mirror of
https://github.com/SCST-project/scst.git
synced 2026-05-14 09:11:27 +00:00
- Update for 2.6.28
- Small doc update in flow control area git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@632 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
@@ -136,16 +136,26 @@ IMPORTANT: All LUN information (access control) MUST be configured
|
||||
|
||||
Also see SCST README file how to tune for the best performance.
|
||||
|
||||
If under high load you experience I/O stalls or see in the kernel log
|
||||
abort or reset messages, then try to reduce QueuedCommands parameter in
|
||||
iscsi-scstd.conf file for the corresponding target to some lower value,
|
||||
like 8 (default is 32). See also SCST README file for more details about
|
||||
that issue.
|
||||
|
||||
CAUTION: Working of target and initiator on the same host isn't
|
||||
======= supported. See SCST README file for details.
|
||||
|
||||
|
||||
Work if target's backstorage or link is too slow
|
||||
------------------------------------------------
|
||||
|
||||
In some cases you can experience I/O stalls or see in the kernel log
|
||||
abort or reset messages. It can happen under high I/O load, when your
|
||||
target's backstorage gets overloaded, or working over a slow link, when
|
||||
the link can't serve all the queued commands on time,
|
||||
|
||||
To workaround it you can reduce QueuedCommands parameter in
|
||||
iscsi-scstd.conf file for the corresponding target to some lower value,
|
||||
like 8 (default is 32).
|
||||
|
||||
Also see SCST README file for more details about that issue and ways to
|
||||
prevent it.
|
||||
|
||||
|
||||
Performance advices
|
||||
-------------------
|
||||
|
||||
|
||||
308
iscsi-scst/kernel/patches/put_page_callback-2.6.28.patch
Normal file
308
iscsi-scst/kernel/patches/put_page_callback-2.6.28.patch
Normal file
@@ -0,0 +1,308 @@
|
||||
diff -upr linux-2.6.28/include/linux/mm_types.h linux-2.6.28/include/linux/mm_types.h
|
||||
--- linux-2.6.28/include/linux/mm_types.h 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/include/linux/mm_types.h 2008-07-22 20:30:21.000000000 +0400
|
||||
@@ -94,6 +94,18 @@ struct page {
|
||||
void *virtual; /* Kernel virtual address (NULL if
|
||||
not kmapped, ie. highmem) */
|
||||
#endif /* WANT_PAGE_VIRTUAL */
|
||||
+
|
||||
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
|
||||
+ /*
|
||||
+ * Used to implement support for notification on zero-copy TCP transfer
|
||||
+ * completion. It might look as not good to have this field here and
|
||||
+ * it's better to have it in struct sk_buff, but it would make the code
|
||||
+ * much more complicated and fragile, since all skb then would have to
|
||||
+ * contain only pages with the same value in this field.
|
||||
+ */
|
||||
+ void *net_priv;
|
||||
+#endif
|
||||
+
|
||||
};
|
||||
|
||||
/*
|
||||
diff -upr linux-2.6.28/include/linux/net.h linux-2.6.28/include/linux/net.h
|
||||
--- linux-2.6.28/include/linux/net.h 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/include/linux/net.h 2008-07-29 20:48:07.000000000 +0400
|
||||
@@ -57,6 +57,7 @@ typedef enum {
|
||||
#include <linux/random.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
|
||||
+#include <linux/mm.h>
|
||||
|
||||
struct poll_table_struct;
|
||||
struct pipe_inode_info;
|
||||
@@ -352,5 +352,44 @@ extern int net_msg_cost;
|
||||
extern struct ratelimit_state net_ratelimit_state;
|
||||
#endif
|
||||
|
||||
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
|
||||
+/* Support for notification on zero-copy TCP transfer completion */
|
||||
+typedef void (*net_get_page_callback_t)(struct page *page);
|
||||
+typedef void (*net_put_page_callback_t)(struct page *page);
|
||||
+
|
||||
+extern net_get_page_callback_t net_get_page_callback;
|
||||
+extern net_put_page_callback_t net_put_page_callback;
|
||||
+
|
||||
+extern int net_set_get_put_page_callbacks(
|
||||
+ net_get_page_callback_t get_callback,
|
||||
+ net_put_page_callback_t put_callback);
|
||||
+
|
||||
+/*
|
||||
+ * See comment for net_set_get_put_page_callbacks() why those functions
|
||||
+ * don't need any protection.
|
||||
+ */
|
||||
+static inline void net_get_page(struct page *page)
|
||||
+{
|
||||
+ if (page->net_priv != 0)
|
||||
+ net_get_page_callback(page);
|
||||
+ get_page(page);
|
||||
+}
|
||||
+static inline void net_put_page(struct page *page)
|
||||
+{
|
||||
+ if (page->net_priv != 0)
|
||||
+ net_put_page_callback(page);
|
||||
+ put_page(page);
|
||||
+}
|
||||
+#else
|
||||
+static inline void net_get_page(struct page *page)
|
||||
+{
|
||||
+ get_page(page);
|
||||
+}
|
||||
+static inline void net_put_page(struct page *page)
|
||||
+{
|
||||
+ put_page(page);
|
||||
+}
|
||||
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
|
||||
+
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _LINUX_NET_H */
|
||||
diff -upr linux-2.6.28/net/core/skbuff.c linux-2.6.28/net/core/skbuff.c
|
||||
--- linux-2.6.28/net/core/skbuff.c 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/net/core/skbuff.c 2008-07-22 20:28:41.000000000 +0400
|
||||
@@ -339,7 +339,7 @@ static void skb_release_data(struct sk_b
|
||||
if (skb_shinfo(skb)->nr_frags) {
|
||||
int i;
|
||||
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
|
||||
- put_page(skb_shinfo(skb)->frags[i].page);
|
||||
+ net_put_page(skb_shinfo(skb)->frags[i].page);
|
||||
}
|
||||
|
||||
if (skb_shinfo(skb)->frag_list)
|
||||
@@ -727,7 +725,7 @@ struct sk_buff *pskb_copy(struct sk_buff
|
||||
|
||||
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
||||
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
|
||||
- get_page(skb_shinfo(n)->frags[i].page);
|
||||
+ net_get_page(skb_shinfo(n)->frags[i].page);
|
||||
}
|
||||
skb_shinfo(n)->nr_frags = i;
|
||||
}
|
||||
@@ -792,7 +792,7 @@ int pskb_expand_head(struct sk_buff *skb
|
||||
sizeof(struct skb_shared_info));
|
||||
|
||||
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
|
||||
- get_page(skb_shinfo(skb)->frags[i].page);
|
||||
+ net_get_page(skb_shinfo(skb)->frags[i].page);
|
||||
|
||||
if (skb_shinfo(skb)->frag_list)
|
||||
skb_clone_fraglist(skb);
|
||||
@@ -1061,7 +1061,7 @@ drop_pages:
|
||||
skb_shinfo(skb)->nr_frags = i;
|
||||
|
||||
for (; i < nfrags; i++)
|
||||
- put_page(skb_shinfo(skb)->frags[i].page);
|
||||
+ net_put_page(skb_shinfo(skb)->frags[i].page);
|
||||
|
||||
if (skb_shinfo(skb)->frag_list)
|
||||
skb_drop_fraglist(skb);
|
||||
@@ -1230,7 +1230,7 @@ pull_pages:
|
||||
k = 0;
|
||||
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
||||
if (skb_shinfo(skb)->frags[i].size <= eat) {
|
||||
- put_page(skb_shinfo(skb)->frags[i].page);
|
||||
+ net_put_page(skb_shinfo(skb)->frags[i].page);
|
||||
eat -= skb_shinfo(skb)->frags[i].size;
|
||||
} else {
|
||||
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
|
||||
@@ -1987,7 +1987,7 @@ static inline void skb_split_no_header(s
|
||||
* where splitting is expensive.
|
||||
* 2. Split is accurately. We make this.
|
||||
*/
|
||||
- get_page(skb_shinfo(skb)->frags[i].page);
|
||||
+ net_get_page(skb_shinfo(skb)->frags[i].page);
|
||||
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
|
||||
skb_shinfo(skb1)->frags[0].size -= len - pos;
|
||||
skb_shinfo(skb)->frags[i].size = len - pos;
|
||||
@@ -2355,7 +2355,7 @@ struct sk_buff *skb_segment(struct sk_bu
|
||||
BUG_ON(i >= nfrags);
|
||||
|
||||
*frag = skb_shinfo(skb)->frags[i];
|
||||
- get_page(frag->page);
|
||||
+ net_get_page(frag->page);
|
||||
size = frag->size;
|
||||
|
||||
if (pos < offset) {
|
||||
diff -upr linux-2.6.28/net/ipv4/ip_output.c linux-2.6.28/net/ipv4/ip_output.c
|
||||
--- linux-2.6.28/net/ipv4/ip_output.c 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/net/ipv4/ip_output.c 2008-07-22 20:28:41.000000000 +0400
|
||||
@@ -1008,7 +1008,7 @@ alloc_new_skb:
|
||||
err = -EMSGSIZE;
|
||||
goto error;
|
||||
}
|
||||
- get_page(page);
|
||||
+ net_get_page(page);
|
||||
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
|
||||
frag = &skb_shinfo(skb)->frags[i];
|
||||
}
|
||||
@@ -1166,7 +1166,7 @@ ssize_t ip_append_page(struct sock *sk,
|
||||
if (skb_can_coalesce(skb, i, page, offset)) {
|
||||
skb_shinfo(skb)->frags[i-1].size += len;
|
||||
} else if (i < MAX_SKB_FRAGS) {
|
||||
- get_page(page);
|
||||
+ net_get_page(page);
|
||||
skb_fill_page_desc(skb, i, page, offset, len);
|
||||
} else {
|
||||
err = -EMSGSIZE;
|
||||
diff -upr linux-2.6.28/net/ipv4/Makefile linux-2.6.28/net/ipv4/Makefile
|
||||
--- linux-2.6.28/net/ipv4/Makefile 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/net/ipv4/Makefile 2008-07-22 20:35:05.000000000 +0400
|
||||
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
|
||||
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
|
||||
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
|
||||
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
|
||||
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
|
||||
|
||||
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
|
||||
xfrm4_output.o
|
||||
diff -upr linux-2.6.28/net/ipv4/tcp.c linux-2.6.28/net/ipv4/tcp.c
|
||||
--- linux-2.6.28/net/ipv4/tcp.c 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/net/ipv4/tcp.c 2008-07-22 20:28:41.000000000 +0400
|
||||
@@ -714,7 +714,7 @@ new_segment:
|
||||
if (can_coalesce) {
|
||||
skb_shinfo(skb)->frags[i - 1].size += copy;
|
||||
} else {
|
||||
- get_page(page);
|
||||
+ net_get_page(page);
|
||||
skb_fill_page_desc(skb, i, page, offset, copy);
|
||||
}
|
||||
|
||||
@@ -919,7 +919,7 @@ new_segment:
|
||||
goto new_segment;
|
||||
} else if (page) {
|
||||
if (off == PAGE_SIZE) {
|
||||
- put_page(page);
|
||||
+ net_put_page(page);
|
||||
TCP_PAGE(sk) = page = NULL;
|
||||
off = 0;
|
||||
}
|
||||
@@ -960,9 +960,9 @@ new_segment:
|
||||
} else {
|
||||
skb_fill_page_desc(skb, i, page, off, copy);
|
||||
if (TCP_PAGE(sk)) {
|
||||
- get_page(page);
|
||||
+ net_get_page(page);
|
||||
} else if (off + copy < PAGE_SIZE) {
|
||||
- get_page(page);
|
||||
+ net_get_page(page);
|
||||
TCP_PAGE(sk) = page;
|
||||
}
|
||||
}
|
||||
diff -upr linux-2.6.28/net/ipv4/tcp_output.c linux-2.6.28/net/ipv4/tcp_output.c
|
||||
--- linux-2.6.28/net/ipv4/tcp_output.c 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/net/ipv4/tcp_output.c 2008-07-22 20:28:41.000000000 +0400
|
||||
@@ -871,7 +871,7 @@ static void __pskb_trim_head(struct sk_b
|
||||
k = 0;
|
||||
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
||||
if (skb_shinfo(skb)->frags[i].size <= eat) {
|
||||
- put_page(skb_shinfo(skb)->frags[i].page);
|
||||
+ net_put_page(skb_shinfo(skb)->frags[i].page);
|
||||
eat -= skb_shinfo(skb)->frags[i].size;
|
||||
} else {
|
||||
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
|
||||
diff -upr linux-2.6.28/net/ipv4/tcp_zero_copy.c linux-2.6.28/net/ipv4/tcp_zero_copy.c
|
||||
--- linux-2.6.28/net/ipv4/tcp_zero_copy.c 2008-07-22 20:12:35.000000000 +0400
|
||||
+++ linux-2.6.28/net/ipv4/tcp_zero_copy.c 2008-07-31 21:21:13.000000000 +0400
|
||||
@@ -0,0 +1,49 @@
|
||||
+/*
|
||||
+ * Support routines for TCP zero copy transmit
|
||||
+ *
|
||||
+ * Created by Vladislav Bolkhovitin
|
||||
+ *
|
||||
+ * This program is free software; you can redistribute it and/or
|
||||
+ * modify it under the terms of the GNU General Public License
|
||||
+ * version 2 as published by the Free Software Foundation.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/skbuff.h>
|
||||
+
|
||||
+net_get_page_callback_t net_get_page_callback __read_mostly;
|
||||
+EXPORT_SYMBOL(net_get_page_callback);
|
||||
+
|
||||
+net_put_page_callback_t net_put_page_callback __read_mostly;
|
||||
+EXPORT_SYMBOL(net_put_page_callback);
|
||||
+
|
||||
+/*
|
||||
+ * Caller of this function must ensure that at the moment when it's called
|
||||
+ * there are no pages in the system with net_priv field set to non-zero
|
||||
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
|
||||
+ * don't need any protection.
|
||||
+ */
|
||||
+int net_set_get_put_page_callbacks(
|
||||
+ net_get_page_callback_t get_callback,
|
||||
+ net_put_page_callback_t put_callback)
|
||||
+{
|
||||
+ int res = 0;
|
||||
+
|
||||
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
|
||||
+ (net_get_page_callback != get_callback)) {
|
||||
+ res = -EBUSY;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
|
||||
+ (net_put_page_callback != put_callback)) {
|
||||
+ res = -EBUSY;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ net_get_page_callback = get_callback;
|
||||
+ net_put_page_callback = put_callback;
|
||||
+
|
||||
+out:
|
||||
+ return res;
|
||||
+}
|
||||
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
|
||||
diff -upr linux-2.6.28/net/ipv6/ip6_output.c linux-2.6.28/net/ipv6/ip6_output.c
|
||||
--- linux-2.6.28/net/ipv6/ip6_output.c 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/net/ipv6/ip6_output.c 2008-07-22 20:28:41.000000000 +0400
|
||||
@@ -1362,7 +1362,7 @@ alloc_new_skb:
|
||||
err = -EMSGSIZE;
|
||||
goto error;
|
||||
}
|
||||
- get_page(page);
|
||||
+ net_get_page(page);
|
||||
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
|
||||
frag = &skb_shinfo(skb)->frags[i];
|
||||
}
|
||||
diff -upr linux-2.6.28/net/Kconfig linux-2.6.28/net/Kconfig
|
||||
--- linux-2.6.28/net/Kconfig 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/net/Kconfig 2008-07-29 21:15:39.000000000 +0400
|
||||
@@ -59,6 +59,18 @@ config INET
|
||||
|
||||
Short answer: say Y.
|
||||
|
||||
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
|
||||
+ bool "TCP/IP zero-copy transfer completion notification"
|
||||
+ depends on INET
|
||||
+ default SCST_ISCSI
|
||||
+ ---help---
|
||||
+ Adds support for sending a notification upon completion of a
|
||||
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
|
||||
+ software. Currently this is only used by the iSCSI target driver
|
||||
+ iSCSI-SCST.
|
||||
+
|
||||
+ If unsure, say N.
|
||||
+
|
||||
if INET
|
||||
source "net/ipv4/Kconfig"
|
||||
source "net/ipv6/Kconfig"
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#include <scsi/scsi_tcq.h>
|
||||
#include <scsi/scsicam.h>
|
||||
@@ -1571,9 +1572,14 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
goto probe_out;
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
|
||||
if (pci_find_aer_capability(pdev))
|
||||
if (pci_enable_pcie_error_reporting(pdev))
|
||||
goto probe_out;
|
||||
#else /* taken from 2.6.28 */
|
||||
/* This may fail but that's ok */
|
||||
pci_enable_pcie_error_reporting(pdev);
|
||||
#endif
|
||||
|
||||
host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
|
||||
if (host == NULL) {
|
||||
|
||||
91
scst/README
91
scst/README
@@ -842,17 +842,20 @@ IMPORTANT: If you use on initiator some versions of Windows (at least W2K)
|
||||
See also important notes about setting block sizes >512 bytes
|
||||
for VDISK FILEIO devices above.
|
||||
|
||||
What if target's backstorage is too slow
|
||||
----------------------------------------
|
||||
|
||||
If under high load you experience I/O stalls or see in the kernel log on
|
||||
the target abort or reset messages, then your backstorage is too slow
|
||||
comparing with your target link speed and amount of simultaneously
|
||||
queued commands. On some seek intensive workloads even fast disks or
|
||||
RAIDs, which able to serve continuous data stream on 500+ MB/s speed,
|
||||
can be as slow as 0.3 MB/s. Another possible cause for that can be
|
||||
MD/LVM/RAID on your target as in http://lkml.org/lkml/2008/2/27/96
|
||||
(check the whole thread as well).
|
||||
Work if target's backstorage or link is too slow
|
||||
------------------------------------------------
|
||||
|
||||
Under high I/O load, when your target's backstorage gets overloaded, or
|
||||
working over a slow link between inititor and target, when the link
|
||||
can't serve all the queued commands on time, you can experience I/O
|
||||
stalls or see in the kernel log abort or reset messages.
|
||||
|
||||
At first, consider the case of too slow target's backstorage. On some
|
||||
seek intensive workloads even fast disks or RAIDs, which able to serve
|
||||
continuous data stream on 500+ MB/s speed, can be as slow as 0.3 MB/s.
|
||||
Another possible cause for that can be MD/LVM/RAID on your target as in
|
||||
http://lkml.org/lkml/2008/2/27/96 (check the whole thread as well).
|
||||
|
||||
Thus, in such situations simply processing of one or more commands takes
|
||||
too long time, hence initiator decides that they are stuck on the target
|
||||
@@ -865,28 +868,21 @@ backstorage speed could be more appropriate.
|
||||
Unfortunately, currently SCST lacks dynamic I/O flow control, when the
|
||||
queue depth on the target is dynamically decreased/increased based on
|
||||
how slow/fast the backstorage speed comparing to the target link. So,
|
||||
there are only 5 possible actions, which you can do to workaround or fix
|
||||
this issue:
|
||||
there are 6 possible actions, which you can do to workaround or fix this
|
||||
issue in this case:
|
||||
|
||||
1. Ignore incoming task management (TM) commands. It's fine if there are
|
||||
not too many of them, so average performance isn't hurt and the
|
||||
corresponding device isn't put offline, i.e. if the backstorage isn't
|
||||
too much slow.
|
||||
corresponding device isn't getting put offline, i.e. if the backstorage
|
||||
isn't too slow.
|
||||
|
||||
2. Decrease /sys/block/sdX/device/queue_depth on the initiator in case
|
||||
if it's Linux (see below how) or/and SCST_MAX_TGT_DEV_COMMANDS constant
|
||||
in scst_priv.h file until you stop seeing incoming TM commands.
|
||||
ISCSI-SCST driver also has its own iSCSI specific parameter for that.
|
||||
ISCSI-SCST driver also has its own iSCSI specific parameter for that,
|
||||
see its README file.
|
||||
|
||||
3. Try to avoid such seek intensive workloads.
|
||||
|
||||
4. Insrease speed of the target's backstorage.
|
||||
|
||||
5. Implement in SCST dynamic I/O flow control. See "Dynamic I/O flow
|
||||
control" section on http://scst.sourceforge.net/contributing.html page
|
||||
for possible idea how to do it.
|
||||
|
||||
To decrease device queue depth on Linux initiators run command:
|
||||
To decrease device queue depth on Linux initiators you can run command:
|
||||
|
||||
# echo Y >/sys/block/sdX/device/queue_depth
|
||||
|
||||
@@ -896,12 +892,53 @@ limitations for Y value, it can be any value from 1 to possible maximum
|
||||
(usually, 32), so start from dividing the current value on 2, i.e. set
|
||||
16, if /sys/block/sdX/device/queue_depth contains 32.
|
||||
|
||||
3. Increase the corresponding timeout on the initiator. For Linux it is
|
||||
located in
|
||||
/sys/devices/platform/host*/session*/target*:0:0/*:0:0:1/timeout. It can
|
||||
be done automatically by an udev rule. For instance, the following
|
||||
rule will increase it to 300 seconds:
|
||||
|
||||
SUBSYSTEM=="scsi", KERNEL=="[0-9]*:[0-9]*", ACTION=="add", ATTR{type}=="0|7|14", ATTR{timeout}="300"
|
||||
|
||||
By default, this timeout is 30 or 60 seconds, depending on your distribution.
|
||||
|
||||
4. Try to avoid such seek intensive workloads.
|
||||
|
||||
5. increase speed of the target's backstorage.
|
||||
|
||||
6. Implement in SCST dynamic I/O flow control. This will be an ultimate
|
||||
solution. See "Dynamic I/O flow control" section on
|
||||
http://scst.sourceforge.net/contributing.html page for possible
|
||||
implementation idea.
|
||||
|
||||
Next, consider the case of too slow link between initiator and target,
|
||||
when the initiator tries to simultaneously push N commands to the target
|
||||
over it. In this case time to serve those commands, i.e. send or receive
|
||||
data for them over the link, can be more, than timeout for any single
|
||||
command, hence one or more commands in the tail of the queue can not be
|
||||
served on time less than the timeout, so the initiator will decide that
|
||||
they are stuck on the target and will try to recover.
|
||||
|
||||
Unfortunately, target can reliably detect leading to the issue
|
||||
conditions only in case of READ commands, when the target can see that
|
||||
commands' data transmission is getting too slow, so the dynamic flow
|
||||
control, described above, can prevent the issue. But for WRITE commands
|
||||
there are cases when target has no way to detect the issue. In this case
|
||||
you can workaround it only by increasing the corresponding timeout on
|
||||
the initiator.
|
||||
|
||||
Thus, to workaround/fix this issue in this case you can use ways 1, 2,
|
||||
3, 6 above or (7) increase speed of the link between target and
|
||||
initiator. But for write intensive workloads you may have to increase
|
||||
the timeout on initiator (way 3) in any case.
|
||||
|
||||
Note, that logged messages about QUEUE_FULL status are quite different
|
||||
by nature. This is a normal work, just SCSI flow control in action.
|
||||
Simply don't enable "mgmt_minor" logging level, or, alternatively, if
|
||||
you are confident in the worst case performance of your back-end
|
||||
storage, you can increase SCST_MAX_TGT_DEV_COMMANDS in scst_priv.h to
|
||||
64. Usually initiators don't try to push more commands on the target.
|
||||
you are confident in the worst case performance of your back-end storage
|
||||
or inititor-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in
|
||||
scst_priv.h to 64. Usually initiators don't try to push more commands on
|
||||
the target.
|
||||
|
||||
Credits
|
||||
-------
|
||||
|
||||
22
scst/kernel/export_alloc_io_context-2.6.28.patch
Normal file
22
scst/kernel/export_alloc_io_context-2.6.28.patch
Normal file
@@ -0,0 +1,22 @@
|
||||
diff -upkr linux-2.6.28/block/blk-ioc.c linux-2.6.28/block/blk-ioc.c
|
||||
--- linux-2.6.28/block/blk-ioc.c 2008-10-10 02:13:53.000000000 +0400
|
||||
+++ linux-2.6.28/block/blk-ioc.c 2008-11-25 21:27:01.000000000 +0300
|
||||
@@ -105,6 +105,7 @@ struct io_context *alloc_io_context(gfp_
|
||||
|
||||
return ret;
|
||||
}
|
||||
+EXPORT_SYMBOL(alloc_io_context);
|
||||
|
||||
/*
|
||||
* If the current task has no IO context then create one and initialise it.
|
||||
diff -upkr linux-2.6.28/include/linux/iocontext.h linux-2.6.28/include/linux/iocontext.h
|
||||
--- linux-2.6.28/include/linux/iocontext.h 2008-10-10 02:13:53.000000000 +0400
|
||||
+++ linux-2.6.28/include/linux/iocontext.h 2008-11-26 13:23:03.000000000 +0300
|
||||
@@ -103,6 +103,7 @@ static inline struct io_context *ioc_tas
|
||||
int put_io_context(struct io_context *ioc);
|
||||
void exit_io_context(void);
|
||||
struct io_context *get_io_context(gfp_t gfp_flags, int node);
|
||||
+#define SCST_ALLOC_IO_CONTEXT_EXPORTED
|
||||
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
|
||||
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
|
||||
#else
|
||||
12
scst/kernel/in-tree/Kconfig.drivers.Linux-2.6.28.patch
Normal file
12
scst/kernel/in-tree/Kconfig.drivers.Linux-2.6.28.patch
Normal file
@@ -0,0 +1,12 @@
|
||||
diff -upkr -X linux-2.6.28/Documentation/dontdiff linux-2.6.28/drivers/Kconfig linux-2.6.28/drivers/Kconfig
|
||||
--- linux-2.6.28/drivers/Kconfig 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/drivers/Kconfig 2008-07-24 14:14:46.000000000 +0400
|
||||
@@ -24,6 +24,8 @@ source "drivers/ide/Kconfig"
|
||||
|
||||
source "drivers/scsi/Kconfig"
|
||||
|
||||
+source "drivers/scst/Kconfig"
|
||||
+
|
||||
source "drivers/ata/Kconfig"
|
||||
|
||||
source "drivers/md/Kconfig"
|
||||
11
scst/kernel/in-tree/Makefile.drivers.Linux-2.6.28.patch
Normal file
11
scst/kernel/in-tree/Makefile.drivers.Linux-2.6.28.patch
Normal file
@@ -0,0 +1,11 @@
|
||||
diff -upkr -X linux-2.6.28/Documentation/dontdiff linux-2.6.28/drivers/Makefile linux-2.6.28/drivers/Makefile
|
||||
--- linux-2.6.28/drivers/Makefile 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/drivers/Makefile 2008-07-24 14:15:29.000000000 +0400
|
||||
@@ -39,6 +39,7 @@ obj-$(CONFIG_ATM) += atm/
|
||||
obj-y += macintosh/
|
||||
obj-$(CONFIG_IDE) += ide/
|
||||
obj-$(CONFIG_SCSI) += scsi/
|
||||
+obj-$(CONFIG_SCST) += scst/
|
||||
obj-$(CONFIG_ATA) += ata/
|
||||
obj-$(CONFIG_FUSION) += message/
|
||||
obj-$(CONFIG_FIREWIRE) += firewire/
|
||||
112
scst/kernel/scst_exec_req_fifo-2.6.28.patch
Normal file
112
scst/kernel/scst_exec_req_fifo-2.6.28.patch
Normal file
@@ -0,0 +1,112 @@
|
||||
diff -upr linux-2.6.28/drivers/scsi/scsi_lib.c linux-2.6.28/drivers/scsi/scsi_lib.c
|
||||
--- linux-2.6.28/drivers/scsi/scsi_lib.c 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/drivers/scsi/scsi_lib.c 2008-07-31 21:20:00.000000000 +0400
|
||||
@@ -380,7 +380,7 @@ free_bios:
|
||||
}
|
||||
|
||||
/**
|
||||
- * scsi_execute_async - insert request
|
||||
+ * __scsi_execute_async - insert request
|
||||
* @sdev: scsi device
|
||||
* @cmd: scsi command
|
||||
* @cmd_len: length of scsi cdb
|
||||
@@ -393,11 +393,14 @@ free_bios:
|
||||
* @privdata: data passed to done()
|
||||
* @done: callback function when done
|
||||
* @gfp: memory allocation flags
|
||||
+ * @at_head: insert request at head or tail of queue
|
||||
*/
|
||||
-int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
|
||||
+static inline int __scsi_execute_async(struct scsi_device *sdev,
|
||||
+ const unsigned char *cmd,
|
||||
int cmd_len, int data_direction, void *buffer, unsigned bufflen,
|
||||
int use_sg, int timeout, int retries, void *privdata,
|
||||
- void (*done)(void *, char *, int, int), gfp_t gfp)
|
||||
+ void (*done)(void *, char *, int, int), gfp_t gfp,
|
||||
+ int at_head)
|
||||
{
|
||||
struct request *req;
|
||||
struct scsi_io_context *sioc;
|
||||
@@ -434,7 +439,7 @@ int scsi_execute_async(struct scsi_devic
|
||||
sioc->data = privdata;
|
||||
sioc->done = done;
|
||||
|
||||
- blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
|
||||
+ blk_execute_rq_nowait(req->q, NULL, req, at_head, scsi_end_async);
|
||||
return 0;
|
||||
|
||||
free_req:
|
||||
@@ -443,8 +446,55 @@ free_sense:
|
||||
kmem_cache_free(scsi_io_context_cache, sioc);
|
||||
return DRIVER_ERROR << 24;
|
||||
}
|
||||
+
|
||||
+/**
|
||||
+ * scsi_execute_async - insert request
|
||||
+ * @sdev: scsi device
|
||||
+ * @cmd: scsi command
|
||||
+ * @cmd_len: length of scsi cdb
|
||||
+ * @data_direction: data direction
|
||||
+ * @buffer: data buffer (this can be a kernel buffer or scatterlist)
|
||||
+ * @bufflen: len of buffer
|
||||
+ * @use_sg: if buffer is a scatterlist this is the number of elements
|
||||
+ * @timeout: request timeout in seconds
|
||||
+ * @retries: number of times to retry request
|
||||
+ * @flags: or into request flags
|
||||
+ **/
|
||||
+int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
|
||||
+ int cmd_len, int data_direction, void *buffer,
|
||||
+ unsigned bufflen, int use_sg, int timeout,
|
||||
+ int retries, void *privdata,
|
||||
+ void (*done)(void *, char *, int, int), gfp_t gfp)
|
||||
+{
|
||||
+ return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
|
||||
+ bufflen, use_sg, timeout, retries, privdata, done, gfp, 1);
|
||||
+}
|
||||
EXPORT_SYMBOL_GPL(scsi_execute_async);
|
||||
|
||||
+/**
|
||||
+ * scsi_execute_async_fifo - insert request at tail, in FIFO order
|
||||
+ * @sdev: scsi device
|
||||
+ * @cmd: scsi command
|
||||
+ * @cmd_len: length of scsi cdb
|
||||
+ * @data_direction: data direction
|
||||
+ * @buffer: data buffer (this can be a kernel buffer or scatterlist)
|
||||
+ * @bufflen: len of buffer
|
||||
+ * @use_sg: if buffer is a scatterlist this is the number of elements
|
||||
+ * @timeout: request timeout in seconds
|
||||
+ * @retries: number of times to retry request
|
||||
+ * @flags: or into request flags
|
||||
+ **/
|
||||
+int scsi_execute_async_fifo(struct scsi_device *sdev, const unsigned char *cmd,
|
||||
+ int cmd_len, int data_direction, void *buffer,
|
||||
+ unsigned bufflen, int use_sg, int timeout, int retries,
|
||||
+ void *privdata,
|
||||
+ void (*done)(void *, char *, int, int), gfp_t gfp)
|
||||
+{
|
||||
+ return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
|
||||
+ bufflen, use_sg, timeout, retries, privdata, done, gfp, 0);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(scsi_execute_async_fifo);
|
||||
+
|
||||
/*
|
||||
* Function: scsi_init_cmd_errh()
|
||||
*
|
||||
diff -upr linux-2.6.28/include/scsi/scsi_device.h linux-2.6.28/include/scsi/scsi_device.h
|
||||
--- linux-2.6.28/include/scsi/scsi_device.h 2008-07-14 01:51:29.000000000 +0400
|
||||
+++ linux-2.6.28/include/scsi/scsi_device.h 2008-07-31 21:20:39.000000000 +0400
|
||||
@@ -377,6 +377,14 @@ extern int scsi_execute_async(struct scs
|
||||
int timeout, int retries, void *privdata,
|
||||
void (*done)(void *, char *, int, int),
|
||||
gfp_t gfp);
|
||||
+#define SCSI_EXEC_REQ_FIFO_DEFINED
|
||||
+extern int scsi_execute_async_fifo(struct scsi_device *sdev,
|
||||
+ const unsigned char *cmd, int cmd_len,
|
||||
+ int data_direction, void *buffer,
|
||||
+ unsigned bufflen, int use_sg,
|
||||
+ int timeout, int retries, void *privdata,
|
||||
+ void (*done)(void *, char *, int, int),
|
||||
+ gfp_t gfp);
|
||||
|
||||
static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev)
|
||||
{
|
||||
Reference in New Issue
Block a user