- Update for 2.6.28

- Small doc update in flow control area git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@632 d57e44dd-8a1f-0410-8b47-8ef2f437770f
2026-06-09 23:22:33 +00:00 · 2009-01-12 18:55:08 +00:00
parent e5caaf4548
commit 479a9d0a9f
8 changed files with 551 additions and 33 deletions
@@ -136,16 +136,26 @@ IMPORTANT: All LUN information (access control) MUST be configured

 Also see SCST README file how to tune for the best performance.

-If under high load you experience I/O stalls or see in the kernel log
-abort or reset messages, then try to reduce QueuedCommands parameter in
-iscsi-scstd.conf file for the corresponding target to some lower value,
-like 8 (default is 32). See also SCST README file for more details about
-that issue.
-
 CAUTION:  Working of target and initiator on the same host isn't
 =======   supported. See SCST README file for details.


+Work if target's backstorage or link is too slow
+------------------------------------------------
+
+In some cases you can experience I/O stalls or see in the kernel log
+abort or reset messages. It can happen under high I/O load, when your
+target's backstorage gets overloaded, or working over a slow link, when
+the link can't serve all the queued commands on time, 
+
+To workaround it you can reduce QueuedCommands parameter in
+iscsi-scstd.conf file for the corresponding target to some lower value,
+like 8 (default is 32).
+
+Also see SCST README file for more details about that issue and ways to
+prevent it.
+
+
 Performance advices
 -------------------

@@ -0,0 +1,308 @@
+diff -upr linux-2.6.28/include/linux/mm_types.h linux-2.6.28/include/linux/mm_types.h
+--- linux-2.6.28/include/linux/mm_types.h	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/linux/mm_types.h	2008-07-22 20:30:21.000000000 +0400
+@@ -94,6 +94,18 @@ struct page {
+ 	void *virtual;			/* Kernel virtual address (NULL if
+ 					   not kmapped, ie. highmem) */
+ #endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+	/*
+	 * Used to implement support for notification on zero-copy TCP transfer
+	 * completion. It might look as not good to have this field here and
+	 * it's better to have it in struct sk_buff, but it would make the code
+	 * much more complicated and fragile, since all skb then would have to
+	 * contain only pages with the same value in this field.
+	 */
+	 void *net_priv;
+#endif
+
+ };
+
+ /*
+diff -upr linux-2.6.28/include/linux/net.h linux-2.6.28/include/linux/net.h
+--- linux-2.6.28/include/linux/net.h	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/linux/net.h	2008-07-29 20:48:07.000000000 +0400
+@@ -57,6 +57,7 @@ typedef enum {
+ #include <linux/random.h>
+ #include <linux/wait.h>
+ #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
+
+ struct poll_table_struct;
+ struct pipe_inode_info;
+@@ -352,5 +352,44 @@ extern int net_msg_cost;
+ extern struct ratelimit_state net_ratelimit_state;
+ #endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+	net_get_page_callback_t get_callback,
+	net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+	if (page->net_priv != 0)
+		net_get_page_callback(page);
+	get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+	if (page->net_priv != 0)
+		net_put_page_callback(page);
+	put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+	get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+	put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
+ #endif /* __KERNEL__ */
+ #endif	/* _LINUX_NET_H */
+diff -upr linux-2.6.28/net/core/skbuff.c linux-2.6.28/net/core/skbuff.c
+--- linux-2.6.28/net/core/skbuff.c	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/core/skbuff.c	2008-07-22 20:28:41.000000000 +0400
+@@ -339,7 +339,7 @@ static void skb_release_data(struct sk_b
+ 		if (skb_shinfo(skb)->nr_frags) {
+ 			int i;
+ 			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+-				put_page(skb_shinfo(skb)->frags[i].page);
+				net_put_page(skb_shinfo(skb)->frags[i].page);
+ 		}
+ 
+ 		if (skb_shinfo(skb)->frag_list)
+@@ -727,7 +725,7 @@ struct sk_buff *pskb_copy(struct sk_buff
+ 
+ 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ 			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+-			get_page(skb_shinfo(n)->frags[i].page);
+			net_get_page(skb_shinfo(n)->frags[i].page);
+ 		}
+ 		skb_shinfo(n)->nr_frags = i;
+ 	}
+@@ -792,7 +792,7 @@ int pskb_expand_head(struct sk_buff *skb
+ 	       sizeof(struct skb_shared_info));
+ 
+ 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+-		get_page(skb_shinfo(skb)->frags[i].page);
+		net_get_page(skb_shinfo(skb)->frags[i].page);
+ 
+ 	if (skb_shinfo(skb)->frag_list)
+ 		skb_clone_fraglist(skb);
+@@ -1061,7 +1061,7 @@ drop_pages:
+ 		skb_shinfo(skb)->nr_frags = i;
+ 
+ 		for (; i < nfrags; i++)
+-			put_page(skb_shinfo(skb)->frags[i].page);
+			net_put_page(skb_shinfo(skb)->frags[i].page);
+ 
+ 		if (skb_shinfo(skb)->frag_list)
+ 			skb_drop_fraglist(skb);
+@@ -1230,7 +1230,7 @@ pull_pages:
+ 	k = 0;
+ 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ 		if (skb_shinfo(skb)->frags[i].size <= eat) {
+-			put_page(skb_shinfo(skb)->frags[i].page);
+			net_put_page(skb_shinfo(skb)->frags[i].page);
+ 			eat -= skb_shinfo(skb)->frags[i].size;
+ 		} else {
+ 			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+@@ -1987,7 +1987,7 @@ static inline void skb_split_no_header(s
+ 				 *    where splitting is expensive.
+ 				 * 2. Split is accurately. We make this.
+ 				 */
+-				get_page(skb_shinfo(skb)->frags[i].page);
+				net_get_page(skb_shinfo(skb)->frags[i].page);
+ 				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+ 				skb_shinfo(skb1)->frags[0].size -= len - pos;
+ 				skb_shinfo(skb)->frags[i].size	= len - pos;
+@@ -2355,7 +2355,7 @@ struct sk_buff *skb_segment(struct sk_bu
+ 			BUG_ON(i >= nfrags);
+ 
+ 			*frag = skb_shinfo(skb)->frags[i];
+-			get_page(frag->page);
+			net_get_page(frag->page);
+ 			size = frag->size;
+ 
+ 			if (pos < offset) {
+diff -upr linux-2.6.28/net/ipv4/ip_output.c linux-2.6.28/net/ipv4/ip_output.c
+--- linux-2.6.28/net/ipv4/ip_output.c	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/ip_output.c	2008-07-22 20:28:41.000000000 +0400
+@@ -1008,7 +1008,7 @@ alloc_new_skb:
+ 						err = -EMSGSIZE;
+ 						goto error;
+ 					}
+-					get_page(page);
+					net_get_page(page);
+ 					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+ 					frag = &skb_shinfo(skb)->frags[i];
+ 				}
+@@ -1166,7 +1166,7 @@ ssize_t	ip_append_page(struct sock *sk, 
+ 		if (skb_can_coalesce(skb, i, page, offset)) {
+ 			skb_shinfo(skb)->frags[i-1].size += len;
+ 		} else if (i < MAX_SKB_FRAGS) {
+-			get_page(page);
+			net_get_page(page);
+ 			skb_fill_page_desc(skb, i, page, offset, len);
+ 		} else {
+ 			err = -EMSGSIZE;
+diff -upr linux-2.6.28/net/ipv4/Makefile linux-2.6.28/net/ipv4/Makefile
+--- linux-2.6.28/net/ipv4/Makefile	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/Makefile	2008-07-22 20:35:05.000000000 +0400
+@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
+ obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+ obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
+ 
+ obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
+ 		      xfrm4_output.o
+diff -upr linux-2.6.28/net/ipv4/tcp.c linux-2.6.28/net/ipv4/tcp.c
+--- linux-2.6.28/net/ipv4/tcp.c	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp.c	2008-07-22 20:28:41.000000000 +0400
+@@ -714,7 +714,7 @@ new_segment:
+ 		if (can_coalesce) {
+ 			skb_shinfo(skb)->frags[i - 1].size += copy;
+ 		} else {
+-			get_page(page);
+			net_get_page(page);
+ 			skb_fill_page_desc(skb, i, page, offset, copy);
+ 		}
+ 
+@@ -919,7 +919,7 @@ new_segment:
+ 					goto new_segment;
+ 				} else if (page) {
+ 					if (off == PAGE_SIZE) {
+-						put_page(page);
+						net_put_page(page);
+ 						TCP_PAGE(sk) = page = NULL;
+ 						off = 0;
+ 					}
+@@ -960,9 +960,9 @@ new_segment:
+ 				} else {
+ 					skb_fill_page_desc(skb, i, page, off, copy);
+ 					if (TCP_PAGE(sk)) {
+-						get_page(page);
+						net_get_page(page);
+ 					} else if (off + copy < PAGE_SIZE) {
+-						get_page(page);
+						net_get_page(page);
+ 						TCP_PAGE(sk) = page;
+ 					}
+ 				}
+diff -upr linux-2.6.28/net/ipv4/tcp_output.c linux-2.6.28/net/ipv4/tcp_output.c
+--- linux-2.6.28/net/ipv4/tcp_output.c	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp_output.c	2008-07-22 20:28:41.000000000 +0400
+@@ -871,7 +871,7 @@ static void __pskb_trim_head(struct sk_b
+ 	k = 0;
+ 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ 		if (skb_shinfo(skb)->frags[i].size <= eat) {
+-			put_page(skb_shinfo(skb)->frags[i].page);
+			net_put_page(skb_shinfo(skb)->frags[i].page);
+ 			eat -= skb_shinfo(skb)->frags[i].size;
+ 		} else {
+ 			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+diff -upr linux-2.6.28/net/ipv4/tcp_zero_copy.c linux-2.6.28/net/ipv4/tcp_zero_copy.c
+--- linux-2.6.28/net/ipv4/tcp_zero_copy.c	2008-07-22 20:12:35.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp_zero_copy.c	2008-07-31 21:21:13.000000000 +0400
+@@ -0,0 +1,49 @@
+/*
+ *	Support routines for TCP zero copy transmit
+ *
+ *	Created by Vladislav Bolkhovitin
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+	net_get_page_callback_t get_callback,
+	net_put_page_callback_t put_callback)
+{
+	int res = 0;
+
+	if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+	    (net_get_page_callback != get_callback)) {
+		res = -EBUSY;
+		goto out;
+	}
+
+	if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+	    (net_put_page_callback != put_callback)) {
+		res = -EBUSY;
+		goto out;
+	}
+
+	net_get_page_callback = get_callback;
+	net_put_page_callback = put_callback;
+
+out:
+	return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+diff -upr linux-2.6.28/net/ipv6/ip6_output.c linux-2.6.28/net/ipv6/ip6_output.c
+--- linux-2.6.28/net/ipv6/ip6_output.c	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv6/ip6_output.c	2008-07-22 20:28:41.000000000 +0400
+@@ -1362,7 +1362,7 @@ alloc_new_skb:
+ 						err = -EMSGSIZE;
+ 						goto error;
+ 					}
+-					get_page(page);
+					net_get_page(page);
+ 					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+ 					frag = &skb_shinfo(skb)->frags[i];
+ 				}
+diff -upr linux-2.6.28/net/Kconfig linux-2.6.28/net/Kconfig
+--- linux-2.6.28/net/Kconfig	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/Kconfig	2008-07-29 21:15:39.000000000 +0400
+@@ -59,6 +59,18 @@ config INET
+ 
+ 	  Short answer: say Y.
+ 
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+	bool "TCP/IP zero-copy transfer completion notification"
+        depends on INET
+        default SCST_ISCSI
+	---help---
+	  Adds support for sending a notification upon completion of a
+          zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+          software. Currently this is only used by the iSCSI target driver
+          iSCSI-SCST.
+
+          If unsure, say N.
+
+ if INET
+ source "net/ipv4/Kconfig"
+ source "net/ipv6/Kconfig"
@@ -11,6 +11,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
+#include <linux/version.h>

 #include <scsi/scsi_tcq.h>
 #include <scsi/scsicam.h>
@@ -1571,9 +1572,14 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			goto probe_out;
 	}

+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
 	if (pci_find_aer_capability(pdev))
 		if (pci_enable_pcie_error_reporting(pdev))
 			goto probe_out;
+#else /* taken from 2.6.28 */
+	/* This may fail but that's ok */
+	pci_enable_pcie_error_reporting(pdev);
+#endif

 	host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
 	if (host == NULL) {
@@ -842,17 +842,20 @@ IMPORTANT: If you use on initiator some versions of Windows (at least W2K)
 	   See also important notes about setting block sizes >512 bytes
 	   for VDISK FILEIO devices above.

-What if target's backstorage is too slow
----------------------------------------

-If under high load you experience I/O stalls or see in the kernel log on
-the target abort or reset messages, then your backstorage is too slow
-comparing with your target link speed and amount of simultaneously
-queued commands. On some seek intensive workloads even fast disks or
-RAIDs, which able to serve continuous data stream on 500+ MB/s speed,
-can be as slow as 0.3 MB/s. Another possible cause for that can be
-MD/LVM/RAID on your target as in http://lkml.org/lkml/2008/2/27/96
-(check the whole thread as well).
+Work if target's backstorage or link is too slow
+------------------------------------------------
+
+Under high I/O load, when your target's backstorage gets overloaded, or
+working over a slow link between inititor and target, when the link
+can't serve all the queued commands on time, you can experience I/O
+stalls or see in the kernel log abort or reset messages.
+
+At first, consider the case of too slow target's backstorage. On some
+seek intensive workloads even fast disks or RAIDs, which able to serve
+continuous data stream on 500+ MB/s speed, can be as slow as 0.3 MB/s.
+Another possible cause for that can be MD/LVM/RAID on your target as in
+http://lkml.org/lkml/2008/2/27/96 (check the whole thread as well).

 Thus, in such situations simply processing of one or more commands takes
 too long time, hence initiator decides that they are stuck on the target
@@ -865,28 +868,21 @@ backstorage speed could be more appropriate.
 Unfortunately, currently SCST lacks dynamic I/O flow control, when the
 queue depth on the target is dynamically decreased/increased based on
 how slow/fast the backstorage speed comparing to the target link. So,
-there are only 5 possible actions, which you can do to workaround or fix
-this issue:
+there are 6 possible actions, which you can do to workaround or fix this
+issue in this case:

 1. Ignore incoming task management (TM) commands. It's fine if there are
 not too many of them, so average performance isn't hurt and the
-corresponding device isn't put offline, i.e. if the backstorage isn't
-too much slow.
+corresponding device isn't getting put offline, i.e. if the backstorage
+isn't too slow.

 2. Decrease /sys/block/sdX/device/queue_depth on the initiator in case
 if it's Linux (see below how) or/and SCST_MAX_TGT_DEV_COMMANDS constant
 in scst_priv.h file until you stop seeing incoming TM commands.
-ISCSI-SCST driver also has its own iSCSI specific parameter for that.
+ISCSI-SCST driver also has its own iSCSI specific parameter for that,
+see its README file.

-3. Try to avoid such seek intensive workloads.
-
-4. Insrease speed of the target's backstorage.
-
-5. Implement in SCST dynamic I/O flow control. See "Dynamic I/O flow
-control" section on http://scst.sourceforge.net/contributing.html page
-for possible idea how to do it.
-
-To decrease device queue depth on Linux initiators run command:
+To decrease device queue depth on Linux initiators you can run command:

 # echo Y >/sys/block/sdX/device/queue_depth

@@ -896,12 +892,53 @@ limitations for Y value, it can be any value from 1 to possible maximum
 (usually, 32), so start from dividing the current value on 2, i.e. set
 16, if /sys/block/sdX/device/queue_depth contains 32.

+3. Increase the corresponding timeout on the initiator. For Linux it is
+located in
+/sys/devices/platform/host*/session*/target*:0:0/*:0:0:1/timeout. It can
+be done automatically by an udev rule. For instance, the following
+rule will increase it to 300 seconds:
+
+SUBSYSTEM=="scsi", KERNEL=="[0-9]*:[0-9]*", ACTION=="add", ATTR{type}=="0|7|14", ATTR{timeout}="300"
+
+By default, this timeout is 30 or 60 seconds, depending on your distribution.
+
+4. Try to avoid such seek intensive workloads.
+
+5. increase speed of the target's backstorage.
+
+6. Implement in SCST dynamic I/O flow control. This will be an ultimate
+solution. See "Dynamic I/O flow control" section on
+http://scst.sourceforge.net/contributing.html page for possible
+implementation idea.
+
+Next, consider the case of too slow link between initiator and target,
+when the initiator tries to simultaneously push N commands to the target
+over it. In this case time to serve those commands, i.e. send or receive
+data for them over the link, can be more, than timeout for any single
+command, hence one or more commands in the tail of the queue can not be
+served on time less than the timeout, so the initiator will decide that
+they are stuck on the target and will try to recover.
+
+Unfortunately, target can reliably detect leading to the issue
+conditions only in case of READ commands, when the target can see that
+commands' data transmission is getting too slow, so the dynamic flow
+control, described above, can prevent the issue. But for WRITE commands
+there are cases when target has no way to detect the issue. In this case
+you can workaround it only by increasing the corresponding timeout on
+the initiator.
+
+Thus, to workaround/fix this issue in this case you can use ways 1, 2,
+3, 6 above or (7) increase speed of the link between target and
+initiator. But for write intensive workloads you may have to increase
+the timeout on initiator (way 3) in any case.
+
 Note, that logged messages about QUEUE_FULL status are quite different
 by nature. This is a normal work, just SCSI flow control in action.
 Simply don't enable "mgmt_minor" logging level, or, alternatively, if
-you are confident in the worst case performance of your back-end
-storage, you can increase SCST_MAX_TGT_DEV_COMMANDS in scst_priv.h to
-64. Usually initiators don't try to push more commands on the target.
+you are confident in the worst case performance of your back-end storage
+or inititor-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in
+scst_priv.h to 64. Usually initiators don't try to push more commands on
+the target.

 Credits
 -------
@@ -0,0 +1,22 @@
+diff -upkr linux-2.6.28/block/blk-ioc.c linux-2.6.28/block/blk-ioc.c
+--- linux-2.6.28/block/blk-ioc.c	2008-10-10 02:13:53.000000000 +0400
+++ linux-2.6.28/block/blk-ioc.c	2008-11-25 21:27:01.000000000 +0300
+@@ -105,6 +105,7 @@ struct io_context *alloc_io_context(gfp_
+ 
+ 	return ret;
+ }
+EXPORT_SYMBOL(alloc_io_context);
+ 
+ /*
+  * If the current task has no IO context then create one and initialise it.
+diff -upkr linux-2.6.28/include/linux/iocontext.h linux-2.6.28/include/linux/iocontext.h
+--- linux-2.6.28/include/linux/iocontext.h	2008-10-10 02:13:53.000000000 +0400
+++ linux-2.6.28/include/linux/iocontext.h	2008-11-26 13:23:03.000000000 +0300
+@@ -103,6 +103,7 @@ static inline struct io_context *ioc_tas
+ int put_io_context(struct io_context *ioc);
+ void exit_io_context(void);
+ struct io_context *get_io_context(gfp_t gfp_flags, int node);
+#define SCST_ALLOC_IO_CONTEXT_EXPORTED
+ struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+ void copy_io_context(struct io_context **pdst, struct io_context **psrc);
+ #else
@@ -0,0 +1,12 @@
+diff -upkr -X linux-2.6.28/Documentation/dontdiff linux-2.6.28/drivers/Kconfig linux-2.6.28/drivers/Kconfig
+--- linux-2.6.28/drivers/Kconfig	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/drivers/Kconfig	2008-07-24 14:14:46.000000000 +0400
+@@ -24,6 +24,8 @@ source "drivers/ide/Kconfig"
+ 
+ source "drivers/scsi/Kconfig"
+ 
+source "drivers/scst/Kconfig"
+
+ source "drivers/ata/Kconfig"
+ 
+ source "drivers/md/Kconfig"
@@ -0,0 +1,11 @@
+diff -upkr -X linux-2.6.28/Documentation/dontdiff linux-2.6.28/drivers/Makefile linux-2.6.28/drivers/Makefile
+--- linux-2.6.28/drivers/Makefile	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/drivers/Makefile	2008-07-24 14:15:29.000000000 +0400
+@@ -39,6 +39,7 @@ obj-$(CONFIG_ATM)		+= atm/
+ obj-y				+= macintosh/
+ obj-$(CONFIG_IDE)		+= ide/
+ obj-$(CONFIG_SCSI)		+= scsi/
+obj-$(CONFIG_SCST)		+= scst/
+ obj-$(CONFIG_ATA)		+= ata/
+ obj-$(CONFIG_FUSION)		+= message/
+ obj-$(CONFIG_FIREWIRE)		+= firewire/
@@ -0,0 +1,112 @@
+diff -upr linux-2.6.28/drivers/scsi/scsi_lib.c linux-2.6.28/drivers/scsi/scsi_lib.c
+--- linux-2.6.28/drivers/scsi/scsi_lib.c	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/drivers/scsi/scsi_lib.c	2008-07-31 21:20:00.000000000 +0400
+@@ -380,7 +380,7 @@ free_bios:
+ }
+ 
+ /**
+- * scsi_execute_async - insert request
+ * __scsi_execute_async - insert request
+  * @sdev:	scsi device
+  * @cmd:	scsi command
+  * @cmd_len:	length of scsi cdb
+@@ -393,11 +393,14 @@ free_bios:
+  * @privdata:	data passed to done()
+  * @done:	callback function when done
+  * @gfp:	memory allocation flags
+ * @at_head:	insert request at head or tail of queue
+  */
+-int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
+static inline int __scsi_execute_async(struct scsi_device *sdev,
+		       const unsigned char *cmd,
+ 		       int cmd_len, int data_direction, void *buffer, unsigned bufflen,
+ 		       int use_sg, int timeout, int retries, void *privdata,
+-		       void (*done)(void *, char *, int, int), gfp_t gfp)
+		       void (*done)(void *, char *, int, int), gfp_t gfp,
+		       int at_head)
+ {
+ 	struct request *req;
+ 	struct scsi_io_context *sioc;
+@@ -434,7 +439,7 @@ int scsi_execute_async(struct scsi_devic
+ 	sioc->data = privdata;
+ 	sioc->done = done;
+ 
+-	blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
+	blk_execute_rq_nowait(req->q, NULL, req, at_head, scsi_end_async);
+ 	return 0;
+ 
+ free_req:
+@@ -443,8 +446,55 @@ free_sense:
+ 	kmem_cache_free(scsi_io_context_cache, sioc);
+ 	return DRIVER_ERROR << 24;
+ }
+
+/**
+ * scsi_execute_async - insert request
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @cmd_len:	length of scsi cdb
+ * @data_direction: data direction
+ * @buffer:	data buffer (this can be a kernel buffer or scatterlist)
+ * @bufflen:	len of buffer
+ * @use_sg:	if buffer is a scatterlist this is the number of elements
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ * @flags:	or into request flags
+ **/
+int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
+		       int cmd_len, int data_direction, void *buffer,
+		       unsigned bufflen, int use_sg, int timeout,
+		       int retries, void *privdata,
+		       void (*done)(void *, char *, int, int), gfp_t gfp)
+{
+	return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
+		bufflen, use_sg, timeout, retries, privdata, done, gfp, 1);
+}
+ EXPORT_SYMBOL_GPL(scsi_execute_async);
+ 
+/**
+ * scsi_execute_async_fifo - insert request at tail, in FIFO order
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @cmd_len:	length of scsi cdb
+ * @data_direction: data direction
+ * @buffer:	data buffer (this can be a kernel buffer or scatterlist)
+ * @bufflen:	len of buffer
+ * @use_sg:	if buffer is a scatterlist this is the number of elements
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ * @flags:	or into request flags
+ **/
+int scsi_execute_async_fifo(struct scsi_device *sdev, const unsigned char *cmd,
+		       int cmd_len, int data_direction, void *buffer,
+		       unsigned bufflen, int use_sg, int timeout, int retries,
+		       void *privdata,
+		       void (*done)(void *, char *, int, int), gfp_t gfp)
+{
+	return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
+		bufflen, use_sg, timeout, retries, privdata, done, gfp, 0);
+}
+EXPORT_SYMBOL_GPL(scsi_execute_async_fifo);
+
+ /*
+  * Function:    scsi_init_cmd_errh()
+  *
+diff -upr linux-2.6.28/include/scsi/scsi_device.h linux-2.6.28/include/scsi/scsi_device.h
+--- linux-2.6.28/include/scsi/scsi_device.h	2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/scsi/scsi_device.h	2008-07-31 21:20:39.000000000 +0400
+@@ -377,6 +377,14 @@ extern int scsi_execute_async(struct scs
+ 			      int timeout, int retries, void *privdata,
+ 			      void (*done)(void *, char *, int, int),
+ 			      gfp_t gfp);
+#define SCSI_EXEC_REQ_FIFO_DEFINED
+extern int scsi_execute_async_fifo(struct scsi_device *sdev,
+			      const unsigned char *cmd, int cmd_len,
+			      int data_direction, void *buffer,
+			      unsigned bufflen, int use_sg,
+			      int timeout, int retries, void *privdata,
+			      void (*done)(void *, char *, int, int),
+			      gfp_t gfp);
+ 
+ static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev)
+ {