- Update for 2.6.28

- Small doc update in flow control area



git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@632 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Vladislav Bolkhovitin
2009-01-12 18:55:08 +00:00
parent e5caaf4548
commit 479a9d0a9f
8 changed files with 551 additions and 33 deletions

View File

@@ -136,16 +136,26 @@ IMPORTANT: All LUN information (access control) MUST be configured
Also see SCST README file how to tune for the best performance.
If under high load you experience I/O stalls or see in the kernel log
abort or reset messages, then try to reduce QueuedCommands parameter in
iscsi-scstd.conf file for the corresponding target to some lower value,
like 8 (default is 32). See also SCST README file for more details about
that issue.
CAUTION: Working of target and initiator on the same host isn't
======= supported. See SCST README file for details.
Work if target's backstorage or link is too slow
------------------------------------------------
In some cases you can experience I/O stalls or see in the kernel log
abort or reset messages. It can happen under high I/O load, when your
target's backstorage gets overloaded, or working over a slow link, when
the link can't serve all the queued commands on time,
To workaround it you can reduce QueuedCommands parameter in
iscsi-scstd.conf file for the corresponding target to some lower value,
like 8 (default is 32).
Also see SCST README file for more details about that issue and ways to
prevent it.
Performance advices
-------------------

View File

@@ -0,0 +1,308 @@
diff -upr linux-2.6.28/include/linux/mm_types.h linux-2.6.28/include/linux/mm_types.h
--- linux-2.6.28/include/linux/mm_types.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/linux/mm_types.h 2008-07-22 20:30:21.000000000 +0400
@@ -94,6 +94,18 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upr linux-2.6.28/include/linux/net.h linux-2.6.28/include/linux/net.h
--- linux-2.6.28/include/linux/net.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/linux/net.h 2008-07-29 20:48:07.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -352,5 +352,44 @@ extern int net_msg_cost;
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.28/net/core/skbuff.c linux-2.6.28/net/core/skbuff.c
--- linux-2.6.28/net/core/skbuff.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/core/skbuff.c 2008-07-22 20:28:41.000000000 +0400
@@ -339,7 +339,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -727,7 +725,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -792,7 +792,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -1061,7 +1061,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1230,7 +1230,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1987,7 +1987,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2355,7 +2355,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.28/net/ipv4/ip_output.c linux-2.6.28/net/ipv4/ip_output.c
--- linux-2.6.28/net/ipv4/ip_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/ip_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1008,7 +1008,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1166,7 +1166,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.28/net/ipv4/Makefile linux-2.6.28/net/ipv4/Makefile
--- linux-2.6.28/net/ipv4/Makefile 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/Makefile 2008-07-22 20:35:05.000000000 +0400
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upr linux-2.6.28/net/ipv4/tcp.c linux-2.6.28/net/ipv4/tcp.c
--- linux-2.6.28/net/ipv4/tcp.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp.c 2008-07-22 20:28:41.000000000 +0400
@@ -714,7 +714,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -919,7 +919,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -960,9 +960,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.28/net/ipv4/tcp_output.c linux-2.6.28/net/ipv4/tcp_output.c
--- linux-2.6.28/net/ipv4/tcp_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -871,7 +871,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.28/net/ipv4/tcp_zero_copy.c linux-2.6.28/net/ipv4/tcp_zero_copy.c
--- linux-2.6.28/net/ipv4/tcp_zero_copy.c 2008-07-22 20:12:35.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp_zero_copy.c 2008-07-31 21:21:13.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upr linux-2.6.28/net/ipv6/ip6_output.c linux-2.6.28/net/ipv6/ip6_output.c
--- linux-2.6.28/net/ipv6/ip6_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv6/ip6_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1362,7 +1362,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upr linux-2.6.28/net/Kconfig linux-2.6.28/net/Kconfig
--- linux-2.6.28/net/Kconfig 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/Kconfig 2008-07-29 21:15:39.000000000 +0400
@@ -59,6 +59,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -11,6 +11,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/version.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsicam.h>
@@ -1571,9 +1572,14 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto probe_out;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
if (pci_find_aer_capability(pdev))
if (pci_enable_pcie_error_reporting(pdev))
goto probe_out;
#else /* taken from 2.6.28 */
/* This may fail but that's ok */
pci_enable_pcie_error_reporting(pdev);
#endif
host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
if (host == NULL) {

View File

@@ -842,17 +842,20 @@ IMPORTANT: If you use on initiator some versions of Windows (at least W2K)
See also important notes about setting block sizes >512 bytes
for VDISK FILEIO devices above.
What if target's backstorage is too slow
----------------------------------------
If under high load you experience I/O stalls or see in the kernel log on
the target abort or reset messages, then your backstorage is too slow
comparing with your target link speed and amount of simultaneously
queued commands. On some seek intensive workloads even fast disks or
RAIDs, which able to serve continuous data stream on 500+ MB/s speed,
can be as slow as 0.3 MB/s. Another possible cause for that can be
MD/LVM/RAID on your target as in http://lkml.org/lkml/2008/2/27/96
(check the whole thread as well).
Work if target's backstorage or link is too slow
------------------------------------------------
Under high I/O load, when your target's backstorage gets overloaded, or
working over a slow link between inititor and target, when the link
can't serve all the queued commands on time, you can experience I/O
stalls or see in the kernel log abort or reset messages.
At first, consider the case of too slow target's backstorage. On some
seek intensive workloads even fast disks or RAIDs, which able to serve
continuous data stream on 500+ MB/s speed, can be as slow as 0.3 MB/s.
Another possible cause for that can be MD/LVM/RAID on your target as in
http://lkml.org/lkml/2008/2/27/96 (check the whole thread as well).
Thus, in such situations simply processing of one or more commands takes
too long time, hence initiator decides that they are stuck on the target
@@ -865,28 +868,21 @@ backstorage speed could be more appropriate.
Unfortunately, currently SCST lacks dynamic I/O flow control, when the
queue depth on the target is dynamically decreased/increased based on
how slow/fast the backstorage speed comparing to the target link. So,
there are only 5 possible actions, which you can do to workaround or fix
this issue:
there are 6 possible actions, which you can do to workaround or fix this
issue in this case:
1. Ignore incoming task management (TM) commands. It's fine if there are
not too many of them, so average performance isn't hurt and the
corresponding device isn't put offline, i.e. if the backstorage isn't
too much slow.
corresponding device isn't getting put offline, i.e. if the backstorage
isn't too slow.
2. Decrease /sys/block/sdX/device/queue_depth on the initiator in case
if it's Linux (see below how) or/and SCST_MAX_TGT_DEV_COMMANDS constant
in scst_priv.h file until you stop seeing incoming TM commands.
ISCSI-SCST driver also has its own iSCSI specific parameter for that.
ISCSI-SCST driver also has its own iSCSI specific parameter for that,
see its README file.
3. Try to avoid such seek intensive workloads.
4. Insrease speed of the target's backstorage.
5. Implement in SCST dynamic I/O flow control. See "Dynamic I/O flow
control" section on http://scst.sourceforge.net/contributing.html page
for possible idea how to do it.
To decrease device queue depth on Linux initiators run command:
To decrease device queue depth on Linux initiators you can run command:
# echo Y >/sys/block/sdX/device/queue_depth
@@ -896,12 +892,53 @@ limitations for Y value, it can be any value from 1 to possible maximum
(usually, 32), so start from dividing the current value on 2, i.e. set
16, if /sys/block/sdX/device/queue_depth contains 32.
3. Increase the corresponding timeout on the initiator. For Linux it is
located in
/sys/devices/platform/host*/session*/target*:0:0/*:0:0:1/timeout. It can
be done automatically by an udev rule. For instance, the following
rule will increase it to 300 seconds:
SUBSYSTEM=="scsi", KERNEL=="[0-9]*:[0-9]*", ACTION=="add", ATTR{type}=="0|7|14", ATTR{timeout}="300"
By default, this timeout is 30 or 60 seconds, depending on your distribution.
4. Try to avoid such seek intensive workloads.
5. increase speed of the target's backstorage.
6. Implement in SCST dynamic I/O flow control. This will be an ultimate
solution. See "Dynamic I/O flow control" section on
http://scst.sourceforge.net/contributing.html page for possible
implementation idea.
Next, consider the case of too slow link between initiator and target,
when the initiator tries to simultaneously push N commands to the target
over it. In this case time to serve those commands, i.e. send or receive
data for them over the link, can be more, than timeout for any single
command, hence one or more commands in the tail of the queue can not be
served on time less than the timeout, so the initiator will decide that
they are stuck on the target and will try to recover.
Unfortunately, target can reliably detect leading to the issue
conditions only in case of READ commands, when the target can see that
commands' data transmission is getting too slow, so the dynamic flow
control, described above, can prevent the issue. But for WRITE commands
there are cases when target has no way to detect the issue. In this case
you can workaround it only by increasing the corresponding timeout on
the initiator.
Thus, to workaround/fix this issue in this case you can use ways 1, 2,
3, 6 above or (7) increase speed of the link between target and
initiator. But for write intensive workloads you may have to increase
the timeout on initiator (way 3) in any case.
Note, that logged messages about QUEUE_FULL status are quite different
by nature. This is a normal work, just SCSI flow control in action.
Simply don't enable "mgmt_minor" logging level, or, alternatively, if
you are confident in the worst case performance of your back-end
storage, you can increase SCST_MAX_TGT_DEV_COMMANDS in scst_priv.h to
64. Usually initiators don't try to push more commands on the target.
you are confident in the worst case performance of your back-end storage
or inititor-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in
scst_priv.h to 64. Usually initiators don't try to push more commands on
the target.
Credits
-------

View File

@@ -0,0 +1,22 @@
diff -upkr linux-2.6.28/block/blk-ioc.c linux-2.6.28/block/blk-ioc.c
--- linux-2.6.28/block/blk-ioc.c 2008-10-10 02:13:53.000000000 +0400
+++ linux-2.6.28/block/blk-ioc.c 2008-11-25 21:27:01.000000000 +0300
@@ -105,6 +105,7 @@ struct io_context *alloc_io_context(gfp_
return ret;
}
+EXPORT_SYMBOL(alloc_io_context);
/*
* If the current task has no IO context then create one and initialise it.
diff -upkr linux-2.6.28/include/linux/iocontext.h linux-2.6.28/include/linux/iocontext.h
--- linux-2.6.28/include/linux/iocontext.h 2008-10-10 02:13:53.000000000 +0400
+++ linux-2.6.28/include/linux/iocontext.h 2008-11-26 13:23:03.000000000 +0300
@@ -103,6 +103,7 @@ static inline struct io_context *ioc_tas
int put_io_context(struct io_context *ioc);
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
+#define SCST_ALLOC_IO_CONTEXT_EXPORTED
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else

View File

@@ -0,0 +1,12 @@
diff -upkr -X linux-2.6.28/Documentation/dontdiff linux-2.6.28/drivers/Kconfig linux-2.6.28/drivers/Kconfig
--- linux-2.6.28/drivers/Kconfig 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/drivers/Kconfig 2008-07-24 14:14:46.000000000 +0400
@@ -24,6 +24,8 @@ source "drivers/ide/Kconfig"
source "drivers/scsi/Kconfig"
+source "drivers/scst/Kconfig"
+
source "drivers/ata/Kconfig"
source "drivers/md/Kconfig"

View File

@@ -0,0 +1,11 @@
diff -upkr -X linux-2.6.28/Documentation/dontdiff linux-2.6.28/drivers/Makefile linux-2.6.28/drivers/Makefile
--- linux-2.6.28/drivers/Makefile 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/drivers/Makefile 2008-07-24 14:15:29.000000000 +0400
@@ -39,6 +39,7 @@ obj-$(CONFIG_ATM) += atm/
obj-y += macintosh/
obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/
+obj-$(CONFIG_SCST) += scst/
obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_FUSION) += message/
obj-$(CONFIG_FIREWIRE) += firewire/

View File

@@ -0,0 +1,112 @@
diff -upr linux-2.6.28/drivers/scsi/scsi_lib.c linux-2.6.28/drivers/scsi/scsi_lib.c
--- linux-2.6.28/drivers/scsi/scsi_lib.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/drivers/scsi/scsi_lib.c 2008-07-31 21:20:00.000000000 +0400
@@ -380,7 +380,7 @@ free_bios:
}
/**
- * scsi_execute_async - insert request
+ * __scsi_execute_async - insert request
* @sdev: scsi device
* @cmd: scsi command
* @cmd_len: length of scsi cdb
@@ -393,11 +393,14 @@ free_bios:
* @privdata: data passed to done()
* @done: callback function when done
* @gfp: memory allocation flags
+ * @at_head: insert request at head or tail of queue
*/
-int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
+static inline int __scsi_execute_async(struct scsi_device *sdev,
+ const unsigned char *cmd,
int cmd_len, int data_direction, void *buffer, unsigned bufflen,
int use_sg, int timeout, int retries, void *privdata,
- void (*done)(void *, char *, int, int), gfp_t gfp)
+ void (*done)(void *, char *, int, int), gfp_t gfp,
+ int at_head)
{
struct request *req;
struct scsi_io_context *sioc;
@@ -434,7 +439,7 @@ int scsi_execute_async(struct scsi_devic
sioc->data = privdata;
sioc->done = done;
- blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
+ blk_execute_rq_nowait(req->q, NULL, req, at_head, scsi_end_async);
return 0;
free_req:
@@ -443,8 +446,55 @@ free_sense:
kmem_cache_free(scsi_io_context_cache, sioc);
return DRIVER_ERROR << 24;
}
+
+/**
+ * scsi_execute_async - insert request
+ * @sdev: scsi device
+ * @cmd: scsi command
+ * @cmd_len: length of scsi cdb
+ * @data_direction: data direction
+ * @buffer: data buffer (this can be a kernel buffer or scatterlist)
+ * @bufflen: len of buffer
+ * @use_sg: if buffer is a scatterlist this is the number of elements
+ * @timeout: request timeout in seconds
+ * @retries: number of times to retry request
+ * @flags: or into request flags
+ **/
+int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
+ int cmd_len, int data_direction, void *buffer,
+ unsigned bufflen, int use_sg, int timeout,
+ int retries, void *privdata,
+ void (*done)(void *, char *, int, int), gfp_t gfp)
+{
+ return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
+ bufflen, use_sg, timeout, retries, privdata, done, gfp, 1);
+}
EXPORT_SYMBOL_GPL(scsi_execute_async);
+/**
+ * scsi_execute_async_fifo - insert request at tail, in FIFO order
+ * @sdev: scsi device
+ * @cmd: scsi command
+ * @cmd_len: length of scsi cdb
+ * @data_direction: data direction
+ * @buffer: data buffer (this can be a kernel buffer or scatterlist)
+ * @bufflen: len of buffer
+ * @use_sg: if buffer is a scatterlist this is the number of elements
+ * @timeout: request timeout in seconds
+ * @retries: number of times to retry request
+ * @flags: or into request flags
+ **/
+int scsi_execute_async_fifo(struct scsi_device *sdev, const unsigned char *cmd,
+ int cmd_len, int data_direction, void *buffer,
+ unsigned bufflen, int use_sg, int timeout, int retries,
+ void *privdata,
+ void (*done)(void *, char *, int, int), gfp_t gfp)
+{
+ return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
+ bufflen, use_sg, timeout, retries, privdata, done, gfp, 0);
+}
+EXPORT_SYMBOL_GPL(scsi_execute_async_fifo);
+
/*
* Function: scsi_init_cmd_errh()
*
diff -upr linux-2.6.28/include/scsi/scsi_device.h linux-2.6.28/include/scsi/scsi_device.h
--- linux-2.6.28/include/scsi/scsi_device.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/scsi/scsi_device.h 2008-07-31 21:20:39.000000000 +0400
@@ -377,6 +377,14 @@ extern int scsi_execute_async(struct scs
int timeout, int retries, void *privdata,
void (*done)(void *, char *, int, int),
gfp_t gfp);
+#define SCSI_EXEC_REQ_FIFO_DEFINED
+extern int scsi_execute_async_fifo(struct scsi_device *sdev,
+ const unsigned char *cmd, int cmd_len,
+ int data_direction, void *buffer,
+ unsigned bufflen, int use_sg,
+ int timeout, int retries, void *privdata,
+ void (*done)(void *, char *, int, int),
+ gfp_t gfp);
static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev)
{