Update for 2.6.39

git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@3462 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Vladislav Bolkhovitin
2011-05-20 00:52:34 +00:00
parent 3a2ba412c3
commit b5a41deb28
7 changed files with 1021 additions and 18 deletions

View File

@@ -0,0 +1,390 @@
diff -upkr linux-2.6.39/include/linux/mm_types.h linux-2.6.39/include/linux/mm_types.h
--- linux-2.6.39/include/linux/mm_types.h 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/include/linux/mm_types.h 2011-05-19 10:46:24.669812999 -0400
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.39/include/linux/net.h linux-2.6.39/include/linux/net.h
--- linux-2.6.39/include/linux/net.h 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/include/linux/net.h 2011-05-19 10:46:24.669812999 -0400
@@ -60,6 +60,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -294,5 +295,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.39/net/core/dev.c linux-2.6.39/net/core/dev.c
--- linux-2.6.39/net/core/dev.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/core/dev.c 2011-05-19 10:46:24.669812999 -0400
@@ -3418,7 +3418,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
diff -upkr linux-2.6.39/net/core/skbuff.c linux-2.6.39/net/core/skbuff.c
--- linux-2.6.39/net/core/skbuff.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/core/skbuff.c 2011-05-19 10:46:24.669812999 -0400
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -325,7 +325,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frag_list(skb))
@@ -732,7 +732,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -819,7 +819,7 @@ int pskb_expand_head(struct sk_buff *skb
kfree(skb->head);
} else {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1097,7 +1097,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1266,7 +1266,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1367,7 +1367,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1391,7 +1391,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1401,7 +1401,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1423,7 +1423,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2056,7 +2056,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2178,7 +2178,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2200,7 +2200,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2598,7 +2598,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.39/net/ipv4/ip_output.c linux-2.6.39/net/ipv4/ip_output.c
--- linux-2.6.39/net/ipv4/ip_output.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/ip_output.c 2011-05-19 10:47:39.565813000 -0400
@@ -985,7 +985,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1220,7 +1220,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.39/net/ipv4/Makefile linux-2.6.39/net/ipv4/Makefile
--- linux-2.6.39/net/ipv4/Makefile 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/Makefile 2011-05-19 10:46:24.669812999 -0400
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.39/net/ipv4/tcp.c linux-2.6.39/net/ipv4/tcp.c
--- linux-2.6.39/net/ipv4/tcp.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/tcp.c 2011-05-19 10:46:24.673813002 -0400
@@ -815,7 +815,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1021,7 +1021,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1062,9 +1062,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.39/net/ipv4/tcp_output.c linux-2.6.39/net/ipv4/tcp_output.c
--- linux-2.6.39/net/ipv4/tcp_output.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/tcp_output.c 2011-05-19 10:46:24.673813002 -0400
@@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.39/net/ipv4/tcp_zero_copy.c linux-2.6.39/net/ipv4/tcp_zero_copy.c
--- linux-2.6.39/net/ipv4/tcp_zero_copy.c 2011-05-19 10:44:53.685813002 -0400
+++ linux-2.6.39/net/ipv4/tcp_zero_copy.c 2011-05-19 10:46:24.673813002 -0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.39/net/ipv6/ip6_output.c linux-2.6.39/net/ipv6/ip6_output.c
--- linux-2.6.39/net/ipv6/ip6_output.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv6/ip6_output.c 2011-05-19 10:46:24.673813002 -0400
@@ -1444,7 +1444,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.39/net/Kconfig linux-2.6.39/net/Kconfig
--- linux-2.6.39/net/Kconfig 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/Kconfig 2011-05-19 10:46:24.673813002 -0400
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -9,7 +9,6 @@
#include <linux/version.h>
#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
#include <linux/smp_lock.h>
#include <linux/list.h>
#include <scsi/scsi_tcq.h>

View File

@@ -0,0 +1,532 @@
diff -upkr linux-2.6.39/block/blk-map.c linux-2.6.39/block/blk-map.c
--- linux-2.6.39/block/blk-map.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/block/blk-map.c 2011-05-19 10:49:02.753812997 -0400
@@ -5,6 +5,8 @@
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
#include <scsi/sg.h> /* for struct sg_iovec */
#include "blk.h"
@@ -274,6 +276,339 @@ int blk_rq_unmap_user(struct bio *bio)
}
EXPORT_SYMBOL(blk_rq_unmap_user);
+struct blk_kern_sg_work {
+ atomic_t bios_inflight;
+ struct sg_table sg_table;
+ struct scatterlist *src_sgl;
+};
+
+static void blk_free_kern_sg_work(struct blk_kern_sg_work *bw)
+{
+ struct sg_table *sgt = &bw->sg_table;
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) {
+ struct page *pg = sg_page(sg);
+ if (pg == NULL)
+ break;
+ __free_page(pg);
+ }
+
+ sg_free_table(sgt);
+ kfree(bw);
+ return;
+}
+
+static void blk_bio_map_kern_endio(struct bio *bio, int err)
+{
+ struct blk_kern_sg_work *bw = bio->bi_private;
+
+ if (bw != NULL) {
+ /* Decrement the bios in processing and, if zero, free */
+ BUG_ON(atomic_read(&bw->bios_inflight) <= 0);
+ if (atomic_dec_and_test(&bw->bios_inflight)) {
+ if ((bio_data_dir(bio) == READ) && (err == 0)) {
+ unsigned long flags;
+
+ local_irq_save(flags); /* to protect KMs */
+ sg_copy(bw->src_sgl, bw->sg_table.sgl, 0, 0,
+ KM_BIO_DST_IRQ, KM_BIO_SRC_IRQ);
+ local_irq_restore(flags);
+ }
+ blk_free_kern_sg_work(bw);
+ }
+ }
+
+ bio_put(bio);
+ return;
+}
+
+static int blk_rq_copy_kern_sg(struct request *rq, struct scatterlist *sgl,
+ int nents, struct blk_kern_sg_work **pbw,
+ gfp_t gfp, gfp_t page_gfp)
+{
+ int res = 0, i;
+ struct scatterlist *sg;
+ struct scatterlist *new_sgl;
+ int new_sgl_nents;
+ size_t len = 0, to_copy;
+ struct blk_kern_sg_work *bw;
+
+ bw = kzalloc(sizeof(*bw), gfp);
+ if (bw == NULL)
+ goto out;
+
+ bw->src_sgl = sgl;
+
+ for_each_sg(sgl, sg, nents, i)
+ len += sg->length;
+ to_copy = len;
+
+ new_sgl_nents = PFN_UP(len);
+
+ res = sg_alloc_table(&bw->sg_table, new_sgl_nents, gfp);
+ if (res != 0)
+ goto err_free;
+
+ new_sgl = bw->sg_table.sgl;
+
+ for_each_sg(new_sgl, sg, new_sgl_nents, i) {
+ struct page *pg;
+
+ pg = alloc_page(page_gfp);
+ if (pg == NULL)
+ goto err_free;
+
+ sg_assign_page(sg, pg);
+ sg->length = min_t(size_t, PAGE_SIZE, len);
+
+ len -= PAGE_SIZE;
+ }
+
+ if (rq_data_dir(rq) == WRITE) {
+ /*
+ * We need to limit amount of copied data to to_copy, because
+ * sgl might have the last element in sgl not marked as last in
+ * SG chaining.
+ */
+ sg_copy(new_sgl, sgl, 0, to_copy,
+ KM_USER0, KM_USER1);
+ }
+
+ *pbw = bw;
+ /*
+ * REQ_COPY_USER name is misleading. It should be something like
+ * REQ_HAS_TAIL_SPACE_FOR_PADDING.
+ */
+ rq->cmd_flags |= REQ_COPY_USER;
+
+out:
+ return res;
+
+err_free:
+ blk_free_kern_sg_work(bw);
+ res = -ENOMEM;
+ goto out;
+}
+
+static int __blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl,
+ int nents, struct blk_kern_sg_work *bw, gfp_t gfp)
+{
+ int res;
+ struct request_queue *q = rq->q;
+ int rw = rq_data_dir(rq);
+ int max_nr_vecs, i;
+ size_t tot_len;
+ bool need_new_bio;
+ struct scatterlist *sg, *prev_sg = NULL;
+ struct bio *bio = NULL, *hbio = NULL, *tbio = NULL;
+ int bios;
+
+ if (unlikely((sgl == NULL) || (sgl->length == 0) || (nents <= 0))) {
+ WARN_ON(1);
+ res = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Let's keep each bio allocation inside a single page to decrease
+ * probability of failure.
+ */
+ max_nr_vecs = min_t(size_t,
+ ((PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec)),
+ BIO_MAX_PAGES);
+
+ need_new_bio = true;
+ tot_len = 0;
+ bios = 0;
+ for_each_sg(sgl, sg, nents, i) {
+ struct page *page = sg_page(sg);
+ void *page_addr = page_address(page);
+ size_t len = sg->length, l;
+ size_t offset = sg->offset;
+
+ tot_len += len;
+ prev_sg = sg;
+
+ /*
+ * Each segment must be aligned on DMA boundary and
+ * not on stack. The last one may have unaligned
+ * length as long as the total length is aligned to
+ * DMA padding alignment.
+ */
+ if (i == nents - 1)
+ l = 0;
+ else
+ l = len;
+ if (((sg->offset | l) & queue_dma_alignment(q)) ||
+ (page_addr && object_is_on_stack(page_addr + sg->offset))) {
+ res = -EINVAL;
+ goto out_free_bios;
+ }
+
+ while (len > 0) {
+ size_t bytes;
+ int rc;
+
+ if (need_new_bio) {
+ bio = bio_kmalloc(gfp, max_nr_vecs);
+ if (bio == NULL) {
+ res = -ENOMEM;
+ goto out_free_bios;
+ }
+
+ if (rw == WRITE)
+ bio->bi_rw |= REQ_WRITE;
+
+ bios++;
+ bio->bi_private = bw;
+ bio->bi_end_io = blk_bio_map_kern_endio;
+
+ if (hbio == NULL)
+ hbio = tbio = bio;
+ else
+ tbio = tbio->bi_next = bio;
+ }
+
+ bytes = min_t(size_t, len, PAGE_SIZE - offset);
+
+ rc = bio_add_pc_page(q, bio, page, bytes, offset);
+ if (rc < bytes) {
+ if (unlikely(need_new_bio || (rc < 0))) {
+ if (rc < 0)
+ res = rc;
+ else
+ res = -EIO;
+ goto out_free_bios;
+ } else {
+ need_new_bio = true;
+ len -= rc;
+ offset += rc;
+ continue;
+ }
+ }
+
+ need_new_bio = false;
+ offset = 0;
+ len -= bytes;
+ page = nth_page(page, 1);
+ }
+ }
+
+ if (hbio == NULL) {
+ res = -EINVAL;
+ goto out_free_bios;
+ }
+
+ /* Total length must be aligned on DMA padding alignment */
+ if ((tot_len & q->dma_pad_mask) &&
+ !(rq->cmd_flags & REQ_COPY_USER)) {
+ res = -EINVAL;
+ goto out_free_bios;
+ }
+
+ if (bw != NULL)
+ atomic_set(&bw->bios_inflight, bios);
+
+ while (hbio != NULL) {
+ bio = hbio;
+ hbio = hbio->bi_next;
+ bio->bi_next = NULL;
+
+ blk_queue_bounce(q, &bio);
+
+ res = blk_rq_append_bio(q, rq, bio);
+ if (unlikely(res != 0)) {
+ bio->bi_next = hbio;
+ hbio = bio;
+ /* We can have one or more bios bounced */
+ goto out_unmap_bios;
+ }
+ }
+
+ res = 0;
+
+ rq->buffer = NULL;
+out:
+ return res;
+
+out_unmap_bios:
+ blk_rq_unmap_kern_sg(rq, res);
+
+out_free_bios:
+ while (hbio != NULL) {
+ bio = hbio;
+ hbio = hbio->bi_next;
+ bio_put(bio);
+ }
+ goto out;
+}
+
+/**
+ * blk_rq_map_kern_sg - map kernel data to a request, for REQ_TYPE_BLOCK_PC
+ * @rq: request to fill
+ * @sgl: area to map
+ * @nents: number of elements in @sgl
+ * @gfp: memory allocation flags
+ *
+ * Description:
+ * Data will be mapped directly if possible. Otherwise a bounce
+ * buffer will be used.
+ */
+int blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl,
+ int nents, gfp_t gfp)
+{
+ int res;
+
+ res = __blk_rq_map_kern_sg(rq, sgl, nents, NULL, gfp);
+ if (unlikely(res != 0)) {
+ struct blk_kern_sg_work *bw = NULL;
+
+ res = blk_rq_copy_kern_sg(rq, sgl, nents, &bw,
+ gfp, rq->q->bounce_gfp | gfp);
+ if (unlikely(res != 0))
+ goto out;
+
+ res = __blk_rq_map_kern_sg(rq, bw->sg_table.sgl,
+ bw->sg_table.nents, bw, gfp);
+ if (res != 0) {
+ blk_free_kern_sg_work(bw);
+ goto out;
+ }
+ }
+
+ rq->buffer = NULL;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(blk_rq_map_kern_sg);
+
+/**
+ * blk_rq_unmap_kern_sg - unmap a request with kernel sg
+ * @rq: request to unmap
+ * @err: non-zero error code
+ *
+ * Description:
+ * Unmap a rq previously mapped by blk_rq_map_kern_sg(). Must be called
+ * only in case of an error!
+ */
+void blk_rq_unmap_kern_sg(struct request *rq, int err)
+{
+ struct bio *bio = rq->bio;
+
+ while (bio) {
+ struct bio *b = bio;
+ bio = bio->bi_next;
+ b->bi_end_io(b, err);
+ }
+ rq->bio = NULL;
+
+ return;
+}
+EXPORT_SYMBOL(blk_rq_unmap_kern_sg);
+
/**
* blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted
diff -upkr linux-2.6.39/include/linux/blkdev.h linux-2.6.39/include/linux/blkdev.h
--- linux-2.6.39/include/linux/blkdev.h 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/include/linux/blkdev.h 2011-05-19 10:49:02.753812997 -0400
@@ -592,6 +592,8 @@ extern unsigned long blk_max_low_pfn, bl
#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
#define BLK_MIN_SG_TIMEOUT (7 * HZ)
+#define SCSI_EXEC_REQ_FIFO_DEFINED
+
#ifdef CONFIG_BOUNCE
extern int init_emergency_isa_pool(void);
extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
@@ -707,6 +709,9 @@ extern int blk_rq_map_kern(struct reques
extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, struct sg_iovec *, int,
unsigned int, gfp_t);
+extern int blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl,
+ int nents, gfp_t gfp);
+extern void blk_rq_unmap_kern_sg(struct request *rq, int err);
extern int blk_execute_rq(struct request_queue *, struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
diff -upkr linux-2.6.39/include/linux/scatterlist.h linux-2.6.39/include/linux/scatterlist.h
--- linux-2.6.39/include/linux/scatterlist.h 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/include/linux/scatterlist.h 2011-05-19 10:49:02.753812997 -0400
@@ -3,6 +3,7 @@
#include <asm/types.h>
#include <asm/scatterlist.h>
+#include <asm/kmap_types.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <asm/io.h>
@@ -218,6 +219,10 @@ size_t sg_copy_from_buffer(struct scatte
size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
void *buf, size_t buflen);
+int sg_copy(struct scatterlist *dst_sg, struct scatterlist *src_sg,
+ int nents_to_copy, size_t copy_len,
+ enum km_type d_km_type, enum km_type s_km_type);
+
/*
* Maximum number of entries that will be allocated in one piece, if
* a list larger than this is required then chaining will be utilized.
diff -upkr linux-2.6.39/lib/scatterlist.c linux-2.6.39/lib/scatterlist.c
--- linux-2.6.39/lib/scatterlist.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/lib/scatterlist.c 2011-05-19 10:49:02.753812997 -0400
@@ -517,3 +517,132 @@ size_t sg_copy_to_buffer(struct scatterl
return sg_copy_buffer(sgl, nents, buf, buflen, 1);
}
EXPORT_SYMBOL(sg_copy_to_buffer);
+
+/*
+ * Can switch to the next dst_sg element, so, to copy to strictly only
+ * one dst_sg element, it must be either last in the chain, or
+ * copy_len == dst_sg->length.
+ */
+static int sg_copy_elem(struct scatterlist **pdst_sg, size_t *pdst_len,
+ size_t *pdst_offs, struct scatterlist *src_sg,
+ size_t copy_len,
+ enum km_type d_km_type, enum km_type s_km_type)
+{
+ int res = 0;
+ struct scatterlist *dst_sg;
+ size_t src_len, dst_len, src_offs, dst_offs;
+ struct page *src_page, *dst_page;
+
+ dst_sg = *pdst_sg;
+ dst_len = *pdst_len;
+ dst_offs = *pdst_offs;
+ dst_page = sg_page(dst_sg);
+
+ src_page = sg_page(src_sg);
+ src_len = src_sg->length;
+ src_offs = src_sg->offset;
+
+ do {
+ void *saddr, *daddr;
+ size_t n;
+
+ saddr = kmap_atomic(src_page +
+ (src_offs >> PAGE_SHIFT), s_km_type) +
+ (src_offs & ~PAGE_MASK);
+ daddr = kmap_atomic(dst_page +
+ (dst_offs >> PAGE_SHIFT), d_km_type) +
+ (dst_offs & ~PAGE_MASK);
+
+ if (((src_offs & ~PAGE_MASK) == 0) &&
+ ((dst_offs & ~PAGE_MASK) == 0) &&
+ (src_len >= PAGE_SIZE) && (dst_len >= PAGE_SIZE) &&
+ (copy_len >= PAGE_SIZE)) {
+ copy_page(daddr, saddr);
+ n = PAGE_SIZE;
+ } else {
+ n = min_t(size_t, PAGE_SIZE - (dst_offs & ~PAGE_MASK),
+ PAGE_SIZE - (src_offs & ~PAGE_MASK));
+ n = min(n, src_len);
+ n = min(n, dst_len);
+ n = min_t(size_t, n, copy_len);
+ memcpy(daddr, saddr, n);
+ }
+ dst_offs += n;
+ src_offs += n;
+
+ kunmap_atomic(saddr, s_km_type);
+ kunmap_atomic(daddr, d_km_type);
+
+ res += n;
+ copy_len -= n;
+ if (copy_len == 0)
+ goto out;
+
+ src_len -= n;
+ dst_len -= n;
+ if (dst_len == 0) {
+ dst_sg = sg_next(dst_sg);
+ if (dst_sg == NULL)
+ goto out;
+ dst_page = sg_page(dst_sg);
+ dst_len = dst_sg->length;
+ dst_offs = dst_sg->offset;
+ }
+ } while (src_len > 0);
+
+out:
+ *pdst_sg = dst_sg;
+ *pdst_len = dst_len;
+ *pdst_offs = dst_offs;
+ return res;
+}
+
+/**
+ * sg_copy - copy one SG vector to another
+ * @dst_sg: destination SG
+ * @src_sg: source SG
+ * @nents_to_copy: maximum number of entries to copy
+ * @copy_len: maximum amount of data to copy. If 0, then copy all.
+ * @d_km_type: kmap_atomic type for the destination SG
+ * @s_km_type: kmap_atomic type for the source SG
+ *
+ * Description:
+ * Data from the source SG vector will be copied to the destination SG
+ * vector. End of the vectors will be determined by sg_next() returning
+ * NULL. Returns number of bytes copied.
+ */
+int sg_copy(struct scatterlist *dst_sg, struct scatterlist *src_sg,
+ int nents_to_copy, size_t copy_len,
+ enum km_type d_km_type, enum km_type s_km_type)
+{
+ int res = 0;
+ size_t dst_len, dst_offs;
+
+ if (copy_len == 0)
+ copy_len = 0x7FFFFFFF; /* copy all */
+
+ if (nents_to_copy == 0)
+ nents_to_copy = 0x7FFFFFFF; /* copy all */
+
+ dst_len = dst_sg->length;
+ dst_offs = dst_sg->offset;
+
+ do {
+ int copied = sg_copy_elem(&dst_sg, &dst_len, &dst_offs,
+ src_sg, copy_len, d_km_type, s_km_type);
+ copy_len -= copied;
+ res += copied;
+ if ((copy_len == 0) || (dst_sg == NULL))
+ goto out;
+
+ nents_to_copy--;
+ if (nents_to_copy == 0)
+ goto out;
+
+ src_sg = sg_next(src_sg);
+ } while (src_sg != NULL);
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(sg_copy);

View File

@@ -2936,6 +2936,9 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr,
struct scst_blockio_work *blockio_work;
int bios = 0;
gfp_t gfp_mask = (cmd->noio_mem_alloc ? GFP_NOIO : GFP_KERNEL);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
struct blk_plug plug;
#endif
TRACE_ENTRY();
@@ -3041,6 +3044,10 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr,
/* +1 to prevent erroneous too early command completion */
atomic_set(&blockio_work->bios_inflight, bios+1);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
blk_start_plug(&plug);
#endif
while (hbio) {
bio = hbio;
hbio = hbio->bi_next;
@@ -3048,8 +3055,12 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr,
submit_bio((write != 0), bio);
}
if (q && q->unplug_fn)
q->unplug_fn(q);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 39)
blk_finish_plug(&plug);
#else
if (q && q->unplug_fn)
q->unplug_fn(q);
#endif
blockio_check_finish(blockio_work);

View File

@@ -567,7 +567,8 @@ out:
#ifndef CONFIG_SCST_PROC
/* Abstract vfs_unlink & path_put for different kernel versions */
/* Abstract vfs_unlink() for different kernel versions (as possible) */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
static inline void scst_pr_vfs_unlink_and_put(struct nameidata *nd)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
@@ -580,7 +581,15 @@ static inline void scst_pr_vfs_unlink_and_put(struct nameidata *nd)
path_put(&nd->path);
#endif
}
#else
static inline void scst_pr_vfs_unlink_and_put(struct path *path)
{
vfs_unlink(path->dentry->d_parent->d_inode, path->dentry);
path_put(path);
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
static inline void scst_pr_path_put(struct nameidata *nd)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
@@ -590,6 +599,7 @@ static inline void scst_pr_path_put(struct nameidata *nd)
path_put(&nd->path);
#endif
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
static int scst_pr_vfs_fsync(struct file *file, loff_t loff, loff_t len)
@@ -906,13 +916,18 @@ static void scst_pr_remove_device_files(struct scst_tgt_dev *tgt_dev)
{
int res = 0;
struct scst_device *dev = tgt_dev->dev;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
struct nameidata nd;
#else
struct path path;
#endif
mm_segment_t old_fs = get_fs();
TRACE_ENTRY();
set_fs(KERNEL_DS);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
res = path_lookup(dev->pr_file_name, 0, &nd);
if (!res)
scst_pr_vfs_unlink_and_put(&nd);
@@ -926,6 +941,21 @@ static void scst_pr_remove_device_files(struct scst_tgt_dev *tgt_dev)
else
TRACE_DBG("Unable to lookup file '%s' - error %d",
dev->pr_file_name1, res);
#else
res = kern_path(dev->pr_file_name, 0, &path);
if (!res)
scst_pr_vfs_unlink_and_put(&path);
else
TRACE_DBG("Unable to lookup file '%s' - error %d",
dev->pr_file_name, res);
res = kern_path(dev->pr_file_name1, 0, &path);
if (!res)
scst_pr_vfs_unlink_and_put(&path);
else
TRACE_DBG("Unable to lookup file '%s' - error %d",
dev->pr_file_name1, res);
#endif
set_fs(old_fs);
@@ -1104,6 +1134,7 @@ write_error:
write_error_close:
filp_close(file, NULL);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
{
struct nameidata nd;
int rc;
@@ -1115,20 +1146,44 @@ write_error_close:
TRACE_PR("Unable to lookup '%s' - error %d",
dev->pr_file_name, rc);
}
#else
{
struct path path;
int rc;
rc = kern_path(dev->pr_file_name, 0, &path);
if (!rc)
scst_pr_vfs_unlink_and_put(&path);
else
TRACE_PR("Unable to lookup '%s' - error %d",
dev->pr_file_name, rc);
}
#endif
goto out_set_fs;
}
static int scst_pr_check_pr_path(void)
{
int res;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
struct nameidata nd;
#else
struct path path;
#endif
mm_segment_t old_fs = get_fs();
TRACE_ENTRY();
set_fs(KERNEL_DS);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
res = path_lookup(SCST_PR_DIR, 0, &nd);
scst_pr_path_put(&nd);
#else
res = kern_path(SCST_PR_DIR, 0, &path);
path_put(&path);
#endif
if (res != 0) {
PRINT_ERROR("Unable to find %s (err %d), you should create "
"this directory manually or reinstall SCST",
@@ -1136,8 +1191,6 @@ static int scst_pr_check_pr_path(void)
goto out_setfs;
}
scst_pr_path_put(&nd);
out_setfs:
set_fs(old_fs);

View File

@@ -2635,9 +2635,11 @@ static inline int scst_real_exec(struct scst_cmd *cmd)
res = scst_do_real_exec(cmd);
if (likely(res == SCST_EXEC_COMPLETED)) {
scst_post_exec_sn(cmd, true);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
if (cmd->dev->scsi_dev != NULL)
generic_unplug_device(
cmd->dev->scsi_dev->request_queue);
#endif
} else
sBUG();
@@ -2740,7 +2742,6 @@ static int scst_exec(struct scst_cmd **active_cmd)
{
struct scst_cmd *cmd = *active_cmd;
struct scst_cmd *ref_cmd;
struct scst_device *dev = cmd->dev;
int res = SCST_CMD_STATE_RES_CONT_NEXT, count = 0;
TRACE_ENTRY();
@@ -2805,8 +2806,10 @@ done:
if (count == 0)
goto out_put;
if (dev->scsi_dev != NULL)
generic_unplug_device(dev->scsi_dev->request_queue);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39)
if (ref_cmd->dev->scsi_dev != NULL)
generic_unplug_device(ref_cmd->dev->scsi_dev->request_queue);
#endif
out_put:
__scst_cmd_put(ref_cmd);

View File

@@ -917,10 +917,16 @@ static int scst_local_send_resp(struct scsi_cmnd *cmnd,
* This does the heavy lifting ... we pass all the commands on to the
* target driver and have it do its magic ...
*/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37) && \
defined(CONFIG_SCST_LOCAL_FORCE_DIRECT_PROCESSING)
static int scst_local_queuecommand(struct Scsi_Host *host,
struct scsi_cmnd *SCpnt)
#else
static int scst_local_queuecommand_lck(struct scsi_cmnd *SCpnt,
void (*done)(struct scsi_cmnd *))
__acquires(&h->host_lock)
__releases(&h->host_lock)
#endif
{
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
struct scst_local_tgt_specific *tgt_specific = NULL;
@@ -1071,23 +1077,32 @@ static int scst_local_queuecommand_lck(struct scsi_cmnd *SCpnt,
/*
* Although starting from 2.6.37 queuecommand() called with no host_lock
* held, in reality without DEF_SCSI_QCMD() this doesn't work and leading
* to various problems like commands lost under highload. So, until that fixed
* we have to go ahead under host_lock, although absolutely don't need it.
*
* NOTE! At the moment in scst_estimate_context*() returning DIRECT contexts
* disabled, so this option doesn't have any real effect.
* held, in fact without DEF_SCSI_QCMD() it doesn't work and leading
* to various problems like hangs under highload. Most likely, it is caused
* by some not reenrable block layer function(s). So, until that changed, we
* have to go ahead with extra context switch. In this regard doesn't matter
* much if we under host_lock or not (although we absolutely don't need this
* lock), so let's have simpler code with DEF_SCSI_QCMD().
*/
#ifdef CONFIG_SCST_LOCAL_FORCE_DIRECT_PROCESSING
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
scst_cmd_init_done(scst_cmd, SCST_CONTEXT_DIRECT);
#else
{
/*
* NOTE! At the moment in scst_estimate_context*() returning
* DIRECT contexts disabled, so this option doesn't have any
* real effect.
*/
struct Scsi_Host *h = SCpnt->device->host;
spin_unlock_irq(h->host_lock);
scst_cmd_init_done(scst_cmd, scst_estimate_context_atomic());
spin_lock_irq(h->host_lock);
}
#endif
#else
/*
* Unfortunately, we called with IRQs disabled, so have no choice,
* We called with IRQs disabled, so have no choice,
* except to pass to the thread context.
*/
scst_cmd_init_done(scst_cmd, SCST_CONTEXT_THREAD);
@@ -1097,7 +1112,8 @@ static int scst_local_queuecommand_lck(struct scsi_cmnd *SCpnt,
return 0;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37)
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37) && \
!defined(CONFIG_SCST_LOCAL_FORCE_DIRECT_PROCESSING)
/*
* See comment in scst_local_queuecommand_lck() near
* CONFIG_SCST_LOCAL_FORCE_DIRECT_PROCESSING
@@ -1940,4 +1956,3 @@ static void __exit scst_local_exit(void)
device_initcall(scst_local_init);
module_exit(scst_local_exit);