diff --git a/iscsi-scst/kernel/patches/put_page_callback-2.6.32.patch b/iscsi-scst/kernel/patches/put_page_callback-2.6.32.patch new file mode 100644 index 000000000..a6667a6ee --- /dev/null +++ b/iscsi-scst/kernel/patches/put_page_callback-2.6.32.patch @@ -0,0 +1,390 @@ +diff -upkr linux-2.6.32.1/include/linux/mm_types.h linux-2.6.32.1/include/linux/mm_types.h +--- linux-2.6.32.1/include/linux/mm_types.h 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/include/linux/mm_types.h 2009-12-16 15:22:16.000000000 +0300 +@@ -106,6 +106,18 @@ struct page { + */ + void *shadow; + #endif ++ ++#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) ++ /* ++ * Used to implement support for notification on zero-copy TCP transfer ++ * completion. It might look as not good to have this field here and ++ * it's better to have it in struct sk_buff, but it would make the code ++ * much more complicated and fragile, since all skb then would have to ++ * contain only pages with the same value in this field. ++ */ ++ void *net_priv; ++#endif ++ + }; + + /* +diff -upkr linux-2.6.32.1/include/linux/net.h linux-2.6.32.1/include/linux/net.h +--- linux-2.6.32.1/include/linux/net.h 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/include/linux/net.h 2009-12-16 15:23:08.000000000 +0300 +@@ -20,6 +20,7 @@ + + #include + #include ++#include + + #define NPROTO AF_MAX + +@@ -361,5 +362,44 @@ static const struct proto_ops name##_ops + extern struct ratelimit_state net_ratelimit_state; + #endif + ++#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) ++/* Support for notification on zero-copy TCP transfer completion */ ++typedef void (*net_get_page_callback_t)(struct page *page); ++typedef void (*net_put_page_callback_t)(struct page *page); ++ ++extern net_get_page_callback_t net_get_page_callback; ++extern net_put_page_callback_t net_put_page_callback; ++ ++extern int net_set_get_put_page_callbacks( ++ net_get_page_callback_t get_callback, ++ net_put_page_callback_t put_callback); ++ ++/* ++ * See comment for net_set_get_put_page_callbacks() why those functions ++ * don't need any protection. ++ */ ++static inline void net_get_page(struct page *page) ++{ ++ if (page->net_priv != 0) ++ net_get_page_callback(page); ++ get_page(page); ++} ++static inline void net_put_page(struct page *page) ++{ ++ if (page->net_priv != 0) ++ net_put_page_callback(page); ++ put_page(page); ++} ++#else ++static inline void net_get_page(struct page *page) ++{ ++ get_page(page); ++} ++static inline void net_put_page(struct page *page) ++{ ++ put_page(page); ++} ++#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */ ++ + #endif /* __KERNEL__ */ + #endif /* _LINUX_NET_H */ +diff -upkr linux-2.6.32.1/net/core/dev.c linux-2.6.32.1/net/core/dev.c +--- linux-2.6.32.1/net/core/dev.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/core/dev.c 2009-12-16 15:22:16.000000000 +0300 +@@ -2516,7 +2516,7 @@ pull: + skb_shinfo(skb)->frags[0].size -= grow; + + if (unlikely(!skb_shinfo(skb)->frags[0].size)) { +- put_page(skb_shinfo(skb)->frags[0].page); ++ net_put_page(skb_shinfo(skb)->frags[0].page); + memmove(skb_shinfo(skb)->frags, + skb_shinfo(skb)->frags + 1, + --skb_shinfo(skb)->nr_frags); +diff -upkr linux-2.6.32.1/net/core/skbuff.c linux-2.6.32.1/net/core/skbuff.c +--- linux-2.6.32.1/net/core/skbuff.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/core/skbuff.c 2009-12-16 15:22:16.000000000 +0300 +@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_ + static void sock_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { +- put_page(buf->page); ++ net_put_page(buf->page); + } + + static void sock_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) + { +- get_page(buf->page); ++ net_get_page(buf->page); + } + + static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, +@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b + if (skb_shinfo(skb)->nr_frags) { + int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) +- put_page(skb_shinfo(skb)->frags[i].page); ++ net_put_page(skb_shinfo(skb)->frags[i].page); + } + + if (skb_has_frags(skb)) +@@ -762,7 +762,7 @@ struct sk_buff *pskb_copy(struct sk_buff + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; +- get_page(skb_shinfo(n)->frags[i].page); ++ net_get_page(skb_shinfo(n)->frags[i].page); + } + skb_shinfo(n)->nr_frags = i; + } +@@ -828,7 +828,7 @@ int pskb_expand_head(struct sk_buff *skb + sizeof(struct skb_shared_info)); + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) +- get_page(skb_shinfo(skb)->frags[i].page); ++ net_get_page(skb_shinfo(skb)->frags[i].page); + + if (skb_has_frags(skb)) + skb_clone_fraglist(skb); +@@ -1102,7 +1102,7 @@ drop_pages: + skb_shinfo(skb)->nr_frags = i; + + for (; i < nfrags; i++) +- put_page(skb_shinfo(skb)->frags[i].page); ++ net_put_page(skb_shinfo(skb)->frags[i].page); + + if (skb_has_frags(skb)) + skb_drop_fraglist(skb); +@@ -1271,7 +1271,7 @@ pull_pages: + k = 0; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size <= eat) { +- put_page(skb_shinfo(skb)->frags[i].page); ++ net_put_page(skb_shinfo(skb)->frags[i].page); + eat -= skb_shinfo(skb)->frags[i].size; + } else { + skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; +@@ -1372,7 +1372,7 @@ EXPORT_SYMBOL(skb_copy_bits); + */ + static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) + { +- put_page(spd->pages[i]); ++ net_put_page(spd->pages[i]); + } + + static inline struct page *linear_to_page(struct page *page, unsigned int *len, +@@ -1396,7 +1396,7 @@ new_page: + off = sk->sk_sndmsg_off; + mlen = PAGE_SIZE - off; + if (mlen < 64 && mlen < *len) { +- put_page(p); ++ net_put_page(p); + goto new_page; + } + +@@ -1406,7 +1406,7 @@ new_page: + memcpy(page_address(p) + off, page_address(page) + *offset, *len); + sk->sk_sndmsg_off += *len; + *offset = off; +- get_page(p); ++ net_get_page(p); + + return p; + } +@@ -1427,7 +1427,7 @@ static inline int spd_fill_page(struct s + if (!page) + return 1; + } else +- get_page(page); ++ net_get_page(page); + + spd->pages[spd->nr_pages] = page; + spd->partial[spd->nr_pages].len = *len; +@@ -2057,7 +2057,7 @@ static inline void skb_split_no_header(s + * where splitting is expensive. + * 2. Split is accurately. We make this. + */ +- get_page(skb_shinfo(skb)->frags[i].page); ++ net_get_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb1)->frags[0].page_offset += len - pos; + skb_shinfo(skb1)->frags[0].size -= len - pos; + skb_shinfo(skb)->frags[i].size = len - pos; +@@ -2179,7 +2179,7 @@ int skb_shift(struct sk_buff *tgt, struc + to++; + + } else { +- get_page(fragfrom->page); ++ net_get_page(fragfrom->page); + fragto->page = fragfrom->page; + fragto->page_offset = fragfrom->page_offset; + fragto->size = todo; +@@ -2201,7 +2201,7 @@ int skb_shift(struct sk_buff *tgt, struc + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += fragfrom->size; +- put_page(fragfrom->page); ++ net_put_page(fragfrom->page); + } + + /* Reposition in the original skb */ +@@ -2599,7 +2599,7 @@ struct sk_buff *skb_segment(struct sk_bu + + while (pos < offset + len && i < nfrags) { + *frag = skb_shinfo(skb)->frags[i]; +- get_page(frag->page); ++ net_get_page(frag->page); + size = frag->size; + + if (pos < offset) { +diff -upkr linux-2.6.32.1/net/ipv4/ip_output.c linux-2.6.32.1/net/ipv4/ip_output.c +--- linux-2.6.32.1/net/ipv4/ip_output.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/ipv4/ip_output.c 2009-12-16 15:22:16.000000000 +0300 +@@ -1020,7 +1020,7 @@ alloc_new_skb: + err = -EMSGSIZE; + goto error; + } +- get_page(page); ++ net_get_page(page); + skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + frag = &skb_shinfo(skb)->frags[i]; + } +@@ -1178,7 +1178,7 @@ ssize_t ip_append_page(struct sock *sk, + if (skb_can_coalesce(skb, i, page, offset)) { + skb_shinfo(skb)->frags[i-1].size += len; + } else if (i < MAX_SKB_FRAGS) { +- get_page(page); ++ net_get_page(page); + skb_fill_page_desc(skb, i, page, offset, len); + } else { + err = -EMSGSIZE; +diff -upkr linux-2.6.32.1/net/ipv4/Makefile linux-2.6.32.1/net/ipv4/Makefile +--- linux-2.6.32.1/net/ipv4/Makefile 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/ipv4/Makefile 2009-12-16 15:22:16.000000000 +0300 +@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o + obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o + obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o + obj-$(CONFIG_NETLABEL) += cipso_ipv4.o ++obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o + + obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ + xfrm4_output.o +diff -upkr linux-2.6.32.1/net/ipv4/tcp.c linux-2.6.32.1/net/ipv4/tcp.c +--- linux-2.6.32.1/net/ipv4/tcp.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/ipv4/tcp.c 2009-12-16 15:22:16.000000000 +0300 +@@ -799,7 +799,7 @@ new_segment: + if (can_coalesce) { + skb_shinfo(skb)->frags[i - 1].size += copy; + } else { +- get_page(page); ++ net_get_page(page); + skb_fill_page_desc(skb, i, page, offset, copy); + } + +@@ -1007,7 +1007,7 @@ new_segment: + goto new_segment; + } else if (page) { + if (off == PAGE_SIZE) { +- put_page(page); ++ net_put_page(page); + TCP_PAGE(sk) = page = NULL; + off = 0; + } +@@ -1048,9 +1048,9 @@ new_segment: + } else { + skb_fill_page_desc(skb, i, page, off, copy); + if (TCP_PAGE(sk)) { +- get_page(page); ++ net_get_page(page); + } else if (off + copy < PAGE_SIZE) { +- get_page(page); ++ net_get_page(page); + TCP_PAGE(sk) = page; + } + } +diff -upkr linux-2.6.32.1/net/ipv4/tcp_output.c linux-2.6.32.1/net/ipv4/tcp_output.c +--- linux-2.6.32.1/net/ipv4/tcp_output.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/ipv4/tcp_output.c 2009-12-16 15:22:16.000000000 +0300 +@@ -909,7 +909,7 @@ static void __pskb_trim_head(struct sk_b + k = 0; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size <= eat) { +- put_page(skb_shinfo(skb)->frags[i].page); ++ net_put_page(skb_shinfo(skb)->frags[i].page); + eat -= skb_shinfo(skb)->frags[i].size; + } else { + skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; +diff -upkr linux-2.6.32.1/net/ipv4/tcp_zero_copy.c linux-2.6.32.1/net/ipv4/tcp_zero_copy.c +--- linux-2.6.32.1/net/ipv4/tcp_zero_copy.c 2009-12-08 17:41:11.000000000 +0300 ++++ linux-2.6.32.1/net/ipv4/tcp_zero_copy.c 2009-12-16 15:22:16.000000000 +0300 +@@ -0,0 +1,49 @@ ++/* ++ * Support routines for TCP zero copy transmit ++ * ++ * Created by Vladislav Bolkhovitin ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * version 2 as published by the Free Software Foundation. ++ */ ++ ++#include ++ ++net_get_page_callback_t net_get_page_callback __read_mostly; ++EXPORT_SYMBOL(net_get_page_callback); ++ ++net_put_page_callback_t net_put_page_callback __read_mostly; ++EXPORT_SYMBOL(net_put_page_callback); ++ ++/* ++ * Caller of this function must ensure that at the moment when it's called ++ * there are no pages in the system with net_priv field set to non-zero ++ * value. Hence, this function, as well as net_get_page() and net_put_page(), ++ * don't need any protection. ++ */ ++int net_set_get_put_page_callbacks( ++ net_get_page_callback_t get_callback, ++ net_put_page_callback_t put_callback) ++{ ++ int res = 0; ++ ++ if ((net_get_page_callback != NULL) && (get_callback != NULL) && ++ (net_get_page_callback != get_callback)) { ++ res = -EBUSY; ++ goto out; ++ } ++ ++ if ((net_put_page_callback != NULL) && (put_callback != NULL) && ++ (net_put_page_callback != put_callback)) { ++ res = -EBUSY; ++ goto out; ++ } ++ ++ net_get_page_callback = get_callback; ++ net_put_page_callback = put_callback; ++ ++out: ++ return res; ++} ++EXPORT_SYMBOL(net_set_get_put_page_callbacks); +diff -upkr linux-2.6.32.1/net/ipv6/ip6_output.c linux-2.6.32.1/net/ipv6/ip6_output.c +--- linux-2.6.32.1/net/ipv6/ip6_output.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/ipv6/ip6_output.c 2009-12-16 15:22:16.000000000 +0300 +@@ -1379,7 +1379,7 @@ alloc_new_skb: + err = -EMSGSIZE; + goto error; + } +- get_page(page); ++ net_get_page(page); + skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + frag = &skb_shinfo(skb)->frags[i]; + } +diff -upkr linux-2.6.32.1/net/Kconfig linux-2.6.32.1/net/Kconfig +--- linux-2.6.32.1/net/Kconfig 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/net/Kconfig 2009-12-16 15:22:16.000000000 +0300 +@@ -72,6 +72,18 @@ config INET + + Short answer: say Y. + ++config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION ++ bool "TCP/IP zero-copy transfer completion notification" ++ depends on INET ++ default SCST_ISCSI ++ ---help--- ++ Adds support for sending a notification upon completion of a ++ zero-copy TCP/IP transfer. This can speed up certain TCP/IP ++ software. Currently this is only used by the iSCSI target driver ++ iSCSI-SCST. ++ ++ If unsure, say N. ++ + if INET + source "net/ipv4/Kconfig" + source "net/ipv6/Kconfig" diff --git a/scst/kernel/io_context-2.6.32.patch b/scst/kernel/io_context-2.6.32.patch new file mode 100644 index 000000000..bd855dc1e --- /dev/null +++ b/scst/kernel/io_context-2.6.32.patch @@ -0,0 +1,61 @@ +diff -upkr linux-2.6.32.1/block/blk-ioc.c linux-2.6.32.1/block/blk-ioc.c +--- linux-2.6.32.1/block/blk-ioc.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/block/blk-ioc.c 2009-12-16 15:20:36.000000000 +0300 +@@ -65,6 +65,21 @@ static void cfq_exit(struct io_context * + rcu_read_unlock(); + } + ++void __exit_io_context(struct io_context *ioc) ++{ ++ if (ioc == NULL) ++ return; ++ ++ if (atomic_dec_and_test(&ioc->nr_tasks)) { ++ if (ioc->aic && ioc->aic->exit) ++ ioc->aic->exit(ioc->aic); ++ cfq_exit(ioc); ++ ++ put_io_context(ioc); ++ } ++} ++EXPORT_SYMBOL(__exit_io_context); ++ + /* Called by the exitting task */ + void exit_io_context(void) + { +@@ -75,13 +90,7 @@ void exit_io_context(void) + current->io_context = NULL; + task_unlock(current); + +- if (atomic_dec_and_test(&ioc->nr_tasks)) { +- if (ioc->aic && ioc->aic->exit) +- ioc->aic->exit(ioc->aic); +- cfq_exit(ioc); +- +- put_io_context(ioc); +- } ++ __exit_io_context(ioc); + } + + struct io_context *alloc_io_context(gfp_t gfp_flags, int node) +@@ -105,6 +114,7 @@ struct io_context *alloc_io_context(gfp_ + + return ret; + } ++EXPORT_SYMBOL(alloc_io_context); + + /* + * If the current task has no IO context then create one and initialise it. +diff -upkr linux-2.6.32.1/include/linux/iocontext.h linux-2.6.32.1/include/linux/iocontext.h +--- linux-2.6.32.1/include/linux/iocontext.h 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/include/linux/iocontext.h 2009-12-16 15:20:36.000000000 +0300 +@@ -103,7 +103,9 @@ static inline struct io_context *ioc_tas + int put_io_context(struct io_context *ioc); + void exit_io_context(void); + struct io_context *get_io_context(gfp_t gfp_flags, int node); ++#define SCST_IO_CONTEXT + struct io_context *alloc_io_context(gfp_t gfp_flags, int node); ++void __exit_io_context(struct io_context *ioc); + void copy_io_context(struct io_context **pdst, struct io_context **psrc); + #else + static inline void exit_io_context(void) diff --git a/scst/kernel/readahead-2.6.32.patch b/scst/kernel/readahead-2.6.32.patch new file mode 100644 index 000000000..3ff3f37cd --- /dev/null +++ b/scst/kernel/readahead-2.6.32.patch @@ -0,0 +1,12 @@ +diff -upkr linux-2.6.32.1/mm/readahead.c linux-2.6.32.1/mm/readahead.c +--- linux-2.6.32.1/mm/readahead.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/mm/readahead.c 2009-12-16 15:21:00.000000000 +0300 +@@ -547,5 +547,8 @@ page_cache_async_readahead(struct addres + + /* do read-ahead */ + ondemand_readahead(mapping, ra, filp, true, offset, req_size); ++ ++ if (PageUptodate(page)) ++ blk_run_backing_dev(mapping->backing_dev_info, NULL); + } + EXPORT_SYMBOL_GPL(page_cache_async_readahead); diff --git a/scst/kernel/scst_exec_req_fifo-2.6.32.patch b/scst/kernel/scst_exec_req_fifo-2.6.32.patch new file mode 100644 index 000000000..3400b7b56 --- /dev/null +++ b/scst/kernel/scst_exec_req_fifo-2.6.32.patch @@ -0,0 +1,528 @@ +diff -upkr linux-2.6.32.1/block/blk-map.c linux-2.6.32.1/block/blk-map.c +--- linux-2.6.32.1/block/blk-map.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/block/blk-map.c 2009-12-16 15:21:35.000000000 +0300 +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + #include /* for struct sg_iovec */ + + #include "blk.h" +@@ -271,6 +272,336 @@ int blk_rq_unmap_user(struct bio *bio) + } + EXPORT_SYMBOL(blk_rq_unmap_user); + ++struct blk_kern_sg_work { ++ atomic_t bios_inflight; ++ struct sg_table sg_table; ++ struct scatterlist *src_sgl; ++}; ++ ++static void blk_free_kern_sg_work(struct blk_kern_sg_work *bw) ++{ ++ sg_free_table(&bw->sg_table); ++ kfree(bw); ++ return; ++} ++ ++static void blk_bio_map_kern_endio(struct bio *bio, int err) ++{ ++ struct blk_kern_sg_work *bw = bio->bi_private; ++ ++ if (bw != NULL) { ++ /* Decrement the bios in processing and, if zero, free */ ++ BUG_ON(atomic_read(&bw->bios_inflight) <= 0); ++ if (atomic_dec_and_test(&bw->bios_inflight)) { ++ if ((bio_data_dir(bio) == READ) && (err == 0)) { ++ unsigned long flags; ++ ++ local_irq_save(flags); /* to protect KMs */ ++ sg_copy(bw->src_sgl, bw->sg_table.sgl, 0, 0, ++ KM_BIO_DST_IRQ, KM_BIO_SRC_IRQ); ++ local_irq_restore(flags); ++ } ++ blk_free_kern_sg_work(bw); ++ } ++ } ++ ++ bio_put(bio); ++ return; ++} ++ ++static int blk_rq_copy_kern_sg(struct request *rq, struct scatterlist *sgl, ++ int nents, struct blk_kern_sg_work **pbw, ++ gfp_t gfp, gfp_t page_gfp) ++{ ++ int res = 0, i; ++ struct scatterlist *sg; ++ struct scatterlist *new_sgl; ++ int new_sgl_nents; ++ size_t len = 0, to_copy; ++ struct blk_kern_sg_work *bw; ++ ++ bw = kzalloc(sizeof(*bw), gfp); ++ if (bw == NULL) ++ goto out; ++ ++ bw->src_sgl = sgl; ++ ++ for_each_sg(sgl, sg, nents, i) ++ len += sg->length; ++ to_copy = len; ++ ++ new_sgl_nents = PFN_UP(len); ++ ++ res = sg_alloc_table(&bw->sg_table, new_sgl_nents, gfp); ++ if (res != 0) ++ goto out_free_bw; ++ ++ new_sgl = bw->sg_table.sgl; ++ ++ for_each_sg(new_sgl, sg, new_sgl_nents, i) { ++ struct page *pg; ++ ++ pg = alloc_page(page_gfp); ++ if (pg == NULL) ++ goto err_free_new_sgl; ++ ++ sg_assign_page(sg, pg); ++ sg->length = min_t(size_t, PAGE_SIZE, len); ++ ++ len -= PAGE_SIZE; ++ } ++ ++ if (rq_data_dir(rq) == WRITE) { ++ /* ++ * We need to limit amount of copied data to to_copy, because ++ * sgl might have the last element in sgl not marked as last in ++ * SG chaining. ++ */ ++ sg_copy(new_sgl, sgl, 0, to_copy, ++ KM_USER0, KM_USER1); ++ } ++ ++ *pbw = bw; ++ /* ++ * REQ_COPY_USER name is misleading. It should be something like ++ * REQ_HAS_TAIL_SPACE_FOR_PADDING. ++ */ ++ rq->cmd_flags |= REQ_COPY_USER; ++ ++out: ++ return res; ++ ++err_free_new_sgl: ++ for_each_sg(new_sgl, sg, new_sgl_nents, i) { ++ struct page *pg = sg_page(sg); ++ if (pg == NULL) ++ break; ++ __free_page(pg); ++ } ++ sg_free_table(&bw->sg_table); ++ ++out_free_bw: ++ kfree(bw); ++ res = -ENOMEM; ++ goto out; ++} ++ ++static int __blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl, ++ int nents, struct blk_kern_sg_work *bw, gfp_t gfp) ++{ ++ int res; ++ struct request_queue *q = rq->q; ++ int rw = rq_data_dir(rq); ++ int max_nr_vecs, i; ++ size_t tot_len; ++ bool need_new_bio; ++ struct scatterlist *sg, *prev_sg = NULL; ++ struct bio *bio = NULL, *hbio = NULL, *tbio = NULL; ++ int bios; ++ ++ if (unlikely((sgl == NULL) || (sgl->length == 0) || (nents <= 0))) { ++ WARN_ON(1); ++ res = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * Let's keep each bio allocation inside a single page to decrease ++ * probability of failure. ++ */ ++ max_nr_vecs = min_t(size_t, ++ ((PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec)), ++ BIO_MAX_PAGES); ++ ++ need_new_bio = true; ++ tot_len = 0; ++ bios = 0; ++ for_each_sg(sgl, sg, nents, i) { ++ struct page *page = sg_page(sg); ++ void *page_addr = page_address(page); ++ size_t len = sg->length, l; ++ size_t offset = sg->offset; ++ ++ tot_len += len; ++ prev_sg = sg; ++ ++ /* ++ * Each segment must be aligned on DMA boundary and ++ * not on stack. The last one may have unaligned ++ * length as long as the total length is aligned to ++ * DMA padding alignment. ++ */ ++ if (i == nents - 1) ++ l = 0; ++ else ++ l = len; ++ if (((sg->offset | l) & queue_dma_alignment(q)) || ++ (page_addr && object_is_on_stack(page_addr + sg->offset))) { ++ res = -EINVAL; ++ goto out_free_bios; ++ } ++ ++ while (len > 0) { ++ size_t bytes; ++ int rc; ++ ++ if (need_new_bio) { ++ bio = bio_kmalloc(gfp, max_nr_vecs); ++ if (bio == NULL) { ++ res = -ENOMEM; ++ goto out_free_bios; ++ } ++ ++ if (rw == WRITE) ++ bio->bi_rw |= 1 << BIO_RW; ++ ++ bios++; ++ bio->bi_private = bw; ++ bio->bi_end_io = blk_bio_map_kern_endio; ++ ++ if (hbio == NULL) ++ hbio = tbio = bio; ++ else ++ tbio = tbio->bi_next = bio; ++ } ++ ++ bytes = min_t(size_t, len, PAGE_SIZE - offset); ++ ++ rc = bio_add_pc_page(q, bio, page, bytes, offset); ++ if (rc < bytes) { ++ if (unlikely(need_new_bio || (rc < 0))) { ++ if (rc < 0) ++ res = rc; ++ else ++ res = -EIO; ++ goto out_free_bios; ++ } else { ++ need_new_bio = true; ++ len -= rc; ++ offset += rc; ++ continue; ++ } ++ } ++ ++ need_new_bio = false; ++ offset = 0; ++ len -= bytes; ++ page = nth_page(page, 1); ++ } ++ } ++ ++ if (hbio == NULL) { ++ res = -EINVAL; ++ goto out_free_bios; ++ } ++ ++ /* Total length must be aligned on DMA padding alignment */ ++ if ((tot_len & q->dma_pad_mask) && ++ !(rq->cmd_flags & REQ_COPY_USER)) { ++ res = -EINVAL; ++ goto out_free_bios; ++ } ++ ++ if (bw != NULL) ++ atomic_set(&bw->bios_inflight, bios); ++ ++ while (hbio != NULL) { ++ bio = hbio; ++ hbio = hbio->bi_next; ++ bio->bi_next = NULL; ++ ++ blk_queue_bounce(q, &bio); ++ ++ res = blk_rq_append_bio(q, rq, bio); ++ if (unlikely(res != 0)) { ++ bio->bi_next = hbio; ++ hbio = bio; ++ /* We can have one or more bios bounced */ ++ goto out_unmap_bios; ++ } ++ } ++ ++ rq->buffer = NULL; ++out: ++ return res; ++ ++out_free_bios: ++ while (hbio != NULL) { ++ bio = hbio; ++ hbio = hbio->bi_next; ++ bio_put(bio); ++ } ++ goto out; ++ ++out_unmap_bios: ++ blk_rq_unmap_kern_sg(rq, res); ++ goto out; ++} ++ ++/** ++ * blk_rq_map_kern_sg - map kernel data to a request, for REQ_TYPE_BLOCK_PC ++ * @rq: request to fill ++ * @sgl: area to map ++ * @nents: number of elements in @sgl ++ * @gfp: memory allocation flags ++ * ++ * Description: ++ * Data will be mapped directly if possible. Otherwise a bounce ++ * buffer will be used. ++ */ ++int blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl, ++ int nents, gfp_t gfp) ++{ ++ int res; ++ ++ res = __blk_rq_map_kern_sg(rq, sgl, nents, NULL, gfp); ++ if (unlikely(res != 0)) { ++ struct blk_kern_sg_work *bw = NULL; ++ ++ res = blk_rq_copy_kern_sg(rq, sgl, nents, &bw, ++ gfp, rq->q->bounce_gfp | gfp); ++ if (unlikely(res != 0)) ++ goto out; ++ ++ res = __blk_rq_map_kern_sg(rq, bw->sg_table.sgl, ++ bw->sg_table.nents, bw, gfp); ++ if (res != 0) { ++ blk_free_kern_sg_work(bw); ++ goto out; ++ } ++ } ++ ++ rq->buffer = NULL; ++ ++out: ++ return res; ++} ++EXPORT_SYMBOL(blk_rq_map_kern_sg); ++ ++/** ++ * blk_rq_unmap_kern_sg - unmap a request with kernel sg ++ * @rq: request to unmap ++ * @err: non-zero error code ++ * ++ * Description: ++ * Unmap a rq previously mapped by blk_rq_map_kern_sg(). Must be called ++ * only in case of an error! ++ */ ++void blk_rq_unmap_kern_sg(struct request *rq, int err) ++{ ++ struct bio *bio = rq->bio; ++ ++ while (bio) { ++ struct bio *b = bio; ++ bio = bio->bi_next; ++ b->bi_end_io(b, err); ++ } ++ rq->bio = NULL; ++ ++ return; ++} ++EXPORT_SYMBOL(blk_rq_unmap_kern_sg); ++ + /** + * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage + * @q: request queue where request should be inserted +diff -upkr linux-2.6.32.1/include/linux/blkdev.h linux-2.6.32.1/include/linux/blkdev.h +--- linux-2.6.32.1/include/linux/blkdev.h 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/include/linux/blkdev.h 2009-12-16 15:21:35.000000000 +0300 +@@ -708,6 +708,8 @@ extern unsigned long blk_max_low_pfn, bl + #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) + #define BLK_MIN_SG_TIMEOUT (7 * HZ) + ++#define SCSI_EXEC_REQ_FIFO_DEFINED ++ + #ifdef CONFIG_BOUNCE + extern int init_emergency_isa_pool(void); + extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); +@@ -812,6 +814,9 @@ extern int blk_rq_map_kern(struct reques + extern int blk_rq_map_user_iov(struct request_queue *, struct request *, + struct rq_map_data *, struct sg_iovec *, int, + unsigned int, gfp_t); ++extern int blk_rq_map_kern_sg(struct request *rq, struct scatterlist *sgl, ++ int nents, gfp_t gfp); ++extern void blk_rq_unmap_kern_sg(struct request *rq, int err); + extern int blk_execute_rq(struct request_queue *, struct gendisk *, + struct request *, int); + extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, +diff -upkr linux-2.6.32.1/include/linux/scatterlist.h linux-2.6.32.1/include/linux/scatterlist.h +--- linux-2.6.32.1/include/linux/scatterlist.h 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/include/linux/scatterlist.h 2009-12-16 15:21:35.000000000 +0300 +@@ -3,6 +3,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -218,6 +219,10 @@ size_t sg_copy_from_buffer(struct scatte + size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen); + ++int sg_copy(struct scatterlist *dst_sg, struct scatterlist *src_sg, ++ int nents_to_copy, size_t copy_len, ++ enum km_type d_km_type, enum km_type s_km_type); ++ + /* + * Maximum number of entries that will be allocated in one piece, if + * a list larger than this is required then chaining will be utilized. +diff -upkr linux-2.6.32.1/lib/scatterlist.c linux-2.6.32.1/lib/scatterlist.c +--- linux-2.6.32.1/lib/scatterlist.c 2009-12-03 06:51:21.000000000 +0300 ++++ linux-2.6.32.1/lib/scatterlist.c 2009-12-16 15:21:35.000000000 +0300 +@@ -493,3 +493,132 @@ size_t sg_copy_to_buffer(struct scatterl + return sg_copy_buffer(sgl, nents, buf, buflen, 1); + } + EXPORT_SYMBOL(sg_copy_to_buffer); ++ ++/* ++ * Can switch to the next dst_sg element, so, to copy to strictly only ++ * one dst_sg element, it must be either last in the chain, or ++ * copy_len == dst_sg->length. ++ */ ++static int sg_copy_elem(struct scatterlist **pdst_sg, size_t *pdst_len, ++ size_t *pdst_offs, struct scatterlist *src_sg, ++ size_t copy_len, ++ enum km_type d_km_type, enum km_type s_km_type) ++{ ++ int res = 0; ++ struct scatterlist *dst_sg; ++ size_t src_len, dst_len, src_offs, dst_offs; ++ struct page *src_page, *dst_page; ++ ++ dst_sg = *pdst_sg; ++ dst_len = *pdst_len; ++ dst_offs = *pdst_offs; ++ dst_page = sg_page(dst_sg); ++ ++ src_page = sg_page(src_sg); ++ src_len = src_sg->length; ++ src_offs = src_sg->offset; ++ ++ do { ++ void *saddr, *daddr; ++ size_t n; ++ ++ saddr = kmap_atomic(src_page + ++ (src_offs >> PAGE_SHIFT), s_km_type) + ++ (src_offs & ~PAGE_MASK); ++ daddr = kmap_atomic(dst_page + ++ (dst_offs >> PAGE_SHIFT), d_km_type) + ++ (dst_offs & ~PAGE_MASK); ++ ++ if (((src_offs & ~PAGE_MASK) == 0) && ++ ((dst_offs & ~PAGE_MASK) == 0) && ++ (src_len >= PAGE_SIZE) && (dst_len >= PAGE_SIZE) && ++ (copy_len >= PAGE_SIZE)) { ++ copy_page(daddr, saddr); ++ n = PAGE_SIZE; ++ } else { ++ n = min_t(size_t, PAGE_SIZE - (dst_offs & ~PAGE_MASK), ++ PAGE_SIZE - (src_offs & ~PAGE_MASK)); ++ n = min(n, src_len); ++ n = min(n, dst_len); ++ n = min_t(size_t, n, copy_len); ++ memcpy(daddr, saddr, n); ++ } ++ dst_offs += n; ++ src_offs += n; ++ ++ kunmap_atomic(saddr, s_km_type); ++ kunmap_atomic(daddr, d_km_type); ++ ++ res += n; ++ copy_len -= n; ++ if (copy_len == 0) ++ goto out; ++ ++ src_len -= n; ++ dst_len -= n; ++ if (dst_len == 0) { ++ dst_sg = sg_next(dst_sg); ++ if (dst_sg == NULL) ++ goto out; ++ dst_page = sg_page(dst_sg); ++ dst_len = dst_sg->length; ++ dst_offs = dst_sg->offset; ++ } ++ } while (src_len > 0); ++ ++out: ++ *pdst_sg = dst_sg; ++ *pdst_len = dst_len; ++ *pdst_offs = dst_offs; ++ return res; ++} ++ ++/** ++ * sg_copy - copy one SG vector to another ++ * @dst_sg: destination SG ++ * @src_sg: source SG ++ * @nents_to_copy: maximum number of entries to copy ++ * @copy_len: maximum amount of data to copy. If 0, then copy all. ++ * @d_km_type: kmap_atomic type for the destination SG ++ * @s_km_type: kmap_atomic type for the source SG ++ * ++ * Description: ++ * Data from the source SG vector will be copied to the destination SG ++ * vector. End of the vectors will be determined by sg_next() returning ++ * NULL. Returns number of bytes copied. ++ */ ++int sg_copy(struct scatterlist *dst_sg, struct scatterlist *src_sg, ++ int nents_to_copy, size_t copy_len, ++ enum km_type d_km_type, enum km_type s_km_type) ++{ ++ int res = 0; ++ size_t dst_len, dst_offs; ++ ++ if (copy_len == 0) ++ copy_len = 0x7FFFFFFF; /* copy all */ ++ ++ if (nents_to_copy == 0) ++ nents_to_copy = 0x7FFFFFFF; /* copy all */ ++ ++ dst_len = dst_sg->length; ++ dst_offs = dst_sg->offset; ++ ++ do { ++ int copied = sg_copy_elem(&dst_sg, &dst_len, &dst_offs, ++ src_sg, copy_len, d_km_type, s_km_type); ++ copy_len -= copied; ++ res += copied; ++ if ((copy_len == 0) || (dst_sg == NULL)) ++ goto out; ++ ++ nents_to_copy--; ++ if (nents_to_copy == 0) ++ goto out; ++ ++ src_sg = sg_next(src_sg); ++ } while (src_sg != NULL); ++ ++out: ++ return res; ++} ++EXPORT_SYMBOL(sg_copy); diff --git a/scst/src/dev_handlers/scst_user.c b/scst/src/dev_handlers/scst_user.c index 8b77139b0..a0e0cfd47 100644 --- a/scst/src/dev_handlers/scst_user.c +++ b/scst/src/dev_handlers/scst_user.c @@ -1365,6 +1365,8 @@ static int dev_user_process_reply_exec(struct scst_user_cmd *ucmd, TRACE_DBG("ucmd %p, status %d, resp_data_len %d", ucmd, ereply->status, ereply->resp_data_len); + cmd->atomic = 0; + if (ereply->resp_data_len != 0) { if (ucmd->ubuff == 0) { int pages, rc; diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index 81b977caa..4e85259f2 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -831,12 +831,12 @@ static int vdisk_attach(struct scst_device *dev) dev_id_len = scnprintf(dev_id_str, sizeof(dev_id_str), "%llx", dev_id_num); - write_lock(&vdisk_t10_dev_id_rwlock); + write_lock_bh(&vdisk_t10_dev_id_rwlock); i = strlen(virt_dev->name) + 1; /* for ' ' */ memset(virt_dev->t10_dev_id, ' ', i + dev_id_len); memcpy(virt_dev->t10_dev_id, virt_dev->name, i-1); memcpy(virt_dev->t10_dev_id + i, dev_id_str, dev_id_len); - write_unlock(&vdisk_t10_dev_id_rwlock); + write_unlock_bh(&vdisk_t10_dev_id_rwlock); out: TRACE_EXIT(); @@ -1476,10 +1476,10 @@ static void vdisk_exec_inquiry(struct scst_cmd *cmd) else memcpy(&buf[num + 4], SCST_FIO_VENDOR, 8); - read_lock(&vdisk_t10_dev_id_rwlock); + read_lock_bh(&vdisk_t10_dev_id_rwlock); i = strlen(virt_dev->t10_dev_id); memcpy(&buf[num + 12], virt_dev->t10_dev_id, i); - read_unlock(&vdisk_t10_dev_id_rwlock); + read_unlock_bh(&vdisk_t10_dev_id_rwlock); buf[num + 3] = 8 + i; num += buf[num + 3]; @@ -2316,8 +2316,6 @@ static int vdisk_fsync(struct scst_vdisk_thr *thr, loff_t loff, struct scst_vdisk_dev *virt_dev = (struct scst_vdisk_dev *)dev->dh_priv; struct file *file = thr->fd; - struct inode *inode; - struct address_space *mapping; TRACE_ENTRY(); @@ -2326,12 +2324,18 @@ static int vdisk_fsync(struct scst_vdisk_thr *thr, loff_t loff, virt_dev->o_direct_flag || virt_dev->nullio) goto out; - inode = file->f_dentry->d_inode; - mapping = file->f_mapping; - - res = sync_page_range(inode, mapping, loff, len); +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) + res = sync_page_range(file->f_dentry->d_inode, file->f_mapping, + loff, len); +#else +#if 0 /* For sparse files we might need to sync metadata as well */ + res = generic_write_sync(file, loff, len); +#else + res = filemap_write_and_wait_range(file->f_mapping, loff, len); +#endif +#endif if (unlikely(res != 0)) { - PRINT_ERROR("sync_page_range() failed (%d)", res); + PRINT_ERROR("sync range failed (%d)", res); if (cmd != NULL) { scst_set_cmd_error(cmd, SCST_LOAD_SENSE(scst_sense_write_error)); @@ -3916,7 +3920,7 @@ static ssize_t vdisk_sysfs_t10_dev_id_store(struct kobject *kobj, dev = container_of(kobj, struct scst_device, dev_kobj); virt_dev = (struct scst_vdisk_dev *)dev->dh_priv; - write_lock(&vdisk_t10_dev_id_rwlock); + write_lock_bh(&vdisk_t10_dev_id_rwlock); if ((count > sizeof(virt_dev->t10_dev_id)) || ((count == sizeof(virt_dev->t10_dev_id)) && @@ -3944,7 +3948,7 @@ static ssize_t vdisk_sysfs_t10_dev_id_store(struct kobject *kobj, virt_dev->t10_dev_id); out_unlock: - write_unlock(&vdisk_t10_dev_id_rwlock); + write_unlock_bh(&vdisk_t10_dev_id_rwlock); TRACE_EXIT_RES(res); return res; @@ -3962,9 +3966,9 @@ static ssize_t vdisk_sysfs_t10_dev_id_show(struct kobject *kobj, dev = container_of(kobj, struct scst_device, dev_kobj); virt_dev = (struct scst_vdisk_dev *)dev->dh_priv; - read_lock(&vdisk_t10_dev_id_rwlock); + read_lock_bh(&vdisk_t10_dev_id_rwlock); pos = sprintf(buf, "%s\n", virt_dev->t10_dev_id); - read_unlock(&vdisk_t10_dev_id_rwlock); + read_unlock_bh(&vdisk_t10_dev_id_rwlock); TRACE_EXIT_RES(pos); return pos; @@ -4038,10 +4042,10 @@ static int vdisk_read_proc(struct seq_file *seq, struct scst_dev_type *dev_type) seq_printf(seq, " "); c++; } - read_lock(&vdisk_t10_dev_id_rwlock); + read_lock_bh(&vdisk_t10_dev_id_rwlock); seq_printf(seq, "%-45s %-16s\n", virt_dev->file_name, virt_dev->t10_dev_id); - read_unlock(&vdisk_t10_dev_id_rwlock); + read_unlock_bh(&vdisk_t10_dev_id_rwlock); } mutex_unlock(&scst_vdisk_mutex); out: @@ -4335,7 +4339,7 @@ static int vdisk_proc_mgmt_cmd(const char *buffer, int length, goto out_up; } - write_lock(&vdisk_t10_dev_id_rwlock); + write_lock_bh(&vdisk_t10_dev_id_rwlock); slen = (strlen(t10_dev_id) <= (sizeof(virt_dev->t10_dev_id)-1) ? strlen(t10_dev_id) : @@ -4347,7 +4351,7 @@ static int vdisk_proc_mgmt_cmd(const char *buffer, int length, PRINT_INFO("T10 device id for device %s changed to %s", virt_dev->name, virt_dev->t10_dev_id); - write_unlock(&vdisk_t10_dev_id_rwlock); + write_unlock_bh(&vdisk_t10_dev_id_rwlock); } res = length; diff --git a/scst/src/scst_targ.c b/scst/src/scst_targ.c index 28dcf6795..314c38924 100644 --- a/scst/src/scst_targ.c +++ b/scst/src/scst_targ.c @@ -138,10 +138,11 @@ static int scst_init_cmd(struct scst_cmd *cmd, enum scst_exec_context *context) goto out; } + EXTRACHECKS_BUG_ON(*context == SCST_CONTEXT_SAME); + /* Small context optimization */ if (((*context == SCST_CONTEXT_TASKLET) || - (*context == SCST_CONTEXT_DIRECT_ATOMIC) || - ((*context == SCST_CONTEXT_SAME) && scst_cmd_atomic(cmd))) && + (*context == SCST_CONTEXT_DIRECT_ATOMIC)) && scst_cmd_is_expected_set(cmd)) { if (cmd->expected_data_direction & SCST_DATA_WRITE) { if (!test_bit(SCST_TGT_DEV_AFTER_INIT_WR_ATOMIC, @@ -1106,6 +1107,7 @@ void scst_rx_data(struct scst_cmd *cmd, int status, } #endif cmd->state = SCST_CMD_STATE_TGT_PRE_EXEC; + /* Small context optimization */ if ((pref_context == SCST_CONTEXT_TASKLET) || (pref_context == SCST_CONTEXT_DIRECT_ATOMIC) ||