Put_page_callback and scst_exec_req_fifo patches for RHEL5/6

git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@7066 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Vladislav Bolkhovitin
2016-12-20 05:49:07 +00:00
parent 6c95c681ac
commit 80e49beea8
3 changed files with 767 additions and 0 deletions

View File

@@ -0,0 +1,257 @@
diff -upr linux-2.6.18/include/linux/mm.h linux-2.6.18/include/linux/mm.h
--- linux-2.6.18/include/linux/mm.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/include/linux/mm.h 2007-08-07 19:35:51.000000000 +0400
@@ -277,6 +277,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#define page_private(page) ((page)->private)
diff -upr linux-2.6.18/include/linux/net.h linux-2.6.18/include/linux/net.h
--- linux-2.6.18/include/linux/net.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/include/linux/net.h 2007-08-29 18:28:21.000000000 +0400
@@ -56,6 +56,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -324,5 +325,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.18/net/core/skbuff.c linux-2.6.18/net/core/skbuff.c
--- linux-2.6.18/net/core/skbuff.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/core/skbuff.c 2007-08-07 19:35:51.000000000 +0400
@@ -324,7 +324,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -666,7 +666,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -720,7 +720,7 @@ int pskb_expand_head(struct sk_buff *skb
memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -902,7 +902,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1071,7 +1071,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1653,7 +1653,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2021,7 +2021,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.18/net/core/utils.c linux-2.6.18/net/core/utils.c
--- linux-2.6.18/net/core/utils.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/core/utils.c 2007-08-23 19:49:40.000000000 +0400
@@ -24,11 +24,15 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
This is a maximally equidistributed combined Tausworthe generator
based on code from GNU Scientific Library 1.5 (30 Jun 2004)
@@ -203,3 +203,32 @@ __be32 in_aton(const char *str)
}
EXPORT_SYMBOL(in_aton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upr linux-2.6.18/net/ipv4/ip_output.c linux-2.6.18/net/ipv4/ip_output.c
--- linux-2.6.18/net/ipv4/ip_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/ip_output.c 2007-08-07 19:37:24.000000000 +0400
@@ -1006,7 +1006,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1166,7 +1166,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.18/net/ipv4/tcp.c linux-2.6.18/net/ipv4/tcp.c
--- linux-2.6.18/net/ipv4/tcp.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/tcp.c 2007-08-07 19:35:51.000000000 +0400
@@ -560,7 +560,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -763,7 +763,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -804,9 +804,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.18/net/ipv4/tcp_output.c linux-2.6.18/net/ipv4/tcp_output.c
--- linux-2.6.18/net/ipv4/tcp_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/tcp_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -659,7 +659,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.18/net/ipv6/ip6_output.c linux-2.6.18/net/ipv6/ip6_output.c
--- linux-2.6.18/net/ipv6/ip6_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv6/ip6_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -1212,7 +1212,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -0,0 +1,401 @@
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/Kbuild linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/Kbuild
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/Kbuild 2011-07-09 00:47:13.884215174 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/Kbuild 2011-07-09 00:47:47.530389221 +0200
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/mm_types.h linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/mm_types.h
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/mm_types.h 2011-07-09 00:47:13.893191775 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/mm_types.h 2011-07-09 00:47:47.533311169 +0200
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/net.h linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/net.h
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/net.h 2011-07-09 00:47:13.867629724 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/net.h 2011-07-09 00:47:47.536207157 +0200
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -365,5 +366,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/dev.c linux-2.6.32-131.4.1.el6.x86_64.new/net/core/dev.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/dev.c 2011-07-09 00:47:14.491417046 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/core/dev.c 2011-07-09 00:47:47.538194361 +0200
@@ -2903,7 +2903,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/skbuff.c linux-2.6.32-131.4.1.el6.x86_64.new/net/core/skbuff.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/skbuff.c 2011-07-09 00:47:14.491417046 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/core/skbuff.c 2011-07-09 00:47:47.542260687 +0200
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -766,7 +766,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -832,7 +832,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1106,7 +1106,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1275,7 +1275,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1376,7 +1376,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1400,7 +1400,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1410,7 +1410,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1431,7 +1431,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2061,7 +2061,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2183,7 +2183,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2205,7 +2205,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2600,7 +2600,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/ip_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/ip_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/ip_output.c 2011-07-09 00:47:14.538469946 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/ip_output.c 2011-07-09 00:47:47.544855733 +0200
@@ -981,7 +981,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1213,7 +1213,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/Makefile linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/Makefile
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/Makefile 2011-07-09 00:47:14.540204846 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/Makefile 2011-07-09 00:47:47.544855733 +0200
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp.c 2011-07-09 00:47:14.522494769 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp.c 2011-07-09 00:47:47.548279863 +0200
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_output.c 2011-07-09 00:47:14.535532920 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_output.c 2011-07-09 00:47:47.548279863 +0200
@@ -909,7 +909,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_zero_copy.c 2011-07-09 00:47:47.548279863 +0200
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv6/ip6_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv6/ip6_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv6/ip6_output.c 2011-07-09 00:47:14.498374289 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv6/ip6_output.c 2011-07-09 00:47:47.557327813 +0200
@@ -1370,7 +1370,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/Kconfig linux-2.6.32-131.4.1.el6.x86_64.new/net/Kconfig
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/Kconfig 2011-07-09 00:47:14.452504858 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/Kconfig 2011-07-09 00:47:47.557327813 +0200
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -0,0 +1,109 @@
diff -upr linux-2.6.18/drivers/scsi/scsi_lib.c linux-2.6.18/drivers/scsi/scsi_lib.c
--- linux-2.6.18/drivers/scsi/scsi_lib.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/drivers/scsi/scsi_lib.c 2007-07-04 21:15:32.000000000 +0400
@@ -367,7 +367,7 @@ free_bios:
}
/**
- * scsi_execute_async - insert request
+ * __scsi_execute_async - insert request
* @sdev: scsi device
* @cmd: scsi command
* @cmd_len: length of scsi cdb
@@ -378,11 +378,14 @@ free_bios:
* @timeout: request timeout in seconds
* @retries: number of times to retry request
* @flags: or into request flags
+ * @at_head: insert request at head or tail of queue
**/
-int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
+static inline int __scsi_execute_async(struct scsi_device *sdev,
+ const unsigned char *cmd,
int cmd_len, int data_direction, void *buffer, unsigned bufflen,
int use_sg, int timeout, int retries, void *privdata,
- void (*done)(void *, char *, int, int), gfp_t gfp)
+ void (*done)(void *, char *, int, int), gfp_t gfp,
+ int at_head)
{
struct request *req;
struct scsi_io_context *sioc;
@@ -418,7 +421,7 @@ int scsi_execute_async(struct scsi_devic
sioc->data = privdata;
sioc->done = done;
- blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
+ blk_execute_rq_nowait(req->q, NULL, req, at_head, scsi_end_async);
return 0;
free_req:
@@ -427,8 +430,53 @@ free_sense:
kfree(sioc);
return DRIVER_ERROR << 24;
}
+
+/**
+ * scsi_execute_async - insert request
+ * @sdev: scsi device
+ * @cmd: scsi command
+ * @cmd_len: length of scsi cdb
+ * @data_direction: data direction
+ * @buffer: data buffer (this can be a kernel buffer or scatterlist)
+ * @bufflen: len of buffer
+ * @use_sg: if buffer is a scatterlist this is the number of elements
+ * @timeout: request timeout in seconds
+ * @retries: number of times to retry request
+ * @flags: or into request flags
+ **/
+int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
+ int cmd_len, int data_direction, void *buffer, unsigned bufflen,
+ int use_sg, int timeout, int retries, void *privdata,
+ void (*done)(void *, char *, int, int), gfp_t gfp)
+{
+ return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
+ bufflen, use_sg, timeout, retries, privdata, done, gfp, 1);
+}
EXPORT_SYMBOL_GPL(scsi_execute_async);
+/**
+ * scsi_execute_async_fifo - insert request at tail, in FIFO order
+ * @sdev: scsi device
+ * @cmd: scsi command
+ * @cmd_len: length of scsi cdb
+ * @data_direction: data direction
+ * @buffer: data buffer (this can be a kernel buffer or scatterlist)
+ * @bufflen: len of buffer
+ * @use_sg: if buffer is a scatterlist this is the number of elements
+ * @timeout: request timeout in seconds
+ * @retries: number of times to retry request
+ * @flags: or into request flags
+ **/
+int scsi_execute_async_fifo(struct scsi_device *sdev, const unsigned char *cmd,
+ int cmd_len, int data_direction, void *buffer, unsigned bufflen,
+ int use_sg, int timeout, int retries, void *privdata,
+ void (*done)(void *, char *, int, int), gfp_t gfp)
+{
+ return __scsi_execute_async(sdev, cmd, cmd_len, data_direction, buffer,
+ bufflen, use_sg, timeout, retries, privdata, done, gfp, 0);
+}
+EXPORT_SYMBOL_GPL(scsi_execute_async_fifo);
+
/*
* Function: scsi_init_cmd_errh()
*
diff -upr linux-2.6.18/include/scsi/scsi_device.h linux-2.6.18/include/scsi/scsi_device.h
--- linux-2.6.18/include/scsi/scsi_device.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/include/scsi/scsi_device.h 2007-07-04 21:15:32.000000000 +0400
@@ -335,6 +335,13 @@ extern int scsi_execute_async(struct scs
int timeout, int retries, void *privdata,
void (*done)(void *, char *, int, int),
gfp_t gfp);
+#define SCSI_EXEC_REQ_FIFO_DEFINED
+extern int scsi_execute_async_fifo(struct scsi_device *sdev,
+ const unsigned char *cmd, int cmd_len, int data_direction,
+ void *buffer, unsigned bufflen, int use_sg,
+ int timeout, int retries, void *privdata,
+ void (*done)(void *, char *, int, int),
+ gfp_t gfp);
static inline void scsi_device_reprobe(struct scsi_device *sdev)
{