Merge branch 'svn-trunk'

This commit is contained in:
Bart Van Assche
2018-11-17 14:32:05 -08:00
59 changed files with 0 additions and 21419 deletions

View File

@@ -1,248 +0,0 @@
diff -upr linux-2.6.16.29/include/linux/mm.h linux-2.6.16.29/include/linux/mm.h
--- linux-2.6.16.29/include/linux/mm.h 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/include/linux/mm.h 2007-08-07 19:54:27.000000000 +0400
@@ -263,6 +263,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#define page_private(page) ((page)->private)
diff -upr linux-2.6.16.29/include/linux/net.h linux-2.6.16.29/include/linux/net.h
--- linux-2.6.16.29/include/linux/net.h 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/include/linux/net.h 2007-08-29 18:31:40.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#define __SO_ACCEPTCON (1 << 16) /* performed a listen */
#ifdef __KERNEL__
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -294,5 +295,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.16.29/net/core/skbuff.c linux-2.6.16.29/net/core/skbuff.c
--- linux-2.6.16.29/net/core/skbuff.c 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/net/core/skbuff.c 2007-08-07 19:55:51.000000000 +0400
@@ -271,7 +271,7 @@ void skb_release_data(struct sk_buff *sk
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -588,7 +588,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -642,7 +642,7 @@ int pskb_expand_head(struct sk_buff *skb
memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -794,7 +794,7 @@ int ___pskb_trim(struct sk_buff *skb, un
return -ENOMEM;
}
if (len <= offset) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb)->nr_frags--;
} else {
skb_shinfo(skb)->frags[i].size = len - offset;
@@ -940,7 +940,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1522,7 +1522,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
diff -upr linux-2.6.16.29/net/core/utils.c linux-2.6.16.29/net/core/utils.c
--- linux-2.6.16.29/net/core/utils.c 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/net/core/utils.c 2007-08-23 19:54:03.000000000 +0400
@@ -24,11 +24,15 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
This is a maximally equidistributed combined Tausworthe generator
based on code from GNU Scientific Library 1.5 (30 Jun 2004)
@@ -190,3 +194,32 @@ __be32 in_aton(const char *str)
}
EXPORT_SYMBOL(in_aton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upr linux-2.6.16.29/net/ipv4/ip_output.c linux-2.6.16.29/net/ipv4/ip_output.c
--- linux-2.6.16.29/net/ipv4/ip_output.c 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/net/ipv4/ip_output.c 2007-08-07 19:54:27.000000000 +0400
@@ -997,7 +997,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1155,7 +1155,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.16.29/net/ipv4/tcp.c linux-2.6.16.29/net/ipv4/tcp.c
--- linux-2.6.16.29/net/ipv4/tcp.c 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/net/ipv4/tcp.c 2007-08-07 19:54:27.000000000 +0400
@@ -558,7 +558,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -767,7 +767,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -808,9 +808,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.16.29/net/ipv4/tcp_output.c linux-2.6.16.29/net/ipv4/tcp_output.c
--- linux-2.6.16.29/net/ipv4/tcp_output.c 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/net/ipv4/tcp_output.c 2007-08-07 19:54:27.000000000 +0400
@@ -633,7 +633,7 @@ static unsigned char *__pskb_trim_head(s
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.16.29/net/ipv6/ip6_output.c linux-2.6.16.29/net/ipv6/ip6_output.c
--- linux-2.6.16.29/net/ipv6/ip6_output.c 2006-09-12 22:02:10.000000000 +0400
+++ linux-2.6.16.29/net/ipv6/ip6_output.c 2007-08-07 19:54:27.000000000 +0400
@@ -1100,7 +1100,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,257 +0,0 @@
diff -upr linux-2.6.18.1/include/linux/mm.h linux-2.6.18.1/include/linux/mm.h
--- linux-2.6.18.1/include/linux/mm.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/include/linux/mm.h 2007-08-07 19:35:51.000000000 +0400
@@ -267,6 +267,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#define page_private(page) ((page)->private)
diff -upr linux-2.6.18.1/include/linux/net.h linux-2.6.18.1/include/linux/net.h
--- linux-2.6.18.1/include/linux/net.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/include/linux/net.h 2007-08-29 18:28:21.000000000 +0400
@@ -56,6 +56,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -304,5 +305,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.18.1/net/core/skbuff.c linux-2.6.18.1/net/core/skbuff.c
--- linux-2.6.18.1/net/core/skbuff.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/net/core/skbuff.c 2007-08-07 19:35:51.000000000 +0400
@@ -309,7 +309,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -646,7 +646,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -700,7 +700,7 @@ int pskb_expand_head(struct sk_buff *skb
memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -882,7 +882,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1051,7 +1051,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1633,7 +1633,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2002,7 +2002,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.18.1/net/core/utils.c linux-2.6.18.1/net/core/utils.c
--- linux-2.6.18.1/net/core/utils.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/net/core/utils.c 2007-08-23 19:49:40.000000000 +0400
@@ -24,11 +24,15 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
This is a maximally equidistributed combined Tausworthe generator
based on code from GNU Scientific Library 1.5 (30 Jun 2004)
@@ -191,3 +195,32 @@ __be32 in_aton(const char *str)
}
EXPORT_SYMBOL(in_aton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upr linux-2.6.18.1/net/ipv4/ip_output.c linux-2.6.18.1/net/ipv4/ip_output.c
--- linux-2.6.18.1/net/ipv4/ip_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/net/ipv4/ip_output.c 2007-08-07 19:37:24.000000000 +0400
@@ -999,7 +999,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1159,7 +1159,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.18.1/net/ipv4/tcp.c linux-2.6.18.1/net/ipv4/tcp.c
--- linux-2.6.18.1/net/ipv4/tcp.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/net/ipv4/tcp.c 2007-08-07 19:35:51.000000000 +0400
@@ -559,7 +559,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -762,7 +762,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -803,9 +803,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.18.1/net/ipv4/tcp_output.c linux-2.6.18.1/net/ipv4/tcp_output.c
--- linux-2.6.18.1/net/ipv4/tcp_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/net/ipv4/tcp_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -657,7 +657,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.18.1/net/ipv6/ip6_output.c linux-2.6.18.1/net/ipv6/ip6_output.c
--- linux-2.6.18.1/net/ipv6/ip6_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18.1/net/ipv6/ip6_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -1149,7 +1149,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,259 +0,0 @@
diff -upr linux-2.6.21.1/include/linux/mm_types.h linux-2.6.21.1/include/linux/mm_types.h
--- linux-2.6.21.1/include/linux/mm_types.h 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/include/linux/mm_types.h 2007-07-04 12:56:56.000000000 +0400
@@ -62,6 +62,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#endif /* _LINUX_MM_TYPES_H */
diff -upr linux-2.6.21.1/include/linux/net.h linux-2.6.21.1/include/linux/net.h
--- linux-2.6.21.1/include/linux/net.h 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/include/linux/net.h 2007-08-29 14:57:06.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
#include <linux/random.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -319,5 +320,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.21.1/net/core/skbuff.c linux-2.6.21.1/net/core/skbuff.c
--- linux-2.6.21.1/net/core/skbuff.c 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/net/core/skbuff.c 2007-07-04 13:18:04.000000000 +0400
@@ -257,7 +257,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -596,7 +596,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -650,7 +650,7 @@ int pskb_expand_head(struct sk_buff *skb
memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -832,7 +832,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1001,7 +1001,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1583,7 +1583,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -1951,7 +1951,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.21.1/net/core/utils.c linux-2.6.21.1/net/core/utils.c
--- linux-2.6.21.1/net/core/utils.c 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/net/core/utils.c 2007-08-23 19:38:32.000000000 +0400
@@ -25,6 +25,7 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/byteorder.h>
#include <asm/system.h>
@@ -33,6 +34,9 @@
int net_msg_cost = 5*HZ;
int net_msg_burst = 10;
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
* All net warning printk()s should be guarded by this function.
*/
@@ -290,3 +294,32 @@ out:
}
EXPORT_SYMBOL(in6_pton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upr linux-2.6.21.1/net/ipv4/ip_output.c linux-2.6.21.1/net/ipv4/ip_output.c
--- linux-2.6.21.1/net/ipv4/ip_output.c 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/net/ipv4/ip_output.c 2007-07-04 13:17:53.000000000 +0400
@@ -991,7 +991,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1151,7 +1151,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.21.1/net/ipv4/tcp.c linux-2.6.21.1/net/ipv4/tcp.c
--- linux-2.6.21.1/net/ipv4/tcp.c 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/net/ipv4/tcp.c 2007-07-04 13:17:58.000000000 +0400
@@ -561,7 +561,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -764,7 +764,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -805,9 +805,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.21.1/net/ipv4/tcp_output.c linux-2.6.21.1/net/ipv4/tcp_output.c
--- linux-2.6.21.1/net/ipv4/tcp_output.c 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/net/ipv4/tcp_output.c 2007-07-04 13:17:05.000000000 +0400
@@ -722,7 +722,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.21.1/net/ipv6/ip6_output.c linux-2.6.21.1/net/ipv6/ip6_output.c
--- linux-2.6.21.1/net/ipv6/ip6_output.c 2007-04-28 01:49:26.000000000 +0400
+++ linux-2.6.21.1/net/ipv6/ip6_output.c 2007-07-04 13:17:49.000000000 +0400
@@ -1227,7 +1227,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,259 +0,0 @@
diff -upr linux-2.6.22/include/linux/mm_types.h linux-2.6.22/include/linux/mm_types.h
--- linux-2.6.22/include/linux/mm_types.h 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/include/linux/mm_types.h 2007-08-07 19:12:10.000000000 +0400
@@ -78,6 +78,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#endif /* _LINUX_MM_TYPES_H */
diff -upr linux-2.6.22/include/linux/net.h linux-2.6.22/include/linux/net.h
--- linux-2.6.22/include/linux/net.h 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/include/linux/net.h 2007-08-29 18:18:56.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
#include <linux/random.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -319,5 +320,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.22/net/core/skbuff.c linux-2.6.22/net/core/skbuff.c
--- linux-2.6.22/net/core/skbuff.c 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/net/core/skbuff.c 2007-08-07 19:12:10.000000000 +0400
@@ -262,7 +262,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -590,7 +590,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -653,7 +653,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -850,7 +850,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1019,7 +1019,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1588,7 +1588,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -1963,7 +1963,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.22/net/core/utils.c linux-2.6.22/net/core/utils.c
--- linux-2.6.22/net/core/utils.c 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/net/core/utils.c 2007-08-23 19:44:28.000000000 +0400
@@ -25,6 +25,7 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/byteorder.h>
#include <asm/system.h>
@@ -35,6 +36,9 @@ int net_msg_burst __read_mostly = 10;
int net_msg_warn __read_mostly = 1;
EXPORT_SYMBOL(net_msg_warn);
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
* All net warning printk()s should be guarded by this function.
*/
@@ -292,3 +296,32 @@ out:
}
EXPORT_SYMBOL(in6_pton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upr linux-2.6.22/net/ipv4/ip_output.c linux-2.6.22/net/ipv4/ip_output.c
--- linux-2.6.22/net/ipv4/ip_output.c 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/net/ipv4/ip_output.c 2007-08-07 19:12:10.000000000 +0400
@@ -996,7 +996,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1154,7 +1154,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.22/net/ipv4/tcp.c linux-2.6.22/net/ipv4/tcp.c
--- linux-2.6.22/net/ipv4/tcp.c 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/net/ipv4/tcp.c 2007-08-07 19:12:10.000000000 +0400
@@ -560,7 +560,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -764,7 +764,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -805,9 +805,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.22/net/ipv4/tcp_output.c linux-2.6.22/net/ipv4/tcp_output.c
--- linux-2.6.22/net/ipv4/tcp_output.c 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/net/ipv4/tcp_output.c 2007-08-07 19:12:10.000000000 +0400
@@ -721,7 +721,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.22/net/ipv6/ip6_output.c linux-2.6.22/net/ipv6/ip6_output.c
--- linux-2.6.22/net/ipv6/ip6_output.c 2007-07-09 03:32:17.000000000 +0400
+++ linux-2.6.22/net/ipv6/ip6_output.c 2007-08-07 19:12:10.000000000 +0400
@@ -1291,7 +1291,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,259 +0,0 @@
diff -upkr linux-2.6.23/include/linux/mm_types.h linux-2.6.23/include/linux/mm_types.h
--- linux-2.6.23/include/linux/mm_types.h 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/include/linux/mm_types.h 2007-10-10 13:42:46.000000000 +0400
@@ -78,6 +78,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#endif /* _LINUX_MM_TYPES_H */
diff -upkr linux-2.6.23/include/linux/net.h linux-2.6.23/include/linux/net.h
--- linux-2.6.23/include/linux/net.h 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/include/linux/net.h 2007-10-10 13:42:46.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
#include <linux/random.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -319,5 +320,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.23/net/core/skbuff.c linux-2.6.23/net/core/skbuff.c
--- linux-2.6.23/net/core/skbuff.c 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/net/core/skbuff.c 2007-10-10 13:42:46.000000000 +0400
@@ -262,7 +262,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -601,7 +601,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -664,7 +664,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -862,7 +862,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1031,7 +1031,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1600,7 +1600,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -1976,7 +1976,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.23/net/core/utils.c linux-2.6.23/net/core/utils.c
--- linux-2.6.23/net/core/utils.c 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/net/core/utils.c 2007-10-10 13:43:13.000000000 +0400
@@ -25,6 +25,7 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <net/sock.h>
#include <asm/byteorder.h>
@@ -36,6 +37,9 @@ int net_msg_burst __read_mostly = 10;
int net_msg_warn __read_mostly = 1;
EXPORT_SYMBOL(net_msg_warn);
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
* All net warning printk()s should be guarded by this function.
*/
@@ -293,3 +297,32 @@ out:
}
EXPORT_SYMBOL(in6_pton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upkr linux-2.6.23/net/ipv4/ip_output.c linux-2.6.23/net/ipv4/ip_output.c
--- linux-2.6.23/net/ipv4/ip_output.c 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/net/ipv4/ip_output.c 2007-10-10 13:42:46.000000000 +0400
@@ -999,7 +999,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1157,7 +1157,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.23/net/ipv4/tcp.c linux-2.6.23/net/ipv4/tcp.c
--- linux-2.6.23/net/ipv4/tcp.c 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/net/ipv4/tcp.c 2007-10-10 13:42:46.000000000 +0400
@@ -560,7 +560,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -765,7 +765,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -806,9 +806,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.23/net/ipv4/tcp_output.c linux-2.6.23/net/ipv4/tcp_output.c
--- linux-2.6.23/net/ipv4/tcp_output.c 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/net/ipv4/tcp_output.c 2007-10-10 13:42:46.000000000 +0400
@@ -729,7 +729,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.23/net/ipv6/ip6_output.c linux-2.6.23/net/ipv6/ip6_output.c
--- linux-2.6.23/net/ipv6/ip6_output.c 2007-10-10 00:31:38.000000000 +0400
+++ linux-2.6.23/net/ipv6/ip6_output.c 2007-10-10 13:42:46.000000000 +0400
@@ -1295,7 +1295,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,259 +0,0 @@
diff -upkr linux-2.6.24/include/linux/mm_types.h linux-2.6.24/include/linux/mm_types.h
--- linux-2.6.24/include/linux/mm_types.h 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/include/linux/mm_types.h 2008-01-28 11:11:16.000000000 +0300
@@ -88,6 +88,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
/*
diff -upkr linux-2.6.24/include/linux/net.h linux-2.6.24/include/linux/net.h
--- linux-2.6.24/include/linux/net.h 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/include/linux/net.h 2008-01-28 11:11:16.000000000 +0300
@@ -58,6 +58,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
#include <linux/random.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -332,5 +333,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.24/net/core/skbuff.c linux-2.6.24/net/core/skbuff.c
--- linux-2.6.24/net/core/skbuff.c 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/net/core/skbuff.c 2008-01-28 11:11:16.000000000 +0300
@@ -262,7 +262,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -596,7 +596,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -659,7 +659,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -856,7 +856,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1025,7 +1025,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1594,7 +1594,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -1970,7 +1970,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.24/net/core/utils.c linux-2.6.24/net/core/utils.c
--- linux-2.6.24/net/core/utils.c 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/net/core/utils.c 2008-01-28 11:11:16.000000000 +0300
@@ -25,6 +25,7 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <net/sock.h>
#include <asm/byteorder.h>
@@ -36,6 +37,9 @@ int net_msg_burst __read_mostly = 10;
int net_msg_warn __read_mostly = 1;
EXPORT_SYMBOL(net_msg_warn);
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
* All net warning printk()s should be guarded by this function.
*/
@@ -293,3 +297,32 @@ out:
}
EXPORT_SYMBOL(in6_pton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upkr linux-2.6.24/net/ipv4/ip_output.c linux-2.6.24/net/ipv4/ip_output.c
--- linux-2.6.24/net/ipv4/ip_output.c 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/net/ipv4/ip_output.c 2008-01-28 11:11:16.000000000 +0300
@@ -999,7 +999,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1157,7 +1157,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.24/net/ipv4/tcp.c linux-2.6.24/net/ipv4/tcp.c
--- linux-2.6.24/net/ipv4/tcp.c 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/net/ipv4/tcp.c 2008-01-28 11:11:16.000000000 +0300
@@ -561,7 +561,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -766,7 +766,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -807,9 +807,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.24/net/ipv4/tcp_output.c linux-2.6.24/net/ipv4/tcp_output.c
--- linux-2.6.24/net/ipv4/tcp_output.c 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/net/ipv4/tcp_output.c 2008-01-28 11:11:16.000000000 +0300
@@ -794,7 +794,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.24/net/ipv6/ip6_output.c linux-2.6.24/net/ipv6/ip6_output.c
--- linux-2.6.24/net/ipv6/ip6_output.c 2008-01-25 01:58:37.000000000 +0300
+++ linux-2.6.24/net/ipv6/ip6_output.c 2008-01-28 11:11:16.000000000 +0300
@@ -1299,7 +1299,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,264 +0,0 @@
diff -upkr linux-2.6.25/include/linux/mm_types.h linux-2.6.25/include/linux/mm_types.h
--- linux-2.6.25/include/linux/mm_types.h 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/include/linux/mm_types.h 2008-04-18 13:38:38.000000000 +0400
@@ -88,6 +88,17 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long page_cgroup;
#endif
diff -upkr linux-2.6.25/include/linux/net.h linux-2.6.25/include/linux/net.h
--- linux-2.6.25/include/linux/net.h 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/include/linux/net.h 2008-04-18 13:37:06.000000000 +0400
@@ -59,6 +59,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
#include <linux/random.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -341,5 +342,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.25/net/core/skbuff.c linux-2.6.25/net/core/skbuff.c
--- linux-2.6.25/net/core/skbuff.c 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/net/core/skbuff.c 2008-04-18 13:37:06.000000000 +0400
@@ -297,7 +297,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -631,7 +631,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -694,7 +694,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -891,7 +891,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1060,7 +1060,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1840,7 +1840,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2215,7 +2215,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.25/net/core/utils.c linux-2.6.25/net/core/utils.c
--- linux-2.6.25/net/core/utils.c 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/net/core/utils.c 2008-04-18 13:39:40.000000000 +0400
@@ -25,6 +25,7 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <net/sock.h>
#include <asm/byteorder.h>
@@ -36,6 +37,12 @@ int net_msg_burst __read_mostly = 10;
int net_msg_warn __read_mostly = 1;
EXPORT_SYMBOL(net_msg_warn);
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
/*
* All net warning printk()s should be guarded by this function.
*/
@@ -283,6 +287,32 @@ out:
EXPORT_SYMBOL(in6_pton);
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to, int pseudohdr)
{
diff -upkr linux-2.6.25/net/ipv4/ip_output.c linux-2.6.25/net/ipv4/ip_output.c
--- linux-2.6.25/net/ipv4/ip_output.c 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/net/ipv4/ip_output.c 2008-04-18 13:37:06.000000000 +0400
@@ -1017,7 +1017,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1175,7 +1175,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.25/net/ipv4/tcp.c linux-2.6.25/net/ipv4/tcp.c
--- linux-2.6.25/net/ipv4/tcp.c 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/net/ipv4/tcp.c 2008-04-18 13:37:06.000000000 +0400
@@ -713,7 +713,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -918,7 +918,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -959,9 +959,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.25/net/ipv4/tcp_output.c linux-2.6.25/net/ipv4/tcp_output.c
--- linux-2.6.25/net/ipv4/tcp_output.c 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/net/ipv4/tcp_output.c 2008-04-18 13:37:06.000000000 +0400
@@ -805,7 +805,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.25/net/ipv6/ip6_output.c linux-2.6.25/net/ipv6/ip6_output.c
--- linux-2.6.25/net/ipv6/ip6_output.c 2008-04-17 06:49:44.000000000 +0400
+++ linux-2.6.25/net/ipv6/ip6_output.c 2008-04-18 13:37:06.000000000 +0400
@@ -1332,7 +1332,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,308 +0,0 @@
diff -upr linux-2.6.26/include/linux/mm_types.h linux-2.6.26/include/linux/mm_types.h
--- linux-2.6.26/include/linux/mm_types.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/include/linux/mm_types.h 2008-07-22 20:30:21.000000000 +0400
@@ -91,6 +91,18 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long page_cgroup;
#endif
diff -upr linux-2.6.26/include/linux/net.h linux-2.6.26/include/linux/net.h
--- linux-2.6.26/include/linux/net.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/include/linux/net.h 2008-07-29 20:48:07.000000000 +0400
@@ -60,6 +60,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
#include <linux/random.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -342,5 +343,44 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.26/net/core/skbuff.c linux-2.6.26/net/core/skbuff.c
--- linux-2.6.26/net/core/skbuff.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/core/skbuff.c 2008-07-22 20:28:41.000000000 +0400
@@ -321,7 +321,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -655,7 +655,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -718,7 +718,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -987,7 +987,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1156,7 +1156,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1944,7 +1944,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2319,7 +2319,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.26/net/ipv4/ip_output.c linux-2.6.26/net/ipv4/ip_output.c
--- linux-2.6.26/net/ipv4/ip_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/ipv4/ip_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1009,7 +1009,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1167,7 +1167,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.26/net/ipv4/Makefile linux-2.6.26/net/ipv4/Makefile
--- linux-2.6.26/net/ipv4/Makefile 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/ipv4/Makefile 2008-07-22 20:35:05.000000000 +0400
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upr linux-2.6.26/net/ipv4/tcp.c linux-2.6.26/net/ipv4/tcp.c
--- linux-2.6.26/net/ipv4/tcp.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/ipv4/tcp.c 2008-07-22 20:28:41.000000000 +0400
@@ -716,7 +716,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -921,7 +921,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -962,9 +962,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.26/net/ipv4/tcp_output.c linux-2.6.26/net/ipv4/tcp_output.c
--- linux-2.6.26/net/ipv4/tcp_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/ipv4/tcp_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -805,7 +805,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.26/net/ipv4/tcp_zero_copy.c linux-2.6.26/net/ipv4/tcp_zero_copy.c
--- linux-2.6.26/net/ipv4/tcp_zero_copy.c 2008-07-22 20:12:35.000000000 +0400
+++ linux-2.6.26/net/ipv4/tcp_zero_copy.c 2008-07-31 21:21:13.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upr linux-2.6.26/net/ipv6/ip6_output.c linux-2.6.26/net/ipv6/ip6_output.c
--- linux-2.6.26/net/ipv6/ip6_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/ipv6/ip6_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1336,7 +1336,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upr linux-2.6.26/net/Kconfig linux-2.6.26/net/Kconfig
--- linux-2.6.26/net/Kconfig 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/Kconfig 2008-07-29 21:15:39.000000000 +0400
@@ -62,6 +62,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,308 +0,0 @@
diff -upr linux-2.6.27/include/linux/mm_types.h linux-2.6.27/include/linux/mm_types.h
--- linux-2.6.27/include/linux/mm_types.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/include/linux/mm_types.h 2008-07-22 20:30:21.000000000 +0400
@@ -92,6 +92,18 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long page_cgroup;
#endif
diff -upr linux-2.6.27/include/linux/net.h linux-2.6.27/include/linux/net.h
--- linux-2.6.27/include/linux/net.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/include/linux/net.h 2008-07-29 20:48:07.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -354,5 +354,44 @@ extern int net_msg_cost;
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.27/net/core/skbuff.c linux-2.6.27/net/core/skbuff.c
--- linux-2.6.27/net/core/skbuff.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/net/core/skbuff.c 2008-07-22 20:28:41.000000000 +0400
@@ -319,7 +319,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -658,7 +658,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -721,7 +721,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -990,7 +990,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1159,7 +1159,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1916,7 +1916,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2284,7 +2284,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.27/net/ipv4/ip_output.c linux-2.6.27/net/ipv4/ip_output.c
--- linux-2.6.27/net/ipv4/ip_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/net/ipv4/ip_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1007,7 +1007,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1165,7 +1165,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.27/net/ipv4/Makefile linux-2.6.27/net/ipv4/Makefile
--- linux-2.6.27/net/ipv4/Makefile 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/net/ipv4/Makefile 2008-07-22 20:35:05.000000000 +0400
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upr linux-2.6.27/net/ipv4/tcp.c linux-2.6.27/net/ipv4/tcp.c
--- linux-2.6.27/net/ipv4/tcp.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/net/ipv4/tcp.c 2008-07-22 20:28:41.000000000 +0400
@@ -712,7 +712,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -917,7 +917,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -958,9 +958,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.27/net/ipv4/tcp_output.c linux-2.6.27/net/ipv4/tcp_output.c
--- linux-2.6.27/net/ipv4/tcp_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/net/ipv4/tcp_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -854,7 +854,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.27/net/ipv4/tcp_zero_copy.c linux-2.6.27/net/ipv4/tcp_zero_copy.c
--- linux-2.6.27/net/ipv4/tcp_zero_copy.c 2008-07-22 20:12:35.000000000 +0400
+++ linux-2.6.27/net/ipv4/tcp_zero_copy.c 2008-07-31 21:21:13.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upr linux-2.6.27/net/ipv6/ip6_output.c linux-2.6.27/net/ipv6/ip6_output.c
--- linux-2.6.27/net/ipv6/ip6_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/net/ipv6/ip6_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1349,7 +1349,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upr linux-2.6.27/net/Kconfig linux-2.6.27/net/Kconfig
--- linux-2.6.27/net/Kconfig 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.27/net/Kconfig 2008-07-29 21:15:39.000000000 +0400
@@ -59,6 +59,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,308 +0,0 @@
diff -upr linux-2.6.28/include/linux/mm_types.h linux-2.6.28/include/linux/mm_types.h
--- linux-2.6.28/include/linux/mm_types.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/linux/mm_types.h 2008-07-22 20:30:21.000000000 +0400
@@ -94,6 +94,18 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upr linux-2.6.28/include/linux/net.h linux-2.6.28/include/linux/net.h
--- linux-2.6.28/include/linux/net.h 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/include/linux/net.h 2008-07-29 20:48:07.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -352,5 +352,44 @@ extern int net_msg_cost;
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.28/net/core/skbuff.c linux-2.6.28/net/core/skbuff.c
--- linux-2.6.28/net/core/skbuff.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/core/skbuff.c 2008-07-22 20:28:41.000000000 +0400
@@ -339,7 +339,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -727,7 +725,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -792,7 +792,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -1061,7 +1061,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1230,7 +1230,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1987,7 +1987,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2355,7 +2355,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.28/net/ipv4/ip_output.c linux-2.6.28/net/ipv4/ip_output.c
--- linux-2.6.28/net/ipv4/ip_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/ip_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1008,7 +1008,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1166,7 +1166,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.28/net/ipv4/Makefile linux-2.6.28/net/ipv4/Makefile
--- linux-2.6.28/net/ipv4/Makefile 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/Makefile 2008-07-22 20:35:05.000000000 +0400
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upr linux-2.6.28/net/ipv4/tcp.c linux-2.6.28/net/ipv4/tcp.c
--- linux-2.6.28/net/ipv4/tcp.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp.c 2008-07-22 20:28:41.000000000 +0400
@@ -714,7 +714,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -919,7 +919,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -960,9 +960,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.28/net/ipv4/tcp_output.c linux-2.6.28/net/ipv4/tcp_output.c
--- linux-2.6.28/net/ipv4/tcp_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -871,7 +871,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.28/net/ipv4/tcp_zero_copy.c linux-2.6.28/net/ipv4/tcp_zero_copy.c
--- linux-2.6.28/net/ipv4/tcp_zero_copy.c 2008-07-22 20:12:35.000000000 +0400
+++ linux-2.6.28/net/ipv4/tcp_zero_copy.c 2008-07-31 21:21:13.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upr linux-2.6.28/net/ipv6/ip6_output.c linux-2.6.28/net/ipv6/ip6_output.c
--- linux-2.6.28/net/ipv6/ip6_output.c 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/ipv6/ip6_output.c 2008-07-22 20:28:41.000000000 +0400
@@ -1362,7 +1362,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upr linux-2.6.28/net/Kconfig linux-2.6.28/net/Kconfig
--- linux-2.6.28/net/Kconfig 2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.28/net/Kconfig 2008-07-29 21:15:39.000000000 +0400
@@ -59,6 +59,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,360 +0,0 @@
diff -upkr linux-2.6.29/include/linux/mm_types.h linux-2.6.29/include/linux/mm_types.h
--- linux-2.6.29/include/linux/mm_types.h 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/include/linux/mm_types.h 2009-03-25 12:13:18.000000000 +0300
@@ -94,6 +94,18 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.29/include/linux/net.h linux-2.6.29/include/linux/net.h
--- linux-2.6.29/include/linux/net.h 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/include/linux/net.h 2009-03-25 12:13:18.000000000 +0300
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -352,5 +353,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.29/net/core/skbuff.c linux-2.6.29/net/core/skbuff.c
--- linux-2.6.29/net/core/skbuff.c 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/net/core/skbuff.c 2009-03-25 13:57:33.000000000 +0300
@@ -73,13 +73,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -327,7 +327,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -716,7 +716,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -781,7 +781,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -1050,7 +1050,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1219,7 +1219,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1322,7 +1322,7 @@ fault:
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int len,
@@ -1352,7 +1352,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = len;
@@ -1977,7 +1977,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2098,7 +2098,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2120,7 +2120,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2514,7 +2514,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.29/net/ipv4/ip_output.c linux-2.6.29/net/ipv4/ip_output.c
--- linux-2.6.29/net/ipv4/ip_output.c 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/net/ipv4/ip_output.c 2009-03-25 12:13:18.000000000 +0300
@@ -1013,7 +1013,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1171,7 +1171,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.29/net/ipv4/Makefile linux-2.6.29/net/ipv4/Makefile
--- linux-2.6.29/net/ipv4/Makefile 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/net/ipv4/Makefile 2009-03-25 12:13:18.000000000 +0300
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.29/net/ipv4/tcp.c linux-2.6.29/net/ipv4/tcp.c
--- linux-2.6.29/net/ipv4/tcp.c 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/net/ipv4/tcp.c 2009-03-25 12:13:18.000000000 +0300
@@ -720,7 +720,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -925,7 +925,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -966,9 +966,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.29/net/ipv4/tcp_output.c linux-2.6.29/net/ipv4/tcp_output.c
--- linux-2.6.29/net/ipv4/tcp_output.c 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/net/ipv4/tcp_output.c 2009-03-25 12:13:18.000000000 +0300
@@ -871,7 +871,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.29/net/ipv4/tcp_zero_copy.c linux-2.6.29/net/ipv4/tcp_zero_copy.c
--- linux-2.6.29/net/ipv4/tcp_zero_copy.c 2009-03-25 14:03:29.000000000 +0300
+++ linux-2.6.29/net/ipv4/tcp_zero_copy.c 2009-03-25 12:13:18.000000000 +0300
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.29/net/ipv6/ip6_output.c linux-2.6.29/net/ipv6/ip6_output.c
--- linux-2.6.29/net/ipv6/ip6_output.c 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/net/ipv6/ip6_output.c 2009-03-25 12:13:18.000000000 +0300
@@ -1394,7 +1394,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.29/net/Kconfig linux-2.6.29/net/Kconfig
--- linux-2.6.29/net/Kconfig 2009-03-24 02:12:14.000000000 +0300
+++ linux-2.6.29/net/Kconfig 2009-03-25 12:13:18.000000000 +0300
@@ -54,6 +54,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,378 +0,0 @@
diff -upkr linux-2.6.30.1/include/linux/mm_types.h linux-2.6.30.1/include/linux/mm_types.h
--- linux-2.6.30.1/include/linux/mm_types.h 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/include/linux/mm_types.h 2009-07-01 15:20:24.000000000 +0400
@@ -98,6 +98,18 @@ struct page {
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
unsigned long debug_flags; /* Use atomic bitops on this */
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.30.1/include/linux/net.h linux-2.6.30.1/include/linux/net.h
--- linux-2.6.30.1/include/linux/net.h 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/include/linux/net.h 2009-07-01 15:20:24.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -356,5 +357,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.30.1/net/core/skbuff.c linux-2.6.30.1/net/core/skbuff.c
--- linux-2.6.30.1/net/core/skbuff.c 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/net/core/skbuff.c 2009-07-01 15:55:08.000000000 +0400
@@ -75,13 +75,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -335,7 +335,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -750,7 +750,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -816,7 +816,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -1088,7 +1088,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1257,7 +1257,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1362,7 +1362,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1386,7 +1386,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1396,7 +1396,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1417,7 +1417,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2057,7 +2057,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2179,7 +2179,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2201,7 +2201,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2600,7 +2600,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.30.1/net/ipv4/ip_output.c linux-2.6.30.1/net/ipv4/ip_output.c
--- linux-2.6.30.1/net/ipv4/ip_output.c 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/net/ipv4/ip_output.c 2009-07-01 15:55:08.000000000 +0400
@@ -1018,7 +1018,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1176,7 +1176,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.30.1/net/ipv4/Makefile linux-2.6.30.1/net/ipv4/Makefile
--- linux-2.6.30.1/net/ipv4/Makefile 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/net/ipv4/Makefile 2009-07-01 15:55:08.000000000 +0400
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.30.1/net/ipv4/tcp.c linux-2.6.30.1/net/ipv4/tcp.c
--- linux-2.6.30.1/net/ipv4/tcp.c 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/net/ipv4/tcp.c 2009-07-01 15:55:08.000000000 +0400
@@ -760,7 +760,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -963,7 +963,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1004,9 +1004,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.30.1/net/ipv4/tcp_output.c linux-2.6.30.1/net/ipv4/tcp_output.c
--- linux-2.6.30.1/net/ipv4/tcp_output.c 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/net/ipv4/tcp_output.c 2009-07-01 15:55:08.000000000 +0400
@@ -889,7 +889,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.30.1/net/ipv4/tcp_zero_copy.c linux-2.6.30.1/net/ipv4/tcp_zero_copy.c
--- linux-2.6.30.1/net/ipv4/tcp_zero_copy.c 2009-06-16 21:19:51.000000000 +0400
+++ linux-2.6.30.1/net/ipv4/tcp_zero_copy.c 2009-07-01 15:55:08.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.30.1/net/ipv6/ip6_output.c linux-2.6.30.1/net/ipv6/ip6_output.c
--- linux-2.6.30.1/net/ipv6/ip6_output.c 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/net/ipv6/ip6_output.c 2009-07-01 15:55:08.000000000 +0400
@@ -1394,7 +1394,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.30.1/net/Kconfig linux-2.6.30.1/net/Kconfig
--- linux-2.6.30.1/net/Kconfig 2009-06-10 07:05:27.000000000 +0400
+++ linux-2.6.30.1/net/Kconfig 2009-07-01 15:55:08.000000000 +0400
@@ -52,6 +52,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-diff -upkr linux-2.6.31/include/linux/mm_types.h linux-2.6.31/include/linux/mm_types.h
--- linux-2.6.31/include/linux/mm_types.h 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/include/linux/mm_types.h 2009-09-23 14:17:05.000000000 +0400
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.31/include/linux/net.h linux-2.6.31/include/linux/net.h
--- linux-2.6.31/include/linux/net.h 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/include/linux/net.h 2009-09-23 14:17:05.000000000 +0400
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -356,5 +357,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.31/net/core/dev.c linux-2.6.31/net/core/dev.c
--- linux-2.6.31/net/core/dev.c 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/core/dev.c 2009-09-23 14:18:41.000000000 +0400
@@ -2474,7 +2474,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -upkr linux-2.6.31/net/core/skbuff.c linux-2.6.31/net/core/skbuff.c
--- linux-2.6.31/net/core/skbuff.c 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/core/skbuff.c 2009-09-23 14:17:05.000000000 +0400
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -765,7 +765,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -831,7 +831,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1105,7 +1105,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1274,7 +1274,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1375,7 +1375,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1399,7 +1399,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1409,7 +1409,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1430,7 +1430,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2060,7 +2060,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2182,7 +2182,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2204,7 +2204,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2602,7 +2602,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.31/net/ipv4/ip_output.c linux-2.6.31/net/ipv4/ip_output.c
--- linux-2.6.31/net/ipv4/ip_output.c 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/ipv4/ip_output.c 2009-09-23 14:17:05.000000000 +0400
@@ -1019,7 +1019,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1177,7 +1177,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.31/net/ipv4/Makefile linux-2.6.31/net/ipv4/Makefile
--- linux-2.6.31/net/ipv4/Makefile 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/ipv4/Makefile 2009-09-23 14:17:05.000000000 +0400
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.31/net/ipv4/tcp.c linux-2.6.31/net/ipv4/tcp.c
--- linux-2.6.31/net/ipv4/tcp.c 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/ipv4/tcp.c 2009-09-23 14:17:05.000000000 +0400
@@ -762,7 +762,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -970,7 +970,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1011,9 +1011,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.31/net/ipv4/tcp_output.c linux-2.6.31/net/ipv4/tcp_output.c
--- linux-2.6.31/net/ipv4/tcp_output.c 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/ipv4/tcp_output.c 2009-09-23 14:17:05.000000000 +0400
@@ -890,7 +890,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.31/net/ipv4/tcp_zero_copy.c linux-2.6.31/net/ipv4/tcp_zero_copy.c
--- linux-2.6.31/net/ipv4/tcp_zero_copy.c 2009-09-25 21:51:49.000000000 +0400
+++ linux-2.6.31/net/ipv4/tcp_zero_copy.c 2009-09-23 14:17:05.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.31/net/ipv6/ip6_output.c linux-2.6.31/net/ipv6/ip6_output.c
--- linux-2.6.31/net/ipv6/ip6_output.c 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/ipv6/ip6_output.c 2009-09-23 14:17:05.000000000 +0400
@@ -1394,7 +1394,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.31/net/Kconfig linux-2.6.31/net/Kconfig
--- linux-2.6.31/net/Kconfig 2009-09-10 02:13:59.000000000 +0400
+++ linux-2.6.31/net/Kconfig 2009-09-23 14:17:05.000000000 +0400
@@ -52,6 +52,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.32.1/include/linux/mm_types.h linux-2.6.32.1/include/linux/mm_types.h
--- linux-2.6.32.1/include/linux/mm_types.h 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/include/linux/mm_types.h 2009-12-16 15:22:16.000000000 +0300
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.32.1/include/linux/net.h linux-2.6.32.1/include/linux/net.h
--- linux-2.6.32.1/include/linux/net.h 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/include/linux/net.h 2009-12-16 15:23:08.000000000 +0300
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -361,5 +362,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.32.1/net/core/dev.c linux-2.6.32.1/net/core/dev.c
--- linux-2.6.32.1/net/core/dev.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/core/dev.c 2009-12-16 15:22:16.000000000 +0300
@@ -2516,7 +2516,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -upkr linux-2.6.32.1/net/core/skbuff.c linux-2.6.32.1/net/core/skbuff.c
--- linux-2.6.32.1/net/core/skbuff.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/core/skbuff.c 2009-12-16 15:22:16.000000000 +0300
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -762,7 +762,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -828,7 +828,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1102,7 +1102,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1271,7 +1271,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1372,7 +1372,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1396,7 +1396,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1406,7 +1406,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1427,7 +1427,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2057,7 +2057,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2179,7 +2179,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2201,7 +2201,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2599,7 +2599,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.32.1/net/ipv4/ip_output.c linux-2.6.32.1/net/ipv4/ip_output.c
--- linux-2.6.32.1/net/ipv4/ip_output.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/ip_output.c 2009-12-16 15:22:16.000000000 +0300
@@ -1020,7 +1020,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1178,7 +1178,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.32.1/net/ipv4/Makefile linux-2.6.32.1/net/ipv4/Makefile
--- linux-2.6.32.1/net/ipv4/Makefile 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/Makefile 2009-12-16 15:22:16.000000000 +0300
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.32.1/net/ipv4/tcp.c linux-2.6.32.1/net/ipv4/tcp.c
--- linux-2.6.32.1/net/ipv4/tcp.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/tcp.c 2009-12-16 15:22:16.000000000 +0300
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.32.1/net/ipv4/tcp_output.c linux-2.6.32.1/net/ipv4/tcp_output.c
--- linux-2.6.32.1/net/ipv4/tcp_output.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/tcp_output.c 2009-12-16 15:22:16.000000000 +0300
@@ -909,7 +909,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.32.1/net/ipv4/tcp_zero_copy.c linux-2.6.32.1/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32.1/net/ipv4/tcp_zero_copy.c 2009-12-08 17:41:11.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/tcp_zero_copy.c 2009-12-16 15:22:16.000000000 +0300
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.32.1/net/ipv6/ip6_output.c linux-2.6.32.1/net/ipv6/ip6_output.c
--- linux-2.6.32.1/net/ipv6/ip6_output.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv6/ip6_output.c 2009-12-16 15:22:16.000000000 +0300
@@ -1379,7 +1379,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.32.1/net/Kconfig linux-2.6.32.1/net/Kconfig
--- linux-2.6.32.1/net/Kconfig 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/Kconfig 2009-12-16 15:22:16.000000000 +0300
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.33/include/linux/mm_types.h linux-2.6.33/include/linux/mm_types.h
--- linux-2.6.33/include/linux/mm_types.h 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/include/linux/mm_types.h 2010-03-01 15:42:39.000000000 +0300
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.33/include/linux/net.h linux-2.6.33/include/linux/net.h
--- linux-2.6.33/include/linux/net.h 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/include/linux/net.h 2010-03-01 15:42:39.000000000 +0300
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -284,5 +285,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.33/net/core/dev.c linux-2.6.33/net/core/dev.c
--- linux-2.6.33/net/core/dev.c 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/core/dev.c 2010-03-01 15:42:39.000000000 +0300
@@ -2652,7 +2652,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -upkr linux-2.6.33/net/core/skbuff.c linux-2.6.33/net/core/skbuff.c
--- linux-2.6.33/net/core/skbuff.c 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/core/skbuff.c 2010-03-01 15:42:39.000000000 +0300
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -765,7 +765,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -831,7 +831,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1105,7 +1105,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1274,7 +1274,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1375,7 +1375,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1399,7 +1399,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1409,7 +1409,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1430,7 +1430,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2060,7 +2060,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2182,7 +2182,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2204,7 +2204,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2602,7 +2602,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.33/net/ipv4/ip_output.c linux-2.6.33/net/ipv4/ip_output.c
--- linux-2.6.33/net/ipv4/ip_output.c 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/ipv4/ip_output.c 2010-03-01 15:42:39.000000000 +0300
@@ -1023,7 +1023,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1181,7 +1181,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.33/net/ipv4/Makefile linux-2.6.33/net/ipv4/Makefile
--- linux-2.6.33/net/ipv4/Makefile 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/ipv4/Makefile 2010-03-01 15:42:39.000000000 +0300
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.33/net/ipv4/tcp.c linux-2.6.33/net/ipv4/tcp.c
--- linux-2.6.33/net/ipv4/tcp.c 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/ipv4/tcp.c 2010-03-01 15:42:39.000000000 +0300
@@ -800,7 +800,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1008,7 +1008,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1049,9 +1049,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.33/net/ipv4/tcp_output.c linux-2.6.33/net/ipv4/tcp_output.c
--- linux-2.6.33/net/ipv4/tcp_output.c 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/ipv4/tcp_output.c 2010-03-01 15:42:39.000000000 +0300
@@ -1076,7 +1076,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.33/net/ipv4/tcp_zero_copy.c linux-2.6.33/net/ipv4/tcp_zero_copy.c
--- linux-2.6.33/net/ipv4/tcp_zero_copy.c 2010-03-01 17:30:31.000000000 +0300
+++ linux-2.6.33/net/ipv4/tcp_zero_copy.c 2010-03-01 15:42:39.000000000 +0300
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.33/net/ipv6/ip6_output.c linux-2.6.33/net/ipv6/ip6_output.c
--- linux-2.6.33/net/ipv6/ip6_output.c 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/ipv6/ip6_output.c 2010-03-01 15:42:39.000000000 +0300
@@ -1378,7 +1378,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.33/net/Kconfig linux-2.6.33/net/Kconfig
--- linux-2.6.33/net/Kconfig 2010-02-24 21:52:17.000000000 +0300
+++ linux-2.6.33/net/Kconfig 2010-03-01 15:42:39.000000000 +0300
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.34/include/linux/mm_types.h linux-2.6.34/include/linux/mm_types.h
--- linux-2.6.34/include/linux/mm_types.h 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/include/linux/mm_types.h 2010-05-24 14:51:40.000000000 +0400
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.34/include/linux/net.h linux-2.6.34/include/linux/net.h
--- linux-2.6.34/include/linux/net.h 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/include/linux/net.h 2010-05-24 14:51:40.000000000 +0400
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -288,5 +289,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.34/net/core/dev.c linux-2.6.34/net/core/dev.c
--- linux-2.6.34/net/core/dev.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/core/dev.c 2010-05-24 14:51:40.000000000 +0400
@@ -2732,7 +2732,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -upkr linux-2.6.34/net/core/skbuff.c linux-2.6.34/net/core/skbuff.c
--- linux-2.6.34/net/core/skbuff.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/core/skbuff.c 2010-05-24 14:51:40.000000000 +0400
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -765,7 +765,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -831,7 +831,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1105,7 +1105,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1274,7 +1274,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1375,7 +1375,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1399,7 +1399,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1409,7 +1409,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1430,7 +1430,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2060,7 +2060,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2182,7 +2182,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2204,7 +2204,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2602,7 +2602,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.34/net/ipv4/ip_output.c linux-2.6.34/net/ipv4/ip_output.c
--- linux-2.6.34/net/ipv4/ip_output.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/ipv4/ip_output.c 2010-05-24 14:51:40.000000000 +0400
@@ -1024,7 +1024,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1182,7 +1182,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.34/net/ipv4/Makefile linux-2.6.34/net/ipv4/Makefile
--- linux-2.6.34/net/ipv4/Makefile 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/ipv4/Makefile 2010-05-24 14:51:40.000000000 +0400
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.34/net/ipv4/tcp.c linux-2.6.34/net/ipv4/tcp.c
--- linux-2.6.34/net/ipv4/tcp.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/ipv4/tcp.c 2010-05-24 14:51:40.000000000 +0400
@@ -800,7 +800,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1009,7 +1009,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1050,9 +1050,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.34/net/ipv4/tcp_output.c linux-2.6.34/net/ipv4/tcp_output.c
--- linux-2.6.34/net/ipv4/tcp_output.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/ipv4/tcp_output.c 2010-05-24 14:51:40.000000000 +0400
@@ -1084,7 +1084,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.34/net/ipv4/tcp_zero_copy.c linux-2.6.34/net/ipv4/tcp_zero_copy.c
--- linux-2.6.34/net/ipv4/tcp_zero_copy.c 2010-03-01 17:30:31.000000000 +0300
+++ linux-2.6.34/net/ipv4/tcp_zero_copy.c 2010-05-24 14:51:40.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.34/net/ipv6/ip6_output.c linux-2.6.34/net/ipv6/ip6_output.c
--- linux-2.6.34/net/ipv6/ip6_output.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/ipv6/ip6_output.c 2010-05-24 14:51:40.000000000 +0400
@@ -1382,7 +1382,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.34/net/Kconfig linux-2.6.34/net/Kconfig
--- linux-2.6.34/net/Kconfig 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.34/net/Kconfig 2010-05-24 14:51:40.000000000 +0400
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.35/include/linux/mm_types.h linux-2.6.35/include/linux/mm_types.h
--- linux-2.6.35/include/linux/mm_types.h 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/include/linux/mm_types.h 2010-05-24 14:51:40.000000000 +0400
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.35/include/linux/net.h linux-2.6.35/include/linux/net.h
--- linux-2.6.35/include/linux/net.h 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/include/linux/net.h 2010-05-24 14:51:40.000000000 +0400
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -291,5 +292,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.35/net/core/dev.c linux-2.6.35/net/core/dev.c
--- linux-2.6.35/net/core/dev.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/core/dev.c 2010-05-24 14:51:40.000000000 +0400
@@ -3130,7 +3130,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -upkr linux-2.6.35/net/core/skbuff.c linux-2.6.35/net/core/skbuff.c
--- linux-2.6.35/net/core/skbuff.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/core/skbuff.c 2010-05-24 14:51:40.000000000 +0400
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -337,7 +337,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -754,7 +754,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -820,7 +820,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1097,7 +1097,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1266,7 +1266,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1367,7 +1367,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1391,7 +1391,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1401,7 +1401,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1423,7 +1423,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2056,7 +2056,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2178,7 +2178,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2200,7 +2200,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2598,7 +2598,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.35/net/ipv4/ip_output.c linux-2.6.35/net/ipv4/ip_output.c
--- linux-2.6.35/net/ipv4/ip_output.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/ipv4/ip_output.c 2010-05-24 14:51:40.000000000 +0400
@@ -1035,7 +1035,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1194,7 +1194,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.35/net/ipv4/Makefile linux-2.6.35/net/ipv4/Makefile
--- linux-2.6.35/net/ipv4/Makefile 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/ipv4/Makefile 2010-05-24 14:51:40.000000000 +0400
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.35/net/ipv4/tcp.c linux-2.6.35/net/ipv4/tcp.c
--- linux-2.6.35/net/ipv4/tcp.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/ipv4/tcp.c 2010-05-24 14:51:40.000000000 +0400
@@ -801,7 +801,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1010,7 +1010,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1051,9 +1051,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.35/net/ipv4/tcp_output.c linux-2.6.35/net/ipv4/tcp_output.c
--- linux-2.6.35/net/ipv4/tcp_output.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/ipv4/tcp_output.c 2010-05-24 14:51:40.000000000 +0400
@@ -1085,7 +1085,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.35/net/ipv4/tcp_zero_copy.c linux-2.6.35/net/ipv4/tcp_zero_copy.c
--- linux-2.6.35/net/ipv4/tcp_zero_copy.c 2010-03-01 17:30:31.000000000 +0300
+++ linux-2.6.35/net/ipv4/tcp_zero_copy.c 2010-05-24 14:51:40.000000000 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.35/net/ipv6/ip6_output.c linux-2.6.35/net/ipv6/ip6_output.c
--- linux-2.6.35/net/ipv6/ip6_output.c 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/ipv6/ip6_output.c 2010-05-24 14:51:40.000000000 +0400
@@ -1383,7 +1383,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.35/net/Kconfig linux-2.6.35/net/Kconfig
--- linux-2.6.35/net/Kconfig 2010-05-17 01:17:36.000000000 +0400
+++ linux-2.6.35/net/Kconfig 2010-05-24 14:51:40.000000000 +0400
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.36/include/linux/mm_types.h linux-2.6.36/include/linux/mm_types.h
--- linux-2.6.36/include/linux/mm_types.h 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/include/linux/mm_types.h 2010-10-26 12:01:40.651752329 +0400
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.36/include/linux/net.h linux-2.6.36/include/linux/net.h
--- linux-2.6.36/include/linux/net.h 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/include/linux/net.h 2010-10-26 12:01:40.651752329 +0400
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -291,5 +292,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.36/net/core/dev.c linux-2.6.36/net/core/dev.c
--- linux-2.6.36/net/core/dev.c 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/core/dev.c 2010-10-26 12:01:40.651752329 +0400
@@ -3140,7 +3140,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
diff -upkr linux-2.6.36/net/core/skbuff.c linux-2.6.36/net/core/skbuff.c
--- linux-2.6.36/net/core/skbuff.c 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/core/skbuff.c 2010-10-26 12:01:40.655752708 +0400
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -337,7 +337,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -754,7 +754,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -820,7 +820,7 @@ int pskb_expand_head(struct sk_buff *skb
offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1097,7 +1097,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1266,7 +1266,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1367,7 +1367,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1391,7 +1391,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1401,7 +1401,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1423,7 +1423,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2056,7 +2056,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2178,7 +2178,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2200,7 +2200,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2601,7 +2601,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.36/net/ipv4/ip_output.c linux-2.6.36/net/ipv4/ip_output.c
--- linux-2.6.36/net/ipv4/ip_output.c 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/ipv4/ip_output.c 2010-10-26 12:01:40.655752708 +0400
@@ -1040,7 +1040,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1199,7 +1199,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.36/net/ipv4/Makefile linux-2.6.36/net/ipv4/Makefile
--- linux-2.6.36/net/ipv4/Makefile 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/ipv4/Makefile 2010-10-26 12:01:40.655752708 +0400
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.36/net/ipv4/tcp.c linux-2.6.36/net/ipv4/tcp.c
--- linux-2.6.36/net/ipv4/tcp.c 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/ipv4/tcp.c 2010-10-26 12:01:40.659752056 +0400
@@ -806,7 +806,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1015,7 +1015,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1056,9 +1056,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.36/net/ipv4/tcp_output.c linux-2.6.36/net/ipv4/tcp_output.c
--- linux-2.6.36/net/ipv4/tcp_output.c 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/ipv4/tcp_output.c 2010-10-26 12:01:40.659752056 +0400
@@ -1086,7 +1086,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.36/net/ipv4/tcp_zero_copy.c linux-2.6.36/net/ipv4/tcp_zero_copy.c
--- linux-2.6.36/net/ipv4/tcp_zero_copy.c 2010-10-26 12:02:24.519252006 +0400
+++ linux-2.6.36/net/ipv4/tcp_zero_copy.c 2010-10-26 12:01:40.659752056 +0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.36/net/ipv6/ip6_output.c linux-2.6.36/net/ipv6/ip6_output.c
--- linux-2.6.36/net/ipv6/ip6_output.c 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/ipv6/ip6_output.c 2010-10-26 12:01:40.659752056 +0400
@@ -1391,7 +1391,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.36/net/Kconfig linux-2.6.36/net/Kconfig
--- linux-2.6.36/net/Kconfig 2010-10-21 00:30:22.000000000 +0400
+++ linux-2.6.36/net/Kconfig 2010-10-26 12:01:40.659752056 +0400
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.37/include/linux/mm_types.h linux-2.6.37/include/linux/mm_types.h
--- linux-2.6.37/include/linux/mm_types.h 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/include/linux/mm_types.h 2011-01-08 16:43:27.966430897 +0300
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.37/include/linux/net.h linux-2.6.37/include/linux/net.h
--- linux-2.6.37/include/linux/net.h 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/include/linux/net.h 2011-04-01 00:14:47.839036295 +0400
@@ -60,6 +60,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -293,5 +294,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.37/net/core/dev.c linux-2.6.37/net/core/dev.c
--- linux-2.6.37/net/core/dev.c 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/core/dev.c 2011-01-08 16:43:27.970432318 +0300
@@ -3225,7 +3225,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
diff -upkr linux-2.6.37/net/core/skbuff.c linux-2.6.37/net/core/skbuff.c
--- linux-2.6.37/net/core/skbuff.c 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/core/skbuff.c 2011-01-08 16:43:59.575095480 +0300
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -324,7 +324,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frag_list(skb))
@@ -730,7 +730,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -806,7 +806,7 @@ int pskb_expand_head(struct sk_buff *skb
kfree(skb->head);
} else {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1083,7 +1083,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1252,7 +1252,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1353,7 +1353,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1377,7 +1377,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1387,7 +1387,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1409,7 +1409,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2042,7 +2042,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2164,7 +2164,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2186,7 +2186,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2587,7 +2587,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.37/net/ipv4/ip_output.c linux-2.6.37/net/ipv4/ip_output.c
--- linux-2.6.37/net/ipv4/ip_output.c 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/ipv4/ip_output.c 2011-01-08 16:43:27.974431382 +0300
@@ -1042,7 +1042,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1201,7 +1201,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.37/net/ipv4/Makefile linux-2.6.37/net/ipv4/Makefile
--- linux-2.6.37/net/ipv4/Makefile 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/ipv4/Makefile 2011-01-08 16:43:27.974431382 +0300
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.37/net/ipv4/tcp.c linux-2.6.37/net/ipv4/tcp.c
--- linux-2.6.37/net/ipv4/tcp.c 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/ipv4/tcp.c 2011-01-08 16:43:27.974431382 +0300
@@ -806,7 +806,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1015,7 +1015,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1056,9 +1056,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.37/net/ipv4/tcp_output.c linux-2.6.37/net/ipv4/tcp_output.c
--- linux-2.6.37/net/ipv4/tcp_output.c 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/ipv4/tcp_output.c 2011-01-08 16:43:27.974431382 +0300
@@ -1082,7 +1082,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.37/net/ipv4/tcp_zero_copy.c linux-2.6.37/net/ipv4/tcp_zero_copy.c
--- linux-2.6.37/net/ipv4/tcp_zero_copy.c 2011-01-10 18:06:50.466106393 +0300
+++ linux-2.6.37/net/ipv4/tcp_zero_copy.c 2011-01-08 16:43:27.978431250 +0300
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.37/net/ipv6/ip6_output.c linux-2.6.37/net/ipv6/ip6_output.c
--- linux-2.6.37/net/ipv6/ip6_output.c 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/ipv6/ip6_output.c 2011-01-08 16:43:27.978431250 +0300
@@ -1383,7 +1383,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.37/net/Kconfig linux-2.6.37/net/Kconfig
--- linux-2.6.37/net/Kconfig 2011-01-05 03:50:19.000000000 +0300
+++ linux-2.6.37/net/Kconfig 2011-01-08 16:43:27.978431250 +0300
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.38/include/linux/mm_types.h linux-2.6.38/include/linux/mm_types.h
--- linux-2.6.38/include/linux/mm_types.h 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/include/linux/mm_types.h 2011-03-18 17:17:58.577360968 +0300
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.38/include/linux/net.h linux-2.6.38/include/linux/net.h
--- linux-2.6.38/include/linux/net.h 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/include/linux/net.h 2011-04-01 21:47:43.293184996 +0400
@@ -60,6 +60,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -293,5 +294,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.38/net/core/dev.c linux-2.6.38/net/core/dev.c
--- linux-2.6.38/net/core/dev.c 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/core/dev.c 2011-03-18 17:47:20.257627966 +0300
@@ -3350,7 +3350,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
diff -upkr linux-2.6.38/net/core/skbuff.c linux-2.6.38/net/core/skbuff.c
--- linux-2.6.38/net/core/skbuff.c 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/core/skbuff.c 2011-03-18 17:47:20.257627966 +0300
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -325,7 +325,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frag_list(skb))
@@ -733,7 +733,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -820,7 +820,7 @@ int pskb_expand_head(struct sk_buff *skb
kfree(skb->head);
} else {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1098,7 +1098,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1267,7 +1267,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1368,7 +1368,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1392,7 +1392,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1402,7 +1402,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1424,7 +1424,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2057,7 +2057,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2179,7 +2179,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2201,7 +2201,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2602,7 +2602,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.38/net/ipv4/ip_output.c linux-2.6.38/net/ipv4/ip_output.c
--- linux-2.6.38/net/ipv4/ip_output.c 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/ipv4/ip_output.c 2011-03-18 17:47:20.257627966 +0300
@@ -1041,7 +1041,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1200,7 +1200,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.38/net/ipv4/Makefile linux-2.6.38/net/ipv4/Makefile
--- linux-2.6.38/net/ipv4/Makefile 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/ipv4/Makefile 2011-03-18 17:17:58.585356965 +0300
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.38/net/ipv4/tcp.c linux-2.6.38/net/ipv4/tcp.c
--- linux-2.6.38/net/ipv4/tcp.c 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/ipv4/tcp.c 2011-03-18 17:47:20.257627966 +0300
@@ -806,7 +806,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1015,7 +1015,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1056,9 +1056,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.38/net/ipv4/tcp_output.c linux-2.6.38/net/ipv4/tcp_output.c
--- linux-2.6.38/net/ipv4/tcp_output.c 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/ipv4/tcp_output.c 2011-03-18 17:47:20.257627966 +0300
@@ -1094,7 +1094,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.38/net/ipv4/tcp_zero_copy.c linux-2.6.38/net/ipv4/tcp_zero_copy.c
--- linux-2.6.38/net/ipv4/tcp_zero_copy.c 2011-03-21 16:53:50.845657069 +0300
+++ linux-2.6.38/net/ipv4/tcp_zero_copy.c 2011-03-18 17:47:20.257627966 +0300
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.38/net/ipv6/ip6_output.c linux-2.6.38/net/ipv6/ip6_output.c
--- linux-2.6.38/net/ipv6/ip6_output.c 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/ipv6/ip6_output.c 2011-03-18 17:47:20.257627966 +0300
@@ -1386,7 +1386,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.38/net/Kconfig linux-2.6.38/net/Kconfig
--- linux-2.6.38/net/Kconfig 2011-03-15 04:20:32.000000000 +0300
+++ linux-2.6.38/net/Kconfig 2011-03-18 17:17:58.589354968 +0300
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-2.6.39/include/linux/mm_types.h linux-2.6.39/include/linux/mm_types.h
--- linux-2.6.39/include/linux/mm_types.h 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/include/linux/mm_types.h 2011-05-19 10:46:24.669812999 -0400
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.39/include/linux/net.h linux-2.6.39/include/linux/net.h
--- linux-2.6.39/include/linux/net.h 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/include/linux/net.h 2011-05-19 10:46:24.669812999 -0400
@@ -60,6 +60,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -294,5 +295,44 @@ extern int kernel_sock_shutdown(struct s
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.39/net/core/dev.c linux-2.6.39/net/core/dev.c
--- linux-2.6.39/net/core/dev.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/core/dev.c 2011-05-19 10:46:24.669812999 -0400
@@ -3418,7 +3418,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
diff -upkr linux-2.6.39/net/core/skbuff.c linux-2.6.39/net/core/skbuff.c
--- linux-2.6.39/net/core/skbuff.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/core/skbuff.c 2011-05-19 10:46:24.669812999 -0400
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -325,7 +325,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frag_list(skb))
@@ -732,7 +732,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -819,7 +819,7 @@ int pskb_expand_head(struct sk_buff *skb
kfree(skb->head);
} else {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1097,7 +1097,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1266,7 +1266,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1367,7 +1367,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1391,7 +1391,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1401,7 +1401,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1423,7 +1423,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2056,7 +2056,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2178,7 +2178,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2200,7 +2200,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2598,7 +2598,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.39/net/ipv4/ip_output.c linux-2.6.39/net/ipv4/ip_output.c
--- linux-2.6.39/net/ipv4/ip_output.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/ip_output.c 2011-05-19 10:47:39.565813000 -0400
@@ -985,7 +985,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1220,7 +1220,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.39/net/ipv4/Makefile linux-2.6.39/net/ipv4/Makefile
--- linux-2.6.39/net/ipv4/Makefile 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/Makefile 2011-05-19 10:46:24.669812999 -0400
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.39/net/ipv4/tcp.c linux-2.6.39/net/ipv4/tcp.c
--- linux-2.6.39/net/ipv4/tcp.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/tcp.c 2011-05-19 10:46:24.673813002 -0400
@@ -815,7 +815,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1021,7 +1021,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1062,9 +1062,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.39/net/ipv4/tcp_output.c linux-2.6.39/net/ipv4/tcp_output.c
--- linux-2.6.39/net/ipv4/tcp_output.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv4/tcp_output.c 2011-05-19 10:46:24.673813002 -0400
@@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.39/net/ipv4/tcp_zero_copy.c linux-2.6.39/net/ipv4/tcp_zero_copy.c
--- linux-2.6.39/net/ipv4/tcp_zero_copy.c 2011-05-19 10:44:53.685813002 -0400
+++ linux-2.6.39/net/ipv4/tcp_zero_copy.c 2011-05-19 10:46:24.673813002 -0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.39/net/ipv6/ip6_output.c linux-2.6.39/net/ipv6/ip6_output.c
--- linux-2.6.39/net/ipv6/ip6_output.c 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/ipv6/ip6_output.c 2011-05-19 10:46:24.673813002 -0400
@@ -1444,7 +1444,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.39/net/Kconfig linux-2.6.39/net/Kconfig
--- linux-2.6.39/net/Kconfig 2011-05-19 00:06:34.000000000 -0400
+++ linux-2.6.39/net/Kconfig 2011-05-19 10:46:24.673813002 -0400
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,390 +0,0 @@
diff -upkr linux-3.0.0/include/linux/mm_types.h linux-3.0.0/include/linux/mm_types.h
--- linux-3.0.0/include/linux/mm_types.h 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/include/linux/mm_types.h 2011-07-22 19:22:23.643231201 -0400
@@ -100,6 +100,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
typedef unsigned long __nocast vm_flags_t;
diff -upkr linux-3.0.0/include/linux/net.h linux-3.0.0/include/linux/net.h
--- linux-3.0.0/include/linux/net.h 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/include/linux/net.h 2011-07-22 19:22:23.643231201 -0400
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -289,5 +290,44 @@ extern int kernel_sock_shutdown(struct s
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
"-type-" __stringify(type))
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-3.0.0/net/core/dev.c linux-3.0.0/net/core/dev.c
--- linux-3.0.0/net/core/dev.c 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/core/dev.c 2011-07-22 19:41:18.491230784 -0400
@@ -3414,7 +3414,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
diff -upkr linux-3.0.0/net/core/skbuff.c linux-3.0.0/net/core/skbuff.c
--- linux-3.0.0/net/core/skbuff.c 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/core/skbuff.c 2011-07-22 19:41:18.491230784 -0400
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -326,7 +326,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frag_list(skb))
@@ -733,7 +733,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -820,7 +820,7 @@ int pskb_expand_head(struct sk_buff *skb
kfree(skb->head);
} else {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1098,7 +1098,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1267,7 +1267,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1368,7 +1368,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1392,7 +1392,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1402,7 +1402,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1424,7 +1424,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2057,7 +2057,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2179,7 +2179,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2201,7 +2201,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2599,7 +2599,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-3.0.0/net/ipv4/ip_output.c linux-3.0.0/net/ipv4/ip_output.c
--- linux-3.0.0/net/ipv4/ip_output.c 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/ipv4/ip_output.c 2011-07-22 19:41:18.491230784 -0400
@@ -987,7 +987,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1224,7 +1224,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-3.0.0/net/ipv4/Makefile linux-3.0.0/net/ipv4/Makefile
--- linux-3.0.0/net/ipv4/Makefile 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/ipv4/Makefile 2011-07-22 19:22:23.647231201 -0400
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-3.0.0/net/ipv4/tcp.c linux-3.0.0/net/ipv4/tcp.c
--- linux-3.0.0/net/ipv4/tcp.c 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/ipv4/tcp.c 2011-07-22 19:41:18.491230784 -0400
@@ -815,7 +815,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1022,7 +1022,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1063,9 +1063,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-3.0.0/net/ipv4/tcp_output.c linux-3.0.0/net/ipv4/tcp_output.c
--- linux-3.0.0/net/ipv4/tcp_output.c 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/ipv4/tcp_output.c 2011-07-22 19:41:18.491230784 -0400
@@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-3.0.0/net/ipv4/tcp_zero_copy.c linux-3.0.0/net/ipv4/tcp_zero_copy.c
--- linux-3.0.0/net/ipv4/tcp_zero_copy.c 2011-07-22 19:18:28.400145288 -0400
+++ linux-3.0.0/net/ipv4/tcp_zero_copy.c 2011-07-22 19:41:18.491230784 -0400
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-3.0.0/net/ipv6/ip6_output.c linux-3.0.0/net/ipv6/ip6_output.c
--- linux-3.0.0/net/ipv6/ip6_output.c 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/ipv6/ip6_output.c 2011-07-22 19:41:18.491230784 -0400
@@ -1446,7 +1446,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-3.0.0/net/Kconfig linux-3.0.0/net/Kconfig
--- linux-3.0.0/net/Kconfig 2011-07-21 22:17:23.000000000 -0400
+++ linux-3.0.0/net/Kconfig 2011-07-22 19:22:23.647231201 -0400
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,418 +0,0 @@
=== modified file 'linux-3.1-scst/include/linux/mm_types.h'
--- linux-3.1-orig/include/linux/mm_types.h 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/include/linux/mm_types.h 2011-10-26 20:57:41 +0000
@@ -124,6 +124,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* If another subsystem starts using the double word pairing for atomic
=== modified file 'linux-3.1-scst/include/linux/net.h'
--- linux-3.1-orig/include/linux/net.h 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/include/linux/net.h 2011-10-26 20:57:41 +0000
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -289,5 +290,44 @@ extern int kernel_sock_shutdown(struct s
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
"-type-" __stringify(type))
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
=== modified file 'linux-3.1-scst/net/Kconfig'
--- linux-3.1-orig/net/Kconfig 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/Kconfig 2011-10-26 20:57:41 +0000
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'linux-3.1-scst/net/core/dev.c'
--- linux-3.1-orig/net/core/dev.c 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/core/dev.c 2011-10-26 20:57:41 +0000
@@ -3432,7 +3432,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
=== modified file 'linux-3.1-scst/net/core/skbuff.c'
--- linux-3.1-orig/net/core/skbuff.c 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/core/skbuff.c 2011-10-26 20:57:41 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -326,7 +326,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
/*
@@ -640,7 +640,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -655,7 +655,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
uarg->callback(uarg);
@@ -820,7 +820,7 @@ struct sk_buff *pskb_copy(struct sk_buff
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -911,7 +911,7 @@ int pskb_expand_head(struct sk_buff *skb
goto nofrags;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1191,7 +1191,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1360,7 +1360,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1499,7 +1499,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1509,7 +1509,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1531,7 +1531,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2164,7 +2164,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2286,7 +2286,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2308,7 +2308,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2706,7 +2706,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
=== modified file 'linux-3.1-scst/net/ipv4/Makefile'
--- linux-3.1-orig/net/ipv4/Makefile 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/ipv4/Makefile 2011-10-26 20:57:41 +0000
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'linux-3.1-scst/net/ipv4/ip_output.c'
--- linux-3.1-orig/net/ipv4/ip_output.c 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/ipv4/ip_output.c 2011-10-26 20:57:41 +0000
@@ -994,7 +994,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1231,7 +1231,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'linux-3.1-scst/net/ipv4/tcp.c'
--- linux-3.1-orig/net/ipv4/tcp.c 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/ipv4/tcp.c 2011-10-26 20:57:41 +0000
@@ -815,7 +815,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1022,7 +1022,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1063,9 +1063,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
=== modified file 'linux-3.1-scst/net/ipv4/tcp_output.c'
--- linux-3.1-orig/net/ipv4/tcp_output.c 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/ipv4/tcp_output.c 2011-10-26 20:57:41 +0000
@@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
=== added file 'net/ipv4/tcp_zero_copy.c'
--- linux-3.1-orig/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ linux-3.1-scst/net/ipv4/tcp_zero_copy.c 2011-10-26 20:57:41 +0000
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'linux-3.1-scst/net/ipv6/ip6_output.c'
--- linux-3.1-orig/net/ipv6/ip6_output.c 2011-10-26 20:34:50 +0000
+++ linux-3.1-scst/net/ipv6/ip6_output.c 2011-10-26 20:57:41 +0000
@@ -1485,7 +1485,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,420 +0,0 @@
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 4222aff..0d2ac7d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *page)
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 9e56eb4..74fe728 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -527,7 +527,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5824971..83e0eaa 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1013,7 +1013,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 55a62ca..dcb9fdf 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1360,7 +1360,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 36efb41..019681c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1292,7 +1292,7 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1774,7 +1774,7 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
} while (!pending_tx_is_head(netbk, peek));
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 10a9a17..1a01f46 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -177,6 +177,17 @@ struct page {
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
int _last_nid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
diff --git a/include/linux/net.h b/include/linux/net.h
index 65545ac..288d185 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -278,6 +279,45 @@ extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ded45ec..b5c6dda 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2075,7 +2075,7 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2098,7 +2098,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
diff --git a/net/Kconfig b/net/Kconfig
index 2ddc904..ec9bfbd 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a224..f53c802 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 20ee14d..e3734cf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -427,7 +427,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -473,7 +473,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -793,7 +793,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1629,7 +1629,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1682,7 +1682,7 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2681,7 +2681,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
diff --git a/net/core/sock.c b/net/core/sock.c
index 50a345e..f9fba8e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1804,7 +1804,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2505,7 +2505,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 089cb9f..bc38b0e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ca5873..014503d2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1006,7 +1006,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1227,7 +1227,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1a2e249..b512ddc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -897,7 +897,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1193,7 +1193,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
diff --git a/net/ipv4/tcp_zero_copy.c b/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..99d41fa
--- /dev/null
+++ b/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b98b8e0..2df0fda 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1432,7 +1432,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
--
1.8.4.5

View File

@@ -1,432 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2013-07-23 02:45:53 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2013-07-23 03:30:56 +0000
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'drivers/net/macvtap.c'
--- old/drivers/net/macvtap.c 2013-07-23 02:45:53 +0000
+++ new/drivers/net/macvtap.c 2013-07-23 03:30:56 +0000
@@ -527,7 +527,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/tun.c'
--- old/drivers/net/tun.c 2013-07-23 02:45:53 +0000
+++ new/drivers/net/tun.c 2013-07-23 03:30:56 +0000
@@ -1013,7 +1013,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2013-07-23 02:45:53 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2013-07-23 03:30:56 +0000
@@ -1360,7 +1360,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2013-07-23 02:45:53 +0000
+++ new/drivers/net/xen-netback/netback.c 2013-07-23 03:30:56 +0000
@@ -1258,7 +1258,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1740,7 +1740,7 @@ static void xen_netbk_idx_release(struct
} while (!pending_tx_is_head(netbk, peek));
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2013-07-23 02:45:53 +0000
+++ new/include/linux/mm_types.h 2013-07-23 03:30:56 +0000
@@ -177,6 +177,17 @@ struct page {
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
int _last_nid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2013-07-23 02:45:53 +0000
+++ new/include/linux/net.h 2013-07-23 03:30:56 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -270,6 +271,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2013-07-23 02:45:53 +0000
+++ new/include/linux/skbuff.h 2013-07-23 03:30:56 +0000
@@ -2074,7 +2074,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2097,7 +2097,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2013-07-23 02:45:53 +0000
+++ new/net/Kconfig 2013-07-23 03:30:56 +0000
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2013-07-23 02:45:53 +0000
+++ new/net/ceph/pagevec.c 2013-07-23 03:30:56 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2013-07-23 02:45:53 +0000
+++ new/net/core/skbuff.c 2013-07-23 03:30:56 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -457,7 +457,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -503,7 +503,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -826,7 +826,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1662,7 +1662,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1715,7 +1715,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2714,7 +2714,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2013-07-23 02:45:53 +0000
+++ new/net/core/sock.c 2013-07-23 03:30:56 +0000
@@ -1804,7 +1804,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2504,7 +2504,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2013-07-23 02:45:53 +0000
+++ new/net/ipv4/Makefile 2013-07-23 03:30:56 +0000
@@ -52,6 +52,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2013-07-23 02:45:53 +0000
+++ new/net/ipv4/ip_output.c 2013-07-23 03:30:56 +0000
@@ -1006,7 +1006,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1227,7 +1227,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2013-07-23 02:45:53 +0000
+++ new/net/ipv4/tcp.c 2013-07-23 03:30:56 +0000
@@ -885,7 +885,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1174,7 +1174,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2013-07-23 03:30:56 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2013-07-23 02:45:53 +0000
+++ new/net/ipv6/ip6_output.c 2013-07-23 03:30:56 +0000
@@ -1436,7 +1436,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,445 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2013-09-28 00:14:38 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2013-09-28 04:15:59 +0000
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'drivers/net/macvtap.c'
--- old/drivers/net/macvtap.c 2013-09-28 00:14:38 +0000
+++ new/drivers/net/macvtap.c 2013-09-28 02:59:05 +0000
@@ -597,7 +597,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/tun.c'
--- old/drivers/net/tun.c 2013-09-28 00:14:38 +0000
+++ new/drivers/net/tun.c 2013-09-28 02:59:05 +0000
@@ -1011,7 +1011,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2013-09-28 00:14:38 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2013-09-28 02:59:05 +0000
@@ -1360,7 +1360,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2013-09-28 00:14:38 +0000
+++ new/drivers/net/xen-netback/netback.c 2013-09-28 02:59:05 +0000
@@ -1265,7 +1265,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1747,7 +1747,7 @@ static void xen_netbk_idx_release(struct
} while (!pending_tx_is_head(netbk, peek));
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2013-09-28 00:14:38 +0000
+++ new/include/linux/mm_types.h 2013-09-28 02:59:05 +0000
@@ -177,6 +177,17 @@ struct page {
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
int _last_nid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2013-09-28 00:14:38 +0000
+++ new/include/linux/net.h 2013-09-28 02:59:05 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -270,6 +271,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2013-09-28 00:14:38 +0000
+++ new/include/linux/skbuff.h 2013-09-28 02:59:05 +0000
@@ -1976,7 +1976,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1999,7 +1999,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2013-09-28 00:14:38 +0000
+++ new/net/Kconfig 2013-09-28 02:59:05 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2013-09-28 00:14:38 +0000
+++ new/net/ceph/pagevec.c 2013-09-28 04:18:46 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2013-09-28 00:14:38 +0000
+++ new/net/core/skbuff.c 2013-09-28 02:59:05 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -452,7 +452,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -498,7 +498,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -826,7 +826,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1651,7 +1651,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1704,7 +1704,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2708,7 +2708,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2013-09-28 00:14:38 +0000
+++ new/net/core/sock.c 2013-09-28 02:59:05 +0000
@@ -1825,7 +1825,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2530,7 +2530,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2013-09-28 00:14:38 +0000
+++ new/net/ipv4/Makefile 2013-09-28 02:59:05 +0000
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2013-09-28 00:14:38 +0000
+++ new/net/ipv4/ip_output.c 2013-09-28 02:59:05 +0000
@@ -998,7 +998,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1219,7 +1219,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2013-09-28 00:14:38 +0000
+++ new/net/ipv4/tcp.c 2013-09-28 02:59:05 +0000
@@ -888,7 +888,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1184,7 +1184,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2013-09-28 02:59:05 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2013-09-28 00:14:38 +0000
+++ new/net/ipv6/ip6_output.c 2013-09-28 02:59:05 +0000
@@ -1443,7 +1443,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
=== modified file 'net/netfilter/nfnetlink_queue_core.c'
--- old/net/netfilter/nfnetlink_queue_core.c 2013-09-28 00:14:38 +0000
+++ new/net/netfilter/nfnetlink_queue_core.c 2013-09-28 02:59:05 +0000
@@ -258,7 +258,7 @@ nfqnl_zcopy(struct sk_buff *to, const st
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}

View File

@@ -1,407 +0,0 @@
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index cc29cd3..ba34c70 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *page)
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 7e2788c..70d390c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1369,7 +1369,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
rq->buf_info[ring_idx][i].page) {
dma_unmap_page(&adapter->pdev->dev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 6255850..12a6f14 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1055,7 +1055,7 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
skb->truesize += txp->size;
/* Take an extra reference to offset xenvif_idx_release */
- get_page(vif->mmap_pages[pending_idx]);
+ net_get_page(vif->mmap_pages[pending_idx]);
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1525,7 +1525,7 @@ static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
} while (!pending_tx_is_head(vif, peek));
- put_page(vif->mmap_pages[pending_idx]);
+ net_put_page(vif->mmap_pages[pending_idx]);
vif->mmap_pages[pending_idx] = NULL;
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8e082f1..15f10c8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -177,6 +177,17 @@ struct page {
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
int _last_nid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
diff --git a/include/linux/net.h b/include/linux/net.h
index 41103f8..54c2bffb 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -278,6 +279,45 @@ extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index efa1649..5efff79 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1975,7 +1975,7 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1998,7 +1998,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
diff --git a/net/Kconfig b/net/Kconfig
index b50dacc..88ed9df 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a224..f53c802 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2c7baa8..3196557 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -422,7 +422,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -468,7 +468,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -793,7 +793,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1618,7 +1618,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1671,7 +1671,7 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2675,7 +2675,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
diff --git a/net/core/sock.c b/net/core/sock.c
index 5cec994..c756279 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1847,7 +1847,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2599,7 +2599,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4b81e91..b88113f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3982eab..d37f078 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1003,7 +1003,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1224,7 +1224,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index be5246e..a349ddd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -896,7 +896,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1192,7 +1192,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
diff --git a/net/ipv4/tcp_zero_copy.c b/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..430147e
--- /dev/null
+++ b/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b6fa35e..b6f3389 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1413,7 +1413,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index ae2e5c1..2ad9e87 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -258,7 +258,7 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
--
1.8.4.5

View File

@@ -1,419 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2013-11-30 00:34:22 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2013-11-30 00:55:02 +0000
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2013-11-30 00:34:22 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2013-11-30 00:55:02 +0000
@@ -1369,7 +1369,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
dma_unmap_page(&adapter->pdev->dev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2013-11-30 00:34:22 +0000
+++ new/drivers/net/xen-netback/netback.c 2013-11-30 00:55:02 +0000
@@ -1054,7 +1054,7 @@ static void xenvif_fill_frags(struct xen
skb->truesize += txp->size;
/* Take an extra reference to offset xenvif_idx_release */
- get_page(vif->mmap_pages[pending_idx]);
+ net_get_page(vif->mmap_pages[pending_idx]);
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1524,7 +1524,7 @@ static void xenvif_idx_release(struct xe
} while (!pending_tx_is_head(vif, peek));
- put_page(vif->mmap_pages[pending_idx]);
+ net_put_page(vif->mmap_pages[pending_idx]);
vif->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2013-11-30 00:34:22 +0000
+++ new/include/linux/mm_types.h 2013-11-30 00:55:02 +0000
@@ -177,6 +177,17 @@ struct page {
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
int _last_nid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2013-11-30 00:34:22 +0000
+++ new/include/linux/net.h 2013-11-30 00:55:02 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -270,6 +271,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2013-11-30 00:34:22 +0000
+++ new/include/linux/skbuff.h 2013-11-30 00:55:02 +0000
@@ -1979,7 +1979,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2002,7 +2002,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2013-11-30 00:34:22 +0000
+++ new/net/Kconfig 2013-11-30 00:55:02 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2013-11-30 00:34:22 +0000
+++ new/net/ceph/pagevec.c 2013-11-30 00:55:02 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2013-11-30 00:34:22 +0000
+++ new/net/core/skbuff.c 2013-11-30 00:55:02 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -452,7 +452,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -498,7 +498,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -826,7 +826,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1651,7 +1651,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1704,7 +1704,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2708,7 +2708,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2013-11-30 00:34:22 +0000
+++ new/net/core/sock.c 2013-11-30 00:55:02 +0000
@@ -1847,7 +1847,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2599,7 +2599,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2013-11-30 00:34:22 +0000
+++ new/net/ipv4/Makefile 2013-11-30 00:55:02 +0000
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2013-11-30 00:34:22 +0000
+++ new/net/ipv4/ip_output.c 2013-11-30 00:55:02 +0000
@@ -1003,7 +1003,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1224,7 +1224,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2013-11-30 00:34:22 +0000
+++ new/net/ipv4/tcp.c 2013-11-30 00:55:02 +0000
@@ -902,7 +902,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1198,7 +1198,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2013-11-30 00:55:02 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2013-11-30 00:34:22 +0000
+++ new/net/ipv6/ip6_output.c 2013-11-30 00:55:02 +0000
@@ -1412,7 +1412,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
=== modified file 'net/netfilter/nfnetlink_queue_core.c'
--- old/net/netfilter/nfnetlink_queue_core.c 2013-11-30 00:34:22 +0000
+++ new/net/netfilter/nfnetlink_queue_core.c 2013-11-30 00:55:02 +0000
@@ -258,7 +258,7 @@ nfqnl_zcopy(struct sk_buff *to, const st
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}

View File

@@ -1,403 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2014-02-20 05:26:12 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2014-02-20 05:35:42 +0000
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2014-02-20 05:26:12 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2014-02-20 05:35:42 +0000
@@ -1369,7 +1369,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
dma_unmap_page(&adapter->pdev->dev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2014-02-20 05:26:12 +0000
+++ new/drivers/net/xen-netback/netback.c 2014-02-20 05:35:42 +0000
@@ -1080,7 +1080,7 @@ static void xenvif_fill_frags(struct xen
skb->truesize += txp->size;
/* Take an extra reference to offset xenvif_idx_release */
- get_page(vif->mmap_pages[pending_idx]);
+ net_get_page(vif->mmap_pages[pending_idx]);
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1760,7 +1760,7 @@ static void xenvif_idx_release(struct xe
} while (!pending_tx_is_head(vif, peek));
- put_page(vif->mmap_pages[pending_idx]);
+ net_put_page(vif->mmap_pages[pending_idx]);
vif->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2014-02-20 05:26:12 +0000
+++ new/include/linux/mm_types.h 2014-02-20 05:35:42 +0000
@@ -195,6 +195,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2014-02-20 05:26:12 +0000
+++ new/include/linux/net.h 2014-02-20 05:35:42 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -295,6 +296,45 @@ int kernel_sendpage(struct socket *sock,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2014-02-20 05:26:12 +0000
+++ new/include/linux/skbuff.h 2014-02-20 05:35:42 +0000
@@ -1974,7 +1974,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1997,7 +1997,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2014-02-20 05:26:12 +0000
+++ new/net/Kconfig 2014-02-20 05:35:42 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2014-02-20 05:26:12 +0000
+++ new/net/ceph/pagevec.c 2014-02-20 05:35:42 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2014-02-20 05:26:12 +0000
+++ new/net/core/skbuff.c 2014-02-20 05:35:42 +0000
@@ -422,7 +422,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -480,7 +480,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -805,7 +805,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1648,7 +1648,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1701,7 +1701,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2716,7 +2716,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2014-02-20 05:26:12 +0000
+++ new/net/core/sock.c 2014-02-20 05:35:42 +0000
@@ -1862,7 +1862,7 @@ bool skb_page_frag_refill(unsigned int s
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2624,7 +2624,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2014-02-20 05:26:12 +0000
+++ new/net/ipv4/Makefile 2014-02-20 05:35:42 +0000
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2014-02-20 05:26:12 +0000
+++ new/net/ipv4/ip_output.c 2014-02-20 05:35:42 +0000
@@ -1005,7 +1005,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1231,7 +1231,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2014-02-20 05:26:12 +0000
+++ new/net/ipv4/tcp.c 2014-02-20 05:35:42 +0000
@@ -898,7 +898,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1194,7 +1194,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2014-02-20 05:35:42 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2014-02-20 05:26:12 +0000
+++ new/net/ipv6/ip6_output.c 2014-02-20 05:35:42 +0000
@@ -1431,7 +1431,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
=== modified file 'net/netfilter/nfnetlink_queue_core.c'
--- old/net/netfilter/nfnetlink_queue_core.c 2014-02-20 05:26:12 +0000
+++ new/net/netfilter/nfnetlink_queue_core.c 2014-02-20 05:35:42 +0000
@@ -258,7 +258,7 @@ nfqnl_zcopy(struct sk_buff *to, const st
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}

View File

@@ -1,419 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2014-01-30 00:25:53 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2014-01-30 01:02:34 +0000
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2014-01-30 00:25:53 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2014-01-30 01:02:34 +0000
@@ -1369,7 +1369,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
dma_unmap_page(&adapter->pdev->dev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2014-01-30 00:25:53 +0000
+++ new/drivers/net/xen-netback/netback.c 2014-01-30 01:02:34 +0000
@@ -1080,7 +1080,7 @@ static void xenvif_fill_frags(struct xen
skb->truesize += txp->size;
/* Take an extra reference to offset xenvif_idx_release */
- get_page(vif->mmap_pages[pending_idx]);
+ net_get_page(vif->mmap_pages[pending_idx]);
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1760,7 +1760,7 @@ static void xenvif_idx_release(struct xe
} while (!pending_tx_is_head(vif, peek));
- put_page(vif->mmap_pages[pending_idx]);
+ net_put_page(vif->mmap_pages[pending_idx]);
vif->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2014-01-30 00:25:53 +0000
+++ new/include/linux/mm_types.h 2014-01-30 01:02:34 +0000
@@ -195,6 +195,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2014-01-30 00:25:53 +0000
+++ new/include/linux/net.h 2014-01-30 01:02:34 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -295,6 +296,45 @@ int kernel_sendpage(struct socket *sock,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2014-01-30 00:25:53 +0000
+++ new/include/linux/skbuff.h 2014-01-30 01:02:34 +0000
@@ -1974,7 +1974,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1997,7 +1997,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2014-01-30 00:25:53 +0000
+++ new/net/Kconfig 2014-01-30 01:02:34 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2014-01-30 00:25:53 +0000
+++ new/net/ceph/pagevec.c 2014-01-30 01:02:34 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2014-01-30 00:25:53 +0000
+++ new/net/core/skbuff.c 2014-01-30 01:02:34 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -452,7 +452,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -510,7 +510,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -835,7 +835,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1678,7 +1678,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1731,7 +1731,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2746,7 +2746,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2014-01-30 00:25:53 +0000
+++ new/net/core/sock.c 2014-01-30 01:02:34 +0000
@@ -1862,7 +1862,7 @@ bool skb_page_frag_refill(unsigned int s
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2624,7 +2624,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2014-01-30 00:25:53 +0000
+++ new/net/ipv4/Makefile 2014-01-30 01:02:34 +0000
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2014-01-30 00:25:53 +0000
+++ new/net/ipv4/ip_output.c 2014-01-30 01:02:34 +0000
@@ -1005,7 +1005,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1231,7 +1231,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2014-01-30 00:25:53 +0000
+++ new/net/ipv4/tcp.c 2014-01-30 01:02:34 +0000
@@ -898,7 +898,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1194,7 +1194,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2014-01-30 01:02:34 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2014-01-30 00:25:53 +0000
+++ new/net/ipv6/ip6_output.c 2014-01-30 01:02:34 +0000
@@ -1431,7 +1431,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
=== modified file 'net/netfilter/nfnetlink_queue_core.c'
--- old/net/netfilter/nfnetlink_queue_core.c 2014-01-30 00:25:53 +0000
+++ new/net/netfilter/nfnetlink_queue_core.c 2014-01-30 01:02:34 +0000
@@ -258,7 +258,7 @@ nfqnl_zcopy(struct sk_buff *to, const st
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}

View File

@@ -1,364 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2014-04-17 22:02:06 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2014-04-17 22:48:38 +0000
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2014-04-17 22:02:06 +0000
+++ new/include/linux/mm_types.h 2014-04-17 22:48:38 +0000
@@ -195,6 +195,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2014-04-17 22:02:06 +0000
+++ new/include/linux/net.h 2014-04-17 22:48:38 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -292,6 +293,45 @@ int kernel_sendpage(struct socket *sock,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2014-04-17 22:02:06 +0000
+++ new/include/linux/skbuff.h 2014-04-17 22:48:38 +0000
@@ -2056,7 +2056,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2079,7 +2079,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2014-04-17 22:02:06 +0000
+++ new/net/Kconfig 2014-04-17 22:48:38 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2014-04-17 22:02:06 +0000
+++ new/net/ceph/pagevec.c 2014-04-17 22:48:38 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2014-04-17 22:02:06 +0000
+++ new/net/core/skbuff.c 2014-04-17 22:48:38 +0000
@@ -425,7 +425,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -483,7 +483,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -804,7 +804,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1647,7 +1647,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1700,7 +1700,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2159,7 +2159,7 @@ skb_zerocopy(struct sk_buff *to, struct
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
@@ -2813,7 +2813,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2014-04-17 22:02:06 +0000
+++ new/net/core/sock.c 2014-04-17 22:48:38 +0000
@@ -1839,7 +1839,7 @@ bool skb_page_frag_refill(unsigned int s
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
order = SKB_FRAG_PAGE_ORDER;
@@ -2602,7 +2602,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2014-04-17 22:02:06 +0000
+++ new/net/ipv4/Makefile 2014-04-17 22:48:38 +0000
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2014-04-17 22:02:06 +0000
+++ new/net/ipv4/ip_output.c 2014-04-17 22:48:38 +0000
@@ -1004,7 +1004,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1230,7 +1230,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2014-04-17 22:02:06 +0000
+++ new/net/ipv4/tcp.c 2014-04-17 22:48:38 +0000
@@ -939,7 +939,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1238,7 +1238,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2014-04-17 22:48:38 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2014-04-17 22:02:06 +0000
+++ new/net/ipv6/ip6_output.c 2014-04-17 22:48:38 +0000
@@ -1455,7 +1455,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,364 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2014-06-18 01:32:48 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2014-06-18 01:44:08 +0000
@@ -131,7 +131,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2014-06-18 01:32:48 +0000
+++ new/include/linux/mm_types.h 2014-06-18 01:44:08 +0000
@@ -196,6 +196,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2014-06-18 01:32:48 +0000
+++ new/include/linux/net.h 2014-06-18 01:44:08 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -285,6 +286,45 @@ int kernel_sendpage(struct socket *sock,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2014-06-18 01:32:48 +0000
+++ new/include/linux/skbuff.h 2014-06-18 01:44:08 +0000
@@ -2113,7 +2113,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2136,7 +2136,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2014-06-18 01:32:48 +0000
+++ new/net/Kconfig 2014-06-18 01:44:08 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2014-06-18 01:32:48 +0000
+++ new/net/ceph/pagevec.c 2014-06-18 01:44:08 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2014-06-18 01:32:48 +0000
+++ new/net/core/skbuff.c 2014-06-18 01:44:08 +0000
@@ -425,7 +425,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -483,7 +483,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -804,7 +804,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1647,7 +1647,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1700,7 +1700,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2159,7 +2159,7 @@ skb_zerocopy(struct sk_buff *to, struct
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
@@ -2813,7 +2813,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2014-06-18 01:32:48 +0000
+++ new/net/core/sock.c 2014-06-18 01:44:08 +0000
@@ -1888,7 +1888,7 @@ bool skb_page_frag_refill(unsigned int s
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
order = SKB_FRAG_PAGE_ORDER;
@@ -2651,7 +2651,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2014-06-18 01:32:48 +0000
+++ new/net/ipv4/Makefile 2014-06-18 01:44:08 +0000
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2014-06-18 01:32:48 +0000
+++ new/net/ipv4/ip_output.c 2014-06-18 01:44:08 +0000
@@ -1047,7 +1047,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1272,7 +1272,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2014-06-18 01:32:48 +0000
+++ new/net/ipv4/tcp.c 2014-06-18 01:44:08 +0000
@@ -939,7 +939,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1238,7 +1238,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2014-06-18 01:44:08 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2014-06-18 01:32:48 +0000
+++ new/net/ipv6/ip6_output.c 2014-06-18 01:44:08 +0000
@@ -1461,7 +1461,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,364 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2014-08-19 01:00:36 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2014-08-19 01:17:24 +0000
@@ -132,7 +132,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2014-08-19 01:00:36 +0000
+++ new/include/linux/mm_types.h 2014-08-19 01:17:24 +0000
@@ -196,6 +196,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2014-08-19 01:00:36 +0000
+++ new/include/linux/net.h 2014-08-19 01:17:24 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -285,6 +286,45 @@ int kernel_sendpage(struct socket *sock,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2014-08-19 01:00:36 +0000
+++ new/include/linux/skbuff.h 2014-08-19 01:17:24 +0000
@@ -2139,7 +2139,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2162,7 +2162,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2014-08-19 01:00:36 +0000
+++ new/net/Kconfig 2014-08-19 01:17:24 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2014-08-19 01:00:36 +0000
+++ new/net/ceph/pagevec.c 2014-08-19 01:17:24 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
if (is_vmalloc_addr(pages))
vfree(pages);
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2014-08-19 01:00:36 +0000
+++ new/net/core/skbuff.c 2014-08-19 01:17:24 +0000
@@ -425,7 +425,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -483,7 +483,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -807,7 +807,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1654,7 +1654,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1707,7 +1707,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2166,7 +2166,7 @@ skb_zerocopy(struct sk_buff *to, struct
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
@@ -2820,7 +2820,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2014-08-19 01:00:36 +0000
+++ new/net/core/sock.c 2014-08-19 01:17:24 +0000
@@ -1888,7 +1888,7 @@ bool skb_page_frag_refill(unsigned int s
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
order = SKB_FRAG_PAGE_ORDER;
@@ -2651,7 +2651,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2014-08-19 01:00:36 +0000
+++ new/net/ipv4/Makefile 2014-08-19 01:17:24 +0000
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2014-08-19 01:00:36 +0000
+++ new/net/ipv4/ip_output.c 2014-08-19 01:17:24 +0000
@@ -1046,7 +1046,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1271,7 +1271,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2014-08-19 01:00:36 +0000
+++ new/net/ipv4/tcp.c 2014-08-19 01:17:24 +0000
@@ -939,7 +939,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1238,7 +1238,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2014-08-19 01:17:24 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2014-08-19 01:00:36 +0000
+++ new/net/ipv6/ip6_output.c 2014-08-19 01:17:24 +0000
@@ -1475,7 +1475,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,364 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2014-11-21 03:17:49 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2014-11-21 03:51:00 +0000
@@ -132,7 +132,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2014-11-21 03:17:49 +0000
+++ new/include/linux/mm_types.h 2014-11-21 03:51:00 +0000
@@ -196,6 +196,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2014-11-21 03:17:49 +0000
+++ new/include/linux/net.h 2014-11-21 03:51:00 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -285,6 +286,45 @@ int kernel_sendpage(struct socket *sock,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2014-11-21 03:17:49 +0000
+++ new/include/linux/skbuff.h 2014-11-21 03:51:00 +0000
@@ -2145,7 +2145,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2168,7 +2168,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2014-11-21 03:17:49 +0000
+++ new/net/Kconfig 2014-11-21 03:51:00 +0000
@@ -75,6 +75,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2014-11-21 03:17:49 +0000
+++ new/net/ceph/pagevec.c 2014-11-21 03:51:00 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
if (is_vmalloc_addr(pages))
vfree(pages);
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2014-11-21 03:17:49 +0000
+++ new/net/core/skbuff.c 2014-11-21 03:51:00 +0000
@@ -426,7 +426,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -484,7 +484,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -808,7 +808,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1655,7 +1655,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1708,7 +1708,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2167,7 +2167,7 @@ skb_zerocopy(struct sk_buff *to, struct
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
@@ -2821,7 +2821,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2014-11-21 03:17:49 +0000
+++ new/net/core/sock.c 2014-11-21 03:51:00 +0000
@@ -1881,7 +1881,7 @@ bool skb_page_frag_refill(unsigned int s
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
pfrag->offset = 0;
@@ -2645,7 +2645,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2014-11-21 03:17:49 +0000
+++ new/net/ipv4/Makefile 2014-11-21 03:51:00 +0000
@@ -54,6 +54,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2014-11-21 03:17:49 +0000
+++ new/net/ipv4/ip_output.c 2014-11-21 03:51:00 +0000
@@ -1051,7 +1051,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1276,7 +1276,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2014-11-21 03:17:49 +0000
+++ new/net/ipv4/tcp.c 2014-11-21 03:51:00 +0000
@@ -950,7 +950,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1251,7 +1251,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2014-11-21 03:51:00 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2014-11-21 03:17:49 +0000
+++ new/net/ipv6/ip6_output.c 2014-11-21 03:51:00 +0000
@@ -1477,7 +1477,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,387 +0,0 @@
Subject: [PATCH] put_page_callback
---
drivers/block/drbd/drbd_receiver.c | 2 +-
include/linux/mm_types.h | 11 +++++++++
include/linux/net.h | 40 ++++++++++++++++++++++++++++++
include/linux/skbuff.h | 4 +--
net/Kconfig | 12 +++++++++
net/ceph/pagevec.c | 2 +-
net/core/skbuff.c | 14 +++++------
net/core/sock.c | 4 +--
net/ipv4/Makefile | 1 +
net/ipv4/ip_output.c | 4 +--
net/ipv4/tcp.c | 4 +--
net/ipv4/tcp_zero_copy.c | 50 ++++++++++++++++++++++++++++++++++++++
net/ipv6/ip6_output.c | 2 +-
13 files changed, 132 insertions(+), 18 deletions(-)
create mode 100644 net/ipv4/tcp_zero_copy.c
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6960fb0..8fa4016 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -132,7 +132,7 @@ static int page_chain_free(struct page *page)
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e0b286..5706a4d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -196,6 +196,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
diff --git a/include/linux/net.h b/include/linux/net.h
index 17d8339..f784384 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -285,6 +286,45 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6c8b6f6..edf6195 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2250,7 +2250,7 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2273,7 +2273,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
diff --git a/net/Kconfig b/net/Kconfig
index 99815b5..ac45213 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -76,6 +76,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 5550130..993f710 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
if (is_vmalloc_addr(pages))
vfree(pages);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 32e31c2..6eb3a9e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -437,7 +437,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -495,7 +495,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -822,7 +822,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1669,7 +1669,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1722,7 +1722,7 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2181,7 +2181,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
@@ -2835,7 +2835,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
diff --git a/net/core/sock.c b/net/core/sock.c
index 15e0c67..e8ea0df 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1830,7 +1830,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
pfrag->offset = 0;
@@ -2581,7 +2581,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 518c04e..4072a87 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_GENEVE) += geneve.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index bc6471d..ab9e262 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1051,7 +1051,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1276,7 +1276,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 38c2bcb..f089a7a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -949,7 +949,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1250,7 +1250,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
diff --git a/net/ipv4/tcp_zero_copy.c b/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..430147e
--- /dev/null
+++ b/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8e950c2..8cb4760 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1472,7 +1472,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
--
2.1.2

View File

@@ -1,387 +0,0 @@
Subject: [PATCH] put_page_callback
---
drivers/block/drbd/drbd_receiver.c | 2 +-
include/linux/mm_types.h | 11 +++++++++
include/linux/net.h | 40 ++++++++++++++++++++++++++++++
include/linux/skbuff.h | 4 +--
net/Kconfig | 12 +++++++++
net/ceph/pagevec.c | 2 +-
net/core/skbuff.c | 14 +++++------
net/core/sock.c | 4 +--
net/ipv4/Makefile | 1 +
net/ipv4/ip_output.c | 4 +--
net/ipv4/tcp.c | 4 +--
net/ipv4/tcp_zero_copy.c | 50 ++++++++++++++++++++++++++++++++++++++
net/ipv6/ip6_output.c | 2 +-
13 files changed, 132 insertions(+), 18 deletions(-)
create mode 100644 net/ipv4/tcp_zero_copy.c
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d169b4a..ec913c2 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -132,7 +132,7 @@ static int page_chain_free(struct page *page)
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6d34aa2..a536ed7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -197,6 +197,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
diff --git a/include/linux/net.h b/include/linux/net.h
index 17d8339..f784384 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -285,6 +286,45 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 85ab7d7..fb82e86 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2254,7 +2254,7 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2277,7 +2277,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
diff --git a/net/Kconfig b/net/Kconfig
index ff9ffc1..a270579 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -76,6 +76,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 5550130..993f710 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
if (is_vmalloc_addr(pages))
vfree(pages);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 62c67be..35074d3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -479,7 +479,7 @@ static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length, gfp_mask,
@@ -592,7 +592,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -920,7 +920,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (!page) {
while (head) {
struct page *next = (struct page *)page_private(head);
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1767,7 +1767,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1820,7 +1820,7 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2279,7 +2279,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
@@ -2933,7 +2933,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c7a33d..ec95185 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1864,7 +1864,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
}
if (pfrag->offset + sz <= pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
pfrag->offset = 0;
@@ -2615,7 +2615,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 518c04e..4072a87 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_GENEVE) += geneve.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 2e2f687..87f5bff 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1052,7 +1052,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1277,7 +1277,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3075723..396b887 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -931,7 +931,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1232,7 +1232,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
diff --git a/net/ipv4/tcp_zero_copy.c b/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..430147e
--- /dev/null
+++ b/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3f5aa99..777f267 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1460,7 +1460,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
--
1.9.1

View File

@@ -1,287 +0,0 @@
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-01-10 22:58:17 +0000
+++ new/include/linux/mm_types.h 2012-01-10 23:02:48 +0000
@@ -149,6 +149,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* If another subsystem starts using the double word pairing for atomic
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-01-10 22:58:17 +0000
+++ new/include/linux/net.h 2012-01-10 23:02:48 +0000
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -289,5 +290,44 @@ extern int kernel_sock_shutdown(struct s
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
"-type-" __stringify(type))
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-01-10 22:58:17 +0000
+++ new/include/linux/skbuff.h 2012-01-10 23:15:31 +0000
@@ -1712,7 +1712,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1735,7 +1735,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-01-10 22:58:17 +0000
+++ new/net/Kconfig 2012-01-10 23:02:48 +0000
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-01-10 22:58:17 +0000
+++ new/net/core/skbuff.c 2012-01-10 23:02:48 +0000
@@ -654,7 +654,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1493,7 +1493,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1517,7 +1517,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1527,7 +1527,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1549,7 +1549,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-01-10 22:58:17 +0000
+++ new/net/ipv4/Makefile 2012-01-10 23:02:48 +0000
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-01-10 22:58:17 +0000
+++ new/net/ipv4/ip_output.c 2012-01-10 23:02:48 +0000
@@ -1232,7 +1232,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-01-10 22:58:17 +0000
+++ new/net/ipv4/tcp.c 2012-01-10 23:02:48 +0000
@@ -815,7 +815,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1022,7 +1022,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1062,9 +1062,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-01-10 23:43:22 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);

View File

@@ -1,303 +0,0 @@
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-01-10 22:58:17 +0000
+++ new/include/linux/mm_types.h 2012-01-10 23:02:48 +0000
@@ -149,6 +149,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* If another subsystem starts using the double word pairing for atomic
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-01-10 22:58:17 +0000
+++ new/include/linux/net.h 2012-01-10 23:02:48 +0000
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -289,5 +290,44 @@ extern int kernel_sock_shutdown(struct s
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
"-type-" __stringify(type))
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-01-10 22:58:17 +0000
+++ new/include/linux/skbuff.h 2012-01-10 23:15:31 +0000
@@ -1712,7 +1712,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1735,7 +1735,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-01-10 22:58:17 +0000
+++ new/net/Kconfig 2012-01-10 23:02:48 +0000
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-01-10 22:58:17 +0000
+++ new/net/core/skbuff.c 2012-01-10 23:02:48 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -654,7 +654,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1493,7 +1493,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1517,7 +1517,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1527,7 +1527,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1549,7 +1549,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-01-10 22:58:17 +0000
+++ new/net/ipv4/Makefile 2012-01-10 23:02:48 +0000
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-01-10 22:58:17 +0000
+++ new/net/ipv4/ip_output.c 2012-01-10 23:02:48 +0000
@@ -1232,7 +1232,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-01-10 22:58:17 +0000
+++ new/net/ipv4/tcp.c 2012-01-10 23:02:48 +0000
@@ -815,7 +815,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1022,7 +1022,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1062,9 +1062,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-01-10 23:43:22 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);

View File

@@ -1,303 +0,0 @@
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-03-19 23:46:01 +0000
+++ new/include/linux/mm_types.h 2012-03-23 21:39:19 +0000
@@ -149,6 +149,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-03-19 23:46:01 +0000
+++ new/include/linux/net.h 2012-03-23 21:39:19 +0000
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -289,5 +290,44 @@ extern int kernel_sock_shutdown(struct s
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
"-type-" __stringify(type))
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-03-19 23:46:01 +0000
+++ new/include/linux/skbuff.h 2012-03-23 21:39:19 +0000
@@ -1702,7 +1702,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1725,7 +1725,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-03-19 23:46:01 +0000
+++ new/net/Kconfig 2012-03-23 21:39:19 +0000
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-03-19 23:46:01 +0000
+++ new/net/core/skbuff.c 2012-03-23 21:39:19 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -702,7 +702,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1542,7 +1542,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1566,7 +1566,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1576,7 +1576,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1598,7 +1598,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-03-19 23:46:01 +0000
+++ new/net/ipv4/Makefile 2012-03-23 21:39:19 +0000
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-03-19 23:46:01 +0000
+++ new/net/ipv4/ip_output.c 2012-03-23 21:39:19 +0000
@@ -1246,7 +1246,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-03-19 23:46:01 +0000
+++ new/net/ipv4/tcp.c 2012-03-23 21:39:19 +0000
@@ -813,7 +813,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1025,7 +1025,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
sk->sk_sndmsg_page = page = NULL;
off = 0;
}
@@ -1065,9 +1065,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (sk->sk_sndmsg_page) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
sk->sk_sndmsg_page = page;
}
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-03-23 21:39:19 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);

View File

@@ -1,303 +0,0 @@
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-05-24 00:24:40 +0000
+++ new/include/linux/mm_types.h 2012-05-24 00:29:53 +0000
@@ -149,6 +149,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-05-24 00:24:40 +0000
+++ new/include/linux/net.h 2012-05-24 00:29:53 +0000
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -290,5 +291,44 @@ extern int kernel_sock_shutdown(struct s
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
"-type-" __stringify(type))
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-05-24 00:24:40 +0000
+++ new/include/linux/skbuff.h 2012-05-24 00:29:53 +0000
@@ -1744,7 +1744,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1767,7 +1767,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-05-24 00:24:40 +0000
+++ new/net/Kconfig 2012-05-24 00:29:53 +0000
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-05-24 00:24:40 +0000
+++ new/net/core/skbuff.c 2012-05-24 00:29:53 +0000
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -702,7 +702,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1544,7 +1544,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1568,7 +1568,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1578,7 +1578,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1600,7 +1600,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-05-24 00:24:40 +0000
+++ new/net/ipv4/Makefile 2012-05-24 00:29:53 +0000
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-05-24 00:24:40 +0000
+++ new/net/ipv4/ip_output.c 2012-05-24 00:29:53 +0000
@@ -1245,7 +1245,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-05-24 00:24:40 +0000
+++ new/net/ipv4/tcp.c 2012-05-24 00:29:53 +0000
@@ -816,7 +816,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1026,7 +1026,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
sk->sk_sndmsg_page = page = NULL;
off = 0;
}
@@ -1066,9 +1066,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (sk->sk_sndmsg_page) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
sk->sk_sndmsg_page = page;
}
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-05-24 00:29:53 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);

View File

@@ -1,328 +0,0 @@
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-08-08 02:57:29 +0000
+++ new/include/linux/mm_types.h 2012-08-08 22:59:04 +0000
@@ -160,6 +160,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-08-08 02:57:29 +0000
+++ new/include/linux/net.h 2012-08-08 22:59:04 +0000
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -303,6 +304,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-08-08 02:57:29 +0000
+++ new/include/linux/skbuff.h 2012-08-08 22:59:04 +0000
@@ -1756,7 +1756,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1779,7 +1779,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-08-08 02:57:29 +0000
+++ new/net/Kconfig 2012-08-08 22:59:04 +0000
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-08-08 02:57:29 +0000
+++ new/net/core/skbuff.c 2012-08-08 22:59:04 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -321,12 +321,12 @@ refill:
}
if (likely(nc->page)) {
if (nc->offset + fragsz > PAGE_SIZE) {
- put_page(nc->page);
+ net_put_page(nc->page);
goto refill;
}
data = page_address(nc->page) + nc->offset;
nc->offset += fragsz;
- get_page(nc->page);
+ net_get_page(nc->page);
}
local_irq_restore(flags);
return data;
@@ -359,7 +359,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
@@ -411,7 +411,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -742,7 +742,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1568,7 +1568,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1595,7 +1595,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1640,7 +1640,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-08-08 02:57:29 +0000
+++ new/net/ipv4/Makefile 2012-08-08 22:59:04 +0000
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-08-08 02:57:29 +0000
+++ new/net/ipv4/ip_output.c 2012-08-08 22:59:04 +0000
@@ -1245,7 +1245,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-08-08 02:57:29 +0000
+++ new/net/ipv4/tcp.c 2012-08-08 22:59:04 +0000
@@ -882,7 +882,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1105,7 +1105,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
sk->sk_sndmsg_page = page = NULL;
off = 0;
}
@@ -1145,9 +1145,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (sk->sk_sndmsg_page) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
sk->sk_sndmsg_page = page;
}
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-08-08 22:59:04 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);

View File

@@ -1,335 +0,0 @@
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2012-10-01 18:39:34 +0000
+++ new/drivers/net/xen-netback/netback.c 2012-10-01 21:38:56 +0000
@@ -1052,7 +1052,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx);
}
}
@@ -1496,7 +1496,7 @@ static void xen_netbk_idx_release(struct
xenvif_put(vif);
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-10-01 18:39:34 +0000
+++ new/include/linux/mm_types.h 2012-10-01 21:38:56 +0000
@@ -176,6 +176,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-10-01 18:39:34 +0000
+++ new/include/linux/net.h 2012-10-01 21:38:56 +0000
@@ -61,6 +61,7 @@ typedef enum {
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
#include <linux/kmemcheck.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -305,6 +306,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-10-01 18:39:34 +0000
+++ new/include/linux/skbuff.h 2012-10-01 21:38:56 +0000
@@ -1848,7 +1848,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1871,7 +1871,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-10-01 18:39:34 +0000
+++ new/net/Kconfig 2012-10-01 21:38:56 +0000
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-10-01 18:39:34 +0000
+++ new/net/core/skbuff.c 2012-10-01 21:38:56 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -427,7 +427,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -480,7 +480,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -813,7 +813,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1648,7 +1648,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1675,7 +1675,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1720,7 +1720,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-10-01 18:39:34 +0000
+++ new/net/ipv4/Makefile 2012-10-01 21:38:56 +0000
@@ -51,6 +51,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-10-01 18:39:34 +0000
+++ new/net/ipv4/ip_output.c 2012-10-01 21:38:56 +0000
@@ -1236,7 +1236,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-10-01 18:39:34 +0000
+++ new/net/ipv4/tcp.c 2012-10-01 21:38:56 +0000
@@ -890,7 +890,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1163,7 +1163,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
sk->sk_sndmsg_page = page = NULL;
off = 0;
}
@@ -1203,9 +1203,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (sk->sk_sndmsg_page) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
sk->sk_sndmsg_page = page;
}
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-10-01 21:38:56 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);

View File

@@ -1,397 +0,0 @@
=== modified file 'drivers/net/macvtap.c'
--- old/drivers/net/macvtap.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/macvtap.c 2012-12-17 23:03:50 +0000
@@ -536,7 +536,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/tun.c'
--- old/drivers/net/tun.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/tun.c 2012-12-17 23:03:50 +0000
@@ -660,7 +660,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2012-12-17 23:03:50 +0000
@@ -1370,7 +1370,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/xen-netback/netback.c 2012-12-17 23:03:50 +0000
@@ -1081,7 +1081,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1522,7 +1522,7 @@ static void xen_netbk_idx_release(struct
xenvif_put(vif);
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-12-17 19:41:04 +0000
+++ new/include/linux/mm_types.h 2012-12-17 23:03:50 +0000
@@ -175,6 +175,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-12-17 19:41:04 +0000
+++ new/include/linux/net.h 2012-12-17 23:03:50 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -270,6 +271,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-12-17 19:41:04 +0000
+++ new/include/linux/skbuff.h 2012-12-17 23:03:50 +0000
@@ -1848,7 +1848,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1871,7 +1871,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-12-17 19:41:04 +0000
+++ new/net/Kconfig 2012-12-17 23:03:50 +0000
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-12-17 19:41:04 +0000
+++ new/net/core/skbuff.c 2012-12-17 23:03:50 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -441,7 +441,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -494,7 +494,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -780,7 +780,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1615,7 +1615,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1668,7 +1668,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2012-12-17 19:41:04 +0000
+++ new/net/core/sock.c 2012-12-17 23:03:50 +0000
@@ -1739,7 +1739,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2439,7 +2439,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-12-17 19:41:04 +0000
+++ new/net/ipv4/Makefile 2012-12-17 23:03:50 +0000
@@ -51,6 +51,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-12-17 19:41:04 +0000
+++ new/net/ipv4/ip_output.c 2012-12-17 23:03:50 +0000
@@ -1002,7 +1002,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1223,7 +1223,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-12-17 19:41:04 +0000
+++ new/net/ipv4/tcp.c 2012-12-17 23:03:50 +0000
@@ -891,7 +891,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1179,7 +1179,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-12-17 23:03:50 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2012-12-17 19:41:04 +0000
+++ new/net/ipv6/ip6_output.c 2012-12-17 23:03:50 +0000
@@ -1517,7 +1517,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,397 +0,0 @@
=== modified file 'drivers/net/macvtap.c'
--- old/drivers/net/macvtap.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/macvtap.c 2012-12-17 23:03:50 +0000
@@ -536,7 +536,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/tun.c'
--- old/drivers/net/tun.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/tun.c 2012-12-17 23:03:50 +0000
@@ -660,7 +660,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2012-12-17 23:03:50 +0000
@@ -1370,7 +1370,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2012-12-17 19:41:04 +0000
+++ new/drivers/net/xen-netback/netback.c 2012-12-17 23:03:50 +0000
@@ -1081,7 +1081,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx);
}
}
@@ -1522,7 +1522,7 @@ static void xen_netbk_idx_release(struct
xenvif_put(vif);
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2012-12-17 19:41:04 +0000
+++ new/include/linux/mm_types.h 2012-12-17 23:03:50 +0000
@@ -175,6 +175,17 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2012-12-17 19:41:04 +0000
+++ new/include/linux/net.h 2012-12-17 23:03:50 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -270,6 +271,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2012-12-17 19:41:04 +0000
+++ new/include/linux/skbuff.h 2012-12-17 23:03:50 +0000
@@ -1848,7 +1848,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1871,7 +1871,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2012-12-17 19:41:04 +0000
+++ new/net/Kconfig 2012-12-17 23:03:50 +0000
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2012-12-17 19:41:04 +0000
+++ new/net/core/skbuff.c 2012-12-17 23:03:50 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -441,7 +441,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -494,7 +494,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -780,7 +780,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1615,7 +1615,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1668,7 +1668,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2012-12-17 19:41:04 +0000
+++ new/net/core/sock.c 2012-12-17 23:03:50 +0000
@@ -1739,7 +1739,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2439,7 +2439,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2012-12-17 19:41:04 +0000
+++ new/net/ipv4/Makefile 2012-12-17 23:03:50 +0000
@@ -51,6 +51,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2012-12-17 19:41:04 +0000
+++ new/net/ipv4/ip_output.c 2012-12-17 23:03:50 +0000
@@ -1002,7 +1002,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1223,7 +1223,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2012-12-17 19:41:04 +0000
+++ new/net/ipv4/tcp.c 2012-12-17 23:03:50 +0000
@@ -891,7 +891,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1179,7 +1179,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2012-12-17 23:03:50 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2012-12-17 19:41:04 +0000
+++ new/net/ipv6/ip6_output.c 2012-12-17 23:03:50 +0000
@@ -1517,7 +1517,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,397 +0,0 @@
=== modified file 'drivers/net/macvtap.c'
--- old/drivers/net/macvtap.c 2013-02-22 21:12:31 +0000
+++ new/drivers/net/macvtap.c 2013-02-23 00:18:18 +0000
@@ -536,7 +536,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/tun.c'
--- old/drivers/net/tun.c 2013-02-22 21:12:31 +0000
+++ new/drivers/net/tun.c 2013-02-23 00:18:18 +0000
@@ -1012,7 +1012,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2013-02-22 21:12:31 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2013-02-23 00:18:18 +0000
@@ -1368,7 +1368,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2013-02-22 21:12:31 +0000
+++ new/drivers/net/xen-netback/netback.c 2013-02-23 00:18:18 +0000
@@ -1090,7 +1090,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1549,7 +1549,7 @@ static void xen_netbk_idx_release(struct
xenvif_put(vif);
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2013-02-22 21:12:31 +0000
+++ new/include/linux/mm_types.h 2013-02-23 00:18:18 +0000
@@ -176,6 +176,17 @@ struct page {
#ifdef CONFIG_NUMA_BALANCING
int _last_nid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2013-02-22 21:12:31 +0000
+++ new/include/linux/net.h 2013-02-23 00:18:18 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -270,6 +271,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2013-02-22 21:12:31 +0000
+++ new/include/linux/skbuff.h 2013-02-23 00:18:18 +0000
@@ -1944,7 +1944,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -1967,7 +1967,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2013-02-22 21:12:31 +0000
+++ new/net/Kconfig 2013-02-23 00:18:18 +0000
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2013-02-22 21:12:31 +0000
+++ new/net/core/skbuff.c 2013-02-23 00:18:18 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -441,7 +441,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -494,7 +494,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -803,7 +803,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1644,7 +1644,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1697,7 +1697,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2013-02-22 21:12:31 +0000
+++ new/net/core/sock.c 2013-02-23 00:18:18 +0000
@@ -1803,7 +1803,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2503,7 +2503,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2013-02-22 21:12:31 +0000
+++ new/net/ipv4/Makefile 2013-02-23 00:18:18 +0000
@@ -51,6 +51,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2013-02-22 21:12:31 +0000
+++ new/net/ipv4/ip_output.c 2013-02-23 00:18:18 +0000
@@ -1006,7 +1006,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1227,7 +1227,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2013-02-22 21:12:31 +0000
+++ new/net/ipv4/tcp.c 2013-02-23 00:18:18 +0000
@@ -892,7 +892,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1180,7 +1180,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2013-02-23 00:18:18 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2013-02-22 21:12:31 +0000
+++ new/net/ipv6/ip6_output.c 2013-02-23 00:18:18 +0000
@@ -1451,7 +1451,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,432 +0,0 @@
=== modified file 'drivers/block/drbd/drbd_receiver.c'
--- old/drivers/block/drbd/drbd_receiver.c 2013-05-11 05:39:14 +0000
+++ new/drivers/block/drbd/drbd_receiver.c 2013-05-11 05:53:15 +0000
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
=== modified file 'drivers/net/macvtap.c'
--- old/drivers/net/macvtap.c 2013-05-11 05:39:14 +0000
+++ new/drivers/net/macvtap.c 2013-05-11 05:49:24 +0000
@@ -525,7 +525,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/tun.c'
--- old/drivers/net/tun.c 2013-05-11 05:39:14 +0000
+++ new/drivers/net/tun.c 2013-05-11 05:49:24 +0000
@@ -1013,7 +1013,7 @@ static int zerocopy_sg_from_iovec(struct
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if (num_pages != size) {
for (i = 0; i < num_pages; i++)
- put_page(page[i]);
+ net_put_page(page[i]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
=== modified file 'drivers/net/vmxnet3/vmxnet3_drv.c'
--- old/drivers/net/vmxnet3/vmxnet3_drv.c 2013-05-11 05:39:14 +0000
+++ new/drivers/net/vmxnet3/vmxnet3_drv.c 2013-05-11 05:49:24 +0000
@@ -1360,7 +1360,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
=== modified file 'drivers/net/xen-netback/netback.c'
--- old/drivers/net/xen-netback/netback.c 2013-05-11 05:39:14 +0000
+++ new/drivers/net/xen-netback/netback.c 2013-05-14 01:25:01 +0000
@@ -1090,7 +1090,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1549,7 +1549,7 @@ static void xen_netbk_idx_release(struct
xenvif_put(vif);
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
=== modified file 'include/linux/mm_types.h'
--- old/include/linux/mm_types.h 2013-05-11 05:39:14 +0000
+++ new/include/linux/mm_types.h 2013-05-11 05:49:24 +0000
@@ -177,6 +177,17 @@ struct page {
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
int _last_nid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
=== modified file 'include/linux/net.h'
--- old/include/linux/net.h 2013-05-11 05:39:14 +0000
+++ new/include/linux/net.h 2013-05-11 05:49:24 +0000
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -270,6 +271,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
=== modified file 'include/linux/skbuff.h'
--- old/include/linux/skbuff.h 2013-05-11 05:39:14 +0000
+++ new/include/linux/skbuff.h 2013-05-11 05:49:24 +0000
@@ -1982,7 +1982,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2005,7 +2005,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
=== modified file 'net/Kconfig'
--- old/net/Kconfig 2013-05-11 05:39:14 +0000
+++ new/net/Kconfig 2013-05-11 05:49:24 +0000
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
=== modified file 'net/ceph/pagevec.c'
--- old/net/ceph/pagevec.c 2013-05-11 05:39:14 +0000
+++ new/net/ceph/pagevec.c 2013-05-11 05:51:31 +0000
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
=== modified file 'net/core/skbuff.c'
--- old/net/core/skbuff.c 2013-05-11 05:39:14 +0000
+++ new/net/core/skbuff.c 2013-05-14 01:25:01 +0000
@@ -77,13 +77,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -430,7 +430,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -483,7 +483,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -792,7 +792,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1633,7 +1633,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1686,7 +1686,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2685,7 +2685,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
=== modified file 'net/core/sock.c'
--- old/net/core/sock.c 2013-05-11 05:39:14 +0000
+++ new/net/core/sock.c 2013-05-11 05:49:24 +0000
@@ -1823,7 +1823,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2523,7 +2523,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
=== modified file 'net/ipv4/Makefile'
--- old/net/ipv4/Makefile 2013-05-11 05:39:14 +0000
+++ new/net/ipv4/Makefile 2013-05-11 05:49:24 +0000
@@ -51,6 +51,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
=== modified file 'net/ipv4/ip_output.c'
--- old/net/ipv4/ip_output.c 2013-05-11 05:39:14 +0000
+++ new/net/ipv4/ip_output.c 2013-05-14 01:25:01 +0000
@@ -1007,7 +1007,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1228,7 +1228,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
=== modified file 'net/ipv4/tcp.c'
--- old/net/ipv4/tcp.c 2013-05-11 05:39:14 +0000
+++ new/net/ipv4/tcp.c 2013-05-14 01:25:01 +0000
@@ -894,7 +894,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1183,7 +1183,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
=== added file 'net/ipv4/tcp_zero_copy.c'
--- old/net/ipv4/tcp_zero_copy.c 1970-01-01 00:00:00 +0000
+++ new/net/ipv4/tcp_zero_copy.c 2013-05-14 01:25:01 +0000
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
=== modified file 'net/ipv6/ip6_output.c'
--- old/net/ipv6/ip6_output.c 2013-05-11 05:39:14 +0000
+++ new/net/ipv6/ip6_output.c 2013-05-11 05:49:24 +0000
@@ -1434,7 +1434,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,257 +0,0 @@
diff -upr linux-2.6.18/include/linux/mm.h linux-2.6.18/include/linux/mm.h
--- linux-2.6.18/include/linux/mm.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/include/linux/mm.h 2007-08-07 19:35:51.000000000 +0400
@@ -277,6 +277,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#define page_private(page) ((page)->private)
diff -upr linux-2.6.18/include/linux/net.h linux-2.6.18/include/linux/net.h
--- linux-2.6.18/include/linux/net.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/include/linux/net.h 2007-08-29 18:28:21.000000000 +0400
@@ -56,6 +56,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -324,5 +325,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.18/net/core/skbuff.c linux-2.6.18/net/core/skbuff.c
--- linux-2.6.18/net/core/skbuff.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/core/skbuff.c 2007-08-07 19:35:51.000000000 +0400
@@ -324,7 +324,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -666,7 +666,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -720,7 +720,7 @@ int pskb_expand_head(struct sk_buff *skb
memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -902,7 +902,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1071,7 +1071,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1653,7 +1653,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2021,7 +2021,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.18/net/core/utils.c linux-2.6.18/net/core/utils.c
--- linux-2.6.18/net/core/utils.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/core/utils.c 2007-08-23 19:49:40.000000000 +0400
@@ -24,11 +24,15 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
This is a maximally equidistributed combined Tausworthe generator
based on code from GNU Scientific Library 1.5 (30 Jun 2004)
@@ -203,3 +203,32 @@ __be32 in_aton(const char *str)
}
EXPORT_SYMBOL(in_aton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upr linux-2.6.18/net/ipv4/ip_output.c linux-2.6.18/net/ipv4/ip_output.c
--- linux-2.6.18/net/ipv4/ip_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/ip_output.c 2007-08-07 19:37:24.000000000 +0400
@@ -1006,7 +1006,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1166,7 +1166,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.18/net/ipv4/tcp.c linux-2.6.18/net/ipv4/tcp.c
--- linux-2.6.18/net/ipv4/tcp.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/tcp.c 2007-08-07 19:35:51.000000000 +0400
@@ -560,7 +560,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -763,7 +763,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -804,9 +804,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.18/net/ipv4/tcp_output.c linux-2.6.18/net/ipv4/tcp_output.c
--- linux-2.6.18/net/ipv4/tcp_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/tcp_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -659,7 +659,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.18/net/ipv6/ip6_output.c linux-2.6.18/net/ipv6/ip6_output.c
--- linux-2.6.18/net/ipv6/ip6_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv6/ip6_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -1212,7 +1212,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,401 +0,0 @@
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/Kbuild linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/Kbuild
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/Kbuild 2011-07-09 00:47:13.884215174 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/Kbuild 2011-07-09 00:47:47.530389221 +0200
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/mm_types.h linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/mm_types.h
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/mm_types.h 2011-07-09 00:47:13.893191775 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/mm_types.h 2011-07-09 00:47:47.533311169 +0200
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/net.h linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/net.h
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/net.h 2011-07-09 00:47:13.867629724 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/net.h 2011-07-09 00:47:47.536207157 +0200
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -365,5 +366,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/dev.c linux-2.6.32-131.4.1.el6.x86_64.new/net/core/dev.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/dev.c 2011-07-09 00:47:14.491417046 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/core/dev.c 2011-07-09 00:47:47.538194361 +0200
@@ -2903,7 +2903,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/skbuff.c linux-2.6.32-131.4.1.el6.x86_64.new/net/core/skbuff.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/skbuff.c 2011-07-09 00:47:14.491417046 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/core/skbuff.c 2011-07-09 00:47:47.542260687 +0200
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -766,7 +766,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -832,7 +832,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1106,7 +1106,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1275,7 +1275,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1376,7 +1376,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1400,7 +1400,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1410,7 +1410,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1431,7 +1431,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2061,7 +2061,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2183,7 +2183,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2205,7 +2205,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2600,7 +2600,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/ip_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/ip_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/ip_output.c 2011-07-09 00:47:14.538469946 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/ip_output.c 2011-07-09 00:47:47.544855733 +0200
@@ -981,7 +981,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1213,7 +1213,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/Makefile linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/Makefile
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/Makefile 2011-07-09 00:47:14.540204846 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/Makefile 2011-07-09 00:47:47.544855733 +0200
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp.c 2011-07-09 00:47:14.522494769 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp.c 2011-07-09 00:47:47.548279863 +0200
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_output.c 2011-07-09 00:47:14.535532920 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_output.c 2011-07-09 00:47:47.548279863 +0200
@@ -909,7 +909,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_zero_copy.c 2011-07-09 00:47:47.548279863 +0200
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv6/ip6_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv6/ip6_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv6/ip6_output.c 2011-07-09 00:47:14.498374289 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv6/ip6_output.c 2011-07-09 00:47:47.557327813 +0200
@@ -1370,7 +1370,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/Kconfig linux-2.6.32-131.4.1.el6.x86_64.new/net/Kconfig
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/Kconfig 2011-07-09 00:47:14.452504858 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/Kconfig 2011-07-09 00:47:47.557327813 +0200
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,419 +0,0 @@
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/Kbuild linux-2.6.32-220.2.1.el6.x86_64/include/linux/Kbuild
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/Kbuild 2011-12-13 20:58:15.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/Kbuild 2012-01-11 13:04:25.174935229 +0100
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/mm_types.h linux-2.6.32-220.2.1.el6.x86_64/include/linux/mm_types.h
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/mm_types.h 2011-12-13 21:05:37.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/mm_types.h 2012-01-11 13:04:25.174935229 +0100
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/net.h linux-2.6.32-220.2.1.el6.x86_64/include/linux/net.h
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/net.h 2011-12-13 21:07:31.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/net.h 2012-01-11 13:04:25.195827374 +0100
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -366,5 +367,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/dev.c linux-2.6.32-220.2.1.el6.x86_64/net/core/dev.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/dev.c 2011-12-13 21:08:32.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/core/dev.c 2012-01-11 13:04:25.201554763 +0100
@@ -2997,7 +2997,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/skbuff.c linux-2.6.32-220.2.1.el6.x86_64/net/core/skbuff.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/skbuff.c 2011-12-13 21:05:47.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/core/skbuff.c 2012-01-11 13:06:47.336976215 +0100
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
/*
@@ -648,7 +648,7 @@ static int skb_copy_ubufs(struct sk_buff
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -663,7 +663,7 @@ static int skb_copy_ubufs(struct sk_buff
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
uarg->callback(uarg);
@@ -840,7 +840,7 @@ struct sk_buff *pskb_copy(struct sk_buff
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -926,7 +926,7 @@ int pskb_expand_head(struct sk_buff *skb
skb_tx(skb)->dev_zerocopy = 0;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1202,7 +1202,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1371,7 +1371,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1473,7 +1473,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1497,7 +1497,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1507,7 +1507,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1528,7 +1528,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2158,7 +2158,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2280,7 +2280,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2302,7 +2302,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2697,7 +2697,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/ip_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/ip_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/ip_output.c 2011-12-13 21:03:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/ip_output.c 2012-01-11 13:04:25.219980596 +0100
@@ -981,7 +981,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1213,7 +1213,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/Makefile linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/Makefile
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/Makefile 2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/Makefile 2012-01-11 13:04:25.222633491 +0100
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp.c 2011-12-13 21:02:58.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp.c 2012-01-11 13:04:25.227497748 +0100
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_output.c 2011-12-13 21:07:40.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_output.c 2012-01-11 13:04:25.232755077 +0100
@@ -911,7 +911,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_zero_copy.c 2012-01-11 13:04:25.235282486 +0100
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv6/ip6_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv6/ip6_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv6/ip6_output.c 2011-12-13 21:08:29.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv6/ip6_output.c 2012-01-11 13:04:25.238620480 +0100
@@ -1400,7 +1400,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/Kconfig linux-2.6.32-220.2.1.el6.x86_64/net/Kconfig
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/Kconfig 2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/Kconfig 2012-01-11 13:04:25.241504025 +0100
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,419 +0,0 @@
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/Kbuild linux-2.6.32-220.2.1.el6.x86_64/include/linux/Kbuild
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/Kbuild 2011-12-13 20:58:15.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/Kbuild 2012-01-11 13:04:25.174935229 +0100
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/mm_types.h linux-2.6.32-220.2.1.el6.x86_64/include/linux/mm_types.h
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/mm_types.h 2011-12-13 21:05:37.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/mm_types.h 2012-01-11 13:04:25.174935229 +0100
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/net.h linux-2.6.32-220.2.1.el6.x86_64/include/linux/net.h
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/net.h 2011-12-13 21:07:31.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/net.h 2012-01-11 13:04:25.195827374 +0100
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -366,5 +367,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/dev.c linux-2.6.32-220.2.1.el6.x86_64/net/core/dev.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/dev.c 2011-12-13 21:08:32.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/core/dev.c 2012-01-11 13:04:25.201554763 +0100
@@ -2997,7 +2997,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/skbuff.c linux-2.6.32-220.2.1.el6.x86_64/net/core/skbuff.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/skbuff.c 2011-12-13 21:05:47.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/core/skbuff.c 2012-01-11 13:06:47.336976215 +0100
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
/*
@@ -648,7 +648,7 @@ static int skb_copy_ubufs(struct sk_buff
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -663,7 +663,7 @@ static int skb_copy_ubufs(struct sk_buff
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
uarg->callback(uarg);
@@ -840,7 +840,7 @@ struct sk_buff *pskb_copy(struct sk_buff
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -926,7 +926,7 @@ int pskb_expand_head(struct sk_buff *skb
skb_tx(skb)->dev_zerocopy = 0;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1202,7 +1202,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1371,7 +1371,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1473,7 +1473,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1497,7 +1497,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1507,7 +1507,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1528,7 +1528,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2158,7 +2158,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2280,7 +2280,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2302,7 +2302,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2697,7 +2697,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/ip_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/ip_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/ip_output.c 2011-12-13 21:03:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/ip_output.c 2012-01-11 13:04:25.219980596 +0100
@@ -981,7 +981,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1213,7 +1213,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/Makefile linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/Makefile
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/Makefile 2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/Makefile 2012-01-11 13:04:25.222633491 +0100
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp.c 2011-12-13 21:02:58.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp.c 2012-01-11 13:04:25.227497748 +0100
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_output.c 2011-12-13 21:07:40.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_output.c 2012-01-11 13:04:25.232755077 +0100
@@ -911,7 +911,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_zero_copy.c 2012-01-11 13:04:25.235282486 +0100
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv6/ip6_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv6/ip6_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv6/ip6_output.c 2011-12-13 21:08:29.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv6/ip6_output.c 2012-01-11 13:04:25.238620480 +0100
@@ -1400,7 +1400,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/Kconfig linux-2.6.32-220.2.1.el6.x86_64/net/Kconfig
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/Kconfig 2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/Kconfig 2012-01-11 13:04:25.241504025 +0100
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,419 +0,0 @@
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/Kbuild linux-2.6.32-220.2.1.el6.x86_64/include/linux/Kbuild
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/Kbuild 2011-12-13 20:58:15.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/Kbuild 2012-01-11 13:04:25.174935229 +0100
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/mm_types.h linux-2.6.32-220.2.1.el6.x86_64/include/linux/mm_types.h
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/mm_types.h 2011-12-13 21:05:37.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/mm_types.h 2012-01-11 13:04:25.174935229 +0100
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/net.h linux-2.6.32-220.2.1.el6.x86_64/include/linux/net.h
--- linux-2.6.32-220.2.1.el6.x86_64.orig/include/linux/net.h 2011-12-13 21:07:31.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/include/linux/net.h 2012-01-11 13:04:25.195827374 +0100
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -366,5 +367,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/dev.c linux-2.6.32-220.2.1.el6.x86_64/net/core/dev.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/dev.c 2011-12-13 21:08:32.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/core/dev.c 2012-01-11 13:04:25.201554763 +0100
@@ -2997,7 +2997,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/skbuff.c linux-2.6.32-220.2.1.el6.x86_64/net/core/skbuff.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/core/skbuff.c 2011-12-13 21:05:47.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/core/skbuff.c 2012-01-11 13:06:47.336976215 +0100
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
/*
@@ -648,7 +648,7 @@ static int skb_copy_ubufs(struct sk_buff
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -663,7 +663,7 @@ static int skb_copy_ubufs(struct sk_buff
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
uarg->callback(uarg);
@@ -840,7 +840,7 @@ struct sk_buff *pskb_copy(struct sk_buff
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -926,7 +926,7 @@ int pskb_expand_head(struct sk_buff *skb
skb_tx(skb)->dev_zerocopy = 0;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1202,7 +1202,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1371,7 +1371,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1473,7 +1473,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1497,7 +1497,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1507,7 +1507,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1528,7 +1528,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2158,7 +2158,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2280,7 +2280,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2302,7 +2302,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2697,7 +2697,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/ip_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/ip_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/ip_output.c 2011-12-13 21:03:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/ip_output.c 2012-01-11 13:04:25.219980596 +0100
@@ -981,7 +981,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1213,7 +1213,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/Makefile linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/Makefile
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/Makefile 2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/Makefile 2012-01-11 13:04:25.222633491 +0100
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp.c 2011-12-13 21:02:58.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp.c 2012-01-11 13:04:25.227497748 +0100
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_output.c 2011-12-13 21:07:40.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_output.c 2012-01-11 13:04:25.232755077 +0100
@@ -911,7 +911,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv4/tcp_zero_copy.c 2012-01-11 13:04:25.235282486 +0100
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv6/ip6_output.c linux-2.6.32-220.2.1.el6.x86_64/net/ipv6/ip6_output.c
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/ipv6/ip6_output.c 2011-12-13 21:08:29.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/ipv6/ip6_output.c 2012-01-11 13:04:25.238620480 +0100
@@ -1400,7 +1400,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -uNrp linux-2.6.32-220.2.1.el6.x86_64.orig/net/Kconfig linux-2.6.32-220.2.1.el6.x86_64/net/Kconfig
--- linux-2.6.32-220.2.1.el6.x86_64.orig/net/Kconfig 2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6.32-220.2.1.el6.x86_64/net/Kconfig 2012-01-11 13:04:25.241504025 +0100
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,453 +0,0 @@
[PATCH] put_page_callback-2.6.32-431.patch
---
include/linux/Kbuild | 1 +
include/linux/mm_types.h | 12 ++++++++++++
include/linux/net.h | 40 +++++++++++++++++++++++++++++++++++++++
net/Kconfig | 12 ++++++++++++
net/core/dev.c | 2 +-
net/core/skbuff.c | 34 ++++++++++++++++-----------------
net/ipv4/Makefile | 1 +
net/ipv4/ip_output.c | 4 ++--
net/ipv4/tcp.c | 8 ++++----
net/ipv4/tcp_output.c | 2 +-
net/ipv4/tcp_zero_copy.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
net/ipv6/ip6_output.c | 2 +-
12 files changed, 141 insertions(+), 26 deletions(-)
create mode 100644 net/ipv4/tcp_zero_copy.c
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 1ac23c9..352c604 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 645f205..7b6de1f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff --git a/include/linux/net.h b/include/linux/net.h
index 58ada6b..b0adbdc 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -389,5 +390,44 @@ static const struct proto_ops name##_ops = { \
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff --git a/net/Kconfig b/net/Kconfig
index de77d58..58d7398 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git a/net/core/dev.c b/net/core/dev.c
index da35549..57f0a77 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3622,7 +3622,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 361f45a..3b78821 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -394,7 +394,7 @@ static void skb_release_data(struct sk_buff *skb)
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
/*
@@ -699,7 +699,7 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -714,7 +714,7 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
uarg->callback(uarg);
@@ -880,7 +880,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -961,7 +961,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb_tx(skb)->dev_zerocopy = 0;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1240,7 +1240,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1409,7 +1409,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1511,7 +1511,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1535,7 +1535,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1545,7 +1545,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1566,7 +1566,7 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2196,7 +2196,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2319,7 +2319,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2341,7 +2341,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2753,7 +2753,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index e18daba..65f5371 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 200cd2a..a9673b2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -991,7 +991,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1223,7 +1223,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3396457..d469b0e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -826,7 +826,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_tx(skb)->shared_frag = 1;
@@ -1035,7 +1035,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1076,9 +1076,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 566d73d..e05f0b8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1061,7 +1061,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff --git a/net/ipv4/tcp_zero_copy.c b/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..9cd990c
--- /dev/null
+++ b/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 26e1dda..4655fdf 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1353,7 +1353,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
--
1.8.1.4

View File

@@ -1,453 +0,0 @@
[PATCH] put_page_callback-2.6.32-504
---
include/linux/Kbuild | 1 +
include/linux/mm_types.h | 12 +++++++++++
include/linux/net.h | 40 +++++++++++++++++++++++++++++++++++++
net/Kconfig | 12 +++++++++++
net/core/dev.c | 2 +-
net/core/skbuff.c | 34 ++++++++++++++++----------------
net/ipv4/Makefile | 1 +
net/ipv4/ip_output.c | 4 +-
net/ipv4/tcp.c | 8 +++---
net/ipv4/tcp_output.c | 2 +-
net/ipv4/tcp_zero_copy.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++
net/ipv6/ip6_output.c | 2 +-
12 files changed, 141 insertions(+), 26 deletions(-)
create mode 100644 net/ipv4/tcp_zero_copy.c
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 9301043..2870f1a 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 645f205..7b6de1f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff --git a/include/linux/net.h b/include/linux/net.h
index 58ada6b..b0adbdc 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -389,5 +390,44 @@ static const struct proto_ops name##_ops = { \
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff --git a/net/Kconfig b/net/Kconfig
index 1d9b405..eedbed6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git a/net/core/dev.c b/net/core/dev.c
index 61dce2f..25d0826 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3655,7 +3655,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 157dc11..74ac749 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -78,13 +78,13 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -396,7 +396,7 @@ static void skb_release_data(struct sk_buff *skb)
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
/*
@@ -705,7 +705,7 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -720,7 +720,7 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
uarg->callback(uarg);
@@ -886,7 +886,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -967,7 +967,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb_tx(skb)->dev_zerocopy = 0;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1246,7 +1246,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1415,7 +1415,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1517,7 +1517,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1541,7 +1541,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1551,7 +1551,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1572,7 +1572,7 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2202,7 +2202,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2325,7 +2325,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2347,7 +2347,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2760,7 +2760,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index e18daba..65f5371 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7ac7cfa..8cda5cc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1000,7 +1000,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1239,7 +1239,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 18d22cf..b5c12fa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -821,7 +821,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_tx(skb)->shared_frag = 1;
@@ -1030,7 +1030,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1071,9 +1071,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 255e6e3..5c48819 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1071,7 +1071,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff --git a/net/ipv4/tcp_zero_copy.c b/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..9cd990c
--- /dev/null
+++ b/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0e985f7..22b4529 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1375,7 +1375,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
--
1.7.1

View File

@@ -1,401 +0,0 @@
diff -upkr linux-2.6.32.1/include/linux/mm_types.h linux-2.6.32.1/include/linux/mm_types.h
--- linux-2.6.32.1/include/linux/mm_types.h 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/include/linux/mm_types.h 2009-12-16 15:22:16.000000000 +0300
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -upkr linux-2.6.32.1/include/linux/net.h linux-2.6.32.1/include/linux/net.h
--- linux-2.6.32.1/include/linux/net.h 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/include/linux/net.h 2009-12-16 15:23:08.000000000 +0300
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -361,5 +362,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upkr linux-2.6.32.1/net/core/dev.c linux-2.6.32.1/net/core/dev.c
--- linux-2.6.32.1/net/core/dev.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/core/dev.c 2009-12-16 15:22:16.000000000 +0300
@@ -2516,7 +2516,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -upkr linux-2.6.32.1/net/core/skbuff.c linux-2.6.32.1/net/core/skbuff.c
--- linux-2.6.32.1/net/core/skbuff.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/core/skbuff.c 2009-12-16 15:22:16.000000000 +0300
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -762,7 +762,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -828,7 +828,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1102,7 +1102,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1271,7 +1271,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1372,7 +1372,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1396,7 +1396,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1406,7 +1406,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1427,7 +1427,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2057,7 +2057,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2179,7 +2179,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2201,7 +2201,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2599,7 +2599,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upkr linux-2.6.32.1/net/ipv4/ip_output.c linux-2.6.32.1/net/ipv4/ip_output.c
--- linux-2.6.32.1/net/ipv4/ip_output.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/ip_output.c 2009-12-16 15:22:16.000000000 +0300
@@ -1020,7 +1020,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1178,7 +1178,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upkr linux-2.6.32.1/net/ipv4/Makefile linux-2.6.32.1/net/ipv4/Makefile
--- linux-2.6.32.1/net/ipv4/Makefile 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/Makefile 2009-12-16 15:22:16.000000000 +0300
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -upkr linux-2.6.32.1/net/ipv4/tcp.c linux-2.6.32.1/net/ipv4/tcp.c
--- linux-2.6.32.1/net/ipv4/tcp.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/tcp.c 2009-12-16 15:22:16.000000000 +0300
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upkr linux-2.6.32.1/net/ipv4/tcp_output.c linux-2.6.32.1/net/ipv4/tcp_output.c
--- linux-2.6.32.1/net/ipv4/tcp_output.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/tcp_output.c 2009-12-16 15:22:16.000000000 +0300
@@ -909,7 +909,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upkr linux-2.6.32.1/net/ipv4/tcp_zero_copy.c linux-2.6.32.1/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32.1/net/ipv4/tcp_zero_copy.c 2009-12-08 17:41:11.000000000 +0300
+++ linux-2.6.32.1/net/ipv4/tcp_zero_copy.c 2009-12-16 15:22:16.000000000 +0300
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -upkr linux-2.6.32.1/net/ipv6/ip6_output.c linux-2.6.32.1/net/ipv6/ip6_output.c
--- linux-2.6.32.1/net/ipv6/ip6_output.c 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/ipv6/ip6_output.c 2009-12-16 15:22:16.000000000 +0300
@@ -1379,7 +1379,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -upkr linux-2.6.32.1/net/Kconfig linux-2.6.32.1/net/Kconfig
--- linux-2.6.32.1/net/Kconfig 2009-12-03 06:51:21.000000000 +0300
+++ linux-2.6.32.1/net/Kconfig 2009-12-16 15:22:16.000000000 +0300
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff -upkr linux-2.6.32.x86_64/include/linux/Kbuild linux-2.6.32.x86_64/include/linux/Kbuild
--- linux-2.6.32.x86_64/include/linux/Kbuild 2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6.32.x86_64/include/linux/Kbuild 2010-08-27 13:14:53.767830262 +0200
@@ -112,6 +112,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h

View File

@@ -1,399 +0,0 @@
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/block/drbd/drbd_receiver.c ./drivers/block/drbd/drbd_receiver.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/block/drbd/drbd_receiver.c 2014-07-16 20:25:31.000000000 +0200
+++ ./drivers/block/drbd/drbd_receiver.c 2014-08-07 09:14:18.606676153 +0200
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/macvtap.c ./drivers/net/macvtap.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/macvtap.c 2014-07-16 20:25:31.000000000 +0200
+++ ./drivers/net/macvtap.c 2014-08-07 09:14:18.606676153 +0200
@@ -597,7 +597,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/tun.c ./drivers/net/tun.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/tun.c 2014-07-16 20:25:31.000000000 +0200
+++ ./drivers/net/tun.c 2014-08-07 09:14:18.606676153 +0200
@@ -1015,7 +1015,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/vmxnet3/vmxnet3_drv.c ./drivers/net/vmxnet3/vmxnet3_drv.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/vmxnet3/vmxnet3_drv.c 2014-07-16 20:25:31.000000000 +0200
+++ ./drivers/net/vmxnet3/vmxnet3_drv.c 2014-08-07 09:14:18.606676153 +0200
@@ -1360,7 +1360,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/xen-netback/netback.c ./drivers/net/xen-netback/netback.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/drivers/net/xen-netback/netback.c 2014-07-16 20:25:31.000000000 +0200
+++ ./drivers/net/xen-netback/netback.c 2014-08-07 09:14:18.606676153 +0200
@@ -1292,7 +1292,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1774,7 +1774,7 @@ static void xen_netbk_idx_release(struct
} while (!pending_tx_is_head(netbk, peek));
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/include/linux/mm_types.h ./include/linux/mm_types.h
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/include/linux/mm_types.h 2014-07-16 20:25:31.000000000 +0200
+++ ./include/linux/mm_types.h 2014-08-07 09:14:18.606676153 +0200
@@ -186,6 +186,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/include/linux/net.h ./include/linux/net.h
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/include/linux/net.h 2014-07-16 20:25:31.000000000 +0200
+++ ./include/linux/net.h 2014-08-07 09:14:18.606676153 +0200
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -278,6 +279,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/include/linux/skbuff.h ./include/linux/skbuff.h
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/include/linux/skbuff.h 2014-07-16 20:25:31.000000000 +0200
+++ ./include/linux/skbuff.h 2014-08-07 09:14:18.606676153 +0200
@@ -2036,7 +2036,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2059,7 +2059,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/Kconfig ./net/Kconfig
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/Kconfig 2014-07-16 20:25:31.000000000 +0200
+++ ./net/Kconfig 2014-08-07 09:14:18.606676153 +0200
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ceph/pagevec.c ./net/ceph/pagevec.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ceph/pagevec.c 2014-07-16 20:25:31.000000000 +0200
+++ ./net/ceph/pagevec.c 2014-08-07 09:14:18.606676153 +0200
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/core/skbuff.c ./net/core/skbuff.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/core/skbuff.c 2014-07-16 20:25:31.000000000 +0200
+++ ./net/core/skbuff.c 2014-08-07 09:14:18.606676153 +0200
@@ -453,7 +453,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -499,7 +499,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -824,7 +824,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1644,7 +1644,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1697,7 +1697,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2794,7 +2794,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/core/sock.c ./net/core/sock.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/core/sock.c 2014-07-16 20:25:31.000000000 +0200
+++ ./net/core/sock.c 2014-08-07 09:14:18.606676153 +0200
@@ -1829,7 +1829,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2534,7 +2534,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/Makefile ./net/ipv4/Makefile
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/Makefile 2014-07-16 20:25:31.000000000 +0200
+++ ./net/ipv4/Makefile 2014-08-07 09:14:18.606676153 +0200
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/ip_output.c ./net/ipv4/ip_output.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/ip_output.c 2014-07-16 20:25:31.000000000 +0200
+++ ./net/ipv4/ip_output.c 2014-08-07 09:14:18.606676153 +0200
@@ -1011,7 +1011,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1235,7 +1235,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/tcp.c ./net/ipv4/tcp.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/tcp.c 2014-07-16 20:25:31.000000000 +0200
+++ ./net/ipv4/tcp.c 2014-08-07 09:14:18.606676153 +0200
@@ -898,7 +898,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1194,7 +1194,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/tcp_zero_copy.c ./net/ipv4/tcp_zero_copy.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ ./net/ipv4/tcp_zero_copy.c 2014-08-07 09:14:18.606676153 +0200
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
diff -Nrup ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv6/ip6_output.c ./net/ipv6/ip6_output.c
--- ../../centos-7-orig/linux-3.10.0-123.6.3.el7/net/ipv6/ip6_output.c 2014-07-16 20:25:31.000000000 +0200
+++ ./net/ipv6/ip6_output.c 2014-08-07 09:14:18.606676153 +0200
@@ -1421,7 +1421,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,424 +0,0 @@
diff -uprN linux-3.10.0-229.el7/drivers/block/drbd/drbd_receiver.c linux-3.10.0-229.el7-patched/drivers/block/drbd/drbd_receiver.c
--- linux-3.10.0-229.el7/drivers/block/drbd/drbd_receiver.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/drivers/block/drbd/drbd_receiver.c 2015-04-21 10:15:55.717008940 +0200
@@ -130,7 +130,7 @@ static int page_chain_free(struct page *
struct page *tmp;
int i = 0;
page_chain_for_each_safe(page, tmp) {
- put_page(page);
+ net_put_page(page);
++i;
}
return i;
diff -uprN linux-3.10.0-229.el7/drivers/net/macvtap.c linux-3.10.0-229.el7-patched/drivers/net/macvtap.c
--- linux-3.10.0-229.el7/drivers/net/macvtap.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/drivers/net/macvtap.c 2015-04-21 10:15:55.718008916 +0200
@@ -604,7 +604,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
diff -uprN linux-3.10.0-229.el7/drivers/net/tun.c linux-3.10.0-229.el7-patched/drivers/net/tun.c
--- linux-3.10.0-229.el7/drivers/net/tun.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/drivers/net/tun.c 2015-04-21 10:15:55.719008893 +0200
@@ -1015,7 +1015,7 @@ static int zerocopy_sg_from_iovec(struct
int j;
for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
+ net_put_page(page[i + j]);
return -EFAULT;
}
truesize = size * PAGE_SIZE;
diff -uprN linux-3.10.0-229.el7/drivers/net/vmxnet3/vmxnet3_drv.c linux-3.10.0-229.el7-patched/drivers/net/vmxnet3/vmxnet3_drv.c
--- linux-3.10.0-229.el7/drivers/net/vmxnet3/vmxnet3_drv.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/drivers/net/vmxnet3/vmxnet3_drv.c 2015-04-21 10:15:55.720008872 +0200
@@ -1360,7 +1360,7 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_que
rq->buf_info[ring_idx][i].page) {
pci_unmap_page(adapter->pdev, rxd->addr,
rxd->len, PCI_DMA_FROMDEVICE);
- put_page(rq->buf_info[ring_idx][i].page);
+ net_put_page(rq->buf_info[ring_idx][i].page);
rq->buf_info[ring_idx][i].page = NULL;
}
}
diff -uprN linux-3.10.0-229.el7/drivers/net/xen-netback/netback.c linux-3.10.0-229.el7-patched/drivers/net/xen-netback/netback.c
--- linux-3.10.0-229.el7/drivers/net/xen-netback/netback.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/drivers/net/xen-netback/netback.c 2015-04-21 10:15:55.720008872 +0200
@@ -1292,7 +1292,7 @@ static void xen_netbk_fill_frags(struct
skb->truesize += txp->size;
/* Take an extra reference to offset xen_netbk_idx_release */
- get_page(netbk->mmap_pages[pending_idx]);
+ net_get_page(netbk->mmap_pages[pending_idx]);
xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
}
@@ -1774,7 +1774,7 @@ static void xen_netbk_idx_release(struct
} while (!pending_tx_is_head(netbk, peek));
netbk->mmap_pages[pending_idx]->mapping = 0;
- put_page(netbk->mmap_pages[pending_idx]);
+ net_put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
diff -uprN linux-3.10.0-229.el7/include/linux/mm_types.h linux-3.10.0-229.el7-patched/include/linux/mm_types.h
--- linux-3.10.0-229.el7/include/linux/mm_types.h 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/include/linux/mm_types.h 2015-04-21 10:15:55.721008852 +0200
@@ -188,6 +188,17 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
diff -uprN linux-3.10.0-229.el7/include/linux/net.h linux-3.10.0-229.el7-patched/include/linux/net.h
--- linux-3.10.0-229.el7/include/linux/net.h 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/include/linux/net.h 2015-04-21 10:15:55.722008834 +0200
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/stringify.h>
+#include <linux/mm.h>
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
@@ -281,6 +282,45 @@ extern int kernel_sock_ioctl(struct sock
extern int kernel_sock_shutdown(struct socket *sock,
enum sock_shutdown_cmd how);
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
diff -uprN linux-3.10.0-229.el7/include/linux/skbuff.h linux-3.10.0-229.el7-patched/include/linux/skbuff.h
--- linux-3.10.0-229.el7/include/linux/skbuff.h 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/include/linux/skbuff.h 2015-04-21 10:15:55.724008801 +0200
@@ -2101,7 +2101,7 @@ static inline struct page *skb_frag_page
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ net_get_page(skb_frag_page(frag));
}
/**
@@ -2124,7 +2124,7 @@ static inline void skb_frag_ref(struct s
*/
static inline void __skb_frag_unref(skb_frag_t *frag)
{
- put_page(skb_frag_page(frag));
+ net_put_page(skb_frag_page(frag));
}
/**
diff -uprN linux-3.10.0-229.el7/net/Kconfig linux-3.10.0-229.el7-patched/net/Kconfig
--- linux-3.10.0-229.el7/net/Kconfig 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/Kconfig 2015-04-21 10:15:55.725008788 +0200
@@ -74,6 +74,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff -uprN linux-3.10.0-229.el7/net/ceph/pagevec.c linux-3.10.0-229.el7-patched/net/ceph/pagevec.c
--- linux-3.10.0-229.el7/net/ceph/pagevec.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/ceph/pagevec.c 2015-04-21 10:15:55.726008775 +0200
@@ -51,7 +51,7 @@ void ceph_put_page_vector(struct page **
for (i = 0; i < num_pages; i++) {
if (dirty)
set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
+ net_put_page(pages[i]);
}
kfree(pages);
}
diff -uprN linux-3.10.0-229.el7/net/core/skbuff.c linux-3.10.0-229.el7-patched/net/core/skbuff.c
--- linux-3.10.0-229.el7/net/core/skbuff.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/core/skbuff.c 2015-04-21 10:34:20.113935390 +0200
@@ -79,13 +79,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -453,7 +453,7 @@ struct sk_buff *__netdev_alloc_skb(struc
if (likely(data)) {
skb = build_skb(data, fragsz);
if (unlikely(!skb))
- put_page(virt_to_head_page(data));
+ net_put_page(virt_to_head_page(data));
}
} else {
skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
@@ -499,7 +499,7 @@ static void skb_clone_fraglist(struct sk
static void skb_free_head(struct sk_buff *skb)
{
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ net_put_page(virt_to_head_page(skb->head));
else
kfree(skb->head);
}
@@ -826,7 +826,7 @@ int skb_copy_ubufs(struct sk_buff *skb,
if (!page) {
while (head) {
struct page *next = (struct page *)head->private;
- put_page(head);
+ net_put_page(head);
head = next;
}
return -ENOMEM;
@@ -1646,7 +1646,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1699,7 +1699,7 @@ static bool spd_fill_page(struct splice_
spd->partial[spd->nr_pages - 1].len += *len;
return false;
}
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
@@ -2158,7 +2158,7 @@ skb_zerocopy(struct sk_buff *to, struct
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
+ net_get_page(page);
j = 1;
len -= plen;
}
@@ -2807,7 +2807,7 @@ int skb_append_datato_frags(struct sock
copy);
frg_cnt++;
pfrag->offset += copy;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
diff -uprN linux-3.10.0-229.el7/net/core/sock.c linux-3.10.0-229.el7-patched/net/core/sock.c
--- linux-3.10.0-229.el7/net/core/sock.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/core/sock.c 2015-04-21 10:15:55.728008756 +0200
@@ -1826,7 +1826,7 @@ bool sk_page_frag_refill(struct sock *sk
}
if (pfrag->offset < pfrag->size)
return true;
- put_page(pfrag->page);
+ net_put_page(pfrag->page);
}
/* We restrict high order allocations to users that can afford to wait */
@@ -2528,7 +2528,7 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
+ net_put_page(sk->sk_frag.page);
sk->sk_frag.page = NULL;
}
diff -uprN linux-3.10.0-229.el7/net/ipv4/Makefile linux-3.10.0-229.el7-patched/net/ipv4/Makefile
--- linux-3.10.0-229.el7/net/ipv4/Makefile 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/ipv4/Makefile 2015-04-21 10:15:55.728008756 +0200
@@ -53,6 +53,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff -uprN linux-3.10.0-229.el7/net/ipv4/ip_output.c linux-3.10.0-229.el7-patched/net/ipv4/ip_output.c
--- linux-3.10.0-229.el7/net/ipv4/ip_output.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/ipv4/ip_output.c 2015-04-21 10:15:55.729008747 +0200
@@ -1047,7 +1047,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
@@ -1271,7 +1271,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -uprN linux-3.10.0-229.el7/net/ipv4/tcp.c linux-3.10.0-229.el7-patched/net/ipv4/tcp.c
--- linux-3.10.0-229.el7/net/ipv4/tcp.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/ipv4/tcp.c 2015-04-21 10:15:55.730008741 +0200
@@ -897,7 +897,7 @@ new_segment:
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
@@ -1193,7 +1193,7 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
pfrag->offset += copy;
}
diff -uprN linux-3.10.0-229.el7/net/ipv4/tcp_zero_copy.c linux-3.10.0-229.el7-patched/net/ipv4/tcp_zero_copy.c
--- linux-3.10.0-229.el7/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/ipv4/tcp_zero_copy.c 2015-04-21 10:15:55.731008735 +0200
@@ -0,0 +1,50 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL_GPL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL_GPL(net_set_get_put_page_callbacks);
diff -uprN linux-3.10.0-229.el7/net/ipv6/ip6_output.c linux-3.10.0-229.el7-patched/net/ipv6/ip6_output.c
--- linux-3.10.0-229.el7/net/ipv6/ip6_output.c 2015-01-30 00:15:53.000000000 +0100
+++ linux-3.10.0-229.el7-patched/net/ipv6/ip6_output.c 2015-04-21 10:15:55.731008735 +0200
@@ -1445,7 +1445,7 @@ alloc_new_skb:
__skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, 0);
skb_shinfo(skb)->nr_frags = ++i;
- get_page(pfrag->page);
+ net_get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,

View File

@@ -1,257 +0,0 @@
diff -upr linux-2.6.18/include/linux/mm.h linux-2.6.18/include/linux/mm.h
--- linux-2.6.18/include/linux/mm.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/include/linux/mm.h 2007-08-07 19:35:51.000000000 +0400
@@ -277,6 +277,15 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. Not good to have this field here, it's better to have
+ * it in struct sk_buff, but it would make the code much more
+ * complicated and fragile, if maintained as a separate patch, since all
+ * skb then would have to contain only pages with the same value in this
+ * field.
+ */
+ void *net_priv;
};
#define page_private(page) ((page)->private)
diff -upr linux-2.6.18/include/linux/net.h linux-2.6.18/include/linux/net.h
--- linux-2.6.18/include/linux/net.h 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/include/linux/net.h 2007-08-29 18:28:21.000000000 +0400
@@ -56,6 +56,7 @@ typedef enum {
#ifdef __KERNEL__
#include <linux/stringify.h>
+#include <linux/mm.h>
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
@@ -324,5 +325,30 @@ extern int net_msg_cost;
extern int net_msg_burst;
#endif
+/* Support for notification on zero-copy TCP transfer completion */
+#define CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -upr linux-2.6.18/net/core/skbuff.c linux-2.6.18/net/core/skbuff.c
--- linux-2.6.18/net/core/skbuff.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/core/skbuff.c 2007-08-07 19:35:51.000000000 +0400
@@ -324,7 +324,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_shinfo(skb)->frag_list)
@@ -666,7 +666,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -720,7 +720,7 @@ int pskb_expand_head(struct sk_buff *skb
memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_clone_fraglist(skb);
@@ -902,7 +902,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_shinfo(skb)->frag_list)
skb_drop_fraglist(skb);
@@ -1071,7 +1071,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1653,7 +1653,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2021,7 +2021,7 @@ struct sk_buff *skb_segment(struct sk_bu
BUG_ON(i >= nfrags);
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -upr linux-2.6.18/net/core/utils.c linux-2.6.18/net/core/utils.c
--- linux-2.6.18/net/core/utils.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/core/utils.c 2007-08-23 19:49:40.000000000 +0400
@@ -24,11 +24,15 @@
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+net_get_page_callback_t net_get_page_callback __read_mostly;
+net_put_page_callback_t net_put_page_callback __read_mostly;
+
/*
This is a maximally equidistributed combined Tausworthe generator
based on code from GNU Scientific Library 1.5 (30 Jun 2004)
@@ -203,3 +203,32 @@ __be32 in_aton(const char *str)
}
EXPORT_SYMBOL(in_aton);
+
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
+
+EXPORT_SYMBOL(net_get_page_callback);
+EXPORT_SYMBOL(net_put_page_callback);
diff -upr linux-2.6.18/net/ipv4/ip_output.c linux-2.6.18/net/ipv4/ip_output.c
--- linux-2.6.18/net/ipv4/ip_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/ip_output.c 2007-08-07 19:37:24.000000000 +0400
@@ -1006,7 +1006,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1166,7 +1166,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -upr linux-2.6.18/net/ipv4/tcp.c linux-2.6.18/net/ipv4/tcp.c
--- linux-2.6.18/net/ipv4/tcp.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/tcp.c 2007-08-07 19:35:51.000000000 +0400
@@ -560,7 +560,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -763,7 +763,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -804,9 +804,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -upr linux-2.6.18/net/ipv4/tcp_output.c linux-2.6.18/net/ipv4/tcp_output.c
--- linux-2.6.18/net/ipv4/tcp_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv4/tcp_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -659,7 +659,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -upr linux-2.6.18/net/ipv6/ip6_output.c linux-2.6.18/net/ipv6/ip6_output.c
--- linux-2.6.18/net/ipv6/ip6_output.c 2006-09-20 07:42:06.000000000 +0400
+++ linux-2.6.18/net/ipv6/ip6_output.c 2007-08-07 19:35:51.000000000 +0400
@@ -1212,7 +1212,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}

View File

@@ -1,401 +0,0 @@
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/Kbuild linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/Kbuild
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/Kbuild 2011-07-09 00:47:13.884215174 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/Kbuild 2011-07-09 00:47:47.530389221 +0200
@@ -113,6 +113,7 @@ header-y += map_to_7segment.h
header-y += matroxfb.h
header-y += meye.h
header-y += minix_fs.h
+header-y += mm.h
header-y += mmtimer.h
header-y += mqueue.h
header-y += mtio.h
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/mm_types.h linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/mm_types.h
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/mm_types.h 2011-07-09 00:47:13.893191775 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/mm_types.h 2011-07-09 00:47:47.533311169 +0200
@@ -106,6 +106,18 @@ struct page {
*/
void *shadow;
#endif
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
};
/*
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/net.h linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/net.h
--- linux-2.6.32-131.4.1.el6.x86_64.orig/include/linux/net.h 2011-07-09 00:47:13.867629724 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/include/linux/net.h 2011-07-09 00:47:47.536207157 +0200
@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <asm/socket.h>
+#include <linux/mm.h>
#define NPROTO AF_MAX
@@ -365,5 +366,44 @@ static const struct proto_ops name##_ops
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/dev.c linux-2.6.32-131.4.1.el6.x86_64.new/net/core/dev.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/dev.c 2011-07-09 00:47:14.491417046 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/core/dev.c 2011-07-09 00:47:47.538194361 +0200
@@ -2903,7 +2903,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ net_put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags);
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/skbuff.c linux-2.6.32-131.4.1.el6.x86_64.new/net/core/skbuff.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/core/skbuff.c 2011-07-09 00:47:14.491417046 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/core/skbuff.c 2011-07-09 00:47:47.542260687 +0200
@@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_
static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- put_page(buf->page);
+ net_put_page(buf->page);
}
static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- get_page(buf->page);
+ net_get_page(buf->page);
}
static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -344,7 +344,7 @@ static void skb_release_data(struct sk_b
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
}
if (skb_has_frags(skb))
@@ -766,7 +766,7 @@ struct sk_buff *pskb_copy(struct sk_buff
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ net_get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -832,7 +832,7 @@ int pskb_expand_head(struct sk_buff *skb
sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_clone_fraglist(skb);
@@ -1106,7 +1106,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
if (skb_has_frags(skb))
skb_drop_fraglist(skb);
@@ -1275,7 +1275,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1376,7 +1376,7 @@ EXPORT_SYMBOL(skb_copy_bits);
*/
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
- put_page(spd->pages[i]);
+ net_put_page(spd->pages[i]);
}
static inline struct page *linear_to_page(struct page *page, unsigned int *len,
@@ -1400,7 +1400,7 @@ new_page:
off = sk->sk_sndmsg_off;
mlen = PAGE_SIZE - off;
if (mlen < 64 && mlen < *len) {
- put_page(p);
+ net_put_page(p);
goto new_page;
}
@@ -1410,7 +1410,7 @@ new_page:
memcpy(page_address(p) + off, page_address(page) + *offset, *len);
sk->sk_sndmsg_off += *len;
*offset = off;
- get_page(p);
+ net_get_page(p);
return p;
}
@@ -1431,7 +1431,7 @@ static inline int spd_fill_page(struct s
if (!page)
return 1;
} else
- get_page(page);
+ net_get_page(page);
spd->pages[spd->nr_pages] = page;
spd->partial[spd->nr_pages].len = *len;
@@ -2061,7 +2061,7 @@ static inline void skb_split_no_header(s
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ net_get_page(skb_shinfo(skb)->frags[i].page);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2183,7 +2183,7 @@ int skb_shift(struct sk_buff *tgt, struc
to++;
} else {
- get_page(fragfrom->page);
+ net_get_page(fragfrom->page);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2205,7 +2205,7 @@ int skb_shift(struct sk_buff *tgt, struc
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ net_put_page(fragfrom->page);
}
/* Reposition in the original skb */
@@ -2600,7 +2600,7 @@ struct sk_buff *skb_segment(struct sk_bu
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ net_get_page(frag->page);
size = frag->size;
if (pos < offset) {
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/ip_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/ip_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/ip_output.c 2011-07-09 00:47:14.538469946 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/ip_output.c 2011-07-09 00:47:47.544855733 +0200
@@ -981,7 +981,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1213,7 +1213,7 @@ ssize_t ip_append_page(struct sock *sk,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/Makefile linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/Makefile
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/Makefile 2011-07-09 00:47:14.540204846 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/Makefile 2011-07-09 00:47:47.544855733 +0200
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp.c 2011-07-09 00:47:14.522494769 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp.c 2011-07-09 00:47:47.548279863 +0200
@@ -799,7 +799,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -1007,7 +1007,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -1048,9 +1048,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_output.c 2011-07-09 00:47:14.535532920 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_output.c 2011-07-09 00:47:47.548279863 +0200
@@ -909,7 +909,7 @@ static void __pskb_trim_head(struct sk_b
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_zero_copy.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv4/tcp_zero_copy.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv4/tcp_zero_copy.c 2011-07-09 00:47:47.548279863 +0200
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv6/ip6_output.c linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv6/ip6_output.c
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/ipv6/ip6_output.c 2011-07-09 00:47:14.498374289 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/ipv6/ip6_output.c 2011-07-09 00:47:47.557327813 +0200
@@ -1370,7 +1370,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
diff -uNrp linux-2.6.32-131.4.1.el6.x86_64.orig/net/Kconfig linux-2.6.32-131.4.1.el6.x86_64.new/net/Kconfig
--- linux-2.6.32-131.4.1.el6.x86_64.orig/net/Kconfig 2011-07-09 00:47:14.452504858 +0200
+++ linux-2.6.32-131.4.1.el6.x86_64.new/net/Kconfig 2011-07-09 00:47:47.557327813 +0200
@@ -72,6 +72,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"

View File

@@ -1,274 +0,0 @@
diff --git a/include/linux/mm_types.h linux-2.6.27.7-9-scst/include/linux/mm_types.h
index c8d3f6f..936b610 100644
--- linux-2.6.27.7-9/include/linux/mm_types.h
+++ linux-2.6.27.7-9-scst/include/linux/mm_types.h
@@ -94,6 +94,18 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long page_cgroup;
#endif
diff --git linux-2.6.27.7-9/include/linux/net.h linux-2.6.27.7-9-scst/include/linux/net.h
index 6dc14a2..992273a 100644
--- linux-2.6.27.7-9/include/linux/net.h
+++ linux-2.6.27.7-9-scst/include/linux/net.h
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -354,5 +355,44 @@ static const struct proto_ops name##_ops = { \
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff --git linux-2.6.27.7-9/net/Kconfig linux-2.6.27.7-9-scst/net/Kconfig
index c2407ef..48bdfb0 100644
--- linux-2.6.27.7-9/net/Kconfig
+++ linux-2.6.27.7-9-scst/net/Kconfig
@@ -59,6 +59,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git linux-2.6.27.7-9/net/core/skbuff.c linux-2.6.27.7-9-scst/net/core/skbuff.c
index e274ebb..eb1e60a 100644
--- linux-2.6.27.7-9/net/core/skbuff.c
+++ linux-2.6.27.7-9-scst/net/core/skbuff.c
@@ -374,7 +374,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_get_page(struct sk_buff *skb, struct page *page)
{
- get_page(page);
+ net_get_page(page);
if (skb_emergency(skb))
atomic_inc(&page->frag_count);
}
@@ -383,7 +383,7 @@ static void skb_put_page(struct sk_buff *skb, struct page *page)
{
if (skb_emergency(skb) && atomic_dec_and_test(&page->frag_count))
mem_reserve_pages_charge(&net_skb_reserve, -1);
- put_page(page);
+ net_put_page(page);
}
static void skb_release_data(struct sk_buff *skb)
diff --git linux-2.6.27.7-9/net/ipv4/Makefile linux-2.6.27.7-9-scst/net/ipv4/Makefile
index ad40ef3..4af3a4b 100644
--- linux-2.6.27.7-9/net/ipv4/Makefile
+++ linux-2.6.27.7-9-scst/net/ipv4/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git linux-2.6.27.7-9/net/ipv4/ip_output.c linux-2.6.27.7-9-scst/net/ipv4/ip_output.c
index d533a89..93d3039 100644
--- linux-2.6.27.7-9/net/ipv4/ip_output.c
+++ linux-2.6.27.7-9-scst/net/ipv4/ip_output.c
@@ -1007,7 +1007,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1165,7 +1165,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git linux-2.6.27.7-9/net/ipv4/tcp.c linux-2.6.27.7-9-scst/net/ipv4/tcp.c
index dd01424..05256eb 100644
--- linux-2.6.27.7-9/net/ipv4/tcp.c
+++ linux-2.6.27.7-9-scst/net/ipv4/tcp.c
@@ -713,7 +713,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -918,7 +918,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -959,9 +959,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff --git linux-2.6.27.7-9/net/ipv4/tcp_output.c linux-2.6.27.7-9-scst/net/ipv4/tcp_output.c
index c251805..b2b5d2f 100644
--- linux-2.6.27.7-9/net/ipv4/tcp_output.c
+++ linux-2.6.27.7-9-scst/net/ipv4/tcp_output.c
@@ -865,7 +865,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff --git linux-2.6.27.7-9/net/ipv4/tcp_zero_copy.c linux-2.6.27.7-9-scst/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..9cd990c
--- /dev/null
+++ linux-2.6.27.7-9-scst/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff --git linux-2.6.27.7-9/net/ipv6/ip6_output.c linux-2.6.27.7-9-scst/net/ipv6/ip6_output.c
index 3df2c44..4242123 100644
--- linux-2.6.27.7-9/net/ipv6/ip6_output.c
+++ linux-2.6.27.7-9-scst/net/ipv6/ip6_output.c
@@ -1349,7 +1349,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}