Unsupported patch from Krzysztof Błaszkowski <kb@sysmikro.com.pl>:

I prepared it for kernel 2.6.27.7-9 which is used by openSuSE11.1-0 but it can 
also be applied to 2.6.27.19-3.2.1 (with some offset/fuzz) which is going to 
be deployed in SLE11.



git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@711 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Vladislav Bolkhovitin
2009-03-23 19:25:38 +00:00
parent acbfc9a0d2
commit 177824fe91

View File

@@ -0,0 +1,274 @@
diff --git a/include/linux/mm_types.h linux-2.6.27.7-9-scst/include/linux/mm_types.h
index c8d3f6f..936b610 100644
--- linux-2.6.27.7-9/include/linux/mm_types.h
+++ linux-2.6.27.7-9-scst/include/linux/mm_types.h
@@ -94,6 +94,18 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+ /*
+ * Used to implement support for notification on zero-copy TCP transfer
+ * completion. It might look as not good to have this field here and
+ * it's better to have it in struct sk_buff, but it would make the code
+ * much more complicated and fragile, since all skb then would have to
+ * contain only pages with the same value in this field.
+ */
+ void *net_priv;
+#endif
+
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long page_cgroup;
#endif
diff --git linux-2.6.27.7-9/include/linux/net.h linux-2.6.27.7-9-scst/include/linux/net.h
index 6dc14a2..992273a 100644
--- linux-2.6.27.7-9/include/linux/net.h
+++ linux-2.6.27.7-9-scst/include/linux/net.h
@@ -57,6 +57,7 @@ typedef enum {
#include <linux/random.h>
#include <linux/wait.h>
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/mm.h>
struct poll_table_struct;
struct pipe_inode_info;
@@ -354,5 +355,44 @@ static const struct proto_ops name##_ops = { \
extern struct ratelimit_state net_ratelimit_state;
#endif
+#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION)
+/* Support for notification on zero-copy TCP transfer completion */
+typedef void (*net_get_page_callback_t)(struct page *page);
+typedef void (*net_put_page_callback_t)(struct page *page);
+
+extern net_get_page_callback_t net_get_page_callback;
+extern net_put_page_callback_t net_put_page_callback;
+
+extern int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback);
+
+/*
+ * See comment for net_set_get_put_page_callbacks() why those functions
+ * don't need any protection.
+ */
+static inline void net_get_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_get_page_callback(page);
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ if (page->net_priv != 0)
+ net_put_page_callback(page);
+ put_page(page);
+}
+#else
+static inline void net_get_page(struct page *page)
+{
+ get_page(page);
+}
+static inline void net_put_page(struct page *page)
+{
+ put_page(page);
+}
+#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff --git linux-2.6.27.7-9/net/Kconfig linux-2.6.27.7-9-scst/net/Kconfig
index c2407ef..48bdfb0 100644
--- linux-2.6.27.7-9/net/Kconfig
+++ linux-2.6.27.7-9-scst/net/Kconfig
@@ -59,6 +59,18 @@ config INET
Short answer: say Y.
+config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION
+ bool "TCP/IP zero-copy transfer completion notification"
+ depends on INET
+ default SCST_ISCSI
+ ---help---
+ Adds support for sending a notification upon completion of a
+ zero-copy TCP/IP transfer. This can speed up certain TCP/IP
+ software. Currently this is only used by the iSCSI target driver
+ iSCSI-SCST.
+
+ If unsure, say N.
+
if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
diff --git linux-2.6.27.7-9/net/core/skbuff.c linux-2.6.27.7-9-scst/net/core/skbuff.c
index e274ebb..eb1e60a 100644
--- linux-2.6.27.7-9/net/core/skbuff.c
+++ linux-2.6.27.7-9-scst/net/core/skbuff.c
@@ -374,7 +374,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_get_page(struct sk_buff *skb, struct page *page)
{
- get_page(page);
+ net_get_page(page);
if (skb_emergency(skb))
atomic_inc(&page->frag_count);
}
@@ -383,7 +383,7 @@ static void skb_put_page(struct sk_buff *skb, struct page *page)
{
if (skb_emergency(skb) && atomic_dec_and_test(&page->frag_count))
mem_reserve_pages_charge(&net_skb_reserve, -1);
- put_page(page);
+ net_put_page(page);
}
static void skb_release_data(struct sk_buff *skb)
diff --git linux-2.6.27.7-9/net/ipv4/Makefile linux-2.6.27.7-9-scst/net/ipv4/Makefile
index ad40ef3..4af3a4b 100644
--- linux-2.6.27.7-9/net/ipv4/Makefile
+++ linux-2.6.27.7-9-scst/net/ipv4/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o
diff --git linux-2.6.27.7-9/net/ipv4/ip_output.c linux-2.6.27.7-9-scst/net/ipv4/ip_output.c
index d533a89..93d3039 100644
--- linux-2.6.27.7-9/net/ipv4/ip_output.c
+++ linux-2.6.27.7-9-scst/net/ipv4/ip_output.c
@@ -1007,7 +1007,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}
@@ -1165,7 +1165,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_shinfo(skb)->frags[i-1].size += len;
} else if (i < MAX_SKB_FRAGS) {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {
err = -EMSGSIZE;
diff --git linux-2.6.27.7-9/net/ipv4/tcp.c linux-2.6.27.7-9-scst/net/ipv4/tcp.c
index dd01424..05256eb 100644
--- linux-2.6.27.7-9/net/ipv4/tcp.c
+++ linux-2.6.27.7-9-scst/net/ipv4/tcp.c
@@ -713,7 +713,7 @@ new_segment:
if (can_coalesce) {
skb_shinfo(skb)->frags[i - 1].size += copy;
} else {
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
@@ -918,7 +918,7 @@ new_segment:
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
- put_page(page);
+ net_put_page(page);
TCP_PAGE(sk) = page = NULL;
off = 0;
}
@@ -959,9 +959,9 @@ new_segment:
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (TCP_PAGE(sk)) {
- get_page(page);
+ net_get_page(page);
} else if (off + copy < PAGE_SIZE) {
- get_page(page);
+ net_get_page(page);
TCP_PAGE(sk) = page;
}
}
diff --git linux-2.6.27.7-9/net/ipv4/tcp_output.c linux-2.6.27.7-9-scst/net/ipv4/tcp_output.c
index c251805..b2b5d2f 100644
--- linux-2.6.27.7-9/net/ipv4/tcp_output.c
+++ linux-2.6.27.7-9-scst/net/ipv4/tcp_output.c
@@ -865,7 +865,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ net_put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
diff --git linux-2.6.27.7-9/net/ipv4/tcp_zero_copy.c linux-2.6.27.7-9-scst/net/ipv4/tcp_zero_copy.c
new file mode 100644
index 0000000..9cd990c
--- /dev/null
+++ linux-2.6.27.7-9-scst/net/ipv4/tcp_zero_copy.c
@@ -0,0 +1,49 @@
+/*
+ * Support routines for TCP zero copy transmit
+ *
+ * Created by Vladislav Bolkhovitin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/skbuff.h>
+
+net_get_page_callback_t net_get_page_callback __read_mostly;
+EXPORT_SYMBOL(net_get_page_callback);
+
+net_put_page_callback_t net_put_page_callback __read_mostly;
+EXPORT_SYMBOL(net_put_page_callback);
+
+/*
+ * Caller of this function must ensure that at the moment when it's called
+ * there are no pages in the system with net_priv field set to non-zero
+ * value. Hence, this function, as well as net_get_page() and net_put_page(),
+ * don't need any protection.
+ */
+int net_set_get_put_page_callbacks(
+ net_get_page_callback_t get_callback,
+ net_put_page_callback_t put_callback)
+{
+ int res = 0;
+
+ if ((net_get_page_callback != NULL) && (get_callback != NULL) &&
+ (net_get_page_callback != get_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ if ((net_put_page_callback != NULL) && (put_callback != NULL) &&
+ (net_put_page_callback != put_callback)) {
+ res = -EBUSY;
+ goto out;
+ }
+
+ net_get_page_callback = get_callback;
+ net_put_page_callback = put_callback;
+
+out:
+ return res;
+}
+EXPORT_SYMBOL(net_set_get_put_page_callbacks);
diff --git linux-2.6.27.7-9/net/ipv6/ip6_output.c linux-2.6.27.7-9-scst/net/ipv6/ip6_output.c
index 3df2c44..4242123 100644
--- linux-2.6.27.7-9/net/ipv6/ip6_output.c
+++ linux-2.6.27.7-9-scst/net/ipv6/ip6_output.c
@@ -1349,7 +1349,7 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- get_page(page);
+ net_get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
frag = &skb_shinfo(skb)->frags[i];
}