From 10168b84c65f0d993296c9fa865c8694b9dcbad7 Mon Sep 17 00:00:00 2001 From: Nils Faerber Date: Tue, 11 Jan 2011 11:24:21 +0100 Subject: [PATCH] Apply put page callback patch --- include/linux/mm_types.h | 12 ++++++++++++ include/linux/net.h | 40 ++++++++++++++++++++++++++++++++++++++++ net/Kconfig | 12 ++++++++++++ net/core/dev.c | 2 +- net/core/skbuff.c | 30 +++++++++++++++--------------- net/ipv4/Makefile | 1 + net/ipv4/ip_output.c | 4 ++-- net/ipv4/tcp.c | 8 ++++---- net/ipv4/tcp_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 10 files changed, 89 insertions(+), 24 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bb7288a782f..9fa6a60fdab 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -100,6 +100,18 @@ struct page { */ void *shadow; #endif + +#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) + /* + * Used to implement support for notification on zero-copy TCP transfer + * completion. It might look as not good to have this field here and + * it's better to have it in struct sk_buff, but it would make the code + * much more complicated and fragile, since all skb then would have to + * contain only pages with the same value in this field. + */ + void *net_priv; +#endif + }; /* diff --git a/include/linux/net.h b/include/linux/net.h index 16faa130088..4c7c564bf55 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -20,6 +20,7 @@ #include #include +#include #define NPROTO AF_MAX @@ -293,5 +294,44 @@ extern int kernel_sock_shutdown(struct socket *sock, extern struct ratelimit_state net_ratelimit_state; #endif +#if defined(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) +/* Support for notification on zero-copy TCP transfer completion */ +typedef void (*net_get_page_callback_t)(struct page *page); +typedef void (*net_put_page_callback_t)(struct page *page); + +extern net_get_page_callback_t net_get_page_callback; +extern net_put_page_callback_t net_put_page_callback; + +extern int net_set_get_put_page_callbacks( + net_get_page_callback_t get_callback, + net_put_page_callback_t put_callback); + +/* + * See comment for net_set_get_put_page_callbacks() why those functions + * don't need any protection. + */ +static inline void net_get_page(struct page *page) +{ + if (page->net_priv != 0) + net_get_page_callback(page); + get_page(page); +} +static inline void net_put_page(struct page *page) +{ + if (page->net_priv != 0) + net_put_page_callback(page); + put_page(page); +} +#else +static inline void net_get_page(struct page *page) +{ + get_page(page); +} +static inline void net_put_page(struct page *page) +{ + put_page(page); +} +#endif /* CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION */ + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/net/Kconfig b/net/Kconfig index 55fd82e9ffd..3bc798bbadb 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -72,6 +72,18 @@ config INET Short answer: say Y. +config TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION + bool "TCP/IP zero-copy transfer completion notification" + depends on INET + default SCST_ISCSI + ---help--- + Adds support for sending a notification upon completion of a + zero-copy TCP/IP transfer. This can speed up certain TCP/IP + software. Currently this is only used by the iSCSI target driver + iSCSI-SCST. + + If unsure, say N. + if INET source "net/ipv4/Kconfig" source "net/ipv6/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index 0dd54a69dac..23ea0f18a5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3225,7 +3225,7 @@ pull: skb_shinfo(skb)->frags[0].size -= grow; if (unlikely(!skb_shinfo(skb)->frags[0].size)) { - put_page(skb_shinfo(skb)->frags[0].page); + net_put_page(skb_shinfo(skb)->frags[0].page); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 104f8444754..c2b64fbee58 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -76,13 +76,13 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly; static void sock_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - put_page(buf->page); + net_put_page(buf->page); } static void sock_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - get_page(buf->page); + net_get_page(buf->page); } static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -324,7 +324,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb_shinfo(skb)->nr_frags) { int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + net_put_page(skb_shinfo(skb)->frags[i].page); } if (skb_has_frag_list(skb)) @@ -730,7 +730,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); + net_get_page(skb_shinfo(n)->frags[i].page); } skb_shinfo(n)->nr_frags = i; } @@ -806,7 +806,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, kfree(skb->head); } else { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); + net_get_page(skb_shinfo(skb)->frags[i].page); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); @@ -1083,7 +1083,7 @@ drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + net_put_page(skb_shinfo(skb)->frags[i].page); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -1252,7 +1252,7 @@ pull_pages: k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); + net_put_page(skb_shinfo(skb)->frags[i].page); eat -= skb_shinfo(skb)->frags[i].size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -1353,7 +1353,7 @@ EXPORT_SYMBOL(skb_copy_bits); */ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) { - put_page(spd->pages[i]); + net_put_page(spd->pages[i]); } static inline struct page *linear_to_page(struct page *page, unsigned int *len, @@ -1377,7 +1377,7 @@ new_page: off = sk->sk_sndmsg_off; mlen = PAGE_SIZE - off; if (mlen < 64 && mlen < *len) { - put_page(p); + net_put_page(p); goto new_page; } @@ -1387,7 +1387,7 @@ new_page: memcpy(page_address(p) + off, page_address(page) + *offset, *len); sk->sk_sndmsg_off += *len; *offset = off; - get_page(p); + net_get_page(p); return p; } @@ -1409,7 +1409,7 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, if (!page) return 1; } else - get_page(page); + net_get_page(page); spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = *len; @@ -2042,7 +2042,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, * where splitting is expensive. * 2. Split is accurately. We make this. */ - get_page(skb_shinfo(skb)->frags[i].page); + net_get_page(skb_shinfo(skb)->frags[i].page); skb_shinfo(skb1)->frags[0].page_offset += len - pos; skb_shinfo(skb1)->frags[0].size -= len - pos; skb_shinfo(skb)->frags[i].size = len - pos; @@ -2164,7 +2164,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - get_page(fragfrom->page); + net_get_page(fragfrom->page); fragto->page = fragfrom->page; fragto->page_offset = fragfrom->page_offset; fragto->size = todo; @@ -2186,7 +2186,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragto = &skb_shinfo(tgt)->frags[merge]; fragto->size += fragfrom->size; - put_page(fragfrom->page); + net_put_page(fragfrom->page); } /* Reposition in the original skb */ @@ -2587,7 +2587,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; - get_page(frag->page); + net_get_page(frag->page); size = frag->size; if (pos < offset) { diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4978d22f9a7..dee3df7c97e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o +obj-$(CONFIG_TCP_ZERO_COPY_TRANSFER_COMPLETION_NOTIFICATION) += tcp_zero_copy.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 439d2a34ee4..07965860fa9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1042,7 +1042,7 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - get_page(page); + net_get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); frag = &skb_shinfo(skb)->frags[i]; } @@ -1201,7 +1201,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if (skb_can_coalesce(skb, i, page, offset)) { skb_shinfo(skb)->frags[i-1].size += len; } else if (i < MAX_SKB_FRAGS) { - get_page(page); + net_get_page(page); skb_fill_page_desc(skb, i, page, offset, len); } else { err = -EMSGSIZE; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f15c36a706e..053d21ca2cd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -806,7 +806,7 @@ new_segment: if (can_coalesce) { skb_shinfo(skb)->frags[i - 1].size += copy; } else { - get_page(page); + net_get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } @@ -1015,7 +1015,7 @@ new_segment: goto new_segment; } else if (page) { if (off == PAGE_SIZE) { - put_page(page); + net_put_page(page); TCP_PAGE(sk) = page = NULL; off = 0; } @@ -1056,9 +1056,9 @@ new_segment: } else { skb_fill_page_desc(skb, i, page, off, copy); if (TCP_PAGE(sk)) { - get_page(page); + net_get_page(page); } else if (off + copy < PAGE_SIZE) { - get_page(page); + net_get_page(page); TCP_PAGE(sk) = page; } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 61c2463e275..143811e7862 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1082,7 +1082,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); + net_put_page(skb_shinfo(skb)->frags[i].page); eat -= skb_shinfo(skb)->frags[i].size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 94b5bf132b2..9b579635b73 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1383,7 +1383,7 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - get_page(page); + net_get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); frag = &skb_shinfo(skb)->frags[i]; } -- 2.39.2