]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - net/core/skbuff.c
Merge tag 'for-3.5' of git://openrisc.net/jonas/linux
[karo-tx-linux.git] / net / core / skbuff.c
index 52ba2b5e803d89906ab598bcf5462eb48dca4193..016694d624843c8ca1df3013639ffd4f6ae75f39 100644 (file)
@@ -36,6 +36,8 @@
  *     The functions in this file will not compile correctly with gcc 2.4.x
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -118,11 +120,10 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
  */
 static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
-       printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
-                         "data:%p tail:%#lx end:%#lx dev:%s\n",
-              here, skb->len, sz, skb->head, skb->data,
-              (unsigned long)skb->tail, (unsigned long)skb->end,
-              skb->dev ? skb->dev->name : "<NULL>");
+       pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
+                __func__, here, skb->len, sz, skb->head, skb->data,
+                (unsigned long)skb->tail, (unsigned long)skb->end,
+                skb->dev ? skb->dev->name : "<NULL>");
        BUG();
 }
 
@@ -137,11 +138,10 @@ static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 
 static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
-       printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
-                         "data:%p tail:%#lx end:%#lx dev:%s\n",
-              here, skb->len, sz, skb->head, skb->data,
-              (unsigned long)skb->tail, (unsigned long)skb->end,
-              skb->dev ? skb->dev->name : "<NULL>");
+       pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
+                __func__, here, skb->len, sz, skb->head, skb->data,
+                (unsigned long)skb->tail, (unsigned long)skb->end,
+                skb->dev ? skb->dev->name : "<NULL>");
        BUG();
 }
 
@@ -293,6 +293,46 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 }
 EXPORT_SYMBOL(build_skb);
 
+struct netdev_alloc_cache {
+       struct page *page;
+       unsigned int offset;
+};
+static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
+
+/**
+ * netdev_alloc_frag - allocate a page fragment
+ * @fragsz: fragment size
+ *
+ * Allocates a frag from a page for receive buffer.
+ * Uses GFP_ATOMIC allocations.
+ */
+void *netdev_alloc_frag(unsigned int fragsz)
+{
+       struct netdev_alloc_cache *nc;
+       void *data = NULL;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       nc = &__get_cpu_var(netdev_alloc_cache);
+       if (unlikely(!nc->page)) {
+refill:
+               nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+               nc->offset = 0;
+       }
+       if (likely(nc->page)) {
+               if (nc->offset + fragsz > PAGE_SIZE) {
+                       put_page(nc->page);
+                       goto refill;
+               }
+               data = page_address(nc->page) + nc->offset;
+               nc->offset += fragsz;
+               get_page(nc->page);
+       }
+       local_irq_restore(flags);
+       return data;
+}
+EXPORT_SYMBOL(netdev_alloc_frag);
+
 /**
  *     __netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *     @dev: network device to receive on
@@ -307,11 +347,23 @@ EXPORT_SYMBOL(build_skb);
  *     %NULL is returned if there is no free memory.
  */
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
-               unsigned int length, gfp_t gfp_mask)
+                                  unsigned int length, gfp_t gfp_mask)
 {
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
+       unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +
+                             SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+       if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) {
+               void *data = netdev_alloc_frag(fragsz);
 
-       skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
+               if (likely(data)) {
+                       skb = build_skb(data, fragsz);
+                       if (unlikely(!skb))
+                               put_page(virt_to_head_page(data));
+               }
+       } else {
+               skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
+       }
        if (likely(skb)) {
                skb_reserve(skb, NET_SKB_PAD);
                skb->dev = dev;
@@ -330,28 +382,6 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 }
 EXPORT_SYMBOL(skb_add_rx_frag);
 
-/**
- *     dev_alloc_skb - allocate an skbuff for receiving
- *     @length: length to allocate
- *
- *     Allocate a new &sk_buff and assign it a usage count of one. The
- *     buffer has unspecified headroom built in. Users should allocate
- *     the headroom they think they need without accounting for the
- *     built in space. The built in space is used for optimisations.
- *
- *     %NULL is returned if there is no free memory. Although this function
- *     allocates memory it can be called from an interrupt.
- */
-struct sk_buff *dev_alloc_skb(unsigned int length)
-{
-       /*
-        * There is more code here than it seems:
-        * __dev_alloc_skb is an inline
-        */
-       return __dev_alloc_skb(length, GFP_ATOMIC);
-}
-EXPORT_SYMBOL(dev_alloc_skb);
-
 static void skb_drop_list(struct sk_buff **listp)
 {
        struct sk_buff *list = *listp;
@@ -829,7 +859,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 {
        int headerlen = skb_headroom(skb);
-       unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
+       unsigned int size = skb_end_offset(skb) + skb->data_len;
        struct sk_buff *n = alloc_skb(size, gfp_mask);
 
        if (!n)
@@ -930,9 +960,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 {
        int i;
        u8 *data;
-       int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
+       int size = nhead + skb_end_offset(skb) + ntail;
        long off;
-       bool fastpath;
 
        BUG_ON(nhead < 0);
 
@@ -941,27 +970,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
        size = SKB_DATA_ALIGN(size);
 
-       /* Check if we can avoid taking references on fragments if we own
-        * the last reference on skb->head. (see skb_release_data())
-        */
-       if (!skb->cloned)
-               fastpath = true;
-       else {
-               int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
-               fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
-       }
-
-       if (fastpath && !skb->head_frag &&
-           size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
-               memmove(skb->head + size, skb_shinfo(skb),
-                       offsetof(struct skb_shared_info,
-                                frags[skb_shinfo(skb)->nr_frags]));
-               memmove(skb->head + nhead, skb->head,
-                       skb_tail_pointer(skb) - skb->head);
-               off = nhead;
-               goto adjust_others;
-       }
-
        data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
                       gfp_mask);
        if (!data)
@@ -977,9 +985,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
               skb_shinfo(skb),
               offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
 
-       if (fastpath) {
-               skb_free_head(skb);
-       } else {
+       /*
+        * if shinfo is shared we must drop the old head gracefully, but if it
+        * is not we can just drop the old head and let the existing refcount
+        * be since all we did is relocate the values
+        */
+       if (skb_cloned(skb)) {
                /* copy this zero copy skb frags */
                if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
                        if (skb_copy_ubufs(skb, gfp_mask))
@@ -992,12 +1003,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
                        skb_clone_fraglist(skb);
 
                skb_release_data(skb);
+       } else {
+               skb_free_head(skb);
        }
        off = (data + nhead) - skb->head;
 
        skb->head     = data;
        skb->head_frag = 0;
-adjust_others:
        skb->data    += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
        skb->end      = size;
@@ -1699,17 +1711,17 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
                              struct splice_pipe_desc *spd, struct sock *sk)
 {
        int seg;
-       bool head_is_linear = !skb->head_frag;
 
        /* map the linear part :
-        * If skb->head_frag is set, this 'linear' part is backed
-        * by a fragment, and we can avoid a copy.
+        * If skb->head_frag is set, this 'linear' part is backed by a
+        * fragment, and if the head is not shared with any clones then
+        * we can avoid a copy since we own the head portion of this page.
         */
        if (__splice_segment(virt_to_page(skb->data),
                             (unsigned long) skb->data & (PAGE_SIZE - 1),
                             skb_headlen(skb),
                             offset, len, skb, spd,
-                            head_is_linear,
+                            skb_head_is_locked(skb),
                             sk, pipe))
                return true;
 
@@ -2745,14 +2757,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
                        if (unlikely(!nskb))
                                goto err;
 
-                       hsize = skb_end_pointer(nskb) - nskb->head;
+                       hsize = skb_end_offset(nskb);
                        if (skb_cow_head(nskb, doffset + headroom)) {
                                kfree_skb(nskb);
                                goto err;
                        }
 
-                       nskb->truesize += skb_end_pointer(nskb) - nskb->head -
-                                         hsize;
+                       nskb->truesize += skb_end_offset(nskb) - hsize;
                        skb_release_head_state(nskb);
                        __skb_push(nskb, doffset);
                } else {
@@ -2870,6 +2881,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        unsigned int len = skb_gro_len(skb);
        unsigned int offset = skb_gro_offset(skb);
        unsigned int headlen = skb_headlen(skb);
+       unsigned int delta_truesize;
 
        if (p->len + len >= 65536)
                return -E2BIG;
@@ -2899,11 +2911,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
                frag->page_offset += offset;
                skb_frag_size_sub(frag, offset);
 
+               /* all fragments truesize : remove (head size + sk_buff) */
+               delta_truesize = skb->truesize -
+                                SKB_TRUESIZE(skb_end_offset(skb));
+
                skb->truesize -= skb->data_len;
                skb->len -= skb->data_len;
                skb->data_len = 0;
 
-               NAPI_GRO_CB(skb)->free = 1;
+               NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;
                goto done;
        } else if (skb->head_frag) {
                int nr_frags = pinfo->nr_frags;
@@ -2928,6 +2944,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
                memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
                /* We dont need to clear skbinfo->nr_frags here */
 
+               delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
                NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
                goto done;
        } else if (skb_gro_len(p) != pinfo->gso_size)
@@ -2970,7 +2987,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        p = nskb;
 
 merge:
-       p->truesize += skb->truesize - len;
+       delta_truesize = skb->truesize;
        if (offset > headlen) {
                unsigned int eat = offset - headlen;
 
@@ -2990,7 +3007,7 @@ merge:
 done:
        NAPI_GRO_CB(p)->count++;
        p->data_len += len;
-       p->truesize += len;
+       p->truesize += delta_truesize;
        p->len += len;
 
        NAPI_GRO_CB(skb)->same_flow = 1;
@@ -3312,10 +3329,8 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
 {
        if (unlikely(start > skb_headlen(skb)) ||
            unlikely((int)start + off > skb_headlen(skb) - 2)) {
-               if (net_ratelimit())
-                       printk(KERN_WARNING
-                              "bad partial csum: csum=%u/%u len=%u\n",
-                              start, off, skb_headlen(skb));
+               net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n",
+                                    start, off, skb_headlen(skb));
                return false;
        }
        skb->ip_summed = CHECKSUM_PARTIAL;
@@ -3327,8 +3342,93 @@ EXPORT_SYMBOL_GPL(skb_partial_csum_set);
 
 void __skb_warn_lro_forwarding(const struct sk_buff *skb)
 {
-       if (net_ratelimit())
-               pr_warning("%s: received packets cannot be forwarded"
-                          " while LRO is enabled\n", skb->dev->name);
+       net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
+                            skb->dev->name);
 }
 EXPORT_SYMBOL(__skb_warn_lro_forwarding);
+
+void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
+{
+       if (head_stolen)
+               kmem_cache_free(skbuff_head_cache, skb);
+       else
+               __kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb_partial);
+
+/**
+ * skb_try_coalesce - try to merge skb to prior one
+ * @to: prior buffer
+ * @from: buffer to add
+ * @fragstolen: pointer to boolean
+ *
+ */
+bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
+                     bool *fragstolen, int *delta_truesize)
+{
+       int i, delta, len = from->len;
+
+       *fragstolen = false;
+
+       if (skb_cloned(to))
+               return false;
+
+       if (len <= skb_tailroom(to)) {
+               BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
+               *delta_truesize = 0;
+               return true;
+       }
+
+       if (skb_has_frag_list(to) || skb_has_frag_list(from))
+               return false;
+
+       if (skb_headlen(from) != 0) {
+               struct page *page;
+               unsigned int offset;
+
+               if (skb_shinfo(to)->nr_frags +
+                   skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+                       return false;
+
+               if (skb_head_is_locked(from))
+                       return false;
+
+               delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
+
+               page = virt_to_head_page(from->head);
+               offset = from->data - (unsigned char *)page_address(page);
+
+               skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+                                  page, offset, skb_headlen(from));
+               *fragstolen = true;
+       } else {
+               if (skb_shinfo(to)->nr_frags +
+                   skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+                       return false;
+
+               delta = from->truesize -
+                       SKB_TRUESIZE(skb_end_pointer(from) - from->head);
+       }
+
+       WARN_ON_ONCE(delta < len);
+
+       memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
+              skb_shinfo(from)->frags,
+              skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
+       skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+
+       if (!skb_cloned(from))
+               skb_shinfo(from)->nr_frags = 0;
+
+       /* if the skb is cloned this does nothing since we set nr_frags to 0 */
+       for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
+               skb_frag_ref(from, i);
+
+       to->truesize += delta;
+       to->len += len;
+       to->data_len += len;
+
+       *delta_truesize = delta;
+       return true;
+}
+EXPORT_SYMBOL(skb_try_coalesce);