]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/mellanox/mlx4/en_rx.c
net/mlx4_en: Saving mem access on data path
[karo-tx-linux.git] / drivers / net / ethernet / mellanox / mlx4 / en_rx.c
1 /*
2  * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33
34 #include <linux/mlx4/cq.h>
35 #include <linux/slab.h>
36 #include <linux/mlx4/qp.h>
37 #include <linux/skbuff.h>
38 #include <linux/if_ether.h>
39 #include <linux/if_vlan.h>
40 #include <linux/vmalloc.h>
41
42 #include "mlx4_en.h"
43
44
45 static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
46                               struct mlx4_en_rx_desc *rx_desc,
47                               struct page_frag *skb_frags,
48                               struct mlx4_en_rx_alloc *ring_alloc,
49                               int i)
50 {
51         struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
52         struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
53         struct page *page;
54         dma_addr_t dma;
55
56         if (page_alloc->offset == frag_info->last_offset) {
57                 /* Allocate new page */
58                 page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER);
59                 if (!page)
60                         return -ENOMEM;
61
62                 skb_frags[i].page = page_alloc->page;
63                 skb_frags[i].offset = page_alloc->offset;
64                 page_alloc->page = page;
65                 page_alloc->offset = frag_info->frag_align;
66         } else {
67                 page = page_alloc->page;
68                 get_page(page);
69
70                 skb_frags[i].page = page;
71                 skb_frags[i].offset = page_alloc->offset;
72                 page_alloc->offset += frag_info->frag_stride;
73         }
74         dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) +
75                              skb_frags[i].offset, frag_info->frag_size,
76                              PCI_DMA_FROMDEVICE);
77         rx_desc->data[i].addr = cpu_to_be64(dma);
78         return 0;
79 }
80
81 static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
82                                   struct mlx4_en_rx_ring *ring)
83 {
84         struct mlx4_en_rx_alloc *page_alloc;
85         int i;
86
87         for (i = 0; i < priv->num_frags; i++) {
88                 page_alloc = &ring->page_alloc[i];
89                 page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
90                                                MLX4_EN_ALLOC_ORDER);
91                 if (!page_alloc->page)
92                         goto out;
93
94                 page_alloc->offset = priv->frag_info[i].frag_align;
95                 en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
96                        i, page_alloc->page);
97         }
98         return 0;
99
100 out:
101         while (i--) {
102                 page_alloc = &ring->page_alloc[i];
103                 put_page(page_alloc->page);
104                 page_alloc->page = NULL;
105         }
106         return -ENOMEM;
107 }
108
109 static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
110                                       struct mlx4_en_rx_ring *ring)
111 {
112         struct mlx4_en_rx_alloc *page_alloc;
113         int i;
114
115         for (i = 0; i < priv->num_frags; i++) {
116                 page_alloc = &ring->page_alloc[i];
117                 en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
118                        i, page_count(page_alloc->page));
119
120                 put_page(page_alloc->page);
121                 page_alloc->page = NULL;
122         }
123 }
124
125
126 static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
127                                  struct mlx4_en_rx_ring *ring, int index)
128 {
129         struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
130         struct skb_frag_struct *skb_frags = ring->rx_info +
131                                             (index << priv->log_rx_info);
132         int possible_frags;
133         int i;
134
135         /* Set size and memtype fields */
136         for (i = 0; i < priv->num_frags; i++) {
137                 skb_frag_size_set(&skb_frags[i], priv->frag_info[i].frag_size);
138                 rx_desc->data[i].byte_count =
139                         cpu_to_be32(priv->frag_info[i].frag_size);
140                 rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
141         }
142
143         /* If the number of used fragments does not fill up the ring stride,
144          * remaining (unused) fragments must be padded with null address/size
145          * and a special memory key */
146         possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
147         for (i = priv->num_frags; i < possible_frags; i++) {
148                 rx_desc->data[i].byte_count = 0;
149                 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
150                 rx_desc->data[i].addr = 0;
151         }
152 }
153
154
155 static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
156                                    struct mlx4_en_rx_ring *ring, int index)
157 {
158         struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
159         struct page_frag *skb_frags = ring->rx_info +
160                                       (index << priv->log_rx_info);
161         int i;
162
163         for (i = 0; i < priv->num_frags; i++)
164                 if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i))
165                         goto err;
166
167         return 0;
168
169 err:
170         while (i--) {
171                 dma_addr_t dma = be64_to_cpu(rx_desc->data[i].addr);
172                 pci_unmap_single(priv->mdev->pdev, dma, skb_frags[i].size,
173                                  PCI_DMA_FROMDEVICE);
174                 put_page(skb_frags[i].page);
175         }
176         return -ENOMEM;
177 }
178
179 static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
180 {
181         *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
182 }
183
184 static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
185                                  struct mlx4_en_rx_ring *ring,
186                                  int index)
187 {
188         struct page_frag *skb_frags;
189         struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
190         dma_addr_t dma;
191         int nr;
192
193         skb_frags = ring->rx_info + (index << priv->log_rx_info);
194         for (nr = 0; nr < priv->num_frags; nr++) {
195                 en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
196                 dma = be64_to_cpu(rx_desc->data[nr].addr);
197
198                 en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
199                 dma_unmap_single(priv->ddev, dma, skb_frags[nr].size,
200                                  PCI_DMA_FROMDEVICE);
201                 put_page(skb_frags[nr].page);
202         }
203 }
204
205 static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
206 {
207         struct mlx4_en_rx_ring *ring;
208         int ring_ind;
209         int buf_ind;
210         int new_size;
211
212         for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
213                 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
214                         ring = &priv->rx_ring[ring_ind];
215
216                         if (mlx4_en_prepare_rx_desc(priv, ring,
217                                                     ring->actual_size)) {
218                                 if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
219                                         en_err(priv, "Failed to allocate "
220                                                      "enough rx buffers\n");
221                                         return -ENOMEM;
222                                 } else {
223                                         new_size = rounddown_pow_of_two(ring->actual_size);
224                                         en_warn(priv, "Only %d buffers allocated "
225                                                       "reducing ring size to %d",
226                                                 ring->actual_size, new_size);
227                                         goto reduce_rings;
228                                 }
229                         }
230                         ring->actual_size++;
231                         ring->prod++;
232                 }
233         }
234         return 0;
235
236 reduce_rings:
237         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
238                 ring = &priv->rx_ring[ring_ind];
239                 while (ring->actual_size > new_size) {
240                         ring->actual_size--;
241                         ring->prod--;
242                         mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
243                 }
244         }
245
246         return 0;
247 }
248
249 static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
250                                 struct mlx4_en_rx_ring *ring)
251 {
252         int index;
253
254         en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
255                ring->cons, ring->prod);
256
257         /* Unmap and free Rx buffers */
258         BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
259         while (ring->cons != ring->prod) {
260                 index = ring->cons & ring->size_mask;
261                 en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
262                 mlx4_en_free_rx_desc(priv, ring, index);
263                 ++ring->cons;
264         }
265 }
266
267 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
268                            struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
269 {
270         struct mlx4_en_dev *mdev = priv->mdev;
271         int err;
272         int tmp;
273
274
275         ring->prod = 0;
276         ring->cons = 0;
277         ring->size = size;
278         ring->size_mask = size - 1;
279         ring->stride = stride;
280         ring->log_stride = ffs(ring->stride) - 1;
281         ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
282
283         tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
284                                         sizeof(struct skb_frag_struct));
285         ring->rx_info = vmalloc(tmp);
286         if (!ring->rx_info)
287                 return -ENOMEM;
288
289         en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
290                  ring->rx_info, tmp);
291
292         err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
293                                  ring->buf_size, 2 * PAGE_SIZE);
294         if (err)
295                 goto err_ring;
296
297         err = mlx4_en_map_buffer(&ring->wqres.buf);
298         if (err) {
299                 en_err(priv, "Failed to map RX buffer\n");
300                 goto err_hwq;
301         }
302         ring->buf = ring->wqres.buf.direct.buf;
303
304         return 0;
305
306 err_hwq:
307         mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
308 err_ring:
309         vfree(ring->rx_info);
310         ring->rx_info = NULL;
311         return err;
312 }
313
314 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
315 {
316         struct mlx4_en_rx_ring *ring;
317         int i;
318         int ring_ind;
319         int err;
320         int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
321                                         DS_SIZE * priv->num_frags);
322
323         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
324                 ring = &priv->rx_ring[ring_ind];
325
326                 ring->prod = 0;
327                 ring->cons = 0;
328                 ring->actual_size = 0;
329                 ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;
330
331                 ring->stride = stride;
332                 if (ring->stride <= TXBB_SIZE)
333                         ring->buf += TXBB_SIZE;
334
335                 ring->log_stride = ffs(ring->stride) - 1;
336                 ring->buf_size = ring->size * ring->stride;
337
338                 memset(ring->buf, 0, ring->buf_size);
339                 mlx4_en_update_rx_prod_db(ring);
340
341                 /* Initailize all descriptors */
342                 for (i = 0; i < ring->size; i++)
343                         mlx4_en_init_rx_desc(priv, ring, i);
344
345                 /* Initialize page allocators */
346                 err = mlx4_en_init_allocator(priv, ring);
347                 if (err) {
348                         en_err(priv, "Failed initializing ring allocator\n");
349                         if (ring->stride <= TXBB_SIZE)
350                                 ring->buf -= TXBB_SIZE;
351                         ring_ind--;
352                         goto err_allocator;
353                 }
354         }
355         err = mlx4_en_fill_rx_buffers(priv);
356         if (err)
357                 goto err_buffers;
358
359         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
360                 ring = &priv->rx_ring[ring_ind];
361
362                 ring->size_mask = ring->actual_size - 1;
363                 mlx4_en_update_rx_prod_db(ring);
364         }
365
366         return 0;
367
368 err_buffers:
369         for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
370                 mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);
371
372         ring_ind = priv->rx_ring_num - 1;
373 err_allocator:
374         while (ring_ind >= 0) {
375                 if (priv->rx_ring[ring_ind].stride <= TXBB_SIZE)
376                         priv->rx_ring[ring_ind].buf -= TXBB_SIZE;
377                 mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]);
378                 ring_ind--;
379         }
380         return err;
381 }
382
383 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
384                              struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
385 {
386         struct mlx4_en_dev *mdev = priv->mdev;
387
388         mlx4_en_unmap_buffer(&ring->wqres.buf);
389         mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
390         vfree(ring->rx_info);
391         ring->rx_info = NULL;
392 }
393
394 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
395                                 struct mlx4_en_rx_ring *ring)
396 {
397         mlx4_en_free_rx_buf(priv, ring);
398         if (ring->stride <= TXBB_SIZE)
399                 ring->buf -= TXBB_SIZE;
400         mlx4_en_destroy_allocator(priv, ring);
401 }
402
403
404 /* Unmap a completed descriptor and free unused pages */
405 static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
406                                     struct mlx4_en_rx_desc *rx_desc,
407                                     struct page_frag *skb_frags,
408                                     struct sk_buff *skb,
409                                     struct mlx4_en_rx_alloc *page_alloc,
410                                     int length)
411 {
412         struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
413         struct mlx4_en_frag_info *frag_info;
414         int nr;
415         dma_addr_t dma;
416
417         /* Collect used fragments while replacing them in the HW descirptors */
418         for (nr = 0; nr < priv->num_frags; nr++) {
419                 frag_info = &priv->frag_info[nr];
420                 if (length <= frag_info->frag_prefix_size)
421                         break;
422
423                 /* Save page reference in skb */
424                 __skb_frag_set_page(&skb_frags_rx[nr], skb_frags[nr].page);
425                 skb_frag_size_set(&skb_frags_rx[nr], skb_frags[nr].size);
426                 skb_frags_rx[nr].page_offset = skb_frags[nr].offset;
427                 skb->truesize += frag_info->frag_stride;
428                 dma = be64_to_cpu(rx_desc->data[nr].addr);
429
430                 /* Allocate a replacement page */
431                 if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr))
432                         goto fail;
433
434                 /* Unmap buffer */
435                 dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]),
436                                  PCI_DMA_FROMDEVICE);
437         }
438         /* Adjust size of last fragment to match actual length */
439         if (nr > 0)
440                 skb_frag_size_set(&skb_frags_rx[nr - 1],
441                         length - priv->frag_info[nr - 1].frag_prefix_size);
442         return nr;
443
444 fail:
445         /* Drop all accumulated fragments (which have already been replaced in
446          * the descriptor) of this packet; remaining fragments are reused... */
447         while (nr > 0) {
448                 nr--;
449                 __skb_frag_unref(&skb_frags_rx[nr]);
450         }
451         return 0;
452 }
453
454
455 static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
456                                       struct mlx4_en_rx_desc *rx_desc,
457                                       struct page_frag *skb_frags,
458                                       struct mlx4_en_rx_alloc *page_alloc,
459                                       unsigned int length)
460 {
461         struct sk_buff *skb;
462         void *va;
463         int used_frags;
464         dma_addr_t dma;
465
466         skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
467         if (!skb) {
468                 en_dbg(RX_ERR, priv, "Failed allocating skb\n");
469                 return NULL;
470         }
471         skb_reserve(skb, NET_IP_ALIGN);
472         skb->len = length;
473
474         /* Get pointer to first fragment so we could copy the headers into the
475          * (linear part of the) skb */
476         va = page_address(skb_frags[0].page) + skb_frags[0].offset;
477
478         if (length <= SMALL_PACKET_SIZE) {
479                 /* We are copying all relevant data to the skb - temporarily
480                  * synch buffers for the copy */
481                 dma = be64_to_cpu(rx_desc->data[0].addr);
482                 dma_sync_single_for_cpu(priv->ddev, dma, length,
483                                         DMA_FROM_DEVICE);
484                 skb_copy_to_linear_data(skb, va, length);
485                 dma_sync_single_for_device(priv->ddev, dma, length,
486                                            DMA_FROM_DEVICE);
487                 skb->tail += length;
488         } else {
489
490                 /* Move relevant fragments to skb */
491                 used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags,
492                                                       skb, page_alloc, length);
493                 if (unlikely(!used_frags)) {
494                         kfree_skb(skb);
495                         return NULL;
496                 }
497                 skb_shinfo(skb)->nr_frags = used_frags;
498
499                 /* Copy headers into the skb linear buffer */
500                 memcpy(skb->data, va, HEADER_COPY_SIZE);
501                 skb->tail += HEADER_COPY_SIZE;
502
503                 /* Skip headers in first fragment */
504                 skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE;
505
506                 /* Adjust size of first fragment */
507                 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], HEADER_COPY_SIZE);
508                 skb->data_len = length - HEADER_COPY_SIZE;
509         }
510         return skb;
511 }
512
513 static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
514 {
515         int i;
516         int offset = ETH_HLEN;
517
518         for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
519                 if (*(skb->data + offset) != (unsigned char) (i & 0xff))
520                         goto out_loopback;
521         }
522         /* Loopback found */
523         priv->loopback_ok = 1;
524
525 out_loopback:
526         dev_kfree_skb_any(skb);
527 }
528
529 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
530 {
531         struct mlx4_en_priv *priv = netdev_priv(dev);
532         struct mlx4_cqe *cqe;
533         struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
534         struct page_frag *skb_frags;
535         struct mlx4_en_rx_desc *rx_desc;
536         struct sk_buff *skb;
537         int index;
538         int nr;
539         unsigned int length;
540         int polled = 0;
541         int ip_summed;
542         struct ethhdr *ethh;
543         u64 s_mac;
544
545         if (!priv->port_up)
546                 return 0;
547
548         /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
549          * descriptor offset can be deduced from the CQE index instead of
550          * reading 'cqe->index' */
551         index = cq->mcq.cons_index & ring->size_mask;
552         cqe = &cq->buf[index];
553
554         /* Process all completed CQEs */
555         while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
556                     cq->mcq.cons_index & cq->size)) {
557
558                 skb_frags = ring->rx_info + (index << priv->log_rx_info);
559                 rx_desc = ring->buf + (index << ring->log_stride);
560
561                 /*
562                  * make sure we read the CQE after we read the ownership bit
563                  */
564                 rmb();
565
566                 /* Drop packet on bad receive or bad checksum */
567                 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
568                                                 MLX4_CQE_OPCODE_ERROR)) {
569                         en_err(priv, "CQE completed in error - vendor "
570                                   "syndrom:%d syndrom:%d\n",
571                                   ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
572                                   ((struct mlx4_err_cqe *) cqe)->syndrome);
573                         goto next;
574                 }
575                 if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
576                         en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
577                         goto next;
578                 }
579
580                 /* Get pointer to first fragment since we haven't skb yet and
581                  * cast it to ethhdr struct */
582                 ethh = (struct ethhdr *)(page_address(skb_frags[0].page) +
583                                          skb_frags[0].offset);
584                 s_mac = mlx4_en_mac_to_u64(ethh->h_source);
585
586                 /* If source MAC is equal to our own MAC and not performing
587                  * the selftest or flb disabled - drop the packet */
588                 if (s_mac == priv->mac &&
589                         (!(dev->features & NETIF_F_LOOPBACK) ||
590                          !priv->validate_loopback))
591                         goto next;
592
593                 /*
594                  * Packet is OK - process it.
595                  */
596                 length = be32_to_cpu(cqe->byte_cnt);
597                 length -= ring->fcs_del;
598                 ring->bytes += length;
599                 ring->packets++;
600
601                 if (likely(dev->features & NETIF_F_RXCSUM)) {
602                         if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
603                             (cqe->checksum == cpu_to_be16(0xffff))) {
604                                 ring->csum_ok++;
605                                 /* This packet is eligible for LRO if it is:
606                                  * - DIX Ethernet (type interpretation)
607                                  * - TCP/IP (v4)
608                                  * - without IP options
609                                  * - not an IP fragment */
610                                 if (dev->features & NETIF_F_GRO) {
611                                         struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
612                                         if (!gro_skb)
613                                                 goto next;
614
615                                         nr = mlx4_en_complete_rx_desc(
616                                                 priv, rx_desc,
617                                                 skb_frags, gro_skb,
618                                                 ring->page_alloc, length);
619                                         if (!nr)
620                                                 goto next;
621
622                                         skb_shinfo(gro_skb)->nr_frags = nr;
623                                         gro_skb->len = length;
624                                         gro_skb->data_len = length;
625                                         gro_skb->ip_summed = CHECKSUM_UNNECESSARY;
626
627                                         if (cqe->vlan_my_qpn &
628                                             cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) {
629                                                 u16 vid = be16_to_cpu(cqe->sl_vid);
630
631                                                 __vlan_hwaccel_put_tag(gro_skb, vid);
632                                         }
633
634                                         if (dev->features & NETIF_F_RXHASH)
635                                                 gro_skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
636
637                                         skb_record_rx_queue(gro_skb, cq->ring);
638                                         napi_gro_frags(&cq->napi);
639
640                                         goto next;
641                                 }
642
643                                 /* LRO not possible, complete processing here */
644                                 ip_summed = CHECKSUM_UNNECESSARY;
645                         } else {
646                                 ip_summed = CHECKSUM_NONE;
647                                 ring->csum_none++;
648                         }
649                 } else {
650                         ip_summed = CHECKSUM_NONE;
651                         ring->csum_none++;
652                 }
653
654                 skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags,
655                                      ring->page_alloc, length);
656                 if (!skb) {
657                         priv->stats.rx_dropped++;
658                         goto next;
659                 }
660
661                 if (unlikely(priv->validate_loopback)) {
662                         validate_loopback(priv, skb);
663                         goto next;
664                 }
665
666                 skb->ip_summed = ip_summed;
667                 skb->protocol = eth_type_trans(skb, dev);
668                 skb_record_rx_queue(skb, cq->ring);
669
670                 if (dev->features & NETIF_F_RXHASH)
671                         skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
672
673                 if (be32_to_cpu(cqe->vlan_my_qpn) &
674                     MLX4_CQE_VLAN_PRESENT_MASK)
675                         __vlan_hwaccel_put_tag(skb, be16_to_cpu(cqe->sl_vid));
676
677                 /* Push it up the stack */
678                 netif_receive_skb(skb);
679
680 next:
681                 ++cq->mcq.cons_index;
682                 index = (cq->mcq.cons_index) & ring->size_mask;
683                 cqe = &cq->buf[index];
684                 if (++polled == budget) {
685                         /* We are here because we reached the NAPI budget -
686                          * flush only pending LRO sessions */
687                         goto out;
688                 }
689         }
690
691 out:
692         AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
693         mlx4_cq_set_ci(&cq->mcq);
694         wmb(); /* ensure HW sees CQ consumer before we post new buffers */
695         ring->cons = cq->mcq.cons_index;
696         ring->prod += polled; /* Polled descriptors were realocated in place */
697         mlx4_en_update_rx_prod_db(ring);
698         return polled;
699 }
700
701
702 void mlx4_en_rx_irq(struct mlx4_cq *mcq)
703 {
704         struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
705         struct mlx4_en_priv *priv = netdev_priv(cq->dev);
706
707         if (priv->port_up)
708                 napi_schedule(&cq->napi);
709         else
710                 mlx4_en_arm_cq(priv, cq);
711 }
712
713 /* Rx CQ polling - called by NAPI */
714 int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
715 {
716         struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
717         struct net_device *dev = cq->dev;
718         struct mlx4_en_priv *priv = netdev_priv(dev);
719         int done;
720
721         done = mlx4_en_process_rx_cq(dev, cq, budget);
722
723         /* If we used up all the quota - we're probably not done yet... */
724         if (done == budget)
725                 INC_PERF_COUNTER(priv->pstats.napi_quota);
726         else {
727                 /* Done for now */
728                 napi_complete(napi);
729                 mlx4_en_arm_cq(priv, cq);
730         }
731         return done;
732 }
733
734
735 /* Calculate the last offset position that accommodates a full fragment
736  * (assuming fagment size = stride-align) */
737 static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
738 {
739         u16 res = MLX4_EN_ALLOC_SIZE % stride;
740         u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
741
742         en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
743                             "res:%d offset:%d\n", stride, align, res, offset);
744         return offset;
745 }
746
747
748 static int frag_sizes[] = {
749         FRAG_SZ0,
750         FRAG_SZ1,
751         FRAG_SZ2,
752         FRAG_SZ3
753 };
754
755 void mlx4_en_calc_rx_buf(struct net_device *dev)
756 {
757         struct mlx4_en_priv *priv = netdev_priv(dev);
758         int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE;
759         int buf_size = 0;
760         int i = 0;
761
762         while (buf_size < eff_mtu) {
763                 priv->frag_info[i].frag_size =
764                         (eff_mtu > buf_size + frag_sizes[i]) ?
765                                 frag_sizes[i] : eff_mtu - buf_size;
766                 priv->frag_info[i].frag_prefix_size = buf_size;
767                 if (!i) {
768                         priv->frag_info[i].frag_align = NET_IP_ALIGN;
769                         priv->frag_info[i].frag_stride =
770                                 ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES);
771                 } else {
772                         priv->frag_info[i].frag_align = 0;
773                         priv->frag_info[i].frag_stride =
774                                 ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
775                 }
776                 priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
777                                                 priv, priv->frag_info[i].frag_stride,
778                                                 priv->frag_info[i].frag_align);
779                 buf_size += priv->frag_info[i].frag_size;
780                 i++;
781         }
782
783         priv->num_frags = i;
784         priv->rx_skb_size = eff_mtu;
785         priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct));
786
787         en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
788                   "num_frags:%d):\n", eff_mtu, priv->num_frags);
789         for (i = 0; i < priv->num_frags; i++) {
790                 en_dbg(DRV, priv, "  frag:%d - size:%d prefix:%d align:%d "
791                                 "stride:%d last_offset:%d\n", i,
792                                 priv->frag_info[i].frag_size,
793                                 priv->frag_info[i].frag_prefix_size,
794                                 priv->frag_info[i].frag_align,
795                                 priv->frag_info[i].frag_stride,
796                                 priv->frag_info[i].last_offset);
797         }
798 }
799
800 /* RSS related functions */
801
802 static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
803                                  struct mlx4_en_rx_ring *ring,
804                                  enum mlx4_qp_state *state,
805                                  struct mlx4_qp *qp)
806 {
807         struct mlx4_en_dev *mdev = priv->mdev;
808         struct mlx4_qp_context *context;
809         int err = 0;
810
811         context = kmalloc(sizeof *context , GFP_KERNEL);
812         if (!context) {
813                 en_err(priv, "Failed to allocate qp context\n");
814                 return -ENOMEM;
815         }
816
817         err = mlx4_qp_alloc(mdev->dev, qpn, qp);
818         if (err) {
819                 en_err(priv, "Failed to allocate qp #%x\n", qpn);
820                 goto out;
821         }
822         qp->event = mlx4_en_sqp_event;
823
824         memset(context, 0, sizeof *context);
825         mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
826                                 qpn, ring->cqn, context);
827         context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
828
829         /* Cancel FCS removal if FW allows */
830         if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
831                 context->param3 |= cpu_to_be32(1 << 29);
832                 ring->fcs_del = ETH_FCS_LEN;
833         } else
834                 ring->fcs_del = 0;
835
836         err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
837         if (err) {
838                 mlx4_qp_remove(mdev->dev, qp);
839                 mlx4_qp_free(mdev->dev, qp);
840         }
841         mlx4_en_update_rx_prod_db(ring);
842 out:
843         kfree(context);
844         return err;
845 }
846
847 /* Allocate rx qp's and configure them according to rss map */
848 int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
849 {
850         struct mlx4_en_dev *mdev = priv->mdev;
851         struct mlx4_en_rss_map *rss_map = &priv->rss_map;
852         struct mlx4_qp_context context;
853         struct mlx4_rss_context *rss_context;
854         int rss_rings;
855         void *ptr;
856         u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
857                         MLX4_RSS_TCP_IPV6);
858         int i, qpn;
859         int err = 0;
860         int good_qps = 0;
861         static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC,
862                                 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD,
863                                 0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
864
865         en_dbg(DRV, priv, "Configuring rss steering\n");
866         err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
867                                     priv->rx_ring_num,
868                                     &rss_map->base_qpn);
869         if (err) {
870                 en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
871                 return err;
872         }
873
874         for (i = 0; i < priv->rx_ring_num; i++) {
875                 qpn = rss_map->base_qpn + i;
876                 err = mlx4_en_config_rss_qp(priv, qpn, &priv->rx_ring[i],
877                                             &rss_map->state[i],
878                                             &rss_map->qps[i]);
879                 if (err)
880                         goto rss_err;
881
882                 ++good_qps;
883         }
884
885         /* Configure RSS indirection qp */
886         err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
887         if (err) {
888                 en_err(priv, "Failed to allocate RSS indirection QP\n");
889                 goto rss_err;
890         }
891         rss_map->indir_qp.event = mlx4_en_sqp_event;
892         mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
893                                 priv->rx_ring[0].cqn, &context);
894
895         if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
896                 rss_rings = priv->rx_ring_num;
897         else
898                 rss_rings = priv->prof->rss_rings;
899
900         ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
901                                         + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
902         rss_context = ptr;
903         rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
904                                             (rss_map->base_qpn));
905         rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
906         if (priv->mdev->profile.udp_rss) {
907                 rss_mask |=  MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
908                 rss_context->base_qpn_udp = rss_context->default_qpn;
909         }
910         rss_context->flags = rss_mask;
911         rss_context->hash_fn = MLX4_RSS_HASH_TOP;
912         for (i = 0; i < 10; i++)
913                 rss_context->rss_key[i] = rsskey[i];
914
915         err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
916                                &rss_map->indir_qp, &rss_map->indir_state);
917         if (err)
918                 goto indir_err;
919
920         return 0;
921
922 indir_err:
923         mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
924                        MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
925         mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
926         mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
927 rss_err:
928         for (i = 0; i < good_qps; i++) {
929                 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
930                                MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
931                 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
932                 mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
933         }
934         mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
935         return err;
936 }
937
938 void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
939 {
940         struct mlx4_en_dev *mdev = priv->mdev;
941         struct mlx4_en_rss_map *rss_map = &priv->rss_map;
942         int i;
943
944         mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
945                        MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
946         mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
947         mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
948
949         for (i = 0; i < priv->rx_ring_num; i++) {
950                 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
951                                MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
952                 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
953                 mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
954         }
955         mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
956 }
957
958
959
960
961