]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
Merge tag 'samsung-dt64-4.11-2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[karo-tx-linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279                                      mac)) {
280                         /* mac already added, skip addition */
281                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282                         return 0;
283                 }
284         }
285
286         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287                                &adapter->pmac_id[0], 0);
288 }
289
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292         int i;
293
294         /* Skip deletion if the programmed mac is
295          * being used in uc-list
296          */
297         for (i = 0; i < adapter->uc_macs; i++) {
298                 if (adapter->pmac_id[i + 1] == pmac_id)
299                         return;
300         }
301         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306         struct be_adapter *adapter = netdev_priv(netdev);
307         struct device *dev = &adapter->pdev->dev;
308         struct sockaddr *addr = p;
309         int status;
310         u8 mac[ETH_ALEN];
311         u32 old_pmac_id = adapter->pmac_id[0];
312
313         if (!is_valid_ether_addr(addr->sa_data))
314                 return -EADDRNOTAVAIL;
315
316         /* Proceed further only if, User provided MAC is different
317          * from active MAC
318          */
319         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320                 return 0;
321
322         /* if device is not running, copy MAC to netdev->dev_addr */
323         if (!netif_running(netdev))
324                 goto done;
325
326         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327          * privilege or if PF did not provision the new MAC address.
328          * On BE3, this cmd will always fail if the VF doesn't have the
329          * FILTMGMT privilege. This failure is OK, only if the PF programmed
330          * the MAC for the VF.
331          */
332         mutex_lock(&adapter->rx_filter_lock);
333         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334         if (!status) {
335
336                 /* Delete the old programmed MAC. This call may fail if the
337                  * old MAC was already deleted by the PF driver.
338                  */
339                 if (adapter->pmac_id[0] != old_pmac_id)
340                         be_dev_mac_del(adapter, old_pmac_id);
341         }
342
343         mutex_unlock(&adapter->rx_filter_lock);
344         /* Decide if the new MAC is successfully activated only after
345          * querying the FW
346          */
347         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348                                        adapter->if_handle, true, 0);
349         if (status)
350                 goto err;
351
352         /* The MAC change did not happen, either due to lack of privilege
353          * or PF didn't pre-provision.
354          */
355         if (!ether_addr_equal(addr->sa_data, mac)) {
356                 status = -EPERM;
357                 goto err;
358         }
359 done:
360         ether_addr_copy(adapter->dev_mac, addr->sa_data);
361         ether_addr_copy(netdev->dev_addr, addr->sa_data);
362         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363         return 0;
364 err:
365         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366         return status;
367 }
368
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372         if (BE2_chip(adapter)) {
373                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374
375                 return &cmd->hw_stats;
376         } else if (BE3_chip(adapter)) {
377                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378
379                 return &cmd->hw_stats;
380         } else {
381                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         }
385 }
386
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390         if (BE2_chip(adapter)) {
391                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392
393                 return &hw_stats->erx;
394         } else if (BE3_chip(adapter)) {
395                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396
397                 return &hw_stats->erx;
398         } else {
399                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         }
403 }
404
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410         struct be_port_rxf_stats_v0 *port_stats =
411                                         &rxf_stats->port[adapter->port_num];
412         struct be_drv_stats *drvs = &adapter->drv_stats;
413
414         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415         drvs->rx_pause_frames = port_stats->rx_pause_frames;
416         drvs->rx_crc_errors = port_stats->rx_crc_errors;
417         drvs->rx_control_frames = port_stats->rx_control_frames;
418         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430         drvs->rx_dropped_header_too_small =
431                 port_stats->rx_dropped_header_too_small;
432         drvs->rx_address_filtered =
433                                         port_stats->rx_address_filtered +
434                                         port_stats->rx_vlan_filtered;
435         drvs->rx_alignment_symbol_errors =
436                 port_stats->rx_alignment_symbol_errors;
437
438         drvs->tx_pauseframes = port_stats->tx_pauseframes;
439         drvs->tx_controlframes = port_stats->tx_controlframes;
440
441         if (adapter->port_num)
442                 drvs->jabber_events = rxf_stats->port1_jabber_events;
443         else
444                 drvs->jabber_events = rxf_stats->port0_jabber_events;
445         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447         drvs->forwarded_packets = rxf_stats->forwarded_packets;
448         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459         struct be_port_rxf_stats_v1 *port_stats =
460                                         &rxf_stats->port[adapter->port_num];
461         struct be_drv_stats *drvs = &adapter->drv_stats;
462
463         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466         drvs->rx_pause_frames = port_stats->rx_pause_frames;
467         drvs->rx_crc_errors = port_stats->rx_crc_errors;
468         drvs->rx_control_frames = port_stats->rx_control_frames;
469         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479         drvs->rx_dropped_header_too_small =
480                 port_stats->rx_dropped_header_too_small;
481         drvs->rx_input_fifo_overflow_drop =
482                 port_stats->rx_input_fifo_overflow_drop;
483         drvs->rx_address_filtered = port_stats->rx_address_filtered;
484         drvs->rx_alignment_symbol_errors =
485                 port_stats->rx_alignment_symbol_errors;
486         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487         drvs->tx_pauseframes = port_stats->tx_pauseframes;
488         drvs->tx_controlframes = port_stats->tx_controlframes;
489         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490         drvs->jabber_events = port_stats->jabber_events;
491         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493         drvs->forwarded_packets = rxf_stats->forwarded_packets;
494         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505         struct be_port_rxf_stats_v2 *port_stats =
506                                         &rxf_stats->port[adapter->port_num];
507         struct be_drv_stats *drvs = &adapter->drv_stats;
508
509         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512         drvs->rx_pause_frames = port_stats->rx_pause_frames;
513         drvs->rx_crc_errors = port_stats->rx_crc_errors;
514         drvs->rx_control_frames = port_stats->rx_control_frames;
515         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525         drvs->rx_dropped_header_too_small =
526                 port_stats->rx_dropped_header_too_small;
527         drvs->rx_input_fifo_overflow_drop =
528                 port_stats->rx_input_fifo_overflow_drop;
529         drvs->rx_address_filtered = port_stats->rx_address_filtered;
530         drvs->rx_alignment_symbol_errors =
531                 port_stats->rx_alignment_symbol_errors;
532         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533         drvs->tx_pauseframes = port_stats->tx_pauseframes;
534         drvs->tx_controlframes = port_stats->tx_controlframes;
535         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536         drvs->jabber_events = port_stats->jabber_events;
537         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539         drvs->forwarded_packets = rxf_stats->forwarded_packets;
540         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544         if (be_roce_supported(adapter)) {
545                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547                 drvs->rx_roce_frames = port_stats->roce_frames_received;
548                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
549                 drvs->roce_drops_payload_len =
550                         port_stats->roce_drops_payload_len;
551         }
552 }
553
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556         struct be_drv_stats *drvs = &adapter->drv_stats;
557         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558
559         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569         drvs->rx_dropped_tcp_length =
570                                 pport_stats->rx_dropped_invalid_tcp_length;
571         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574         drvs->rx_dropped_header_too_small =
575                                 pport_stats->rx_dropped_header_too_small;
576         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577         drvs->rx_address_filtered =
578                                         pport_stats->rx_address_filtered +
579                                         pport_stats->rx_vlan_filtered;
580         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584         drvs->jabber_events = pport_stats->rx_jabbers;
585         drvs->forwarded_packets = pport_stats->num_forwards_lo;
586         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587         drvs->rx_drops_too_many_frags =
588                                 pport_stats->rx_drops_too_many_frags_lo;
589 }
590
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)                   (x & 0xFFFF)
594 #define hi(x)                   (x & 0xFFFF0000)
595         bool wrapped = val < lo(*acc);
596         u32 newacc = hi(*acc) + val;
597
598         if (wrapped)
599                 newacc += 65536;
600         ACCESS_ONCE(*acc) = newacc;
601 }
602
603 static void populate_erx_stats(struct be_adapter *adapter,
604                                struct be_rx_obj *rxo, u32 erx_stat)
605 {
606         if (!BEx_chip(adapter))
607                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608         else
609                 /* below erx HW counter can actually wrap around after
610                  * 65535. Driver accumulates a 32-bit value
611                  */
612                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613                                      (u16)erx_stat);
614 }
615
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619         struct be_rx_obj *rxo;
620         int i;
621         u32 erx_stat;
622
623         if (lancer_chip(adapter)) {
624                 populate_lancer_stats(adapter);
625         } else {
626                 if (BE2_chip(adapter))
627                         populate_be_v0_stats(adapter);
628                 else if (BE3_chip(adapter))
629                         /* for BE3 */
630                         populate_be_v1_stats(adapter);
631                 else
632                         populate_be_v2_stats(adapter);
633
634                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635                 for_all_rx_queues(adapter, rxo, i) {
636                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637                         populate_erx_stats(adapter, rxo, erx_stat);
638                 }
639         }
640 }
641
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643                                                 struct rtnl_link_stats64 *stats)
644 {
645         struct be_adapter *adapter = netdev_priv(netdev);
646         struct be_drv_stats *drvs = &adapter->drv_stats;
647         struct be_rx_obj *rxo;
648         struct be_tx_obj *txo;
649         u64 pkts, bytes;
650         unsigned int start;
651         int i;
652
653         for_all_rx_queues(adapter, rxo, i) {
654                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
655
656                 do {
657                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658                         pkts = rx_stats(rxo)->rx_pkts;
659                         bytes = rx_stats(rxo)->rx_bytes;
660                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661                 stats->rx_packets += pkts;
662                 stats->rx_bytes += bytes;
663                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665                                         rx_stats(rxo)->rx_drops_no_frags;
666         }
667
668         for_all_tx_queues(adapter, txo, i) {
669                 const struct be_tx_stats *tx_stats = tx_stats(txo);
670
671                 do {
672                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673                         pkts = tx_stats(txo)->tx_pkts;
674                         bytes = tx_stats(txo)->tx_bytes;
675                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676                 stats->tx_packets += pkts;
677                 stats->tx_bytes += bytes;
678         }
679
680         /* bad pkts received */
681         stats->rx_errors = drvs->rx_crc_errors +
682                 drvs->rx_alignment_symbol_errors +
683                 drvs->rx_in_range_errors +
684                 drvs->rx_out_range_errors +
685                 drvs->rx_frame_too_long +
686                 drvs->rx_dropped_too_small +
687                 drvs->rx_dropped_too_short +
688                 drvs->rx_dropped_header_too_small +
689                 drvs->rx_dropped_tcp_length +
690                 drvs->rx_dropped_runt;
691
692         /* detailed rx errors */
693         stats->rx_length_errors = drvs->rx_in_range_errors +
694                 drvs->rx_out_range_errors +
695                 drvs->rx_frame_too_long;
696
697         stats->rx_crc_errors = drvs->rx_crc_errors;
698
699         /* frame alignment errors */
700         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701
702         /* receiver fifo overrun */
703         /* drops_no_pbuf is no per i/f, it's per BE card */
704         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705                                 drvs->rx_input_fifo_overflow_drop +
706                                 drvs->rx_drops_no_pbuf;
707         return stats;
708 }
709
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712         struct net_device *netdev = adapter->netdev;
713
714         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715                 netif_carrier_off(netdev);
716                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717         }
718
719         if (link_status)
720                 netif_carrier_on(netdev);
721         else
722                 netif_carrier_off(netdev);
723
724         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726
727 static int be_gso_hdr_len(struct sk_buff *skb)
728 {
729         if (skb->encapsulation)
730                 return skb_inner_transport_offset(skb) +
731                        inner_tcp_hdrlen(skb);
732         return skb_transport_offset(skb) + tcp_hdrlen(skb);
733 }
734
735 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
736 {
737         struct be_tx_stats *stats = tx_stats(txo);
738         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
739         /* Account for headers which get duplicated in TSO pkt */
740         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
741
742         u64_stats_update_begin(&stats->sync);
743         stats->tx_reqs++;
744         stats->tx_bytes += skb->len + dup_hdr_len;
745         stats->tx_pkts += tx_pkts;
746         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
747                 stats->tx_vxlan_offload_pkts += tx_pkts;
748         u64_stats_update_end(&stats->sync);
749 }
750
751 /* Returns number of WRBs needed for the skb */
752 static u32 skb_wrb_cnt(struct sk_buff *skb)
753 {
754         /* +1 for the header wrb */
755         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
756 }
757
758 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
759 {
760         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
761         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
762         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
763         wrb->rsvd0 = 0;
764 }
765
766 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
767  * to avoid the swap and shift/mask operations in wrb_fill().
768  */
769 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
770 {
771         wrb->frag_pa_hi = 0;
772         wrb->frag_pa_lo = 0;
773         wrb->frag_len = 0;
774         wrb->rsvd0 = 0;
775 }
776
777 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
778                                      struct sk_buff *skb)
779 {
780         u8 vlan_prio;
781         u16 vlan_tag;
782
783         vlan_tag = skb_vlan_tag_get(skb);
784         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
785         /* If vlan priority provided by OS is NOT in available bmap */
786         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
787                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
788                                 adapter->recommended_prio_bits;
789
790         return vlan_tag;
791 }
792
793 /* Used only for IP tunnel packets */
794 static u16 skb_inner_ip_proto(struct sk_buff *skb)
795 {
796         return (inner_ip_hdr(skb)->version == 4) ?
797                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
798 }
799
800 static u16 skb_ip_proto(struct sk_buff *skb)
801 {
802         return (ip_hdr(skb)->version == 4) ?
803                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
804 }
805
806 static inline bool be_is_txq_full(struct be_tx_obj *txo)
807 {
808         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
809 }
810
811 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
812 {
813         return atomic_read(&txo->q.used) < txo->q.len / 2;
814 }
815
816 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
817 {
818         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
819 }
820
821 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
822                                        struct sk_buff *skb,
823                                        struct be_wrb_params *wrb_params)
824 {
825         u16 proto;
826
827         if (skb_is_gso(skb)) {
828                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
829                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
830                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
831                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
832         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
833                 if (skb->encapsulation) {
834                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
835                         proto = skb_inner_ip_proto(skb);
836                 } else {
837                         proto = skb_ip_proto(skb);
838                 }
839                 if (proto == IPPROTO_TCP)
840                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
841                 else if (proto == IPPROTO_UDP)
842                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
843         }
844
845         if (skb_vlan_tag_present(skb)) {
846                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
847                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
848         }
849
850         BE_WRB_F_SET(wrb_params->features, CRC, 1);
851 }
852
853 static void wrb_fill_hdr(struct be_adapter *adapter,
854                          struct be_eth_hdr_wrb *hdr,
855                          struct be_wrb_params *wrb_params,
856                          struct sk_buff *skb)
857 {
858         memset(hdr, 0, sizeof(*hdr));
859
860         SET_TX_WRB_HDR_BITS(crc, hdr,
861                             BE_WRB_F_GET(wrb_params->features, CRC));
862         SET_TX_WRB_HDR_BITS(ipcs, hdr,
863                             BE_WRB_F_GET(wrb_params->features, IPCS));
864         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
865                             BE_WRB_F_GET(wrb_params->features, TCPCS));
866         SET_TX_WRB_HDR_BITS(udpcs, hdr,
867                             BE_WRB_F_GET(wrb_params->features, UDPCS));
868
869         SET_TX_WRB_HDR_BITS(lso, hdr,
870                             BE_WRB_F_GET(wrb_params->features, LSO));
871         SET_TX_WRB_HDR_BITS(lso6, hdr,
872                             BE_WRB_F_GET(wrb_params->features, LSO6));
873         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
874
875         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
876          * hack is not needed, the evt bit is set while ringing DB.
877          */
878         SET_TX_WRB_HDR_BITS(event, hdr,
879                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
880         SET_TX_WRB_HDR_BITS(vlan, hdr,
881                             BE_WRB_F_GET(wrb_params->features, VLAN));
882         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
883
884         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
885         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
886         SET_TX_WRB_HDR_BITS(mgmt, hdr,
887                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
888 }
889
890 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
891                           bool unmap_single)
892 {
893         dma_addr_t dma;
894         u32 frag_len = le32_to_cpu(wrb->frag_len);
895
896
897         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
898                 (u64)le32_to_cpu(wrb->frag_pa_lo);
899         if (frag_len) {
900                 if (unmap_single)
901                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
902                 else
903                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
904         }
905 }
906
907 /* Grab a WRB header for xmit */
908 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
909 {
910         u32 head = txo->q.head;
911
912         queue_head_inc(&txo->q);
913         return head;
914 }
915
916 /* Set up the WRB header for xmit */
917 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
918                                 struct be_tx_obj *txo,
919                                 struct be_wrb_params *wrb_params,
920                                 struct sk_buff *skb, u16 head)
921 {
922         u32 num_frags = skb_wrb_cnt(skb);
923         struct be_queue_info *txq = &txo->q;
924         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
925
926         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
927         be_dws_cpu_to_le(hdr, sizeof(*hdr));
928
929         BUG_ON(txo->sent_skb_list[head]);
930         txo->sent_skb_list[head] = skb;
931         txo->last_req_hdr = head;
932         atomic_add(num_frags, &txq->used);
933         txo->last_req_wrb_cnt = num_frags;
934         txo->pend_wrb_cnt += num_frags;
935 }
936
937 /* Setup a WRB fragment (buffer descriptor) for xmit */
938 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
939                                  int len)
940 {
941         struct be_eth_wrb *wrb;
942         struct be_queue_info *txq = &txo->q;
943
944         wrb = queue_head_node(txq);
945         wrb_fill(wrb, busaddr, len);
946         queue_head_inc(txq);
947 }
948
949 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
950  * was invoked. The producer index is restored to the previous packet and the
951  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
952  */
953 static void be_xmit_restore(struct be_adapter *adapter,
954                             struct be_tx_obj *txo, u32 head, bool map_single,
955                             u32 copied)
956 {
957         struct device *dev;
958         struct be_eth_wrb *wrb;
959         struct be_queue_info *txq = &txo->q;
960
961         dev = &adapter->pdev->dev;
962         txq->head = head;
963
964         /* skip the first wrb (hdr); it's not mapped */
965         queue_head_inc(txq);
966         while (copied) {
967                 wrb = queue_head_node(txq);
968                 unmap_tx_frag(dev, wrb, map_single);
969                 map_single = false;
970                 copied -= le32_to_cpu(wrb->frag_len);
971                 queue_head_inc(txq);
972         }
973
974         txq->head = head;
975 }
976
977 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
978  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
979  * of WRBs used up by the packet.
980  */
981 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
982                            struct sk_buff *skb,
983                            struct be_wrb_params *wrb_params)
984 {
985         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
986         struct device *dev = &adapter->pdev->dev;
987         struct be_queue_info *txq = &txo->q;
988         bool map_single = false;
989         u32 head = txq->head;
990         dma_addr_t busaddr;
991         int len;
992
993         head = be_tx_get_wrb_hdr(txo);
994
995         if (skb->len > skb->data_len) {
996                 len = skb_headlen(skb);
997
998                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
999                 if (dma_mapping_error(dev, busaddr))
1000                         goto dma_err;
1001                 map_single = true;
1002                 be_tx_setup_wrb_frag(txo, busaddr, len);
1003                 copied += len;
1004         }
1005
1006         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1007                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1008                 len = skb_frag_size(frag);
1009
1010                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1011                 if (dma_mapping_error(dev, busaddr))
1012                         goto dma_err;
1013                 be_tx_setup_wrb_frag(txo, busaddr, len);
1014                 copied += len;
1015         }
1016
1017         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1018
1019         be_tx_stats_update(txo, skb);
1020         return wrb_cnt;
1021
1022 dma_err:
1023         adapter->drv_stats.dma_map_errors++;
1024         be_xmit_restore(adapter, txo, head, map_single, copied);
1025         return 0;
1026 }
1027
1028 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1029 {
1030         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1031 }
1032
1033 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1034                                              struct sk_buff *skb,
1035                                              struct be_wrb_params
1036                                              *wrb_params)
1037 {
1038         u16 vlan_tag = 0;
1039
1040         skb = skb_share_check(skb, GFP_ATOMIC);
1041         if (unlikely(!skb))
1042                 return skb;
1043
1044         if (skb_vlan_tag_present(skb))
1045                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1046
1047         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1048                 if (!vlan_tag)
1049                         vlan_tag = adapter->pvid;
1050                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1051                  * skip VLAN insertion
1052                  */
1053                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1054         }
1055
1056         if (vlan_tag) {
1057                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058                                                 vlan_tag);
1059                 if (unlikely(!skb))
1060                         return skb;
1061                 skb->vlan_tci = 0;
1062         }
1063
1064         /* Insert the outer VLAN, if any */
1065         if (adapter->qnq_vid) {
1066                 vlan_tag = adapter->qnq_vid;
1067                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1068                                                 vlan_tag);
1069                 if (unlikely(!skb))
1070                         return skb;
1071                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1072         }
1073
1074         return skb;
1075 }
1076
1077 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1078 {
1079         struct ethhdr *eh = (struct ethhdr *)skb->data;
1080         u16 offset = ETH_HLEN;
1081
1082         if (eh->h_proto == htons(ETH_P_IPV6)) {
1083                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1084
1085                 offset += sizeof(struct ipv6hdr);
1086                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1087                     ip6h->nexthdr != NEXTHDR_UDP) {
1088                         struct ipv6_opt_hdr *ehdr =
1089                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1090
1091                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1092                         if (ehdr->hdrlen == 0xff)
1093                                 return true;
1094                 }
1095         }
1096         return false;
1097 }
1098
1099 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1100 {
1101         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1102 }
1103
1104 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1105 {
1106         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1107 }
1108
1109 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1110                                                   struct sk_buff *skb,
1111                                                   struct be_wrb_params
1112                                                   *wrb_params)
1113 {
1114         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1115         unsigned int eth_hdr_len;
1116         struct iphdr *ip;
1117
1118         /* For padded packets, BE HW modifies tot_len field in IP header
1119          * incorrecly when VLAN tag is inserted by HW.
1120          * For padded packets, Lancer computes incorrect checksum.
1121          */
1122         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1123                                                 VLAN_ETH_HLEN : ETH_HLEN;
1124         if (skb->len <= 60 &&
1125             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1126             is_ipv4_pkt(skb)) {
1127                 ip = (struct iphdr *)ip_hdr(skb);
1128                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1129         }
1130
1131         /* If vlan tag is already inlined in the packet, skip HW VLAN
1132          * tagging in pvid-tagging mode
1133          */
1134         if (be_pvid_tagging_enabled(adapter) &&
1135             veh->h_vlan_proto == htons(ETH_P_8021Q))
1136                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1137
1138         /* HW has a bug wherein it will calculate CSUM for VLAN
1139          * pkts even though it is disabled.
1140          * Manually insert VLAN in pkt.
1141          */
1142         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1143             skb_vlan_tag_present(skb)) {
1144                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1145                 if (unlikely(!skb))
1146                         goto err;
1147         }
1148
1149         /* HW may lockup when VLAN HW tagging is requested on
1150          * certain ipv6 packets. Drop such pkts if the HW workaround to
1151          * skip HW tagging is not enabled by FW.
1152          */
1153         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1154                      (adapter->pvid || adapter->qnq_vid) &&
1155                      !qnq_async_evt_rcvd(adapter)))
1156                 goto tx_drop;
1157
1158         /* Manual VLAN tag insertion to prevent:
1159          * ASIC lockup when the ASIC inserts VLAN tag into
1160          * certain ipv6 packets. Insert VLAN tags in driver,
1161          * and set event, completion, vlan bits accordingly
1162          * in the Tx WRB.
1163          */
1164         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1165             be_vlan_tag_tx_chk(adapter, skb)) {
1166                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1167                 if (unlikely(!skb))
1168                         goto err;
1169         }
1170
1171         return skb;
1172 tx_drop:
1173         dev_kfree_skb_any(skb);
1174 err:
1175         return NULL;
1176 }
1177
1178 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1179                                            struct sk_buff *skb,
1180                                            struct be_wrb_params *wrb_params)
1181 {
1182         int err;
1183
1184         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1185          * packets that are 32b or less may cause a transmit stall
1186          * on that port. The workaround is to pad such packets
1187          * (len <= 32 bytes) to a minimum length of 36b.
1188          */
1189         if (skb->len <= 32) {
1190                 if (skb_put_padto(skb, 36))
1191                         return NULL;
1192         }
1193
1194         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1195                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1196                 if (!skb)
1197                         return NULL;
1198         }
1199
1200         /* The stack can send us skbs with length greater than
1201          * what the HW can handle. Trim the extra bytes.
1202          */
1203         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1204         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1205         WARN_ON(err);
1206
1207         return skb;
1208 }
1209
1210 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1211 {
1212         struct be_queue_info *txq = &txo->q;
1213         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1214
1215         /* Mark the last request eventable if it hasn't been marked already */
1216         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1217                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1218
1219         /* compose a dummy wrb if there are odd set of wrbs to notify */
1220         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1221                 wrb_fill_dummy(queue_head_node(txq));
1222                 queue_head_inc(txq);
1223                 atomic_inc(&txq->used);
1224                 txo->pend_wrb_cnt++;
1225                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1226                                            TX_HDR_WRB_NUM_SHIFT);
1227                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1228                                           TX_HDR_WRB_NUM_SHIFT);
1229         }
1230         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1231         txo->pend_wrb_cnt = 0;
1232 }
1233
1234 /* OS2BMC related */
1235
1236 #define DHCP_CLIENT_PORT        68
1237 #define DHCP_SERVER_PORT        67
1238 #define NET_BIOS_PORT1          137
1239 #define NET_BIOS_PORT2          138
1240 #define DHCPV6_RAS_PORT         547
1241
1242 #define is_mc_allowed_on_bmc(adapter, eh)       \
1243         (!is_multicast_filt_enabled(adapter) && \
1244          is_multicast_ether_addr(eh->h_dest) && \
1245          !is_broadcast_ether_addr(eh->h_dest))
1246
1247 #define is_bc_allowed_on_bmc(adapter, eh)       \
1248         (!is_broadcast_filt_enabled(adapter) && \
1249          is_broadcast_ether_addr(eh->h_dest))
1250
1251 #define is_arp_allowed_on_bmc(adapter, skb)     \
1252         (is_arp(skb) && is_arp_filt_enabled(adapter))
1253
1254 #define is_broadcast_packet(eh, adapter)        \
1255                 (is_multicast_ether_addr(eh->h_dest) && \
1256                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1257
1258 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1259
1260 #define is_arp_filt_enabled(adapter)    \
1261                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1262
1263 #define is_dhcp_client_filt_enabled(adapter)    \
1264                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1265
1266 #define is_dhcp_srvr_filt_enabled(adapter)      \
1267                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1268
1269 #define is_nbios_filt_enabled(adapter)  \
1270                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1271
1272 #define is_ipv6_na_filt_enabled(adapter)        \
1273                 (adapter->bmc_filt_mask &       \
1274                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1275
1276 #define is_ipv6_ra_filt_enabled(adapter)        \
1277                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1278
1279 #define is_ipv6_ras_filt_enabled(adapter)       \
1280                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1281
1282 #define is_broadcast_filt_enabled(adapter)      \
1283                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1284
1285 #define is_multicast_filt_enabled(adapter)      \
1286                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1287
1288 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1289                                struct sk_buff **skb)
1290 {
1291         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1292         bool os2bmc = false;
1293
1294         if (!be_is_os2bmc_enabled(adapter))
1295                 goto done;
1296
1297         if (!is_multicast_ether_addr(eh->h_dest))
1298                 goto done;
1299
1300         if (is_mc_allowed_on_bmc(adapter, eh) ||
1301             is_bc_allowed_on_bmc(adapter, eh) ||
1302             is_arp_allowed_on_bmc(adapter, (*skb))) {
1303                 os2bmc = true;
1304                 goto done;
1305         }
1306
1307         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1308                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1309                 u8 nexthdr = hdr->nexthdr;
1310
1311                 if (nexthdr == IPPROTO_ICMPV6) {
1312                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1313
1314                         switch (icmp6->icmp6_type) {
1315                         case NDISC_ROUTER_ADVERTISEMENT:
1316                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1317                                 goto done;
1318                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1319                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1320                                 goto done;
1321                         default:
1322                                 break;
1323                         }
1324                 }
1325         }
1326
1327         if (is_udp_pkt((*skb))) {
1328                 struct udphdr *udp = udp_hdr((*skb));
1329
1330                 switch (ntohs(udp->dest)) {
1331                 case DHCP_CLIENT_PORT:
1332                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1333                         goto done;
1334                 case DHCP_SERVER_PORT:
1335                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1336                         goto done;
1337                 case NET_BIOS_PORT1:
1338                 case NET_BIOS_PORT2:
1339                         os2bmc = is_nbios_filt_enabled(adapter);
1340                         goto done;
1341                 case DHCPV6_RAS_PORT:
1342                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1343                         goto done;
1344                 default:
1345                         break;
1346                 }
1347         }
1348 done:
1349         /* For packets over a vlan, which are destined
1350          * to BMC, asic expects the vlan to be inline in the packet.
1351          */
1352         if (os2bmc)
1353                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1354
1355         return os2bmc;
1356 }
1357
1358 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         u16 q_idx = skb_get_queue_mapping(skb);
1362         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1363         struct be_wrb_params wrb_params = { 0 };
1364         bool flush = !skb->xmit_more;
1365         u16 wrb_cnt;
1366
1367         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1368         if (unlikely(!skb))
1369                 goto drop;
1370
1371         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1372
1373         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1374         if (unlikely(!wrb_cnt)) {
1375                 dev_kfree_skb_any(skb);
1376                 goto drop;
1377         }
1378
1379         /* if os2bmc is enabled and if the pkt is destined to bmc,
1380          * enqueue the pkt a 2nd time with mgmt bit set.
1381          */
1382         if (be_send_pkt_to_bmc(adapter, &skb)) {
1383                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1384                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385                 if (unlikely(!wrb_cnt))
1386                         goto drop;
1387                 else
1388                         skb_get(skb);
1389         }
1390
1391         if (be_is_txq_full(txo)) {
1392                 netif_stop_subqueue(netdev, q_idx);
1393                 tx_stats(txo)->tx_stops++;
1394         }
1395
1396         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1397                 be_xmit_flush(adapter, txo);
1398
1399         return NETDEV_TX_OK;
1400 drop:
1401         tx_stats(txo)->tx_drv_drops++;
1402         /* Flush the already enqueued tx requests */
1403         if (flush && txo->pend_wrb_cnt)
1404                 be_xmit_flush(adapter, txo);
1405
1406         return NETDEV_TX_OK;
1407 }
1408
1409 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1410 {
1411         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1412                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1413 }
1414
1415 static int be_set_vlan_promisc(struct be_adapter *adapter)
1416 {
1417         struct device *dev = &adapter->pdev->dev;
1418         int status;
1419
1420         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1421                 return 0;
1422
1423         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1424         if (!status) {
1425                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1426                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1427         } else {
1428                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1429         }
1430         return status;
1431 }
1432
1433 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1434 {
1435         struct device *dev = &adapter->pdev->dev;
1436         int status;
1437
1438         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1439         if (!status) {
1440                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1441                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1442         }
1443         return status;
1444 }
1445
1446 /*
1447  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1448  * If the user configures more, place BE in vlan promiscuous mode.
1449  */
1450 static int be_vid_config(struct be_adapter *adapter)
1451 {
1452         struct device *dev = &adapter->pdev->dev;
1453         u16 vids[BE_NUM_VLANS_SUPPORTED];
1454         u16 num = 0, i = 0;
1455         int status = 0;
1456
1457         /* No need to change the VLAN state if the I/F is in promiscuous */
1458         if (adapter->netdev->flags & IFF_PROMISC)
1459                 return 0;
1460
1461         if (adapter->vlans_added > be_max_vlans(adapter))
1462                 return be_set_vlan_promisc(adapter);
1463
1464         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1465                 status = be_clear_vlan_promisc(adapter);
1466                 if (status)
1467                         return status;
1468         }
1469         /* Construct VLAN Table to give to HW */
1470         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1471                 vids[num++] = cpu_to_le16(i);
1472
1473         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1474         if (status) {
1475                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1476                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1477                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1478                     addl_status(status) ==
1479                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1480                         return be_set_vlan_promisc(adapter);
1481         }
1482         return status;
1483 }
1484
1485 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1486 {
1487         struct be_adapter *adapter = netdev_priv(netdev);
1488         int status = 0;
1489
1490         mutex_lock(&adapter->rx_filter_lock);
1491
1492         /* Packets with VID 0 are always received by Lancer by default */
1493         if (lancer_chip(adapter) && vid == 0)
1494                 goto done;
1495
1496         if (test_bit(vid, adapter->vids))
1497                 goto done;
1498
1499         set_bit(vid, adapter->vids);
1500         adapter->vlans_added++;
1501
1502         status = be_vid_config(adapter);
1503 done:
1504         mutex_unlock(&adapter->rx_filter_lock);
1505         return status;
1506 }
1507
1508 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1509 {
1510         struct be_adapter *adapter = netdev_priv(netdev);
1511         int status = 0;
1512
1513         mutex_lock(&adapter->rx_filter_lock);
1514
1515         /* Packets with VID 0 are always received by Lancer by default */
1516         if (lancer_chip(adapter) && vid == 0)
1517                 goto done;
1518
1519         if (!test_bit(vid, adapter->vids))
1520                 goto done;
1521
1522         clear_bit(vid, adapter->vids);
1523         adapter->vlans_added--;
1524
1525         status = be_vid_config(adapter);
1526 done:
1527         mutex_unlock(&adapter->rx_filter_lock);
1528         return status;
1529 }
1530
1531 static void be_set_all_promisc(struct be_adapter *adapter)
1532 {
1533         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1534         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1535 }
1536
1537 static void be_set_mc_promisc(struct be_adapter *adapter)
1538 {
1539         int status;
1540
1541         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1542                 return;
1543
1544         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1545         if (!status)
1546                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1547 }
1548
1549 static void be_set_uc_promisc(struct be_adapter *adapter)
1550 {
1551         int status;
1552
1553         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1554                 return;
1555
1556         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1557         if (!status)
1558                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1559 }
1560
1561 static void be_clear_uc_promisc(struct be_adapter *adapter)
1562 {
1563         int status;
1564
1565         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1566                 return;
1567
1568         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1569         if (!status)
1570                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1571 }
1572
1573 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1574  * We use a single callback function for both sync and unsync. We really don't
1575  * add/remove addresses through this callback. But, we use it to detect changes
1576  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1577  */
1578 static int be_uc_list_update(struct net_device *netdev,
1579                              const unsigned char *addr)
1580 {
1581         struct be_adapter *adapter = netdev_priv(netdev);
1582
1583         adapter->update_uc_list = true;
1584         return 0;
1585 }
1586
1587 static int be_mc_list_update(struct net_device *netdev,
1588                              const unsigned char *addr)
1589 {
1590         struct be_adapter *adapter = netdev_priv(netdev);
1591
1592         adapter->update_mc_list = true;
1593         return 0;
1594 }
1595
1596 static void be_set_mc_list(struct be_adapter *adapter)
1597 {
1598         struct net_device *netdev = adapter->netdev;
1599         struct netdev_hw_addr *ha;
1600         bool mc_promisc = false;
1601         int status;
1602
1603         netif_addr_lock_bh(netdev);
1604         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1605
1606         if (netdev->flags & IFF_PROMISC) {
1607                 adapter->update_mc_list = false;
1608         } else if (netdev->flags & IFF_ALLMULTI ||
1609                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1610                 /* Enable multicast promisc if num configured exceeds
1611                  * what we support
1612                  */
1613                 mc_promisc = true;
1614                 adapter->update_mc_list = false;
1615         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1616                 /* Update mc-list unconditionally if the iface was previously
1617                  * in mc-promisc mode and now is out of that mode.
1618                  */
1619                 adapter->update_mc_list = true;
1620         }
1621
1622         if (adapter->update_mc_list) {
1623                 int i = 0;
1624
1625                 /* cache the mc-list in adapter */
1626                 netdev_for_each_mc_addr(ha, netdev) {
1627                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1628                         i++;
1629                 }
1630                 adapter->mc_count = netdev_mc_count(netdev);
1631         }
1632         netif_addr_unlock_bh(netdev);
1633
1634         if (mc_promisc) {
1635                 be_set_mc_promisc(adapter);
1636         } else if (adapter->update_mc_list) {
1637                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1638                 if (!status)
1639                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1640                 else
1641                         be_set_mc_promisc(adapter);
1642
1643                 adapter->update_mc_list = false;
1644         }
1645 }
1646
1647 static void be_clear_mc_list(struct be_adapter *adapter)
1648 {
1649         struct net_device *netdev = adapter->netdev;
1650
1651         __dev_mc_unsync(netdev, NULL);
1652         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1653         adapter->mc_count = 0;
1654 }
1655
1656 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1657 {
1658         if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1659                              adapter->dev_mac)) {
1660                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1661                 return 0;
1662         }
1663
1664         return be_cmd_pmac_add(adapter,
1665                                (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1666                                adapter->if_handle,
1667                                &adapter->pmac_id[uc_idx + 1], 0);
1668 }
1669
1670 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1671 {
1672         if (pmac_id == adapter->pmac_id[0])
1673                 return;
1674
1675         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1676 }
1677
1678 static void be_set_uc_list(struct be_adapter *adapter)
1679 {
1680         struct net_device *netdev = adapter->netdev;
1681         struct netdev_hw_addr *ha;
1682         bool uc_promisc = false;
1683         int curr_uc_macs = 0, i;
1684
1685         netif_addr_lock_bh(netdev);
1686         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1687
1688         if (netdev->flags & IFF_PROMISC) {
1689                 adapter->update_uc_list = false;
1690         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1691                 uc_promisc = true;
1692                 adapter->update_uc_list = false;
1693         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1694                 /* Update uc-list unconditionally if the iface was previously
1695                  * in uc-promisc mode and now is out of that mode.
1696                  */
1697                 adapter->update_uc_list = true;
1698         }
1699
1700         if (adapter->update_uc_list) {
1701                 i = 1; /* First slot is claimed by the Primary MAC */
1702
1703                 /* cache the uc-list in adapter array */
1704                 netdev_for_each_uc_addr(ha, netdev) {
1705                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1706                         i++;
1707                 }
1708                 curr_uc_macs = netdev_uc_count(netdev);
1709         }
1710         netif_addr_unlock_bh(netdev);
1711
1712         if (uc_promisc) {
1713                 be_set_uc_promisc(adapter);
1714         } else if (adapter->update_uc_list) {
1715                 be_clear_uc_promisc(adapter);
1716
1717                 for (i = 0; i < adapter->uc_macs; i++)
1718                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1719
1720                 for (i = 0; i < curr_uc_macs; i++)
1721                         be_uc_mac_add(adapter, i);
1722                 adapter->uc_macs = curr_uc_macs;
1723                 adapter->update_uc_list = false;
1724         }
1725 }
1726
1727 static void be_clear_uc_list(struct be_adapter *adapter)
1728 {
1729         struct net_device *netdev = adapter->netdev;
1730         int i;
1731
1732         __dev_uc_unsync(netdev, NULL);
1733         for (i = 0; i < adapter->uc_macs; i++)
1734                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1735
1736         adapter->uc_macs = 0;
1737 }
1738
1739 static void __be_set_rx_mode(struct be_adapter *adapter)
1740 {
1741         struct net_device *netdev = adapter->netdev;
1742
1743         mutex_lock(&adapter->rx_filter_lock);
1744
1745         if (netdev->flags & IFF_PROMISC) {
1746                 if (!be_in_all_promisc(adapter))
1747                         be_set_all_promisc(adapter);
1748         } else if (be_in_all_promisc(adapter)) {
1749                 /* We need to re-program the vlan-list or clear
1750                  * vlan-promisc mode (if needed) when the interface
1751                  * comes out of promisc mode.
1752                  */
1753                 be_vid_config(adapter);
1754         }
1755
1756         be_set_uc_list(adapter);
1757         be_set_mc_list(adapter);
1758
1759         mutex_unlock(&adapter->rx_filter_lock);
1760 }
1761
1762 static void be_work_set_rx_mode(struct work_struct *work)
1763 {
1764         struct be_cmd_work *cmd_work =
1765                                 container_of(work, struct be_cmd_work, work);
1766
1767         __be_set_rx_mode(cmd_work->adapter);
1768         kfree(cmd_work);
1769 }
1770
1771 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1772 {
1773         struct be_adapter *adapter = netdev_priv(netdev);
1774         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1775         int status;
1776
1777         if (!sriov_enabled(adapter))
1778                 return -EPERM;
1779
1780         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1781                 return -EINVAL;
1782
1783         /* Proceed further only if user provided MAC is different
1784          * from active MAC
1785          */
1786         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1787                 return 0;
1788
1789         if (BEx_chip(adapter)) {
1790                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1791                                 vf + 1);
1792
1793                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1794                                          &vf_cfg->pmac_id, vf + 1);
1795         } else {
1796                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1797                                         vf + 1);
1798         }
1799
1800         if (status) {
1801                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1802                         mac, vf, status);
1803                 return be_cmd_status(status);
1804         }
1805
1806         ether_addr_copy(vf_cfg->mac_addr, mac);
1807
1808         return 0;
1809 }
1810
1811 static int be_get_vf_config(struct net_device *netdev, int vf,
1812                             struct ifla_vf_info *vi)
1813 {
1814         struct be_adapter *adapter = netdev_priv(netdev);
1815         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1816
1817         if (!sriov_enabled(adapter))
1818                 return -EPERM;
1819
1820         if (vf >= adapter->num_vfs)
1821                 return -EINVAL;
1822
1823         vi->vf = vf;
1824         vi->max_tx_rate = vf_cfg->tx_rate;
1825         vi->min_tx_rate = 0;
1826         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1827         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1828         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1829         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1830         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1831
1832         return 0;
1833 }
1834
1835 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1836 {
1837         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1838         u16 vids[BE_NUM_VLANS_SUPPORTED];
1839         int vf_if_id = vf_cfg->if_handle;
1840         int status;
1841
1842         /* Enable Transparent VLAN Tagging */
1843         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1844         if (status)
1845                 return status;
1846
1847         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1848         vids[0] = 0;
1849         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1850         if (!status)
1851                 dev_info(&adapter->pdev->dev,
1852                          "Cleared guest VLANs on VF%d", vf);
1853
1854         /* After TVT is enabled, disallow VFs to program VLAN filters */
1855         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1856                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1857                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1858                 if (!status)
1859                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1860         }
1861         return 0;
1862 }
1863
1864 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1865 {
1866         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1867         struct device *dev = &adapter->pdev->dev;
1868         int status;
1869
1870         /* Reset Transparent VLAN Tagging. */
1871         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1872                                        vf_cfg->if_handle, 0, 0);
1873         if (status)
1874                 return status;
1875
1876         /* Allow VFs to program VLAN filtering */
1877         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1878                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1879                                                   BE_PRIV_FILTMGMT, vf + 1);
1880                 if (!status) {
1881                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1882                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1883                 }
1884         }
1885
1886         dev_info(dev,
1887                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1888         return 0;
1889 }
1890
1891 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1892                           __be16 vlan_proto)
1893 {
1894         struct be_adapter *adapter = netdev_priv(netdev);
1895         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1896         int status;
1897
1898         if (!sriov_enabled(adapter))
1899                 return -EPERM;
1900
1901         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1902                 return -EINVAL;
1903
1904         if (vlan_proto != htons(ETH_P_8021Q))
1905                 return -EPROTONOSUPPORT;
1906
1907         if (vlan || qos) {
1908                 vlan |= qos << VLAN_PRIO_SHIFT;
1909                 status = be_set_vf_tvt(adapter, vf, vlan);
1910         } else {
1911                 status = be_clear_vf_tvt(adapter, vf);
1912         }
1913
1914         if (status) {
1915                 dev_err(&adapter->pdev->dev,
1916                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1917                         status);
1918                 return be_cmd_status(status);
1919         }
1920
1921         vf_cfg->vlan_tag = vlan;
1922         return 0;
1923 }
1924
1925 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1926                              int min_tx_rate, int max_tx_rate)
1927 {
1928         struct be_adapter *adapter = netdev_priv(netdev);
1929         struct device *dev = &adapter->pdev->dev;
1930         int percent_rate, status = 0;
1931         u16 link_speed = 0;
1932         u8 link_status;
1933
1934         if (!sriov_enabled(adapter))
1935                 return -EPERM;
1936
1937         if (vf >= adapter->num_vfs)
1938                 return -EINVAL;
1939
1940         if (min_tx_rate)
1941                 return -EINVAL;
1942
1943         if (!max_tx_rate)
1944                 goto config_qos;
1945
1946         status = be_cmd_link_status_query(adapter, &link_speed,
1947                                           &link_status, 0);
1948         if (status)
1949                 goto err;
1950
1951         if (!link_status) {
1952                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1953                 status = -ENETDOWN;
1954                 goto err;
1955         }
1956
1957         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1958                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1959                         link_speed);
1960                 status = -EINVAL;
1961                 goto err;
1962         }
1963
1964         /* On Skyhawk the QOS setting must be done only as a % value */
1965         percent_rate = link_speed / 100;
1966         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1967                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1968                         percent_rate);
1969                 status = -EINVAL;
1970                 goto err;
1971         }
1972
1973 config_qos:
1974         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1975         if (status)
1976                 goto err;
1977
1978         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1979         return 0;
1980
1981 err:
1982         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1983                 max_tx_rate, vf);
1984         return be_cmd_status(status);
1985 }
1986
1987 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1988                                 int link_state)
1989 {
1990         struct be_adapter *adapter = netdev_priv(netdev);
1991         int status;
1992
1993         if (!sriov_enabled(adapter))
1994                 return -EPERM;
1995
1996         if (vf >= adapter->num_vfs)
1997                 return -EINVAL;
1998
1999         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2000         if (status) {
2001                 dev_err(&adapter->pdev->dev,
2002                         "Link state change on VF %d failed: %#x\n", vf, status);
2003                 return be_cmd_status(status);
2004         }
2005
2006         adapter->vf_cfg[vf].plink_tracking = link_state;
2007
2008         return 0;
2009 }
2010
2011 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2012 {
2013         struct be_adapter *adapter = netdev_priv(netdev);
2014         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2015         u8 spoofchk;
2016         int status;
2017
2018         if (!sriov_enabled(adapter))
2019                 return -EPERM;
2020
2021         if (vf >= adapter->num_vfs)
2022                 return -EINVAL;
2023
2024         if (BEx_chip(adapter))
2025                 return -EOPNOTSUPP;
2026
2027         if (enable == vf_cfg->spoofchk)
2028                 return 0;
2029
2030         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2031
2032         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2033                                        0, spoofchk);
2034         if (status) {
2035                 dev_err(&adapter->pdev->dev,
2036                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2037                 return be_cmd_status(status);
2038         }
2039
2040         vf_cfg->spoofchk = enable;
2041         return 0;
2042 }
2043
2044 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2045                           ulong now)
2046 {
2047         aic->rx_pkts_prev = rx_pkts;
2048         aic->tx_reqs_prev = tx_pkts;
2049         aic->jiffies = now;
2050 }
2051
2052 static int be_get_new_eqd(struct be_eq_obj *eqo)
2053 {
2054         struct be_adapter *adapter = eqo->adapter;
2055         int eqd, start;
2056         struct be_aic_obj *aic;
2057         struct be_rx_obj *rxo;
2058         struct be_tx_obj *txo;
2059         u64 rx_pkts = 0, tx_pkts = 0;
2060         ulong now;
2061         u32 pps, delta;
2062         int i;
2063
2064         aic = &adapter->aic_obj[eqo->idx];
2065         if (!aic->enable) {
2066                 if (aic->jiffies)
2067                         aic->jiffies = 0;
2068                 eqd = aic->et_eqd;
2069                 return eqd;
2070         }
2071
2072         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2073                 do {
2074                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2075                         rx_pkts += rxo->stats.rx_pkts;
2076                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2077         }
2078
2079         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2080                 do {
2081                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2082                         tx_pkts += txo->stats.tx_reqs;
2083                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2084         }
2085
2086         /* Skip, if wrapped around or first calculation */
2087         now = jiffies;
2088         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2089             rx_pkts < aic->rx_pkts_prev ||
2090             tx_pkts < aic->tx_reqs_prev) {
2091                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2092                 return aic->prev_eqd;
2093         }
2094
2095         delta = jiffies_to_msecs(now - aic->jiffies);
2096         if (delta == 0)
2097                 return aic->prev_eqd;
2098
2099         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2100                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2101         eqd = (pps / 15000) << 2;
2102
2103         if (eqd < 8)
2104                 eqd = 0;
2105         eqd = min_t(u32, eqd, aic->max_eqd);
2106         eqd = max_t(u32, eqd, aic->min_eqd);
2107
2108         be_aic_update(aic, rx_pkts, tx_pkts, now);
2109
2110         return eqd;
2111 }
2112
2113 /* For Skyhawk-R only */
2114 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2115 {
2116         struct be_adapter *adapter = eqo->adapter;
2117         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2118         ulong now = jiffies;
2119         int eqd;
2120         u32 mult_enc;
2121
2122         if (!aic->enable)
2123                 return 0;
2124
2125         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2126                 eqd = aic->prev_eqd;
2127         else
2128                 eqd = be_get_new_eqd(eqo);
2129
2130         if (eqd > 100)
2131                 mult_enc = R2I_DLY_ENC_1;
2132         else if (eqd > 60)
2133                 mult_enc = R2I_DLY_ENC_2;
2134         else if (eqd > 20)
2135                 mult_enc = R2I_DLY_ENC_3;
2136         else
2137                 mult_enc = R2I_DLY_ENC_0;
2138
2139         aic->prev_eqd = eqd;
2140
2141         return mult_enc;
2142 }
2143
2144 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2145 {
2146         struct be_set_eqd set_eqd[MAX_EVT_QS];
2147         struct be_aic_obj *aic;
2148         struct be_eq_obj *eqo;
2149         int i, num = 0, eqd;
2150
2151         for_all_evt_queues(adapter, eqo, i) {
2152                 aic = &adapter->aic_obj[eqo->idx];
2153                 eqd = be_get_new_eqd(eqo);
2154                 if (force_update || eqd != aic->prev_eqd) {
2155                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2156                         set_eqd[num].eq_id = eqo->q.id;
2157                         aic->prev_eqd = eqd;
2158                         num++;
2159                 }
2160         }
2161
2162         if (num)
2163                 be_cmd_modify_eqd(adapter, set_eqd, num);
2164 }
2165
2166 static void be_rx_stats_update(struct be_rx_obj *rxo,
2167                                struct be_rx_compl_info *rxcp)
2168 {
2169         struct be_rx_stats *stats = rx_stats(rxo);
2170
2171         u64_stats_update_begin(&stats->sync);
2172         stats->rx_compl++;
2173         stats->rx_bytes += rxcp->pkt_size;
2174         stats->rx_pkts++;
2175         if (rxcp->tunneled)
2176                 stats->rx_vxlan_offload_pkts++;
2177         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2178                 stats->rx_mcast_pkts++;
2179         if (rxcp->err)
2180                 stats->rx_compl_err++;
2181         u64_stats_update_end(&stats->sync);
2182 }
2183
2184 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2185 {
2186         /* L4 checksum is not reliable for non TCP/UDP packets.
2187          * Also ignore ipcksm for ipv6 pkts
2188          */
2189         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2190                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2191 }
2192
2193 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2194 {
2195         struct be_adapter *adapter = rxo->adapter;
2196         struct be_rx_page_info *rx_page_info;
2197         struct be_queue_info *rxq = &rxo->q;
2198         u32 frag_idx = rxq->tail;
2199
2200         rx_page_info = &rxo->page_info_tbl[frag_idx];
2201         BUG_ON(!rx_page_info->page);
2202
2203         if (rx_page_info->last_frag) {
2204                 dma_unmap_page(&adapter->pdev->dev,
2205                                dma_unmap_addr(rx_page_info, bus),
2206                                adapter->big_page_size, DMA_FROM_DEVICE);
2207                 rx_page_info->last_frag = false;
2208         } else {
2209                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2210                                         dma_unmap_addr(rx_page_info, bus),
2211                                         rx_frag_size, DMA_FROM_DEVICE);
2212         }
2213
2214         queue_tail_inc(rxq);
2215         atomic_dec(&rxq->used);
2216         return rx_page_info;
2217 }
2218
2219 /* Throwaway the data in the Rx completion */
2220 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2221                                 struct be_rx_compl_info *rxcp)
2222 {
2223         struct be_rx_page_info *page_info;
2224         u16 i, num_rcvd = rxcp->num_rcvd;
2225
2226         for (i = 0; i < num_rcvd; i++) {
2227                 page_info = get_rx_page_info(rxo);
2228                 put_page(page_info->page);
2229                 memset(page_info, 0, sizeof(*page_info));
2230         }
2231 }
2232
2233 /*
2234  * skb_fill_rx_data forms a complete skb for an ether frame
2235  * indicated by rxcp.
2236  */
2237 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2238                              struct be_rx_compl_info *rxcp)
2239 {
2240         struct be_rx_page_info *page_info;
2241         u16 i, j;
2242         u16 hdr_len, curr_frag_len, remaining;
2243         u8 *start;
2244
2245         page_info = get_rx_page_info(rxo);
2246         start = page_address(page_info->page) + page_info->page_offset;
2247         prefetch(start);
2248
2249         /* Copy data in the first descriptor of this completion */
2250         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2251
2252         skb->len = curr_frag_len;
2253         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2254                 memcpy(skb->data, start, curr_frag_len);
2255                 /* Complete packet has now been moved to data */
2256                 put_page(page_info->page);
2257                 skb->data_len = 0;
2258                 skb->tail += curr_frag_len;
2259         } else {
2260                 hdr_len = ETH_HLEN;
2261                 memcpy(skb->data, start, hdr_len);
2262                 skb_shinfo(skb)->nr_frags = 1;
2263                 skb_frag_set_page(skb, 0, page_info->page);
2264                 skb_shinfo(skb)->frags[0].page_offset =
2265                                         page_info->page_offset + hdr_len;
2266                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2267                                   curr_frag_len - hdr_len);
2268                 skb->data_len = curr_frag_len - hdr_len;
2269                 skb->truesize += rx_frag_size;
2270                 skb->tail += hdr_len;
2271         }
2272         page_info->page = NULL;
2273
2274         if (rxcp->pkt_size <= rx_frag_size) {
2275                 BUG_ON(rxcp->num_rcvd != 1);
2276                 return;
2277         }
2278
2279         /* More frags present for this completion */
2280         remaining = rxcp->pkt_size - curr_frag_len;
2281         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2282                 page_info = get_rx_page_info(rxo);
2283                 curr_frag_len = min(remaining, rx_frag_size);
2284
2285                 /* Coalesce all frags from the same physical page in one slot */
2286                 if (page_info->page_offset == 0) {
2287                         /* Fresh page */
2288                         j++;
2289                         skb_frag_set_page(skb, j, page_info->page);
2290                         skb_shinfo(skb)->frags[j].page_offset =
2291                                                         page_info->page_offset;
2292                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2293                         skb_shinfo(skb)->nr_frags++;
2294                 } else {
2295                         put_page(page_info->page);
2296                 }
2297
2298                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2299                 skb->len += curr_frag_len;
2300                 skb->data_len += curr_frag_len;
2301                 skb->truesize += rx_frag_size;
2302                 remaining -= curr_frag_len;
2303                 page_info->page = NULL;
2304         }
2305         BUG_ON(j > MAX_SKB_FRAGS);
2306 }
2307
2308 /* Process the RX completion indicated by rxcp when GRO is disabled */
2309 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2310                                 struct be_rx_compl_info *rxcp)
2311 {
2312         struct be_adapter *adapter = rxo->adapter;
2313         struct net_device *netdev = adapter->netdev;
2314         struct sk_buff *skb;
2315
2316         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2317         if (unlikely(!skb)) {
2318                 rx_stats(rxo)->rx_drops_no_skbs++;
2319                 be_rx_compl_discard(rxo, rxcp);
2320                 return;
2321         }
2322
2323         skb_fill_rx_data(rxo, skb, rxcp);
2324
2325         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2326                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2327         else
2328                 skb_checksum_none_assert(skb);
2329
2330         skb->protocol = eth_type_trans(skb, netdev);
2331         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2332         if (netdev->features & NETIF_F_RXHASH)
2333                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2334
2335         skb->csum_level = rxcp->tunneled;
2336         skb_mark_napi_id(skb, napi);
2337
2338         if (rxcp->vlanf)
2339                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2340
2341         netif_receive_skb(skb);
2342 }
2343
2344 /* Process the RX completion indicated by rxcp when GRO is enabled */
2345 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2346                                     struct napi_struct *napi,
2347                                     struct be_rx_compl_info *rxcp)
2348 {
2349         struct be_adapter *adapter = rxo->adapter;
2350         struct be_rx_page_info *page_info;
2351         struct sk_buff *skb = NULL;
2352         u16 remaining, curr_frag_len;
2353         u16 i, j;
2354
2355         skb = napi_get_frags(napi);
2356         if (!skb) {
2357                 be_rx_compl_discard(rxo, rxcp);
2358                 return;
2359         }
2360
2361         remaining = rxcp->pkt_size;
2362         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2363                 page_info = get_rx_page_info(rxo);
2364
2365                 curr_frag_len = min(remaining, rx_frag_size);
2366
2367                 /* Coalesce all frags from the same physical page in one slot */
2368                 if (i == 0 || page_info->page_offset == 0) {
2369                         /* First frag or Fresh page */
2370                         j++;
2371                         skb_frag_set_page(skb, j, page_info->page);
2372                         skb_shinfo(skb)->frags[j].page_offset =
2373                                                         page_info->page_offset;
2374                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2375                 } else {
2376                         put_page(page_info->page);
2377                 }
2378                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2379                 skb->truesize += rx_frag_size;
2380                 remaining -= curr_frag_len;
2381                 memset(page_info, 0, sizeof(*page_info));
2382         }
2383         BUG_ON(j > MAX_SKB_FRAGS);
2384
2385         skb_shinfo(skb)->nr_frags = j + 1;
2386         skb->len = rxcp->pkt_size;
2387         skb->data_len = rxcp->pkt_size;
2388         skb->ip_summed = CHECKSUM_UNNECESSARY;
2389         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2390         if (adapter->netdev->features & NETIF_F_RXHASH)
2391                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2392
2393         skb->csum_level = rxcp->tunneled;
2394
2395         if (rxcp->vlanf)
2396                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2397
2398         napi_gro_frags(napi);
2399 }
2400
2401 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2402                                  struct be_rx_compl_info *rxcp)
2403 {
2404         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2405         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2406         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2407         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2408         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2409         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2410         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2411         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2412         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2413         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2414         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2415         if (rxcp->vlanf) {
2416                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2417                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2418         }
2419         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2420         rxcp->tunneled =
2421                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2422 }
2423
2424 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2425                                  struct be_rx_compl_info *rxcp)
2426 {
2427         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2428         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2429         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2430         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2431         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2432         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2433         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2434         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2435         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2436         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2437         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2438         if (rxcp->vlanf) {
2439                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2440                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2441         }
2442         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2443         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2444 }
2445
2446 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2447 {
2448         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2449         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2450         struct be_adapter *adapter = rxo->adapter;
2451
2452         /* For checking the valid bit it is Ok to use either definition as the
2453          * valid bit is at the same position in both v0 and v1 Rx compl */
2454         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2455                 return NULL;
2456
2457         rmb();
2458         be_dws_le_to_cpu(compl, sizeof(*compl));
2459
2460         if (adapter->be3_native)
2461                 be_parse_rx_compl_v1(compl, rxcp);
2462         else
2463                 be_parse_rx_compl_v0(compl, rxcp);
2464
2465         if (rxcp->ip_frag)
2466                 rxcp->l4_csum = 0;
2467
2468         if (rxcp->vlanf) {
2469                 /* In QNQ modes, if qnq bit is not set, then the packet was
2470                  * tagged only with the transparent outer vlan-tag and must
2471                  * not be treated as a vlan packet by host
2472                  */
2473                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2474                         rxcp->vlanf = 0;
2475
2476                 if (!lancer_chip(adapter))
2477                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2478
2479                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2480                     !test_bit(rxcp->vlan_tag, adapter->vids))
2481                         rxcp->vlanf = 0;
2482         }
2483
2484         /* As the compl has been parsed, reset it; we wont touch it again */
2485         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2486
2487         queue_tail_inc(&rxo->cq);
2488         return rxcp;
2489 }
2490
2491 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2492 {
2493         u32 order = get_order(size);
2494
2495         if (order > 0)
2496                 gfp |= __GFP_COMP;
2497         return  alloc_pages(gfp, order);
2498 }
2499
2500 /*
2501  * Allocate a page, split it to fragments of size rx_frag_size and post as
2502  * receive buffers to BE
2503  */
2504 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2505 {
2506         struct be_adapter *adapter = rxo->adapter;
2507         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2508         struct be_queue_info *rxq = &rxo->q;
2509         struct page *pagep = NULL;
2510         struct device *dev = &adapter->pdev->dev;
2511         struct be_eth_rx_d *rxd;
2512         u64 page_dmaaddr = 0, frag_dmaaddr;
2513         u32 posted, page_offset = 0, notify = 0;
2514
2515         page_info = &rxo->page_info_tbl[rxq->head];
2516         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2517                 if (!pagep) {
2518                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2519                         if (unlikely(!pagep)) {
2520                                 rx_stats(rxo)->rx_post_fail++;
2521                                 break;
2522                         }
2523                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2524                                                     adapter->big_page_size,
2525                                                     DMA_FROM_DEVICE);
2526                         if (dma_mapping_error(dev, page_dmaaddr)) {
2527                                 put_page(pagep);
2528                                 pagep = NULL;
2529                                 adapter->drv_stats.dma_map_errors++;
2530                                 break;
2531                         }
2532                         page_offset = 0;
2533                 } else {
2534                         get_page(pagep);
2535                         page_offset += rx_frag_size;
2536                 }
2537                 page_info->page_offset = page_offset;
2538                 page_info->page = pagep;
2539
2540                 rxd = queue_head_node(rxq);
2541                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2542                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2543                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2544
2545                 /* Any space left in the current big page for another frag? */
2546                 if ((page_offset + rx_frag_size + rx_frag_size) >
2547                                         adapter->big_page_size) {
2548                         pagep = NULL;
2549                         page_info->last_frag = true;
2550                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2551                 } else {
2552                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2553                 }
2554
2555                 prev_page_info = page_info;
2556                 queue_head_inc(rxq);
2557                 page_info = &rxo->page_info_tbl[rxq->head];
2558         }
2559
2560         /* Mark the last frag of a page when we break out of the above loop
2561          * with no more slots available in the RXQ
2562          */
2563         if (pagep) {
2564                 prev_page_info->last_frag = true;
2565                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2566         }
2567
2568         if (posted) {
2569                 atomic_add(posted, &rxq->used);
2570                 if (rxo->rx_post_starved)
2571                         rxo->rx_post_starved = false;
2572                 do {
2573                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2574                         be_rxq_notify(adapter, rxq->id, notify);
2575                         posted -= notify;
2576                 } while (posted);
2577         } else if (atomic_read(&rxq->used) == 0) {
2578                 /* Let be_worker replenish when memory is available */
2579                 rxo->rx_post_starved = true;
2580         }
2581 }
2582
2583 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2584 {
2585         struct be_queue_info *tx_cq = &txo->cq;
2586         struct be_tx_compl_info *txcp = &txo->txcp;
2587         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2588
2589         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2590                 return NULL;
2591
2592         /* Ensure load ordering of valid bit dword and other dwords below */
2593         rmb();
2594         be_dws_le_to_cpu(compl, sizeof(*compl));
2595
2596         txcp->status = GET_TX_COMPL_BITS(status, compl);
2597         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2598
2599         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2600         queue_tail_inc(tx_cq);
2601         return txcp;
2602 }
2603
2604 static u16 be_tx_compl_process(struct be_adapter *adapter,
2605                                struct be_tx_obj *txo, u16 last_index)
2606 {
2607         struct sk_buff **sent_skbs = txo->sent_skb_list;
2608         struct be_queue_info *txq = &txo->q;
2609         struct sk_buff *skb = NULL;
2610         bool unmap_skb_hdr = false;
2611         struct be_eth_wrb *wrb;
2612         u16 num_wrbs = 0;
2613         u32 frag_index;
2614
2615         do {
2616                 if (sent_skbs[txq->tail]) {
2617                         /* Free skb from prev req */
2618                         if (skb)
2619                                 dev_consume_skb_any(skb);
2620                         skb = sent_skbs[txq->tail];
2621                         sent_skbs[txq->tail] = NULL;
2622                         queue_tail_inc(txq);  /* skip hdr wrb */
2623                         num_wrbs++;
2624                         unmap_skb_hdr = true;
2625                 }
2626                 wrb = queue_tail_node(txq);
2627                 frag_index = txq->tail;
2628                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2629                               (unmap_skb_hdr && skb_headlen(skb)));
2630                 unmap_skb_hdr = false;
2631                 queue_tail_inc(txq);
2632                 num_wrbs++;
2633         } while (frag_index != last_index);
2634         dev_consume_skb_any(skb);
2635
2636         return num_wrbs;
2637 }
2638
2639 /* Return the number of events in the event queue */
2640 static inline int events_get(struct be_eq_obj *eqo)
2641 {
2642         struct be_eq_entry *eqe;
2643         int num = 0;
2644
2645         do {
2646                 eqe = queue_tail_node(&eqo->q);
2647                 if (eqe->evt == 0)
2648                         break;
2649
2650                 rmb();
2651                 eqe->evt = 0;
2652                 num++;
2653                 queue_tail_inc(&eqo->q);
2654         } while (true);
2655
2656         return num;
2657 }
2658
2659 /* Leaves the EQ is disarmed state */
2660 static void be_eq_clean(struct be_eq_obj *eqo)
2661 {
2662         int num = events_get(eqo);
2663
2664         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2665 }
2666
2667 /* Free posted rx buffers that were not used */
2668 static void be_rxq_clean(struct be_rx_obj *rxo)
2669 {
2670         struct be_queue_info *rxq = &rxo->q;
2671         struct be_rx_page_info *page_info;
2672
2673         while (atomic_read(&rxq->used) > 0) {
2674                 page_info = get_rx_page_info(rxo);
2675                 put_page(page_info->page);
2676                 memset(page_info, 0, sizeof(*page_info));
2677         }
2678         BUG_ON(atomic_read(&rxq->used));
2679         rxq->tail = 0;
2680         rxq->head = 0;
2681 }
2682
2683 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2684 {
2685         struct be_queue_info *rx_cq = &rxo->cq;
2686         struct be_rx_compl_info *rxcp;
2687         struct be_adapter *adapter = rxo->adapter;
2688         int flush_wait = 0;
2689
2690         /* Consume pending rx completions.
2691          * Wait for the flush completion (identified by zero num_rcvd)
2692          * to arrive. Notify CQ even when there are no more CQ entries
2693          * for HW to flush partially coalesced CQ entries.
2694          * In Lancer, there is no need to wait for flush compl.
2695          */
2696         for (;;) {
2697                 rxcp = be_rx_compl_get(rxo);
2698                 if (!rxcp) {
2699                         if (lancer_chip(adapter))
2700                                 break;
2701
2702                         if (flush_wait++ > 50 ||
2703                             be_check_error(adapter,
2704                                            BE_ERROR_HW)) {
2705                                 dev_warn(&adapter->pdev->dev,
2706                                          "did not receive flush compl\n");
2707                                 break;
2708                         }
2709                         be_cq_notify(adapter, rx_cq->id, true, 0);
2710                         mdelay(1);
2711                 } else {
2712                         be_rx_compl_discard(rxo, rxcp);
2713                         be_cq_notify(adapter, rx_cq->id, false, 1);
2714                         if (rxcp->num_rcvd == 0)
2715                                 break;
2716                 }
2717         }
2718
2719         /* After cleanup, leave the CQ in unarmed state */
2720         be_cq_notify(adapter, rx_cq->id, false, 0);
2721 }
2722
2723 static void be_tx_compl_clean(struct be_adapter *adapter)
2724 {
2725         struct device *dev = &adapter->pdev->dev;
2726         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2727         struct be_tx_compl_info *txcp;
2728         struct be_queue_info *txq;
2729         u32 end_idx, notified_idx;
2730         struct be_tx_obj *txo;
2731         int i, pending_txqs;
2732
2733         /* Stop polling for compls when HW has been silent for 10ms */
2734         do {
2735                 pending_txqs = adapter->num_tx_qs;
2736
2737                 for_all_tx_queues(adapter, txo, i) {
2738                         cmpl = 0;
2739                         num_wrbs = 0;
2740                         txq = &txo->q;
2741                         while ((txcp = be_tx_compl_get(txo))) {
2742                                 num_wrbs +=
2743                                         be_tx_compl_process(adapter, txo,
2744                                                             txcp->end_index);
2745                                 cmpl++;
2746                         }
2747                         if (cmpl) {
2748                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2749                                 atomic_sub(num_wrbs, &txq->used);
2750                                 timeo = 0;
2751                         }
2752                         if (!be_is_tx_compl_pending(txo))
2753                                 pending_txqs--;
2754                 }
2755
2756                 if (pending_txqs == 0 || ++timeo > 10 ||
2757                     be_check_error(adapter, BE_ERROR_HW))
2758                         break;
2759
2760                 mdelay(1);
2761         } while (true);
2762
2763         /* Free enqueued TX that was never notified to HW */
2764         for_all_tx_queues(adapter, txo, i) {
2765                 txq = &txo->q;
2766
2767                 if (atomic_read(&txq->used)) {
2768                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2769                                  i, atomic_read(&txq->used));
2770                         notified_idx = txq->tail;
2771                         end_idx = txq->tail;
2772                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2773                                   txq->len);
2774                         /* Use the tx-compl process logic to handle requests
2775                          * that were not sent to the HW.
2776                          */
2777                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2778                         atomic_sub(num_wrbs, &txq->used);
2779                         BUG_ON(atomic_read(&txq->used));
2780                         txo->pend_wrb_cnt = 0;
2781                         /* Since hw was never notified of these requests,
2782                          * reset TXQ indices
2783                          */
2784                         txq->head = notified_idx;
2785                         txq->tail = notified_idx;
2786                 }
2787         }
2788 }
2789
2790 static void be_evt_queues_destroy(struct be_adapter *adapter)
2791 {
2792         struct be_eq_obj *eqo;
2793         int i;
2794
2795         for_all_evt_queues(adapter, eqo, i) {
2796                 if (eqo->q.created) {
2797                         be_eq_clean(eqo);
2798                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2799                         netif_napi_del(&eqo->napi);
2800                         free_cpumask_var(eqo->affinity_mask);
2801                 }
2802                 be_queue_free(adapter, &eqo->q);
2803         }
2804 }
2805
2806 static int be_evt_queues_create(struct be_adapter *adapter)
2807 {
2808         struct be_queue_info *eq;
2809         struct be_eq_obj *eqo;
2810         struct be_aic_obj *aic;
2811         int i, rc;
2812
2813         /* need enough EQs to service both RX and TX queues */
2814         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2815                                     max(adapter->cfg_num_rx_irqs,
2816                                         adapter->cfg_num_tx_irqs));
2817
2818         for_all_evt_queues(adapter, eqo, i) {
2819                 int numa_node = dev_to_node(&adapter->pdev->dev);
2820
2821                 aic = &adapter->aic_obj[i];
2822                 eqo->adapter = adapter;
2823                 eqo->idx = i;
2824                 aic->max_eqd = BE_MAX_EQD;
2825                 aic->enable = true;
2826
2827                 eq = &eqo->q;
2828                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2829                                     sizeof(struct be_eq_entry));
2830                 if (rc)
2831                         return rc;
2832
2833                 rc = be_cmd_eq_create(adapter, eqo);
2834                 if (rc)
2835                         return rc;
2836
2837                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2838                         return -ENOMEM;
2839                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2840                                 eqo->affinity_mask);
2841                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2842                                BE_NAPI_WEIGHT);
2843         }
2844         return 0;
2845 }
2846
2847 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2848 {
2849         struct be_queue_info *q;
2850
2851         q = &adapter->mcc_obj.q;
2852         if (q->created)
2853                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2854         be_queue_free(adapter, q);
2855
2856         q = &adapter->mcc_obj.cq;
2857         if (q->created)
2858                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2859         be_queue_free(adapter, q);
2860 }
2861
2862 /* Must be called only after TX qs are created as MCC shares TX EQ */
2863 static int be_mcc_queues_create(struct be_adapter *adapter)
2864 {
2865         struct be_queue_info *q, *cq;
2866
2867         cq = &adapter->mcc_obj.cq;
2868         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2869                            sizeof(struct be_mcc_compl)))
2870                 goto err;
2871
2872         /* Use the default EQ for MCC completions */
2873         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2874                 goto mcc_cq_free;
2875
2876         q = &adapter->mcc_obj.q;
2877         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2878                 goto mcc_cq_destroy;
2879
2880         if (be_cmd_mccq_create(adapter, q, cq))
2881                 goto mcc_q_free;
2882
2883         return 0;
2884
2885 mcc_q_free:
2886         be_queue_free(adapter, q);
2887 mcc_cq_destroy:
2888         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2889 mcc_cq_free:
2890         be_queue_free(adapter, cq);
2891 err:
2892         return -1;
2893 }
2894
2895 static void be_tx_queues_destroy(struct be_adapter *adapter)
2896 {
2897         struct be_queue_info *q;
2898         struct be_tx_obj *txo;
2899         u8 i;
2900
2901         for_all_tx_queues(adapter, txo, i) {
2902                 q = &txo->q;
2903                 if (q->created)
2904                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2905                 be_queue_free(adapter, q);
2906
2907                 q = &txo->cq;
2908                 if (q->created)
2909                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2910                 be_queue_free(adapter, q);
2911         }
2912 }
2913
2914 static int be_tx_qs_create(struct be_adapter *adapter)
2915 {
2916         struct be_queue_info *cq;
2917         struct be_tx_obj *txo;
2918         struct be_eq_obj *eqo;
2919         int status, i;
2920
2921         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2922
2923         for_all_tx_queues(adapter, txo, i) {
2924                 cq = &txo->cq;
2925                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2926                                         sizeof(struct be_eth_tx_compl));
2927                 if (status)
2928                         return status;
2929
2930                 u64_stats_init(&txo->stats.sync);
2931                 u64_stats_init(&txo->stats.sync_compl);
2932
2933                 /* If num_evt_qs is less than num_tx_qs, then more than
2934                  * one txq share an eq
2935                  */
2936                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2937                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2938                 if (status)
2939                         return status;
2940
2941                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2942                                         sizeof(struct be_eth_wrb));
2943                 if (status)
2944                         return status;
2945
2946                 status = be_cmd_txq_create(adapter, txo);
2947                 if (status)
2948                         return status;
2949
2950                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2951                                     eqo->idx);
2952         }
2953
2954         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2955                  adapter->num_tx_qs);
2956         return 0;
2957 }
2958
2959 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2960 {
2961         struct be_queue_info *q;
2962         struct be_rx_obj *rxo;
2963         int i;
2964
2965         for_all_rx_queues(adapter, rxo, i) {
2966                 q = &rxo->cq;
2967                 if (q->created)
2968                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2969                 be_queue_free(adapter, q);
2970         }
2971 }
2972
2973 static int be_rx_cqs_create(struct be_adapter *adapter)
2974 {
2975         struct be_queue_info *eq, *cq;
2976         struct be_rx_obj *rxo;
2977         int rc, i;
2978
2979         adapter->num_rss_qs =
2980                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2981
2982         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2983         if (adapter->num_rss_qs < 2)
2984                 adapter->num_rss_qs = 0;
2985
2986         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2987
2988         /* When the interface is not capable of RSS rings (and there is no
2989          * need to create a default RXQ) we'll still need one RXQ
2990          */
2991         if (adapter->num_rx_qs == 0)
2992                 adapter->num_rx_qs = 1;
2993
2994         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2995         for_all_rx_queues(adapter, rxo, i) {
2996                 rxo->adapter = adapter;
2997                 cq = &rxo->cq;
2998                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2999                                     sizeof(struct be_eth_rx_compl));
3000                 if (rc)
3001                         return rc;
3002
3003                 u64_stats_init(&rxo->stats.sync);
3004                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3005                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3006                 if (rc)
3007                         return rc;
3008         }
3009
3010         dev_info(&adapter->pdev->dev,
3011                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3012         return 0;
3013 }
3014
3015 static irqreturn_t be_intx(int irq, void *dev)
3016 {
3017         struct be_eq_obj *eqo = dev;
3018         struct be_adapter *adapter = eqo->adapter;
3019         int num_evts = 0;
3020
3021         /* IRQ is not expected when NAPI is scheduled as the EQ
3022          * will not be armed.
3023          * But, this can happen on Lancer INTx where it takes
3024          * a while to de-assert INTx or in BE2 where occasionaly
3025          * an interrupt may be raised even when EQ is unarmed.
3026          * If NAPI is already scheduled, then counting & notifying
3027          * events will orphan them.
3028          */
3029         if (napi_schedule_prep(&eqo->napi)) {
3030                 num_evts = events_get(eqo);
3031                 __napi_schedule(&eqo->napi);
3032                 if (num_evts)
3033                         eqo->spurious_intr = 0;
3034         }
3035         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3036
3037         /* Return IRQ_HANDLED only for the the first spurious intr
3038          * after a valid intr to stop the kernel from branding
3039          * this irq as a bad one!
3040          */
3041         if (num_evts || eqo->spurious_intr++ == 0)
3042                 return IRQ_HANDLED;
3043         else
3044                 return IRQ_NONE;
3045 }
3046
3047 static irqreturn_t be_msix(int irq, void *dev)
3048 {
3049         struct be_eq_obj *eqo = dev;
3050
3051         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3052         napi_schedule(&eqo->napi);
3053         return IRQ_HANDLED;
3054 }
3055
3056 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3057 {
3058         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3059 }
3060
3061 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3062                          int budget, int polling)
3063 {
3064         struct be_adapter *adapter = rxo->adapter;
3065         struct be_queue_info *rx_cq = &rxo->cq;
3066         struct be_rx_compl_info *rxcp;
3067         u32 work_done;
3068         u32 frags_consumed = 0;
3069
3070         for (work_done = 0; work_done < budget; work_done++) {
3071                 rxcp = be_rx_compl_get(rxo);
3072                 if (!rxcp)
3073                         break;
3074
3075                 /* Is it a flush compl that has no data */
3076                 if (unlikely(rxcp->num_rcvd == 0))
3077                         goto loop_continue;
3078
3079                 /* Discard compl with partial DMA Lancer B0 */
3080                 if (unlikely(!rxcp->pkt_size)) {
3081                         be_rx_compl_discard(rxo, rxcp);
3082                         goto loop_continue;
3083                 }
3084
3085                 /* On BE drop pkts that arrive due to imperfect filtering in
3086                  * promiscuous mode on some skews
3087                  */
3088                 if (unlikely(rxcp->port != adapter->port_num &&
3089                              !lancer_chip(adapter))) {
3090                         be_rx_compl_discard(rxo, rxcp);
3091                         goto loop_continue;
3092                 }
3093
3094                 /* Don't do gro when we're busy_polling */
3095                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3096                         be_rx_compl_process_gro(rxo, napi, rxcp);
3097                 else
3098                         be_rx_compl_process(rxo, napi, rxcp);
3099
3100 loop_continue:
3101                 frags_consumed += rxcp->num_rcvd;
3102                 be_rx_stats_update(rxo, rxcp);
3103         }
3104
3105         if (work_done) {
3106                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3107
3108                 /* When an rx-obj gets into post_starved state, just
3109                  * let be_worker do the posting.
3110                  */
3111                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3112                     !rxo->rx_post_starved)
3113                         be_post_rx_frags(rxo, GFP_ATOMIC,
3114                                          max_t(u32, MAX_RX_POST,
3115                                                frags_consumed));
3116         }
3117
3118         return work_done;
3119 }
3120
3121 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3122 {
3123         switch (status) {
3124         case BE_TX_COMP_HDR_PARSE_ERR:
3125                 tx_stats(txo)->tx_hdr_parse_err++;
3126                 break;
3127         case BE_TX_COMP_NDMA_ERR:
3128                 tx_stats(txo)->tx_dma_err++;
3129                 break;
3130         case BE_TX_COMP_ACL_ERR:
3131                 tx_stats(txo)->tx_spoof_check_err++;
3132                 break;
3133         }
3134 }
3135
3136 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3137 {
3138         switch (status) {
3139         case LANCER_TX_COMP_LSO_ERR:
3140                 tx_stats(txo)->tx_tso_err++;
3141                 break;
3142         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3143         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3144                 tx_stats(txo)->tx_spoof_check_err++;
3145                 break;
3146         case LANCER_TX_COMP_QINQ_ERR:
3147                 tx_stats(txo)->tx_qinq_err++;
3148                 break;
3149         case LANCER_TX_COMP_PARITY_ERR:
3150                 tx_stats(txo)->tx_internal_parity_err++;
3151                 break;
3152         case LANCER_TX_COMP_DMA_ERR:
3153                 tx_stats(txo)->tx_dma_err++;
3154                 break;
3155         }
3156 }
3157
3158 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3159                           int idx)
3160 {
3161         int num_wrbs = 0, work_done = 0;
3162         struct be_tx_compl_info *txcp;
3163
3164         while ((txcp = be_tx_compl_get(txo))) {
3165                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3166                 work_done++;
3167
3168                 if (txcp->status) {
3169                         if (lancer_chip(adapter))
3170                                 lancer_update_tx_err(txo, txcp->status);
3171                         else
3172                                 be_update_tx_err(txo, txcp->status);
3173                 }
3174         }
3175
3176         if (work_done) {
3177                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3178                 atomic_sub(num_wrbs, &txo->q.used);
3179
3180                 /* As Tx wrbs have been freed up, wake up netdev queue
3181                  * if it was stopped due to lack of tx wrbs.  */
3182                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3183                     be_can_txq_wake(txo)) {
3184                         netif_wake_subqueue(adapter->netdev, idx);
3185                 }
3186
3187                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3188                 tx_stats(txo)->tx_compl += work_done;
3189                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3190         }
3191 }
3192
3193 #ifdef CONFIG_NET_RX_BUSY_POLL
3194 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3195 {
3196         bool status = true;
3197
3198         spin_lock(&eqo->lock); /* BH is already disabled */
3199         if (eqo->state & BE_EQ_LOCKED) {
3200                 WARN_ON(eqo->state & BE_EQ_NAPI);
3201                 eqo->state |= BE_EQ_NAPI_YIELD;
3202                 status = false;
3203         } else {
3204                 eqo->state = BE_EQ_NAPI;
3205         }
3206         spin_unlock(&eqo->lock);
3207         return status;
3208 }
3209
3210 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3211 {
3212         spin_lock(&eqo->lock); /* BH is already disabled */
3213
3214         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3215         eqo->state = BE_EQ_IDLE;
3216
3217         spin_unlock(&eqo->lock);
3218 }
3219
3220 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3221 {
3222         bool status = true;
3223
3224         spin_lock_bh(&eqo->lock);
3225         if (eqo->state & BE_EQ_LOCKED) {
3226                 eqo->state |= BE_EQ_POLL_YIELD;
3227                 status = false;
3228         } else {
3229                 eqo->state |= BE_EQ_POLL;
3230         }
3231         spin_unlock_bh(&eqo->lock);
3232         return status;
3233 }
3234
3235 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3236 {
3237         spin_lock_bh(&eqo->lock);
3238
3239         WARN_ON(eqo->state & (BE_EQ_NAPI));
3240         eqo->state = BE_EQ_IDLE;
3241
3242         spin_unlock_bh(&eqo->lock);
3243 }
3244
3245 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3246 {
3247         spin_lock_init(&eqo->lock);
3248         eqo->state = BE_EQ_IDLE;
3249 }
3250
3251 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3252 {
3253         local_bh_disable();
3254
3255         /* It's enough to just acquire napi lock on the eqo to stop
3256          * be_busy_poll() from processing any queueus.
3257          */
3258         while (!be_lock_napi(eqo))
3259                 mdelay(1);
3260
3261         local_bh_enable();
3262 }
3263
3264 #else /* CONFIG_NET_RX_BUSY_POLL */
3265
3266 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3267 {
3268         return true;
3269 }
3270
3271 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3272 {
3273 }
3274
3275 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3276 {
3277         return false;
3278 }
3279
3280 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3281 {
3282 }
3283
3284 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3285 {
3286 }
3287
3288 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3289 {
3290 }
3291 #endif /* CONFIG_NET_RX_BUSY_POLL */
3292
3293 int be_poll(struct napi_struct *napi, int budget)
3294 {
3295         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296         struct be_adapter *adapter = eqo->adapter;
3297         int max_work = 0, work, i, num_evts;
3298         struct be_rx_obj *rxo;
3299         struct be_tx_obj *txo;
3300         u32 mult_enc = 0;
3301
3302         num_evts = events_get(eqo);
3303
3304         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305                 be_process_tx(adapter, txo, i);
3306
3307         if (be_lock_napi(eqo)) {
3308                 /* This loop will iterate twice for EQ0 in which
3309                  * completions of the last RXQ (default one) are also processed
3310                  * For other EQs the loop iterates only once
3311                  */
3312                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3314                         max_work = max(work, max_work);
3315                 }
3316                 be_unlock_napi(eqo);
3317         } else {
3318                 max_work = budget;
3319         }
3320
3321         if (is_mcc_eqo(eqo))
3322                 be_process_mcc(adapter);
3323
3324         if (max_work < budget) {
3325                 napi_complete(napi);
3326
3327                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3328                  * delay via a delay multiplier encoding value
3329                  */
3330                 if (skyhawk_chip(adapter))
3331                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3332
3333                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3334                              mult_enc);
3335         } else {
3336                 /* As we'll continue in polling mode, count and clear events */
3337                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3338         }
3339         return max_work;
3340 }
3341
3342 #ifdef CONFIG_NET_RX_BUSY_POLL
3343 static int be_busy_poll(struct napi_struct *napi)
3344 {
3345         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3346         struct be_adapter *adapter = eqo->adapter;
3347         struct be_rx_obj *rxo;
3348         int i, work = 0;
3349
3350         if (!be_lock_busy_poll(eqo))
3351                 return LL_FLUSH_BUSY;
3352
3353         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3354                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3355                 if (work)
3356                         break;
3357         }
3358
3359         be_unlock_busy_poll(eqo);
3360         return work;
3361 }
3362 #endif
3363
3364 void be_detect_error(struct be_adapter *adapter)
3365 {
3366         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3367         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3368         u32 i;
3369         struct device *dev = &adapter->pdev->dev;
3370
3371         if (be_check_error(adapter, BE_ERROR_HW))
3372                 return;
3373
3374         if (lancer_chip(adapter)) {
3375                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3376                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3377                         be_set_error(adapter, BE_ERROR_UE);
3378                         sliport_err1 = ioread32(adapter->db +
3379                                                 SLIPORT_ERROR1_OFFSET);
3380                         sliport_err2 = ioread32(adapter->db +
3381                                                 SLIPORT_ERROR2_OFFSET);
3382                         /* Do not log error messages if its a FW reset */
3383                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3384                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3385                                 dev_info(dev, "Firmware update in progress\n");
3386                         } else {
3387                                 dev_err(dev, "Error detected in the card\n");
3388                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3389                                         sliport_status);
3390                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3391                                         sliport_err1);
3392                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3393                                         sliport_err2);
3394                         }
3395                 }
3396         } else {
3397                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3398                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3399                 ue_lo_mask = ioread32(adapter->pcicfg +
3400                                       PCICFG_UE_STATUS_LOW_MASK);
3401                 ue_hi_mask = ioread32(adapter->pcicfg +
3402                                       PCICFG_UE_STATUS_HI_MASK);
3403
3404                 ue_lo = (ue_lo & ~ue_lo_mask);
3405                 ue_hi = (ue_hi & ~ue_hi_mask);
3406
3407                 /* On certain platforms BE hardware can indicate spurious UEs.
3408                  * Allow HW to stop working completely in case of a real UE.
3409                  * Hence not setting the hw_error for UE detection.
3410                  */
3411
3412                 if (ue_lo || ue_hi) {
3413                         dev_err(dev, "Error detected in the adapter");
3414                         if (skyhawk_chip(adapter))
3415                                 be_set_error(adapter, BE_ERROR_UE);
3416
3417                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3418                                 if (ue_lo & 1)
3419                                         dev_err(dev, "UE: %s bit set\n",
3420                                                 ue_status_low_desc[i]);
3421                         }
3422                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3423                                 if (ue_hi & 1)
3424                                         dev_err(dev, "UE: %s bit set\n",
3425                                                 ue_status_hi_desc[i]);
3426                         }
3427                 }
3428         }
3429 }
3430
3431 static void be_msix_disable(struct be_adapter *adapter)
3432 {
3433         if (msix_enabled(adapter)) {
3434                 pci_disable_msix(adapter->pdev);
3435                 adapter->num_msix_vec = 0;
3436                 adapter->num_msix_roce_vec = 0;
3437         }
3438 }
3439
3440 static int be_msix_enable(struct be_adapter *adapter)
3441 {
3442         unsigned int i, max_roce_eqs;
3443         struct device *dev = &adapter->pdev->dev;
3444         int num_vec;
3445
3446         /* If RoCE is supported, program the max number of vectors that
3447          * could be used for NIC and RoCE, else, just program the number
3448          * we'll use initially.
3449          */
3450         if (be_roce_supported(adapter)) {
3451                 max_roce_eqs =
3452                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3453                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3454                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3455         } else {
3456                 num_vec = max(adapter->cfg_num_rx_irqs,
3457                               adapter->cfg_num_tx_irqs);
3458         }
3459
3460         for (i = 0; i < num_vec; i++)
3461                 adapter->msix_entries[i].entry = i;
3462
3463         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3464                                         MIN_MSIX_VECTORS, num_vec);
3465         if (num_vec < 0)
3466                 goto fail;
3467
3468         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3469                 adapter->num_msix_roce_vec = num_vec / 2;
3470                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3471                          adapter->num_msix_roce_vec);
3472         }
3473
3474         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3475
3476         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3477                  adapter->num_msix_vec);
3478         return 0;
3479
3480 fail:
3481         dev_warn(dev, "MSIx enable failed\n");
3482
3483         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3484         if (be_virtfn(adapter))
3485                 return num_vec;
3486         return 0;
3487 }
3488
3489 static inline int be_msix_vec_get(struct be_adapter *adapter,
3490                                   struct be_eq_obj *eqo)
3491 {
3492         return adapter->msix_entries[eqo->msix_idx].vector;
3493 }
3494
3495 static int be_msix_register(struct be_adapter *adapter)
3496 {
3497         struct net_device *netdev = adapter->netdev;
3498         struct be_eq_obj *eqo;
3499         int status, i, vec;
3500
3501         for_all_evt_queues(adapter, eqo, i) {
3502                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3503                 vec = be_msix_vec_get(adapter, eqo);
3504                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3505                 if (status)
3506                         goto err_msix;
3507
3508                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3509         }
3510
3511         return 0;
3512 err_msix:
3513         for (i--; i >= 0; i--) {
3514                 eqo = &adapter->eq_obj[i];
3515                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3516         }
3517         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3518                  status);
3519         be_msix_disable(adapter);
3520         return status;
3521 }
3522
3523 static int be_irq_register(struct be_adapter *adapter)
3524 {
3525         struct net_device *netdev = adapter->netdev;
3526         int status;
3527
3528         if (msix_enabled(adapter)) {
3529                 status = be_msix_register(adapter);
3530                 if (status == 0)
3531                         goto done;
3532                 /* INTx is not supported for VF */
3533                 if (be_virtfn(adapter))
3534                         return status;
3535         }
3536
3537         /* INTx: only the first EQ is used */
3538         netdev->irq = adapter->pdev->irq;
3539         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3540                              &adapter->eq_obj[0]);
3541         if (status) {
3542                 dev_err(&adapter->pdev->dev,
3543                         "INTx request IRQ failed - err %d\n", status);
3544                 return status;
3545         }
3546 done:
3547         adapter->isr_registered = true;
3548         return 0;
3549 }
3550
3551 static void be_irq_unregister(struct be_adapter *adapter)
3552 {
3553         struct net_device *netdev = adapter->netdev;
3554         struct be_eq_obj *eqo;
3555         int i, vec;
3556
3557         if (!adapter->isr_registered)
3558                 return;
3559
3560         /* INTx */
3561         if (!msix_enabled(adapter)) {
3562                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3563                 goto done;
3564         }
3565
3566         /* MSIx */
3567         for_all_evt_queues(adapter, eqo, i) {
3568                 vec = be_msix_vec_get(adapter, eqo);
3569                 irq_set_affinity_hint(vec, NULL);
3570                 free_irq(vec, eqo);
3571         }
3572
3573 done:
3574         adapter->isr_registered = false;
3575 }
3576
3577 static void be_rx_qs_destroy(struct be_adapter *adapter)
3578 {
3579         struct rss_info *rss = &adapter->rss_info;
3580         struct be_queue_info *q;
3581         struct be_rx_obj *rxo;
3582         int i;
3583
3584         for_all_rx_queues(adapter, rxo, i) {
3585                 q = &rxo->q;
3586                 if (q->created) {
3587                         /* If RXQs are destroyed while in an "out of buffer"
3588                          * state, there is a possibility of an HW stall on
3589                          * Lancer. So, post 64 buffers to each queue to relieve
3590                          * the "out of buffer" condition.
3591                          * Make sure there's space in the RXQ before posting.
3592                          */
3593                         if (lancer_chip(adapter)) {
3594                                 be_rx_cq_clean(rxo);
3595                                 if (atomic_read(&q->used) == 0)
3596                                         be_post_rx_frags(rxo, GFP_KERNEL,
3597                                                          MAX_RX_POST);
3598                         }
3599
3600                         be_cmd_rxq_destroy(adapter, q);
3601                         be_rx_cq_clean(rxo);
3602                         be_rxq_clean(rxo);
3603                 }
3604                 be_queue_free(adapter, q);
3605         }
3606
3607         if (rss->rss_flags) {
3608                 rss->rss_flags = RSS_ENABLE_NONE;
3609                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3610                                   128, rss->rss_hkey);
3611         }
3612 }
3613
3614 static void be_disable_if_filters(struct be_adapter *adapter)
3615 {
3616         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3617         be_clear_uc_list(adapter);
3618         be_clear_mc_list(adapter);
3619
3620         /* The IFACE flags are enabled in the open path and cleared
3621          * in the close path. When a VF gets detached from the host and
3622          * assigned to a VM the following happens:
3623          *      - VF's IFACE flags get cleared in the detach path
3624          *      - IFACE create is issued by the VF in the attach path
3625          * Due to a bug in the BE3/Skyhawk-R FW
3626          * (Lancer FW doesn't have the bug), the IFACE capability flags
3627          * specified along with the IFACE create cmd issued by a VF are not
3628          * honoured by FW.  As a consequence, if a *new* driver
3629          * (that enables/disables IFACE flags in open/close)
3630          * is loaded in the host and an *old* driver is * used by a VM/VF,
3631          * the IFACE gets created *without* the needed flags.
3632          * To avoid this, disable RX-filter flags only for Lancer.
3633          */
3634         if (lancer_chip(adapter)) {
3635                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3636                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3637         }
3638 }
3639
3640 static int be_close(struct net_device *netdev)
3641 {
3642         struct be_adapter *adapter = netdev_priv(netdev);
3643         struct be_eq_obj *eqo;
3644         int i;
3645
3646         /* This protection is needed as be_close() may be called even when the
3647          * adapter is in cleared state (after eeh perm failure)
3648          */
3649         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3650                 return 0;
3651
3652         /* Before attempting cleanup ensure all the pending cmds in the
3653          * config_wq have finished execution
3654          */
3655         flush_workqueue(be_wq);
3656
3657         be_disable_if_filters(adapter);
3658
3659         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3660                 for_all_evt_queues(adapter, eqo, i) {
3661                         napi_disable(&eqo->napi);
3662                         be_disable_busy_poll(eqo);
3663                 }
3664                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3665         }
3666
3667         be_async_mcc_disable(adapter);
3668
3669         /* Wait for all pending tx completions to arrive so that
3670          * all tx skbs are freed.
3671          */
3672         netif_tx_disable(netdev);
3673         be_tx_compl_clean(adapter);
3674
3675         be_rx_qs_destroy(adapter);
3676
3677         for_all_evt_queues(adapter, eqo, i) {
3678                 if (msix_enabled(adapter))
3679                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3680                 else
3681                         synchronize_irq(netdev->irq);
3682                 be_eq_clean(eqo);
3683         }
3684
3685         be_irq_unregister(adapter);
3686
3687         return 0;
3688 }
3689
3690 static int be_rx_qs_create(struct be_adapter *adapter)
3691 {
3692         struct rss_info *rss = &adapter->rss_info;
3693         u8 rss_key[RSS_HASH_KEY_LEN];
3694         struct be_rx_obj *rxo;
3695         int rc, i, j;
3696
3697         for_all_rx_queues(adapter, rxo, i) {
3698                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3699                                     sizeof(struct be_eth_rx_d));
3700                 if (rc)
3701                         return rc;
3702         }
3703
3704         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3705                 rxo = default_rxo(adapter);
3706                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707                                        rx_frag_size, adapter->if_handle,
3708                                        false, &rxo->rss_id);
3709                 if (rc)
3710                         return rc;
3711         }
3712
3713         for_all_rss_queues(adapter, rxo, i) {
3714                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3715                                        rx_frag_size, adapter->if_handle,
3716                                        true, &rxo->rss_id);
3717                 if (rc)
3718                         return rc;
3719         }
3720
3721         if (be_multi_rxq(adapter)) {
3722                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3723                         for_all_rss_queues(adapter, rxo, i) {
3724                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3725                                         break;
3726                                 rss->rsstable[j + i] = rxo->rss_id;
3727                                 rss->rss_queue[j + i] = i;
3728                         }
3729                 }
3730                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3731                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3732
3733                 if (!BEx_chip(adapter))
3734                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3735                                 RSS_ENABLE_UDP_IPV6;
3736
3737                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3738                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3739                                        RSS_INDIR_TABLE_LEN, rss_key);
3740                 if (rc) {
3741                         rss->rss_flags = RSS_ENABLE_NONE;
3742                         return rc;
3743                 }
3744
3745                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3746         } else {
3747                 /* Disable RSS, if only default RX Q is created */
3748                 rss->rss_flags = RSS_ENABLE_NONE;
3749         }
3750
3751
3752         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3753          * which is a queue empty condition
3754          */
3755         for_all_rx_queues(adapter, rxo, i)
3756                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3757
3758         return 0;
3759 }
3760
3761 static int be_enable_if_filters(struct be_adapter *adapter)
3762 {
3763         int status;
3764
3765         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3766         if (status)
3767                 return status;
3768
3769         /* For BE3 VFs, the PF programs the initial MAC address */
3770         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3771                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772                 if (status)
3773                         return status;
3774                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3775         }
3776
3777         if (adapter->vlans_added)
3778                 be_vid_config(adapter);
3779
3780         __be_set_rx_mode(adapter);
3781
3782         return 0;
3783 }
3784
3785 static int be_open(struct net_device *netdev)
3786 {
3787         struct be_adapter *adapter = netdev_priv(netdev);
3788         struct be_eq_obj *eqo;
3789         struct be_rx_obj *rxo;
3790         struct be_tx_obj *txo;
3791         u8 link_status;
3792         int status, i;
3793
3794         status = be_rx_qs_create(adapter);
3795         if (status)
3796                 goto err;
3797
3798         status = be_enable_if_filters(adapter);
3799         if (status)
3800                 goto err;
3801
3802         status = be_irq_register(adapter);
3803         if (status)
3804                 goto err;
3805
3806         for_all_rx_queues(adapter, rxo, i)
3807                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3808
3809         for_all_tx_queues(adapter, txo, i)
3810                 be_cq_notify(adapter, txo->cq.id, true, 0);
3811
3812         be_async_mcc_enable(adapter);
3813
3814         for_all_evt_queues(adapter, eqo, i) {
3815                 napi_enable(&eqo->napi);
3816                 be_enable_busy_poll(eqo);
3817                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3818         }
3819         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3820
3821         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3822         if (!status)
3823                 be_link_status_update(adapter, link_status);
3824
3825         netif_tx_start_all_queues(netdev);
3826         if (skyhawk_chip(adapter))
3827                 udp_tunnel_get_rx_info(netdev);
3828
3829         return 0;
3830 err:
3831         be_close(adapter->netdev);
3832         return -EIO;
3833 }
3834
3835 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3836 {
3837         u32 addr;
3838
3839         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3840
3841         mac[5] = (u8)(addr & 0xFF);
3842         mac[4] = (u8)((addr >> 8) & 0xFF);
3843         mac[3] = (u8)((addr >> 16) & 0xFF);
3844         /* Use the OUI from the current MAC address */
3845         memcpy(mac, adapter->netdev->dev_addr, 3);
3846 }
3847
3848 /*
3849  * Generate a seed MAC address from the PF MAC Address using jhash.
3850  * MAC Address for VFs are assigned incrementally starting from the seed.
3851  * These addresses are programmed in the ASIC by the PF and the VF driver
3852  * queries for the MAC address during its probe.
3853  */
3854 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3855 {
3856         u32 vf;
3857         int status = 0;
3858         u8 mac[ETH_ALEN];
3859         struct be_vf_cfg *vf_cfg;
3860
3861         be_vf_eth_addr_generate(adapter, mac);
3862
3863         for_all_vfs(adapter, vf_cfg, vf) {
3864                 if (BEx_chip(adapter))
3865                         status = be_cmd_pmac_add(adapter, mac,
3866                                                  vf_cfg->if_handle,
3867                                                  &vf_cfg->pmac_id, vf + 1);
3868                 else
3869                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3870                                                 vf + 1);
3871
3872                 if (status)
3873                         dev_err(&adapter->pdev->dev,
3874                                 "Mac address assignment failed for VF %d\n",
3875                                 vf);
3876                 else
3877                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3878
3879                 mac[5] += 1;
3880         }
3881         return status;
3882 }
3883
3884 static int be_vfs_mac_query(struct be_adapter *adapter)
3885 {
3886         int status, vf;
3887         u8 mac[ETH_ALEN];
3888         struct be_vf_cfg *vf_cfg;
3889
3890         for_all_vfs(adapter, vf_cfg, vf) {
3891                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3892                                                mac, vf_cfg->if_handle,
3893                                                false, vf+1);
3894                 if (status)
3895                         return status;
3896                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3897         }
3898         return 0;
3899 }
3900
3901 static void be_vf_clear(struct be_adapter *adapter)
3902 {
3903         struct be_vf_cfg *vf_cfg;
3904         u32 vf;
3905
3906         if (pci_vfs_assigned(adapter->pdev)) {
3907                 dev_warn(&adapter->pdev->dev,
3908                          "VFs are assigned to VMs: not disabling VFs\n");
3909                 goto done;
3910         }
3911
3912         pci_disable_sriov(adapter->pdev);
3913
3914         for_all_vfs(adapter, vf_cfg, vf) {
3915                 if (BEx_chip(adapter))
3916                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3917                                         vf_cfg->pmac_id, vf + 1);
3918                 else
3919                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3920                                        vf + 1);
3921
3922                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3923         }
3924
3925         if (BE3_chip(adapter))
3926                 be_cmd_set_hsw_config(adapter, 0, 0,
3927                                       adapter->if_handle,
3928                                       PORT_FWD_TYPE_PASSTHRU, 0);
3929 done:
3930         kfree(adapter->vf_cfg);
3931         adapter->num_vfs = 0;
3932         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3933 }
3934
3935 static void be_clear_queues(struct be_adapter *adapter)
3936 {
3937         be_mcc_queues_destroy(adapter);
3938         be_rx_cqs_destroy(adapter);
3939         be_tx_queues_destroy(adapter);
3940         be_evt_queues_destroy(adapter);
3941 }
3942
3943 static void be_cancel_worker(struct be_adapter *adapter)
3944 {
3945         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3946                 cancel_delayed_work_sync(&adapter->work);
3947                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3948         }
3949 }
3950
3951 static void be_cancel_err_detection(struct be_adapter *adapter)
3952 {
3953         struct be_error_recovery *err_rec = &adapter->error_recovery;
3954
3955         if (!be_err_recovery_workq)
3956                 return;
3957
3958         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3959                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3960                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3961         }
3962 }
3963
3964 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3965 {
3966         struct net_device *netdev = adapter->netdev;
3967
3968         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3969                 be_cmd_manage_iface(adapter, adapter->if_handle,
3970                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3971
3972         if (adapter->vxlan_port)
3973                 be_cmd_set_vxlan_port(adapter, 0);
3974
3975         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3976         adapter->vxlan_port = 0;
3977
3978         netdev->hw_enc_features = 0;
3979         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3980         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3981 }
3982
3983 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3984                                 struct be_resources *vft_res)
3985 {
3986         struct be_resources res = adapter->pool_res;
3987         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3988         struct be_resources res_mod = {0};
3989         u16 num_vf_qs = 1;
3990
3991         /* Distribute the queue resources among the PF and it's VFs */
3992         if (num_vfs) {
3993                 /* Divide the rx queues evenly among the VFs and the PF, capped
3994                  * at VF-EQ-count. Any remainder queues belong to the PF.
3995                  */
3996                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3997                                 res.max_rss_qs / (num_vfs + 1));
3998
3999                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4000                  * RSS Tables per port. Provide RSS on VFs, only if number of
4001                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4002                  */
4003                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4004                         num_vf_qs = 1;
4005         }
4006
4007         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4008          * which are modifiable using SET_PROFILE_CONFIG cmd.
4009          */
4010         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4011                                   RESOURCE_MODIFIABLE, 0);
4012
4013         /* If RSS IFACE capability flags are modifiable for a VF, set the
4014          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4015          * more than 1 RSSQ is available for a VF.
4016          * Otherwise, provision only 1 queue pair for VF.
4017          */
4018         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4019                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4020                 if (num_vf_qs > 1) {
4021                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4022                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4023                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4024                 } else {
4025                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4026                                              BE_IF_FLAGS_DEFQ_RSS);
4027                 }
4028         } else {
4029                 num_vf_qs = 1;
4030         }
4031
4032         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4033                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4034                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4035         }
4036
4037         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4038         vft_res->max_rx_qs = num_vf_qs;
4039         vft_res->max_rss_qs = num_vf_qs;
4040         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4041         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4042
4043         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4044          * among the PF and it's VFs, if the fields are changeable
4045          */
4046         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4047                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4048
4049         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4050                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4051
4052         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4053                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4054
4055         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4056                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4057 }
4058
4059 static void be_if_destroy(struct be_adapter *adapter)
4060 {
4061         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4062
4063         kfree(adapter->pmac_id);
4064         adapter->pmac_id = NULL;
4065
4066         kfree(adapter->mc_list);
4067         adapter->mc_list = NULL;
4068
4069         kfree(adapter->uc_list);
4070         adapter->uc_list = NULL;
4071 }
4072
4073 static int be_clear(struct be_adapter *adapter)
4074 {
4075         struct pci_dev *pdev = adapter->pdev;
4076         struct  be_resources vft_res = {0};
4077
4078         be_cancel_worker(adapter);
4079
4080         flush_workqueue(be_wq);
4081
4082         if (sriov_enabled(adapter))
4083                 be_vf_clear(adapter);
4084
4085         /* Re-configure FW to distribute resources evenly across max-supported
4086          * number of VFs, only when VFs are not already enabled.
4087          */
4088         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4089             !pci_vfs_assigned(pdev)) {
4090                 be_calculate_vf_res(adapter,
4091                                     pci_sriov_get_totalvfs(pdev),
4092                                     &vft_res);
4093                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4094                                         pci_sriov_get_totalvfs(pdev),
4095                                         &vft_res);
4096         }
4097
4098         be_disable_vxlan_offloads(adapter);
4099
4100         be_if_destroy(adapter);
4101
4102         be_clear_queues(adapter);
4103
4104         be_msix_disable(adapter);
4105         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4106         return 0;
4107 }
4108
4109 static int be_vfs_if_create(struct be_adapter *adapter)
4110 {
4111         struct be_resources res = {0};
4112         u32 cap_flags, en_flags, vf;
4113         struct be_vf_cfg *vf_cfg;
4114         int status;
4115
4116         /* If a FW profile exists, then cap_flags are updated */
4117         cap_flags = BE_VF_IF_EN_FLAGS;
4118
4119         for_all_vfs(adapter, vf_cfg, vf) {
4120                 if (!BE3_chip(adapter)) {
4121                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4122                                                            ACTIVE_PROFILE_TYPE,
4123                                                            RESOURCE_LIMITS,
4124                                                            vf + 1);
4125                         if (!status) {
4126                                 cap_flags = res.if_cap_flags;
4127                                 /* Prevent VFs from enabling VLAN promiscuous
4128                                  * mode
4129                                  */
4130                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4131                         }
4132                 }
4133
4134                 /* PF should enable IF flags during proxy if_create call */
4135                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4136                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4137                                           &vf_cfg->if_handle, vf + 1);
4138                 if (status)
4139                         return status;
4140         }
4141
4142         return 0;
4143 }
4144
4145 static int be_vf_setup_init(struct be_adapter *adapter)
4146 {
4147         struct be_vf_cfg *vf_cfg;
4148         int vf;
4149
4150         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4151                                   GFP_KERNEL);
4152         if (!adapter->vf_cfg)
4153                 return -ENOMEM;
4154
4155         for_all_vfs(adapter, vf_cfg, vf) {
4156                 vf_cfg->if_handle = -1;
4157                 vf_cfg->pmac_id = -1;
4158         }
4159         return 0;
4160 }
4161
4162 static int be_vf_setup(struct be_adapter *adapter)
4163 {
4164         struct device *dev = &adapter->pdev->dev;
4165         struct be_vf_cfg *vf_cfg;
4166         int status, old_vfs, vf;
4167         bool spoofchk;
4168
4169         old_vfs = pci_num_vf(adapter->pdev);
4170
4171         status = be_vf_setup_init(adapter);
4172         if (status)
4173                 goto err;
4174
4175         if (old_vfs) {
4176                 for_all_vfs(adapter, vf_cfg, vf) {
4177                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4178                         if (status)
4179                                 goto err;
4180                 }
4181
4182                 status = be_vfs_mac_query(adapter);
4183                 if (status)
4184                         goto err;
4185         } else {
4186                 status = be_vfs_if_create(adapter);
4187                 if (status)
4188                         goto err;
4189
4190                 status = be_vf_eth_addr_config(adapter);
4191                 if (status)
4192                         goto err;
4193         }
4194
4195         for_all_vfs(adapter, vf_cfg, vf) {
4196                 /* Allow VFs to programs MAC/VLAN filters */
4197                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4198                                                   vf + 1);
4199                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4200                         status = be_cmd_set_fn_privileges(adapter,
4201                                                           vf_cfg->privileges |
4202                                                           BE_PRIV_FILTMGMT,
4203                                                           vf + 1);
4204                         if (!status) {
4205                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4206                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4207                                          vf);
4208                         }
4209                 }
4210
4211                 /* Allow full available bandwidth */
4212                 if (!old_vfs)
4213                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4214
4215                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4216                                                vf_cfg->if_handle, NULL,
4217                                                &spoofchk);
4218                 if (!status)
4219                         vf_cfg->spoofchk = spoofchk;
4220
4221                 if (!old_vfs) {
4222                         be_cmd_enable_vf(adapter, vf + 1);
4223                         be_cmd_set_logical_link_config(adapter,
4224                                                        IFLA_VF_LINK_STATE_AUTO,
4225                                                        vf+1);
4226                 }
4227         }
4228
4229         if (!old_vfs) {
4230                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4231                 if (status) {
4232                         dev_err(dev, "SRIOV enable failed\n");
4233                         adapter->num_vfs = 0;
4234                         goto err;
4235                 }
4236         }
4237
4238         if (BE3_chip(adapter)) {
4239                 /* On BE3, enable VEB only when SRIOV is enabled */
4240                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4241                                                adapter->if_handle,
4242                                                PORT_FWD_TYPE_VEB, 0);
4243                 if (status)
4244                         goto err;
4245         }
4246
4247         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4248         return 0;
4249 err:
4250         dev_err(dev, "VF setup failed\n");
4251         be_vf_clear(adapter);
4252         return status;
4253 }
4254
4255 /* Converting function_mode bits on BE3 to SH mc_type enums */
4256
4257 static u8 be_convert_mc_type(u32 function_mode)
4258 {
4259         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4260                 return vNIC1;
4261         else if (function_mode & QNQ_MODE)
4262                 return FLEX10;
4263         else if (function_mode & VNIC_MODE)
4264                 return vNIC2;
4265         else if (function_mode & UMC_ENABLED)
4266                 return UMC;
4267         else
4268                 return MC_NONE;
4269 }
4270
4271 /* On BE2/BE3 FW does not suggest the supported limits */
4272 static void BEx_get_resources(struct be_adapter *adapter,
4273                               struct be_resources *res)
4274 {
4275         bool use_sriov = adapter->num_vfs ? 1 : 0;
4276
4277         if (be_physfn(adapter))
4278                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4279         else
4280                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4281
4282         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4283
4284         if (be_is_mc(adapter)) {
4285                 /* Assuming that there are 4 channels per port,
4286                  * when multi-channel is enabled
4287                  */
4288                 if (be_is_qnq_mode(adapter))
4289                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4290                 else
4291                         /* In a non-qnq multichannel mode, the pvid
4292                          * takes up one vlan entry
4293                          */
4294                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4295         } else {
4296                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4297         }
4298
4299         res->max_mcast_mac = BE_MAX_MC;
4300
4301         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4302          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4303          *    *only* if it is RSS-capable.
4304          */
4305         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4306             be_virtfn(adapter) ||
4307             (be_is_mc(adapter) &&
4308              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4309                 res->max_tx_qs = 1;
4310         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4311                 struct be_resources super_nic_res = {0};
4312
4313                 /* On a SuperNIC profile, the driver needs to use the
4314                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4315                  */
4316                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4317                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4318                                           0);
4319                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4320                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4321         } else {
4322                 res->max_tx_qs = BE3_MAX_TX_QS;
4323         }
4324
4325         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4326             !use_sriov && be_physfn(adapter))
4327                 res->max_rss_qs = (adapter->be3_native) ?
4328                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4329         res->max_rx_qs = res->max_rss_qs + 1;
4330
4331         if (be_physfn(adapter))
4332                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4333                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4334         else
4335                 res->max_evt_qs = 1;
4336
4337         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4338         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4339         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4340                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4341 }
4342
4343 static void be_setup_init(struct be_adapter *adapter)
4344 {
4345         adapter->vlan_prio_bmap = 0xff;
4346         adapter->phy.link_speed = -1;
4347         adapter->if_handle = -1;
4348         adapter->be3_native = false;
4349         adapter->if_flags = 0;
4350         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4351         if (be_physfn(adapter))
4352                 adapter->cmd_privileges = MAX_PRIVILEGES;
4353         else
4354                 adapter->cmd_privileges = MIN_PRIVILEGES;
4355 }
4356
4357 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4358  * However, this HW limitation is not exposed to the host via any SLI cmd.
4359  * As a result, in the case of SRIOV and in particular multi-partition configs
4360  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4361  * for distribution between the VFs. This self-imposed limit will determine the
4362  * no: of VFs for which RSS can be enabled.
4363  */
4364 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4365 {
4366         struct be_port_resources port_res = {0};
4367         u8 rss_tables_on_port;
4368         u16 max_vfs = be_max_vfs(adapter);
4369
4370         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4371                                   RESOURCE_LIMITS, 0);
4372
4373         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4374
4375         /* Each PF Pool's RSS Tables limit =
4376          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4377          */
4378         adapter->pool_res.max_rss_tables =
4379                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4380 }
4381
4382 static int be_get_sriov_config(struct be_adapter *adapter)
4383 {
4384         struct be_resources res = {0};
4385         int max_vfs, old_vfs;
4386
4387         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4388                                   RESOURCE_LIMITS, 0);
4389
4390         /* Some old versions of BE3 FW don't report max_vfs value */
4391         if (BE3_chip(adapter) && !res.max_vfs) {
4392                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4393                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4394         }
4395
4396         adapter->pool_res = res;
4397
4398         /* If during previous unload of the driver, the VFs were not disabled,
4399          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4400          * Instead use the TotalVFs value stored in the pci-dev struct.
4401          */
4402         old_vfs = pci_num_vf(adapter->pdev);
4403         if (old_vfs) {
4404                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4405                          old_vfs);
4406
4407                 adapter->pool_res.max_vfs =
4408                         pci_sriov_get_totalvfs(adapter->pdev);
4409                 adapter->num_vfs = old_vfs;
4410         }
4411
4412         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4413                 be_calculate_pf_pool_rss_tables(adapter);
4414                 dev_info(&adapter->pdev->dev,
4415                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4416                          be_max_pf_pool_rss_tables(adapter));
4417         }
4418         return 0;
4419 }
4420
4421 static void be_alloc_sriov_res(struct be_adapter *adapter)
4422 {
4423         int old_vfs = pci_num_vf(adapter->pdev);
4424         struct  be_resources vft_res = {0};
4425         int status;
4426
4427         be_get_sriov_config(adapter);
4428
4429         if (!old_vfs)
4430                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4431
4432         /* When the HW is in SRIOV capable configuration, the PF-pool
4433          * resources are given to PF during driver load, if there are no
4434          * old VFs. This facility is not available in BE3 FW.
4435          * Also, this is done by FW in Lancer chip.
4436          */
4437         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4438                 be_calculate_vf_res(adapter, 0, &vft_res);
4439                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4440                                                  &vft_res);
4441                 if (status)
4442                         dev_err(&adapter->pdev->dev,
4443                                 "Failed to optimize SRIOV resources\n");
4444         }
4445 }
4446
4447 static int be_get_resources(struct be_adapter *adapter)
4448 {
4449         struct device *dev = &adapter->pdev->dev;
4450         struct be_resources res = {0};
4451         int status;
4452
4453         /* For Lancer, SH etc read per-function resource limits from FW.
4454          * GET_FUNC_CONFIG returns per function guaranteed limits.
4455          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4456          */
4457         if (BEx_chip(adapter)) {
4458                 BEx_get_resources(adapter, &res);
4459         } else {
4460                 status = be_cmd_get_func_config(adapter, &res);
4461                 if (status)
4462                         return status;
4463
4464                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4465                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4466                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4467                         res.max_rss_qs -= 1;
4468         }
4469
4470         /* If RoCE is supported stash away half the EQs for RoCE */
4471         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4472                                 res.max_evt_qs / 2 : res.max_evt_qs;
4473         adapter->res = res;
4474
4475         /* If FW supports RSS default queue, then skip creating non-RSS
4476          * queue for non-IP traffic.
4477          */
4478         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4479                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4480
4481         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4482                  be_max_txqs(adapter), be_max_rxqs(adapter),
4483                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4484                  be_max_vfs(adapter));
4485         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4486                  be_max_uc(adapter), be_max_mc(adapter),
4487                  be_max_vlans(adapter));
4488
4489         /* Ensure RX and TX queues are created in pairs at init time */
4490         adapter->cfg_num_rx_irqs =
4491                                 min_t(u16, netif_get_num_default_rss_queues(),
4492                                       be_max_qp_irqs(adapter));
4493         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4494         return 0;
4495 }
4496
4497 static int be_get_config(struct be_adapter *adapter)
4498 {
4499         int status, level;
4500         u16 profile_id;
4501
4502         status = be_cmd_get_cntl_attributes(adapter);
4503         if (status)
4504                 return status;
4505
4506         status = be_cmd_query_fw_cfg(adapter);
4507         if (status)
4508                 return status;
4509
4510         if (!lancer_chip(adapter) && be_physfn(adapter))
4511                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4512
4513         if (BEx_chip(adapter)) {
4514                 level = be_cmd_get_fw_log_level(adapter);
4515                 adapter->msg_enable =
4516                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4517         }
4518
4519         be_cmd_get_acpi_wol_cap(adapter);
4520         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4521         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4522
4523         be_cmd_query_port_name(adapter);
4524
4525         if (be_physfn(adapter)) {
4526                 status = be_cmd_get_active_profile(adapter, &profile_id);
4527                 if (!status)
4528                         dev_info(&adapter->pdev->dev,
4529                                  "Using profile 0x%x\n", profile_id);
4530         }
4531
4532         return 0;
4533 }
4534
4535 static int be_mac_setup(struct be_adapter *adapter)
4536 {
4537         u8 mac[ETH_ALEN];
4538         int status;
4539
4540         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4541                 status = be_cmd_get_perm_mac(adapter, mac);
4542                 if (status)
4543                         return status;
4544
4545                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4546                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4547         }
4548
4549         return 0;
4550 }
4551
4552 static void be_schedule_worker(struct be_adapter *adapter)
4553 {
4554         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4555         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4556 }
4557
4558 static void be_destroy_err_recovery_workq(void)
4559 {
4560         if (!be_err_recovery_workq)
4561                 return;
4562
4563         flush_workqueue(be_err_recovery_workq);
4564         destroy_workqueue(be_err_recovery_workq);
4565         be_err_recovery_workq = NULL;
4566 }
4567
4568 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4569 {
4570         struct be_error_recovery *err_rec = &adapter->error_recovery;
4571
4572         if (!be_err_recovery_workq)
4573                 return;
4574
4575         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4576                            msecs_to_jiffies(delay));
4577         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4578 }
4579
4580 static int be_setup_queues(struct be_adapter *adapter)
4581 {
4582         struct net_device *netdev = adapter->netdev;
4583         int status;
4584
4585         status = be_evt_queues_create(adapter);
4586         if (status)
4587                 goto err;
4588
4589         status = be_tx_qs_create(adapter);
4590         if (status)
4591                 goto err;
4592
4593         status = be_rx_cqs_create(adapter);
4594         if (status)
4595                 goto err;
4596
4597         status = be_mcc_queues_create(adapter);
4598         if (status)
4599                 goto err;
4600
4601         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4602         if (status)
4603                 goto err;
4604
4605         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4606         if (status)
4607                 goto err;
4608
4609         return 0;
4610 err:
4611         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4612         return status;
4613 }
4614
4615 static int be_if_create(struct be_adapter *adapter)
4616 {
4617         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4618         u32 cap_flags = be_if_cap_flags(adapter);
4619         int status;
4620
4621         /* alloc required memory for other filtering fields */
4622         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4623                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4624         if (!adapter->pmac_id)
4625                 return -ENOMEM;
4626
4627         adapter->mc_list = kcalloc(be_max_mc(adapter),
4628                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4629         if (!adapter->mc_list)
4630                 return -ENOMEM;
4631
4632         adapter->uc_list = kcalloc(be_max_uc(adapter),
4633                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4634         if (!adapter->uc_list)
4635                 return -ENOMEM;
4636
4637         if (adapter->cfg_num_rx_irqs == 1)
4638                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4639
4640         en_flags &= cap_flags;
4641         /* will enable all the needed filter flags in be_open() */
4642         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4643                                   &adapter->if_handle, 0);
4644
4645         if (status)
4646                 return status;
4647
4648         return 0;
4649 }
4650
4651 int be_update_queues(struct be_adapter *adapter)
4652 {
4653         struct net_device *netdev = adapter->netdev;
4654         int status;
4655
4656         if (netif_running(netdev))
4657                 be_close(netdev);
4658
4659         be_cancel_worker(adapter);
4660
4661         /* If any vectors have been shared with RoCE we cannot re-program
4662          * the MSIx table.
4663          */
4664         if (!adapter->num_msix_roce_vec)
4665                 be_msix_disable(adapter);
4666
4667         be_clear_queues(adapter);
4668         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4669         if (status)
4670                 return status;
4671
4672         if (!msix_enabled(adapter)) {
4673                 status = be_msix_enable(adapter);
4674                 if (status)
4675                         return status;
4676         }
4677
4678         status = be_if_create(adapter);
4679         if (status)
4680                 return status;
4681
4682         status = be_setup_queues(adapter);
4683         if (status)
4684                 return status;
4685
4686         be_schedule_worker(adapter);
4687
4688         if (netif_running(netdev))
4689                 status = be_open(netdev);
4690
4691         return status;
4692 }
4693
4694 static inline int fw_major_num(const char *fw_ver)
4695 {
4696         int fw_major = 0, i;
4697
4698         i = sscanf(fw_ver, "%d.", &fw_major);
4699         if (i != 1)
4700                 return 0;
4701
4702         return fw_major;
4703 }
4704
4705 /* If it is error recovery, FLR the PF
4706  * Else if any VFs are already enabled don't FLR the PF
4707  */
4708 static bool be_reset_required(struct be_adapter *adapter)
4709 {
4710         if (be_error_recovering(adapter))
4711                 return true;
4712         else
4713                 return pci_num_vf(adapter->pdev) == 0;
4714 }
4715
4716 /* Wait for the FW to be ready and perform the required initialization */
4717 static int be_func_init(struct be_adapter *adapter)
4718 {
4719         int status;
4720
4721         status = be_fw_wait_ready(adapter);
4722         if (status)
4723                 return status;
4724
4725         /* FW is now ready; clear errors to allow cmds/doorbell */
4726         be_clear_error(adapter, BE_CLEAR_ALL);
4727
4728         if (be_reset_required(adapter)) {
4729                 status = be_cmd_reset_function(adapter);
4730                 if (status)
4731                         return status;
4732
4733                 /* Wait for interrupts to quiesce after an FLR */
4734                 msleep(100);
4735         }
4736
4737         /* Tell FW we're ready to fire cmds */
4738         status = be_cmd_fw_init(adapter);
4739         if (status)
4740                 return status;
4741
4742         /* Allow interrupts for other ULPs running on NIC function */
4743         be_intr_set(adapter, true);
4744
4745         return 0;
4746 }
4747
4748 static int be_setup(struct be_adapter *adapter)
4749 {
4750         struct device *dev = &adapter->pdev->dev;
4751         int status;
4752
4753         status = be_func_init(adapter);
4754         if (status)
4755                 return status;
4756
4757         be_setup_init(adapter);
4758
4759         if (!lancer_chip(adapter))
4760                 be_cmd_req_native_mode(adapter);
4761
4762         /* invoke this cmd first to get pf_num and vf_num which are needed
4763          * for issuing profile related cmds
4764          */
4765         if (!BEx_chip(adapter)) {
4766                 status = be_cmd_get_func_config(adapter, NULL);
4767                 if (status)
4768                         return status;
4769         }
4770
4771         status = be_get_config(adapter);
4772         if (status)
4773                 goto err;
4774
4775         if (!BE2_chip(adapter) && be_physfn(adapter))
4776                 be_alloc_sriov_res(adapter);
4777
4778         status = be_get_resources(adapter);
4779         if (status)
4780                 goto err;
4781
4782         status = be_msix_enable(adapter);
4783         if (status)
4784                 goto err;
4785
4786         /* will enable all the needed filter flags in be_open() */
4787         status = be_if_create(adapter);
4788         if (status)
4789                 goto err;
4790
4791         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4792         rtnl_lock();
4793         status = be_setup_queues(adapter);
4794         rtnl_unlock();
4795         if (status)
4796                 goto err;
4797
4798         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4799
4800         status = be_mac_setup(adapter);
4801         if (status)
4802                 goto err;
4803
4804         be_cmd_get_fw_ver(adapter);
4805         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4806
4807         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4808                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4809                         adapter->fw_ver);
4810                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4811         }
4812
4813         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4814                                          adapter->rx_fc);
4815         if (status)
4816                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4817                                         &adapter->rx_fc);
4818
4819         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4820                  adapter->tx_fc, adapter->rx_fc);
4821
4822         if (be_physfn(adapter))
4823                 be_cmd_set_logical_link_config(adapter,
4824                                                IFLA_VF_LINK_STATE_AUTO, 0);
4825
4826         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4827          * confusing a linux bridge or OVS that it might be connected to.
4828          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4829          * when SRIOV is not enabled.
4830          */
4831         if (BE3_chip(adapter))
4832                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4833                                       PORT_FWD_TYPE_PASSTHRU, 0);
4834
4835         if (adapter->num_vfs)
4836                 be_vf_setup(adapter);
4837
4838         status = be_cmd_get_phy_info(adapter);
4839         if (!status && be_pause_supported(adapter))
4840                 adapter->phy.fc_autoneg = 1;
4841
4842         if (be_physfn(adapter) && !lancer_chip(adapter))
4843                 be_cmd_set_features(adapter);
4844
4845         be_schedule_worker(adapter);
4846         adapter->flags |= BE_FLAGS_SETUP_DONE;
4847         return 0;
4848 err:
4849         be_clear(adapter);
4850         return status;
4851 }
4852
4853 #ifdef CONFIG_NET_POLL_CONTROLLER
4854 static void be_netpoll(struct net_device *netdev)
4855 {
4856         struct be_adapter *adapter = netdev_priv(netdev);
4857         struct be_eq_obj *eqo;
4858         int i;
4859
4860         for_all_evt_queues(adapter, eqo, i) {
4861                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4862                 napi_schedule(&eqo->napi);
4863         }
4864 }
4865 #endif
4866
4867 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4868 {
4869         const struct firmware *fw;
4870         int status;
4871
4872         if (!netif_running(adapter->netdev)) {
4873                 dev_err(&adapter->pdev->dev,
4874                         "Firmware load not allowed (interface is down)\n");
4875                 return -ENETDOWN;
4876         }
4877
4878         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4879         if (status)
4880                 goto fw_exit;
4881
4882         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4883
4884         if (lancer_chip(adapter))
4885                 status = lancer_fw_download(adapter, fw);
4886         else
4887                 status = be_fw_download(adapter, fw);
4888
4889         if (!status)
4890                 be_cmd_get_fw_ver(adapter);
4891
4892 fw_exit:
4893         release_firmware(fw);
4894         return status;
4895 }
4896
4897 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4898                                  u16 flags)
4899 {
4900         struct be_adapter *adapter = netdev_priv(dev);
4901         struct nlattr *attr, *br_spec;
4902         int rem;
4903         int status = 0;
4904         u16 mode = 0;
4905
4906         if (!sriov_enabled(adapter))
4907                 return -EOPNOTSUPP;
4908
4909         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4910         if (!br_spec)
4911                 return -EINVAL;
4912
4913         nla_for_each_nested(attr, br_spec, rem) {
4914                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4915                         continue;
4916
4917                 if (nla_len(attr) < sizeof(mode))
4918                         return -EINVAL;
4919
4920                 mode = nla_get_u16(attr);
4921                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4922                         return -EOPNOTSUPP;
4923
4924                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4925                         return -EINVAL;
4926
4927                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4928                                                adapter->if_handle,
4929                                                mode == BRIDGE_MODE_VEPA ?
4930                                                PORT_FWD_TYPE_VEPA :
4931                                                PORT_FWD_TYPE_VEB, 0);
4932                 if (status)
4933                         goto err;
4934
4935                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4936                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4937
4938                 return status;
4939         }
4940 err:
4941         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4942                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4943
4944         return status;
4945 }
4946
4947 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4948                                  struct net_device *dev, u32 filter_mask,
4949                                  int nlflags)
4950 {
4951         struct be_adapter *adapter = netdev_priv(dev);
4952         int status = 0;
4953         u8 hsw_mode;
4954
4955         /* BE and Lancer chips support VEB mode only */
4956         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4957                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4958                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4959                         return 0;
4960                 hsw_mode = PORT_FWD_TYPE_VEB;
4961         } else {
4962                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4963                                                adapter->if_handle, &hsw_mode,
4964                                                NULL);
4965                 if (status)
4966                         return 0;
4967
4968                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4969                         return 0;
4970         }
4971
4972         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4973                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4974                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4975                                        0, 0, nlflags, filter_mask, NULL);
4976 }
4977
4978 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4979                                          void (*func)(struct work_struct *))
4980 {
4981         struct be_cmd_work *work;
4982
4983         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4984         if (!work) {
4985                 dev_err(&adapter->pdev->dev,
4986                         "be_work memory allocation failed\n");
4987                 return NULL;
4988         }
4989
4990         INIT_WORK(&work->work, func);
4991         work->adapter = adapter;
4992         return work;
4993 }
4994
4995 /* VxLAN offload Notes:
4996  *
4997  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4998  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4999  * is expected to work across all types of IP tunnels once exported. Skyhawk
5000  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5001  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5002  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5003  * those other tunnels are unexported on the fly through ndo_features_check().
5004  *
5005  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5006  * adds more than one port, disable offloads and don't re-enable them again
5007  * until after all the tunnels are removed.
5008  */
5009 static void be_work_add_vxlan_port(struct work_struct *work)
5010 {
5011         struct be_cmd_work *cmd_work =
5012                                 container_of(work, struct be_cmd_work, work);
5013         struct be_adapter *adapter = cmd_work->adapter;
5014         struct net_device *netdev = adapter->netdev;
5015         struct device *dev = &adapter->pdev->dev;
5016         __be16 port = cmd_work->info.vxlan_port;
5017         int status;
5018
5019         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5020                 adapter->vxlan_port_aliases++;
5021                 goto done;
5022         }
5023
5024         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5025                 dev_info(dev,
5026                          "Only one UDP port supported for VxLAN offloads\n");
5027                 dev_info(dev, "Disabling VxLAN offloads\n");
5028                 adapter->vxlan_port_count++;
5029                 goto err;
5030         }
5031
5032         if (adapter->vxlan_port_count++ >= 1)
5033                 goto done;
5034
5035         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5036                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5037         if (status) {
5038                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5039                 goto err;
5040         }
5041
5042         status = be_cmd_set_vxlan_port(adapter, port);
5043         if (status) {
5044                 dev_warn(dev, "Failed to add VxLAN port\n");
5045                 goto err;
5046         }
5047         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5048         adapter->vxlan_port = port;
5049
5050         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5051                                    NETIF_F_TSO | NETIF_F_TSO6 |
5052                                    NETIF_F_GSO_UDP_TUNNEL;
5053         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5054         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5055
5056         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5057                  be16_to_cpu(port));
5058         goto done;
5059 err:
5060         be_disable_vxlan_offloads(adapter);
5061 done:
5062         kfree(cmd_work);
5063 }
5064
5065 static void be_work_del_vxlan_port(struct work_struct *work)
5066 {
5067         struct be_cmd_work *cmd_work =
5068                                 container_of(work, struct be_cmd_work, work);
5069         struct be_adapter *adapter = cmd_work->adapter;
5070         __be16 port = cmd_work->info.vxlan_port;
5071
5072         if (adapter->vxlan_port != port)
5073                 goto done;
5074
5075         if (adapter->vxlan_port_aliases) {
5076                 adapter->vxlan_port_aliases--;
5077                 goto out;
5078         }
5079
5080         be_disable_vxlan_offloads(adapter);
5081
5082         dev_info(&adapter->pdev->dev,
5083                  "Disabled VxLAN offloads for UDP port %d\n",
5084                  be16_to_cpu(port));
5085 done:
5086         adapter->vxlan_port_count--;
5087 out:
5088         kfree(cmd_work);
5089 }
5090
5091 static void be_cfg_vxlan_port(struct net_device *netdev,
5092                               struct udp_tunnel_info *ti,
5093                               void (*func)(struct work_struct *))
5094 {
5095         struct be_adapter *adapter = netdev_priv(netdev);
5096         struct be_cmd_work *cmd_work;
5097
5098         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5099                 return;
5100
5101         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5102                 return;
5103
5104         cmd_work = be_alloc_work(adapter, func);
5105         if (cmd_work) {
5106                 cmd_work->info.vxlan_port = ti->port;
5107                 queue_work(be_wq, &cmd_work->work);
5108         }
5109 }
5110
5111 static void be_del_vxlan_port(struct net_device *netdev,
5112                               struct udp_tunnel_info *ti)
5113 {
5114         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5115 }
5116
5117 static void be_add_vxlan_port(struct net_device *netdev,
5118                               struct udp_tunnel_info *ti)
5119 {
5120         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5121 }
5122
5123 static netdev_features_t be_features_check(struct sk_buff *skb,
5124                                            struct net_device *dev,
5125                                            netdev_features_t features)
5126 {
5127         struct be_adapter *adapter = netdev_priv(dev);
5128         u8 l4_hdr = 0;
5129
5130         /* The code below restricts offload features for some tunneled packets.
5131          * Offload features for normal (non tunnel) packets are unchanged.
5132          */
5133         if (!skb->encapsulation ||
5134             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5135                 return features;
5136
5137         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5138          * should disable tunnel offload features if it's not a VxLAN packet,
5139          * as tunnel offloads have been enabled only for VxLAN. This is done to
5140          * allow other tunneled traffic like GRE work fine while VxLAN
5141          * offloads are configured in Skyhawk-R.
5142          */
5143         switch (vlan_get_protocol(skb)) {
5144         case htons(ETH_P_IP):
5145                 l4_hdr = ip_hdr(skb)->protocol;
5146                 break;
5147         case htons(ETH_P_IPV6):
5148                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5149                 break;
5150         default:
5151                 return features;
5152         }
5153
5154         if (l4_hdr != IPPROTO_UDP ||
5155             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5156             skb->inner_protocol != htons(ETH_P_TEB) ||
5157             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5158                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5159             !adapter->vxlan_port ||
5160             udp_hdr(skb)->dest != adapter->vxlan_port)
5161                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5162
5163         return features;
5164 }
5165
5166 static int be_get_phys_port_id(struct net_device *dev,
5167                                struct netdev_phys_item_id *ppid)
5168 {
5169         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5170         struct be_adapter *adapter = netdev_priv(dev);
5171         u8 *id;
5172
5173         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5174                 return -ENOSPC;
5175
5176         ppid->id[0] = adapter->hba_port_num + 1;
5177         id = &ppid->id[1];
5178         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5179              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5180                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5181
5182         ppid->id_len = id_len;
5183
5184         return 0;
5185 }
5186
5187 static void be_set_rx_mode(struct net_device *dev)
5188 {
5189         struct be_adapter *adapter = netdev_priv(dev);
5190         struct be_cmd_work *work;
5191
5192         work = be_alloc_work(adapter, be_work_set_rx_mode);
5193         if (work)
5194                 queue_work(be_wq, &work->work);
5195 }
5196
5197 static const struct net_device_ops be_netdev_ops = {
5198         .ndo_open               = be_open,
5199         .ndo_stop               = be_close,
5200         .ndo_start_xmit         = be_xmit,
5201         .ndo_set_rx_mode        = be_set_rx_mode,
5202         .ndo_set_mac_address    = be_mac_addr_set,
5203         .ndo_get_stats64        = be_get_stats64,
5204         .ndo_validate_addr      = eth_validate_addr,
5205         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5206         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5207         .ndo_set_vf_mac         = be_set_vf_mac,
5208         .ndo_set_vf_vlan        = be_set_vf_vlan,
5209         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5210         .ndo_get_vf_config      = be_get_vf_config,
5211         .ndo_set_vf_link_state  = be_set_vf_link_state,
5212         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5213 #ifdef CONFIG_NET_POLL_CONTROLLER
5214         .ndo_poll_controller    = be_netpoll,
5215 #endif
5216         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5217         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5218 #ifdef CONFIG_NET_RX_BUSY_POLL
5219         .ndo_busy_poll          = be_busy_poll,
5220 #endif
5221         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5222         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5223         .ndo_features_check     = be_features_check,
5224         .ndo_get_phys_port_id   = be_get_phys_port_id,
5225 };
5226
5227 static void be_netdev_init(struct net_device *netdev)
5228 {
5229         struct be_adapter *adapter = netdev_priv(netdev);
5230
5231         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5232                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5233                 NETIF_F_HW_VLAN_CTAG_TX;
5234         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5235                 netdev->hw_features |= NETIF_F_RXHASH;
5236
5237         netdev->features |= netdev->hw_features |
5238                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5239
5240         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5241                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5242
5243         netdev->priv_flags |= IFF_UNICAST_FLT;
5244
5245         netdev->flags |= IFF_MULTICAST;
5246
5247         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5248
5249         netdev->netdev_ops = &be_netdev_ops;
5250
5251         netdev->ethtool_ops = &be_ethtool_ops;
5252
5253         /* MTU range: 256 - 9000 */
5254         netdev->min_mtu = BE_MIN_MTU;
5255         netdev->max_mtu = BE_MAX_MTU;
5256 }
5257
5258 static void be_cleanup(struct be_adapter *adapter)
5259 {
5260         struct net_device *netdev = adapter->netdev;
5261
5262         rtnl_lock();
5263         netif_device_detach(netdev);
5264         if (netif_running(netdev))
5265                 be_close(netdev);
5266         rtnl_unlock();
5267
5268         be_clear(adapter);
5269 }
5270
5271 static int be_resume(struct be_adapter *adapter)
5272 {
5273         struct net_device *netdev = adapter->netdev;
5274         int status;
5275
5276         status = be_setup(adapter);
5277         if (status)
5278                 return status;
5279
5280         rtnl_lock();
5281         if (netif_running(netdev))
5282                 status = be_open(netdev);
5283         rtnl_unlock();
5284
5285         if (status)
5286                 return status;
5287
5288         netif_device_attach(netdev);
5289
5290         return 0;
5291 }
5292
5293 static void be_soft_reset(struct be_adapter *adapter)
5294 {
5295         u32 val;
5296
5297         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5298         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5299         val |= SLIPORT_SOFTRESET_SR_MASK;
5300         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5301 }
5302
5303 static bool be_err_is_recoverable(struct be_adapter *adapter)
5304 {
5305         struct be_error_recovery *err_rec = &adapter->error_recovery;
5306         unsigned long initial_idle_time =
5307                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5308         unsigned long recovery_interval =
5309                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5310         u16 ue_err_code;
5311         u32 val;
5312
5313         val = be_POST_stage_get(adapter);
5314         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5315                 return false;
5316         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5317         if (ue_err_code == 0)
5318                 return false;
5319
5320         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5321                 ue_err_code);
5322
5323         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5324                 dev_err(&adapter->pdev->dev,
5325                         "Cannot recover within %lu sec from driver load\n",
5326                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5327                 return false;
5328         }
5329
5330         if (err_rec->last_recovery_time &&
5331             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5332                 dev_err(&adapter->pdev->dev,
5333                         "Cannot recover within %lu sec from last recovery\n",
5334                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5335                 return false;
5336         }
5337
5338         if (ue_err_code == err_rec->last_err_code) {
5339                 dev_err(&adapter->pdev->dev,
5340                         "Cannot recover from a consecutive TPE error\n");
5341                 return false;
5342         }
5343
5344         err_rec->last_recovery_time = jiffies;
5345         err_rec->last_err_code = ue_err_code;
5346         return true;
5347 }
5348
5349 static int be_tpe_recover(struct be_adapter *adapter)
5350 {
5351         struct be_error_recovery *err_rec = &adapter->error_recovery;
5352         int status = -EAGAIN;
5353         u32 val;
5354
5355         switch (err_rec->recovery_state) {
5356         case ERR_RECOVERY_ST_NONE:
5357                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5358                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5359                 break;
5360
5361         case ERR_RECOVERY_ST_DETECT:
5362                 val = be_POST_stage_get(adapter);
5363                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5364                     POST_STAGE_RECOVERABLE_ERR) {
5365                         dev_err(&adapter->pdev->dev,
5366                                 "Unrecoverable HW error detected: 0x%x\n", val);
5367                         status = -EINVAL;
5368                         err_rec->resched_delay = 0;
5369                         break;
5370                 }
5371
5372                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5373
5374                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5375                  * milliseconds before it checks for final error status in
5376                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5377                  * If it does, then PF0 initiates a Soft Reset.
5378                  */
5379                 if (adapter->pf_num == 0) {
5380                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5381                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5382                                         ERR_RECOVERY_UE_DETECT_DURATION;
5383                         break;
5384                 }
5385
5386                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5387                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5388                                         ERR_RECOVERY_UE_DETECT_DURATION;
5389                 break;
5390
5391         case ERR_RECOVERY_ST_RESET:
5392                 if (!be_err_is_recoverable(adapter)) {
5393                         dev_err(&adapter->pdev->dev,
5394                                 "Failed to meet recovery criteria\n");
5395                         status = -EIO;
5396                         err_rec->resched_delay = 0;
5397                         break;
5398                 }
5399                 be_soft_reset(adapter);
5400                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5401                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5402                                         err_rec->ue_to_reset_time;
5403                 break;
5404
5405         case ERR_RECOVERY_ST_PRE_POLL:
5406                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5407                 err_rec->resched_delay = 0;
5408                 status = 0;                     /* done */
5409                 break;
5410
5411         default:
5412                 status = -EINVAL;
5413                 err_rec->resched_delay = 0;
5414                 break;
5415         }
5416
5417         return status;
5418 }
5419
5420 static int be_err_recover(struct be_adapter *adapter)
5421 {
5422         int status;
5423
5424         if (!lancer_chip(adapter)) {
5425                 if (!adapter->error_recovery.recovery_supported ||
5426                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5427                         return -EIO;
5428                 status = be_tpe_recover(adapter);
5429                 if (status)
5430                         goto err;
5431         }
5432
5433         /* Wait for adapter to reach quiescent state before
5434          * destroying queues
5435          */
5436         status = be_fw_wait_ready(adapter);
5437         if (status)
5438                 goto err;
5439
5440         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5441
5442         be_cleanup(adapter);
5443
5444         status = be_resume(adapter);
5445         if (status)
5446                 goto err;
5447
5448         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5449
5450 err:
5451         return status;
5452 }
5453
5454 static void be_err_detection_task(struct work_struct *work)
5455 {
5456         struct be_error_recovery *err_rec =
5457                         container_of(work, struct be_error_recovery,
5458                                      err_detection_work.work);
5459         struct be_adapter *adapter =
5460                         container_of(err_rec, struct be_adapter,
5461                                      error_recovery);
5462         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5463         struct device *dev = &adapter->pdev->dev;
5464         int recovery_status;
5465
5466         be_detect_error(adapter);
5467         if (!be_check_error(adapter, BE_ERROR_HW))
5468                 goto reschedule_task;
5469
5470         recovery_status = be_err_recover(adapter);
5471         if (!recovery_status) {
5472                 err_rec->recovery_retries = 0;
5473                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5474                 dev_info(dev, "Adapter recovery successful\n");
5475                 goto reschedule_task;
5476         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5477                 /* BEx/SH recovery state machine */
5478                 if (adapter->pf_num == 0 &&
5479                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5480                         dev_err(&adapter->pdev->dev,
5481                                 "Adapter recovery in progress\n");
5482                 resched_delay = err_rec->resched_delay;
5483                 goto reschedule_task;
5484         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5485                 /* For VFs, check if PF have allocated resources
5486                  * every second.
5487                  */
5488                 dev_err(dev, "Re-trying adapter recovery\n");
5489                 goto reschedule_task;
5490         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5491                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5492                 /* In case of another error during recovery, it takes 30 sec
5493                  * for adapter to come out of error. Retry error recovery after
5494                  * this time interval.
5495                  */
5496                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5497                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5498                 goto reschedule_task;
5499         } else {
5500                 dev_err(dev, "Adapter recovery failed\n");
5501                 dev_err(dev, "Please reboot server to recover\n");
5502         }
5503
5504         return;
5505
5506 reschedule_task:
5507         be_schedule_err_detection(adapter, resched_delay);
5508 }
5509
5510 static void be_log_sfp_info(struct be_adapter *adapter)
5511 {
5512         int status;
5513
5514         status = be_cmd_query_sfp_info(adapter);
5515         if (!status) {
5516                 dev_err(&adapter->pdev->dev,
5517                         "Port %c: %s Vendor: %s part no: %s",
5518                         adapter->port_name,
5519                         be_misconfig_evt_port_state[adapter->phy_state],
5520                         adapter->phy.vendor_name,
5521                         adapter->phy.vendor_pn);
5522         }
5523         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5524 }
5525
5526 static void be_worker(struct work_struct *work)
5527 {
5528         struct be_adapter *adapter =
5529                 container_of(work, struct be_adapter, work.work);
5530         struct be_rx_obj *rxo;
5531         int i;
5532
5533         if (be_physfn(adapter) &&
5534             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5535                 be_cmd_get_die_temperature(adapter);
5536
5537         /* when interrupts are not yet enabled, just reap any pending
5538          * mcc completions
5539          */
5540         if (!netif_running(adapter->netdev)) {
5541                 local_bh_disable();
5542                 be_process_mcc(adapter);
5543                 local_bh_enable();
5544                 goto reschedule;
5545         }
5546
5547         if (!adapter->stats_cmd_sent) {
5548                 if (lancer_chip(adapter))
5549                         lancer_cmd_get_pport_stats(adapter,
5550                                                    &adapter->stats_cmd);
5551                 else
5552                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5553         }
5554
5555         for_all_rx_queues(adapter, rxo, i) {
5556                 /* Replenish RX-queues starved due to memory
5557                  * allocation failures.
5558                  */
5559                 if (rxo->rx_post_starved)
5560                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5561         }
5562
5563         /* EQ-delay update for Skyhawk is done while notifying EQ */
5564         if (!skyhawk_chip(adapter))
5565                 be_eqd_update(adapter, false);
5566
5567         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5568                 be_log_sfp_info(adapter);
5569
5570 reschedule:
5571         adapter->work_counter++;
5572         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5573 }
5574
5575 static void be_unmap_pci_bars(struct be_adapter *adapter)
5576 {
5577         if (adapter->csr)
5578                 pci_iounmap(adapter->pdev, adapter->csr);
5579         if (adapter->db)
5580                 pci_iounmap(adapter->pdev, adapter->db);
5581         if (adapter->pcicfg && adapter->pcicfg_mapped)
5582                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5583 }
5584
5585 static int db_bar(struct be_adapter *adapter)
5586 {
5587         if (lancer_chip(adapter) || be_virtfn(adapter))
5588                 return 0;
5589         else
5590                 return 4;
5591 }
5592
5593 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5594 {
5595         if (skyhawk_chip(adapter)) {
5596                 adapter->roce_db.size = 4096;
5597                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5598                                                               db_bar(adapter));
5599                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5600                                                                db_bar(adapter));
5601         }
5602         return 0;
5603 }
5604
5605 static int be_map_pci_bars(struct be_adapter *adapter)
5606 {
5607         struct pci_dev *pdev = adapter->pdev;
5608         u8 __iomem *addr;
5609         u32 sli_intf;
5610
5611         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5612         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5613                                 SLI_INTF_FAMILY_SHIFT;
5614         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5615
5616         if (BEx_chip(adapter) && be_physfn(adapter)) {
5617                 adapter->csr = pci_iomap(pdev, 2, 0);
5618                 if (!adapter->csr)
5619                         return -ENOMEM;
5620         }
5621
5622         addr = pci_iomap(pdev, db_bar(adapter), 0);
5623         if (!addr)
5624                 goto pci_map_err;
5625         adapter->db = addr;
5626
5627         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5628                 if (be_physfn(adapter)) {
5629                         /* PCICFG is the 2nd BAR in BE2 */
5630                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5631                         if (!addr)
5632                                 goto pci_map_err;
5633                         adapter->pcicfg = addr;
5634                         adapter->pcicfg_mapped = true;
5635                 } else {
5636                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5637                         adapter->pcicfg_mapped = false;
5638                 }
5639         }
5640
5641         be_roce_map_pci_bars(adapter);
5642         return 0;
5643
5644 pci_map_err:
5645         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5646         be_unmap_pci_bars(adapter);
5647         return -ENOMEM;
5648 }
5649
5650 static void be_drv_cleanup(struct be_adapter *adapter)
5651 {
5652         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5653         struct device *dev = &adapter->pdev->dev;
5654
5655         if (mem->va)
5656                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5657
5658         mem = &adapter->rx_filter;
5659         if (mem->va)
5660                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5661
5662         mem = &adapter->stats_cmd;
5663         if (mem->va)
5664                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5665 }
5666
5667 /* Allocate and initialize various fields in be_adapter struct */
5668 static int be_drv_init(struct be_adapter *adapter)
5669 {
5670         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5671         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5672         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5673         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5674         struct device *dev = &adapter->pdev->dev;
5675         int status = 0;
5676
5677         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5678         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5679                                                  &mbox_mem_alloc->dma,
5680                                                  GFP_KERNEL);
5681         if (!mbox_mem_alloc->va)
5682                 return -ENOMEM;
5683
5684         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5685         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5686         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5687
5688         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5689         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5690                                             &rx_filter->dma, GFP_KERNEL);
5691         if (!rx_filter->va) {
5692                 status = -ENOMEM;
5693                 goto free_mbox;
5694         }
5695
5696         if (lancer_chip(adapter))
5697                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5698         else if (BE2_chip(adapter))
5699                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5700         else if (BE3_chip(adapter))
5701                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5702         else
5703                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5704         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5705                                             &stats_cmd->dma, GFP_KERNEL);
5706         if (!stats_cmd->va) {
5707                 status = -ENOMEM;
5708                 goto free_rx_filter;
5709         }
5710
5711         mutex_init(&adapter->mbox_lock);
5712         mutex_init(&adapter->mcc_lock);
5713         mutex_init(&adapter->rx_filter_lock);
5714         spin_lock_init(&adapter->mcc_cq_lock);
5715         init_completion(&adapter->et_cmd_compl);
5716
5717         pci_save_state(adapter->pdev);
5718
5719         INIT_DELAYED_WORK(&adapter->work, be_worker);
5720
5721         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5722         adapter->error_recovery.resched_delay = 0;
5723         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5724                           be_err_detection_task);
5725
5726         adapter->rx_fc = true;
5727         adapter->tx_fc = true;
5728
5729         /* Must be a power of 2 or else MODULO will BUG_ON */
5730         adapter->be_get_temp_freq = 64;
5731
5732         return 0;
5733
5734 free_rx_filter:
5735         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5736 free_mbox:
5737         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5738                           mbox_mem_alloc->dma);
5739         return status;
5740 }
5741
5742 static void be_remove(struct pci_dev *pdev)
5743 {
5744         struct be_adapter *adapter = pci_get_drvdata(pdev);
5745
5746         if (!adapter)
5747                 return;
5748
5749         be_roce_dev_remove(adapter);
5750         be_intr_set(adapter, false);
5751
5752         be_cancel_err_detection(adapter);
5753
5754         unregister_netdev(adapter->netdev);
5755
5756         be_clear(adapter);
5757
5758         if (!pci_vfs_assigned(adapter->pdev))
5759                 be_cmd_reset_function(adapter);
5760
5761         /* tell fw we're done with firing cmds */
5762         be_cmd_fw_clean(adapter);
5763
5764         be_unmap_pci_bars(adapter);
5765         be_drv_cleanup(adapter);
5766
5767         pci_disable_pcie_error_reporting(pdev);
5768
5769         pci_release_regions(pdev);
5770         pci_disable_device(pdev);
5771
5772         free_netdev(adapter->netdev);
5773 }
5774
5775 static ssize_t be_hwmon_show_temp(struct device *dev,
5776                                   struct device_attribute *dev_attr,
5777                                   char *buf)
5778 {
5779         struct be_adapter *adapter = dev_get_drvdata(dev);
5780
5781         /* Unit: millidegree Celsius */
5782         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5783                 return -EIO;
5784         else
5785                 return sprintf(buf, "%u\n",
5786                                adapter->hwmon_info.be_on_die_temp * 1000);
5787 }
5788
5789 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5790                           be_hwmon_show_temp, NULL, 1);
5791
5792 static struct attribute *be_hwmon_attrs[] = {
5793         &sensor_dev_attr_temp1_input.dev_attr.attr,
5794         NULL
5795 };
5796
5797 ATTRIBUTE_GROUPS(be_hwmon);
5798
5799 static char *mc_name(struct be_adapter *adapter)
5800 {
5801         char *str = ""; /* default */
5802
5803         switch (adapter->mc_type) {
5804         case UMC:
5805                 str = "UMC";
5806                 break;
5807         case FLEX10:
5808                 str = "FLEX10";
5809                 break;
5810         case vNIC1:
5811                 str = "vNIC-1";
5812                 break;
5813         case nPAR:
5814                 str = "nPAR";
5815                 break;
5816         case UFP:
5817                 str = "UFP";
5818                 break;
5819         case vNIC2:
5820                 str = "vNIC-2";
5821                 break;
5822         default:
5823                 str = "";
5824         }
5825
5826         return str;
5827 }
5828
5829 static inline char *func_name(struct be_adapter *adapter)
5830 {
5831         return be_physfn(adapter) ? "PF" : "VF";
5832 }
5833
5834 static inline char *nic_name(struct pci_dev *pdev)
5835 {
5836         switch (pdev->device) {
5837         case OC_DEVICE_ID1:
5838                 return OC_NAME;
5839         case OC_DEVICE_ID2:
5840                 return OC_NAME_BE;
5841         case OC_DEVICE_ID3:
5842         case OC_DEVICE_ID4:
5843                 return OC_NAME_LANCER;
5844         case BE_DEVICE_ID2:
5845                 return BE3_NAME;
5846         case OC_DEVICE_ID5:
5847         case OC_DEVICE_ID6:
5848                 return OC_NAME_SH;
5849         default:
5850                 return BE_NAME;
5851         }
5852 }
5853
5854 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5855 {
5856         struct be_adapter *adapter;
5857         struct net_device *netdev;
5858         int status = 0;
5859
5860         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5861
5862         status = pci_enable_device(pdev);
5863         if (status)
5864                 goto do_none;
5865
5866         status = pci_request_regions(pdev, DRV_NAME);
5867         if (status)
5868                 goto disable_dev;
5869         pci_set_master(pdev);
5870
5871         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5872         if (!netdev) {
5873                 status = -ENOMEM;
5874                 goto rel_reg;
5875         }
5876         adapter = netdev_priv(netdev);
5877         adapter->pdev = pdev;
5878         pci_set_drvdata(pdev, adapter);
5879         adapter->netdev = netdev;
5880         SET_NETDEV_DEV(netdev, &pdev->dev);
5881
5882         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5883         if (!status) {
5884                 netdev->features |= NETIF_F_HIGHDMA;
5885         } else {
5886                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5887                 if (status) {
5888                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5889                         goto free_netdev;
5890                 }
5891         }
5892
5893         status = pci_enable_pcie_error_reporting(pdev);
5894         if (!status)
5895                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5896
5897         status = be_map_pci_bars(adapter);
5898         if (status)
5899                 goto free_netdev;
5900
5901         status = be_drv_init(adapter);
5902         if (status)
5903                 goto unmap_bars;
5904
5905         status = be_setup(adapter);
5906         if (status)
5907                 goto drv_cleanup;
5908
5909         be_netdev_init(netdev);
5910         status = register_netdev(netdev);
5911         if (status != 0)
5912                 goto unsetup;
5913
5914         be_roce_dev_add(adapter);
5915
5916         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5917         adapter->error_recovery.probe_time = jiffies;
5918
5919         /* On Die temperature not supported for VF. */
5920         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5921                 adapter->hwmon_info.hwmon_dev =
5922                         devm_hwmon_device_register_with_groups(&pdev->dev,
5923                                                                DRV_NAME,
5924                                                                adapter,
5925                                                                be_hwmon_groups);
5926                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5927         }
5928
5929         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5930                  func_name(adapter), mc_name(adapter), adapter->port_name);
5931
5932         return 0;
5933
5934 unsetup:
5935         be_clear(adapter);
5936 drv_cleanup:
5937         be_drv_cleanup(adapter);
5938 unmap_bars:
5939         be_unmap_pci_bars(adapter);
5940 free_netdev:
5941         free_netdev(netdev);
5942 rel_reg:
5943         pci_release_regions(pdev);
5944 disable_dev:
5945         pci_disable_device(pdev);
5946 do_none:
5947         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5948         return status;
5949 }
5950
5951 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5952 {
5953         struct be_adapter *adapter = pci_get_drvdata(pdev);
5954
5955         be_intr_set(adapter, false);
5956         be_cancel_err_detection(adapter);
5957
5958         be_cleanup(adapter);
5959
5960         pci_save_state(pdev);
5961         pci_disable_device(pdev);
5962         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5963         return 0;
5964 }
5965
5966 static int be_pci_resume(struct pci_dev *pdev)
5967 {
5968         struct be_adapter *adapter = pci_get_drvdata(pdev);
5969         int status = 0;
5970
5971         status = pci_enable_device(pdev);
5972         if (status)
5973                 return status;
5974
5975         pci_restore_state(pdev);
5976
5977         status = be_resume(adapter);
5978         if (status)
5979                 return status;
5980
5981         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5982
5983         return 0;
5984 }
5985
5986 /*
5987  * An FLR will stop BE from DMAing any data.
5988  */
5989 static void be_shutdown(struct pci_dev *pdev)
5990 {
5991         struct be_adapter *adapter = pci_get_drvdata(pdev);
5992
5993         if (!adapter)
5994                 return;
5995
5996         be_roce_dev_shutdown(adapter);
5997         cancel_delayed_work_sync(&adapter->work);
5998         be_cancel_err_detection(adapter);
5999
6000         netif_device_detach(adapter->netdev);
6001
6002         be_cmd_reset_function(adapter);
6003
6004         pci_disable_device(pdev);
6005 }
6006
6007 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6008                                             pci_channel_state_t state)
6009 {
6010         struct be_adapter *adapter = pci_get_drvdata(pdev);
6011
6012         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6013
6014         be_roce_dev_remove(adapter);
6015
6016         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6017                 be_set_error(adapter, BE_ERROR_EEH);
6018
6019                 be_cancel_err_detection(adapter);
6020
6021                 be_cleanup(adapter);
6022         }
6023
6024         if (state == pci_channel_io_perm_failure)
6025                 return PCI_ERS_RESULT_DISCONNECT;
6026
6027         pci_disable_device(pdev);
6028
6029         /* The error could cause the FW to trigger a flash debug dump.
6030          * Resetting the card while flash dump is in progress
6031          * can cause it not to recover; wait for it to finish.
6032          * Wait only for first function as it is needed only once per
6033          * adapter.
6034          */
6035         if (pdev->devfn == 0)
6036                 ssleep(30);
6037
6038         return PCI_ERS_RESULT_NEED_RESET;
6039 }
6040
6041 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6042 {
6043         struct be_adapter *adapter = pci_get_drvdata(pdev);
6044         int status;
6045
6046         dev_info(&adapter->pdev->dev, "EEH reset\n");
6047
6048         status = pci_enable_device(pdev);
6049         if (status)
6050                 return PCI_ERS_RESULT_DISCONNECT;
6051
6052         pci_set_master(pdev);
6053         pci_restore_state(pdev);
6054
6055         /* Check if card is ok and fw is ready */
6056         dev_info(&adapter->pdev->dev,
6057                  "Waiting for FW to be ready after EEH reset\n");
6058         status = be_fw_wait_ready(adapter);
6059         if (status)
6060                 return PCI_ERS_RESULT_DISCONNECT;
6061
6062         pci_cleanup_aer_uncorrect_error_status(pdev);
6063         be_clear_error(adapter, BE_CLEAR_ALL);
6064         return PCI_ERS_RESULT_RECOVERED;
6065 }
6066
6067 static void be_eeh_resume(struct pci_dev *pdev)
6068 {
6069         int status = 0;
6070         struct be_adapter *adapter = pci_get_drvdata(pdev);
6071
6072         dev_info(&adapter->pdev->dev, "EEH resume\n");
6073
6074         pci_save_state(pdev);
6075
6076         status = be_resume(adapter);
6077         if (status)
6078                 goto err;
6079
6080         be_roce_dev_add(adapter);
6081
6082         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6083         return;
6084 err:
6085         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6086 }
6087
6088 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6089 {
6090         struct be_adapter *adapter = pci_get_drvdata(pdev);
6091         struct be_resources vft_res = {0};
6092         int status;
6093
6094         if (!num_vfs)
6095                 be_vf_clear(adapter);
6096
6097         adapter->num_vfs = num_vfs;
6098
6099         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6100                 dev_warn(&pdev->dev,
6101                          "Cannot disable VFs while they are assigned\n");
6102                 return -EBUSY;
6103         }
6104
6105         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6106          * are equally distributed across the max-number of VFs. The user may
6107          * request only a subset of the max-vfs to be enabled.
6108          * Based on num_vfs, redistribute the resources across num_vfs so that
6109          * each VF will have access to more number of resources.
6110          * This facility is not available in BE3 FW.
6111          * Also, this is done by FW in Lancer chip.
6112          */
6113         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6114                 be_calculate_vf_res(adapter, adapter->num_vfs,
6115                                     &vft_res);
6116                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6117                                                  adapter->num_vfs, &vft_res);
6118                 if (status)
6119                         dev_err(&pdev->dev,
6120                                 "Failed to optimize SR-IOV resources\n");
6121         }
6122
6123         status = be_get_resources(adapter);
6124         if (status)
6125                 return be_cmd_status(status);
6126
6127         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6128         rtnl_lock();
6129         status = be_update_queues(adapter);
6130         rtnl_unlock();
6131         if (status)
6132                 return be_cmd_status(status);
6133
6134         if (adapter->num_vfs)
6135                 status = be_vf_setup(adapter);
6136
6137         if (!status)
6138                 return adapter->num_vfs;
6139
6140         return 0;
6141 }
6142
6143 static const struct pci_error_handlers be_eeh_handlers = {
6144         .error_detected = be_eeh_err_detected,
6145         .slot_reset = be_eeh_reset,
6146         .resume = be_eeh_resume,
6147 };
6148
6149 static struct pci_driver be_driver = {
6150         .name = DRV_NAME,
6151         .id_table = be_dev_ids,
6152         .probe = be_probe,
6153         .remove = be_remove,
6154         .suspend = be_suspend,
6155         .resume = be_pci_resume,
6156         .shutdown = be_shutdown,
6157         .sriov_configure = be_pci_sriov_configure,
6158         .err_handler = &be_eeh_handlers
6159 };
6160
6161 static int __init be_init_module(void)
6162 {
6163         int status;
6164
6165         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6166             rx_frag_size != 2048) {
6167                 printk(KERN_WARNING DRV_NAME
6168                         " : Module param rx_frag_size must be 2048/4096/8192."
6169                         " Using 2048\n");
6170                 rx_frag_size = 2048;
6171         }
6172
6173         if (num_vfs > 0) {
6174                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6175                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6176         }
6177
6178         be_wq = create_singlethread_workqueue("be_wq");
6179         if (!be_wq) {
6180                 pr_warn(DRV_NAME "workqueue creation failed\n");
6181                 return -1;
6182         }
6183
6184         be_err_recovery_workq =
6185                 create_singlethread_workqueue("be_err_recover");
6186         if (!be_err_recovery_workq)
6187                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6188
6189         status = pci_register_driver(&be_driver);
6190         if (status) {
6191                 destroy_workqueue(be_wq);
6192                 be_destroy_err_recovery_workq();
6193         }
6194         return status;
6195 }
6196 module_init(be_init_module);
6197
6198 static void __exit be_exit_module(void)
6199 {
6200         pci_unregister_driver(&be_driver);
6201
6202         be_destroy_err_recovery_workq();
6203
6204         if (be_wq)
6205                 destroy_workqueue(be_wq);
6206 }
6207 module_exit(be_exit_module);