]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
Merge tag 'platform-drivers-x86-v4.12-2' of git://git.infradead.org/linux-platform...
[karo-tx-linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365
366         /* Remember currently programmed MAC */
367         ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369         ether_addr_copy(netdev->dev_addr, addr->sa_data);
370         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371         return 0;
372 err:
373         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374         return status;
375 }
376
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380         if (BE2_chip(adapter)) {
381                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         } else if (BE3_chip(adapter)) {
385                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386
387                 return &cmd->hw_stats;
388         } else {
389                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         }
393 }
394
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398         if (BE2_chip(adapter)) {
399                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         } else if (BE3_chip(adapter)) {
403                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404
405                 return &hw_stats->erx;
406         } else {
407                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         }
411 }
412
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v0 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->rx_pause_frames = port_stats->rx_pause_frames;
424         drvs->rx_crc_errors = port_stats->rx_crc_errors;
425         drvs->rx_control_frames = port_stats->rx_control_frames;
426         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_address_filtered =
441                                         port_stats->rx_address_filtered +
442                                         port_stats->rx_vlan_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448
449         if (adapter->port_num)
450                 drvs->jabber_events = rxf_stats->port1_jabber_events;
451         else
452                 drvs->jabber_events = rxf_stats->port0_jabber_events;
453         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455         drvs->forwarded_packets = rxf_stats->forwarded_packets;
456         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467         struct be_port_rxf_stats_v1 *port_stats =
468                                         &rxf_stats->port[adapter->port_num];
469         struct be_drv_stats *drvs = &adapter->drv_stats;
470
471         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474         drvs->rx_pause_frames = port_stats->rx_pause_frames;
475         drvs->rx_crc_errors = port_stats->rx_crc_errors;
476         drvs->rx_control_frames = port_stats->rx_control_frames;
477         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487         drvs->rx_dropped_header_too_small =
488                 port_stats->rx_dropped_header_too_small;
489         drvs->rx_input_fifo_overflow_drop =
490                 port_stats->rx_input_fifo_overflow_drop;
491         drvs->rx_address_filtered = port_stats->rx_address_filtered;
492         drvs->rx_alignment_symbol_errors =
493                 port_stats->rx_alignment_symbol_errors;
494         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495         drvs->tx_pauseframes = port_stats->tx_pauseframes;
496         drvs->tx_controlframes = port_stats->tx_controlframes;
497         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498         drvs->jabber_events = port_stats->jabber_events;
499         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501         drvs->forwarded_packets = rxf_stats->forwarded_packets;
502         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513         struct be_port_rxf_stats_v2 *port_stats =
514                                         &rxf_stats->port[adapter->port_num];
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516
517         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520         drvs->rx_pause_frames = port_stats->rx_pause_frames;
521         drvs->rx_crc_errors = port_stats->rx_crc_errors;
522         drvs->rx_control_frames = port_stats->rx_control_frames;
523         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533         drvs->rx_dropped_header_too_small =
534                 port_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop =
536                 port_stats->rx_input_fifo_overflow_drop;
537         drvs->rx_address_filtered = port_stats->rx_address_filtered;
538         drvs->rx_alignment_symbol_errors =
539                 port_stats->rx_alignment_symbol_errors;
540         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541         drvs->tx_pauseframes = port_stats->tx_pauseframes;
542         drvs->tx_controlframes = port_stats->tx_controlframes;
543         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544         drvs->jabber_events = port_stats->jabber_events;
545         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547         drvs->forwarded_packets = rxf_stats->forwarded_packets;
548         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552         if (be_roce_supported(adapter)) {
553                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555                 drvs->rx_roce_frames = port_stats->roce_frames_received;
556                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
557                 drvs->roce_drops_payload_len =
558                         port_stats->roce_drops_payload_len;
559         }
560 }
561
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564         struct be_drv_stats *drvs = &adapter->drv_stats;
565         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566
567         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577         drvs->rx_dropped_tcp_length =
578                                 pport_stats->rx_dropped_invalid_tcp_length;
579         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582         drvs->rx_dropped_header_too_small =
583                                 pport_stats->rx_dropped_header_too_small;
584         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585         drvs->rx_address_filtered =
586                                         pport_stats->rx_address_filtered +
587                                         pport_stats->rx_vlan_filtered;
588         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592         drvs->jabber_events = pport_stats->rx_jabbers;
593         drvs->forwarded_packets = pport_stats->num_forwards_lo;
594         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595         drvs->rx_drops_too_many_frags =
596                                 pport_stats->rx_drops_too_many_frags_lo;
597 }
598
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)                   (x & 0xFFFF)
602 #define hi(x)                   (x & 0xFFFF0000)
603         bool wrapped = val < lo(*acc);
604         u32 newacc = hi(*acc) + val;
605
606         if (wrapped)
607                 newacc += 65536;
608         ACCESS_ONCE(*acc) = newacc;
609 }
610
611 static void populate_erx_stats(struct be_adapter *adapter,
612                                struct be_rx_obj *rxo, u32 erx_stat)
613 {
614         if (!BEx_chip(adapter))
615                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616         else
617                 /* below erx HW counter can actually wrap around after
618                  * 65535. Driver accumulates a 32-bit value
619                  */
620                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621                                      (u16)erx_stat);
622 }
623
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627         struct be_rx_obj *rxo;
628         int i;
629         u32 erx_stat;
630
631         if (lancer_chip(adapter)) {
632                 populate_lancer_stats(adapter);
633         } else {
634                 if (BE2_chip(adapter))
635                         populate_be_v0_stats(adapter);
636                 else if (BE3_chip(adapter))
637                         /* for BE3 */
638                         populate_be_v1_stats(adapter);
639                 else
640                         populate_be_v2_stats(adapter);
641
642                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643                 for_all_rx_queues(adapter, rxo, i) {
644                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645                         populate_erx_stats(adapter, rxo, erx_stat);
646                 }
647         }
648 }
649
650 static void be_get_stats64(struct net_device *netdev,
651                            struct rtnl_link_stats64 *stats)
652 {
653         struct be_adapter *adapter = netdev_priv(netdev);
654         struct be_drv_stats *drvs = &adapter->drv_stats;
655         struct be_rx_obj *rxo;
656         struct be_tx_obj *txo;
657         u64 pkts, bytes;
658         unsigned int start;
659         int i;
660
661         for_all_rx_queues(adapter, rxo, i) {
662                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
663
664                 do {
665                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666                         pkts = rx_stats(rxo)->rx_pkts;
667                         bytes = rx_stats(rxo)->rx_bytes;
668                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669                 stats->rx_packets += pkts;
670                 stats->rx_bytes += bytes;
671                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673                                         rx_stats(rxo)->rx_drops_no_frags;
674         }
675
676         for_all_tx_queues(adapter, txo, i) {
677                 const struct be_tx_stats *tx_stats = tx_stats(txo);
678
679                 do {
680                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681                         pkts = tx_stats(txo)->tx_pkts;
682                         bytes = tx_stats(txo)->tx_bytes;
683                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684                 stats->tx_packets += pkts;
685                 stats->tx_bytes += bytes;
686         }
687
688         /* bad pkts received */
689         stats->rx_errors = drvs->rx_crc_errors +
690                 drvs->rx_alignment_symbol_errors +
691                 drvs->rx_in_range_errors +
692                 drvs->rx_out_range_errors +
693                 drvs->rx_frame_too_long +
694                 drvs->rx_dropped_too_small +
695                 drvs->rx_dropped_too_short +
696                 drvs->rx_dropped_header_too_small +
697                 drvs->rx_dropped_tcp_length +
698                 drvs->rx_dropped_runt;
699
700         /* detailed rx errors */
701         stats->rx_length_errors = drvs->rx_in_range_errors +
702                 drvs->rx_out_range_errors +
703                 drvs->rx_frame_too_long;
704
705         stats->rx_crc_errors = drvs->rx_crc_errors;
706
707         /* frame alignment errors */
708         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709
710         /* receiver fifo overrun */
711         /* drops_no_pbuf is no per i/f, it's per BE card */
712         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713                                 drvs->rx_input_fifo_overflow_drop +
714                                 drvs->rx_drops_no_pbuf;
715 }
716
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719         struct net_device *netdev = adapter->netdev;
720
721         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722                 netif_carrier_off(netdev);
723                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724         }
725
726         if (link_status)
727                 netif_carrier_on(netdev);
728         else
729                 netif_carrier_off(netdev);
730
731         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736         if (skb->encapsulation)
737                 return skb_inner_transport_offset(skb) +
738                        inner_tcp_hdrlen(skb);
739         return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744         struct be_tx_stats *stats = tx_stats(txo);
745         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746         /* Account for headers which get duplicated in TSO pkt */
747         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748
749         u64_stats_update_begin(&stats->sync);
750         stats->tx_reqs++;
751         stats->tx_bytes += skb->len + dup_hdr_len;
752         stats->tx_pkts += tx_pkts;
753         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754                 stats->tx_vxlan_offload_pkts += tx_pkts;
755         u64_stats_update_end(&stats->sync);
756 }
757
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761         /* +1 for the header wrb */
762         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770         wrb->rsvd0 = 0;
771 }
772
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778         wrb->frag_pa_hi = 0;
779         wrb->frag_pa_lo = 0;
780         wrb->frag_len = 0;
781         wrb->rsvd0 = 0;
782 }
783
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785                                      struct sk_buff *skb)
786 {
787         u8 vlan_prio;
788         u16 vlan_tag;
789
790         vlan_tag = skb_vlan_tag_get(skb);
791         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792         /* If vlan priority provided by OS is NOT in available bmap */
793         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795                                 adapter->recommended_prio_bits;
796
797         return vlan_tag;
798 }
799
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803         return (inner_ip_hdr(skb)->version == 4) ?
804                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809         return (ip_hdr(skb)->version == 4) ?
810                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820         return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829                                        struct sk_buff *skb,
830                                        struct be_wrb_params *wrb_params)
831 {
832         u16 proto;
833
834         if (skb_is_gso(skb)) {
835                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
836                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840                 if (skb->encapsulation) {
841                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842                         proto = skb_inner_ip_proto(skb);
843                 } else {
844                         proto = skb_ip_proto(skb);
845                 }
846                 if (proto == IPPROTO_TCP)
847                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848                 else if (proto == IPPROTO_UDP)
849                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850         }
851
852         if (skb_vlan_tag_present(skb)) {
853                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855         }
856
857         BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861                          struct be_eth_hdr_wrb *hdr,
862                          struct be_wrb_params *wrb_params,
863                          struct sk_buff *skb)
864 {
865         memset(hdr, 0, sizeof(*hdr));
866
867         SET_TX_WRB_HDR_BITS(crc, hdr,
868                             BE_WRB_F_GET(wrb_params->features, CRC));
869         SET_TX_WRB_HDR_BITS(ipcs, hdr,
870                             BE_WRB_F_GET(wrb_params->features, IPCS));
871         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, TCPCS));
873         SET_TX_WRB_HDR_BITS(udpcs, hdr,
874                             BE_WRB_F_GET(wrb_params->features, UDPCS));
875
876         SET_TX_WRB_HDR_BITS(lso, hdr,
877                             BE_WRB_F_GET(wrb_params->features, LSO));
878         SET_TX_WRB_HDR_BITS(lso6, hdr,
879                             BE_WRB_F_GET(wrb_params->features, LSO6));
880         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881
882         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883          * hack is not needed, the evt bit is set while ringing DB.
884          */
885         SET_TX_WRB_HDR_BITS(event, hdr,
886                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887         SET_TX_WRB_HDR_BITS(vlan, hdr,
888                             BE_WRB_F_GET(wrb_params->features, VLAN));
889         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890
891         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893         SET_TX_WRB_HDR_BITS(mgmt, hdr,
894                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898                           bool unmap_single)
899 {
900         dma_addr_t dma;
901         u32 frag_len = le32_to_cpu(wrb->frag_len);
902
903
904         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905                 (u64)le32_to_cpu(wrb->frag_pa_lo);
906         if (frag_len) {
907                 if (unmap_single)
908                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909                 else
910                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911         }
912 }
913
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917         u32 head = txo->q.head;
918
919         queue_head_inc(&txo->q);
920         return head;
921 }
922
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925                                 struct be_tx_obj *txo,
926                                 struct be_wrb_params *wrb_params,
927                                 struct sk_buff *skb, u16 head)
928 {
929         u32 num_frags = skb_wrb_cnt(skb);
930         struct be_queue_info *txq = &txo->q;
931         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932
933         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934         be_dws_cpu_to_le(hdr, sizeof(*hdr));
935
936         BUG_ON(txo->sent_skb_list[head]);
937         txo->sent_skb_list[head] = skb;
938         txo->last_req_hdr = head;
939         atomic_add(num_frags, &txq->used);
940         txo->last_req_wrb_cnt = num_frags;
941         txo->pend_wrb_cnt += num_frags;
942 }
943
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946                                  int len)
947 {
948         struct be_eth_wrb *wrb;
949         struct be_queue_info *txq = &txo->q;
950
951         wrb = queue_head_node(txq);
952         wrb_fill(wrb, busaddr, len);
953         queue_head_inc(txq);
954 }
955
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961                             struct be_tx_obj *txo, u32 head, bool map_single,
962                             u32 copied)
963 {
964         struct device *dev;
965         struct be_eth_wrb *wrb;
966         struct be_queue_info *txq = &txo->q;
967
968         dev = &adapter->pdev->dev;
969         txq->head = head;
970
971         /* skip the first wrb (hdr); it's not mapped */
972         queue_head_inc(txq);
973         while (copied) {
974                 wrb = queue_head_node(txq);
975                 unmap_tx_frag(dev, wrb, map_single);
976                 map_single = false;
977                 copied -= le32_to_cpu(wrb->frag_len);
978                 queue_head_inc(txq);
979         }
980
981         txq->head = head;
982 }
983
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989                            struct sk_buff *skb,
990                            struct be_wrb_params *wrb_params)
991 {
992         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993         struct device *dev = &adapter->pdev->dev;
994         struct be_queue_info *txq = &txo->q;
995         bool map_single = false;
996         u32 head = txq->head;
997         dma_addr_t busaddr;
998         int len;
999
1000         head = be_tx_get_wrb_hdr(txo);
1001
1002         if (skb->len > skb->data_len) {
1003                 len = skb_headlen(skb);
1004
1005                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006                 if (dma_mapping_error(dev, busaddr))
1007                         goto dma_err;
1008                 map_single = true;
1009                 be_tx_setup_wrb_frag(txo, busaddr, len);
1010                 copied += len;
1011         }
1012
1013         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015                 len = skb_frag_size(frag);
1016
1017                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018                 if (dma_mapping_error(dev, busaddr))
1019                         goto dma_err;
1020                 be_tx_setup_wrb_frag(txo, busaddr, len);
1021                 copied += len;
1022         }
1023
1024         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025
1026         be_tx_stats_update(txo, skb);
1027         return wrb_cnt;
1028
1029 dma_err:
1030         adapter->drv_stats.dma_map_errors++;
1031         be_xmit_restore(adapter, txo, head, map_single, copied);
1032         return 0;
1033 }
1034
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041                                              struct sk_buff *skb,
1042                                              struct be_wrb_params
1043                                              *wrb_params)
1044 {
1045         u16 vlan_tag = 0;
1046
1047         skb = skb_share_check(skb, GFP_ATOMIC);
1048         if (unlikely(!skb))
1049                 return skb;
1050
1051         if (skb_vlan_tag_present(skb))
1052                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053
1054         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055                 if (!vlan_tag)
1056                         vlan_tag = adapter->pvid;
1057                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058                  * skip VLAN insertion
1059                  */
1060                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061         }
1062
1063         if (vlan_tag) {
1064                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065                                                 vlan_tag);
1066                 if (unlikely(!skb))
1067                         return skb;
1068                 skb->vlan_tci = 0;
1069         }
1070
1071         /* Insert the outer VLAN, if any */
1072         if (adapter->qnq_vid) {
1073                 vlan_tag = adapter->qnq_vid;
1074                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075                                                 vlan_tag);
1076                 if (unlikely(!skb))
1077                         return skb;
1078                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079         }
1080
1081         return skb;
1082 }
1083
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086         struct ethhdr *eh = (struct ethhdr *)skb->data;
1087         u16 offset = ETH_HLEN;
1088
1089         if (eh->h_proto == htons(ETH_P_IPV6)) {
1090                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091
1092                 offset += sizeof(struct ipv6hdr);
1093                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1094                     ip6h->nexthdr != NEXTHDR_UDP) {
1095                         struct ipv6_opt_hdr *ehdr =
1096                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1097
1098                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099                         if (ehdr->hdrlen == 0xff)
1100                                 return true;
1101                 }
1102         }
1103         return false;
1104 }
1105
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117                                                   struct sk_buff *skb,
1118                                                   struct be_wrb_params
1119                                                   *wrb_params)
1120 {
1121         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122         unsigned int eth_hdr_len;
1123         struct iphdr *ip;
1124
1125         /* For padded packets, BE HW modifies tot_len field in IP header
1126          * incorrecly when VLAN tag is inserted by HW.
1127          * For padded packets, Lancer computes incorrect checksum.
1128          */
1129         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130                                                 VLAN_ETH_HLEN : ETH_HLEN;
1131         if (skb->len <= 60 &&
1132             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1133             is_ipv4_pkt(skb)) {
1134                 ip = (struct iphdr *)ip_hdr(skb);
1135                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1136         }
1137
1138         /* If vlan tag is already inlined in the packet, skip HW VLAN
1139          * tagging in pvid-tagging mode
1140          */
1141         if (be_pvid_tagging_enabled(adapter) &&
1142             veh->h_vlan_proto == htons(ETH_P_8021Q))
1143                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1144
1145         /* HW has a bug wherein it will calculate CSUM for VLAN
1146          * pkts even though it is disabled.
1147          * Manually insert VLAN in pkt.
1148          */
1149         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1150             skb_vlan_tag_present(skb)) {
1151                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1152                 if (unlikely(!skb))
1153                         goto err;
1154         }
1155
1156         /* HW may lockup when VLAN HW tagging is requested on
1157          * certain ipv6 packets. Drop such pkts if the HW workaround to
1158          * skip HW tagging is not enabled by FW.
1159          */
1160         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1161                      (adapter->pvid || adapter->qnq_vid) &&
1162                      !qnq_async_evt_rcvd(adapter)))
1163                 goto tx_drop;
1164
1165         /* Manual VLAN tag insertion to prevent:
1166          * ASIC lockup when the ASIC inserts VLAN tag into
1167          * certain ipv6 packets. Insert VLAN tags in driver,
1168          * and set event, completion, vlan bits accordingly
1169          * in the Tx WRB.
1170          */
1171         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1172             be_vlan_tag_tx_chk(adapter, skb)) {
1173                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1174                 if (unlikely(!skb))
1175                         goto err;
1176         }
1177
1178         return skb;
1179 tx_drop:
1180         dev_kfree_skb_any(skb);
1181 err:
1182         return NULL;
1183 }
1184
1185 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1186                                            struct sk_buff *skb,
1187                                            struct be_wrb_params *wrb_params)
1188 {
1189         int err;
1190
1191         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1192          * packets that are 32b or less may cause a transmit stall
1193          * on that port. The workaround is to pad such packets
1194          * (len <= 32 bytes) to a minimum length of 36b.
1195          */
1196         if (skb->len <= 32) {
1197                 if (skb_put_padto(skb, 36))
1198                         return NULL;
1199         }
1200
1201         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1202                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1203                 if (!skb)
1204                         return NULL;
1205         }
1206
1207         /* The stack can send us skbs with length greater than
1208          * what the HW can handle. Trim the extra bytes.
1209          */
1210         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1211         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1212         WARN_ON(err);
1213
1214         return skb;
1215 }
1216
1217 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1218 {
1219         struct be_queue_info *txq = &txo->q;
1220         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1221
1222         /* Mark the last request eventable if it hasn't been marked already */
1223         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1224                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1225
1226         /* compose a dummy wrb if there are odd set of wrbs to notify */
1227         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1228                 wrb_fill_dummy(queue_head_node(txq));
1229                 queue_head_inc(txq);
1230                 atomic_inc(&txq->used);
1231                 txo->pend_wrb_cnt++;
1232                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1233                                            TX_HDR_WRB_NUM_SHIFT);
1234                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1235                                           TX_HDR_WRB_NUM_SHIFT);
1236         }
1237         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1238         txo->pend_wrb_cnt = 0;
1239 }
1240
1241 /* OS2BMC related */
1242
1243 #define DHCP_CLIENT_PORT        68
1244 #define DHCP_SERVER_PORT        67
1245 #define NET_BIOS_PORT1          137
1246 #define NET_BIOS_PORT2          138
1247 #define DHCPV6_RAS_PORT         547
1248
1249 #define is_mc_allowed_on_bmc(adapter, eh)       \
1250         (!is_multicast_filt_enabled(adapter) && \
1251          is_multicast_ether_addr(eh->h_dest) && \
1252          !is_broadcast_ether_addr(eh->h_dest))
1253
1254 #define is_bc_allowed_on_bmc(adapter, eh)       \
1255         (!is_broadcast_filt_enabled(adapter) && \
1256          is_broadcast_ether_addr(eh->h_dest))
1257
1258 #define is_arp_allowed_on_bmc(adapter, skb)     \
1259         (is_arp(skb) && is_arp_filt_enabled(adapter))
1260
1261 #define is_broadcast_packet(eh, adapter)        \
1262                 (is_multicast_ether_addr(eh->h_dest) && \
1263                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1264
1265 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1266
1267 #define is_arp_filt_enabled(adapter)    \
1268                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1269
1270 #define is_dhcp_client_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1272
1273 #define is_dhcp_srvr_filt_enabled(adapter)      \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1275
1276 #define is_nbios_filt_enabled(adapter)  \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1278
1279 #define is_ipv6_na_filt_enabled(adapter)        \
1280                 (adapter->bmc_filt_mask &       \
1281                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1282
1283 #define is_ipv6_ra_filt_enabled(adapter)        \
1284                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1285
1286 #define is_ipv6_ras_filt_enabled(adapter)       \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1288
1289 #define is_broadcast_filt_enabled(adapter)      \
1290                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1291
1292 #define is_multicast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1294
1295 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1296                                struct sk_buff **skb)
1297 {
1298         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1299         bool os2bmc = false;
1300
1301         if (!be_is_os2bmc_enabled(adapter))
1302                 goto done;
1303
1304         if (!is_multicast_ether_addr(eh->h_dest))
1305                 goto done;
1306
1307         if (is_mc_allowed_on_bmc(adapter, eh) ||
1308             is_bc_allowed_on_bmc(adapter, eh) ||
1309             is_arp_allowed_on_bmc(adapter, (*skb))) {
1310                 os2bmc = true;
1311                 goto done;
1312         }
1313
1314         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1315                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1316                 u8 nexthdr = hdr->nexthdr;
1317
1318                 if (nexthdr == IPPROTO_ICMPV6) {
1319                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1320
1321                         switch (icmp6->icmp6_type) {
1322                         case NDISC_ROUTER_ADVERTISEMENT:
1323                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1324                                 goto done;
1325                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1327                                 goto done;
1328                         default:
1329                                 break;
1330                         }
1331                 }
1332         }
1333
1334         if (is_udp_pkt((*skb))) {
1335                 struct udphdr *udp = udp_hdr((*skb));
1336
1337                 switch (ntohs(udp->dest)) {
1338                 case DHCP_CLIENT_PORT:
1339                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1340                         goto done;
1341                 case DHCP_SERVER_PORT:
1342                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1343                         goto done;
1344                 case NET_BIOS_PORT1:
1345                 case NET_BIOS_PORT2:
1346                         os2bmc = is_nbios_filt_enabled(adapter);
1347                         goto done;
1348                 case DHCPV6_RAS_PORT:
1349                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1350                         goto done;
1351                 default:
1352                         break;
1353                 }
1354         }
1355 done:
1356         /* For packets over a vlan, which are destined
1357          * to BMC, asic expects the vlan to be inline in the packet.
1358          */
1359         if (os2bmc)
1360                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1361
1362         return os2bmc;
1363 }
1364
1365 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1366 {
1367         struct be_adapter *adapter = netdev_priv(netdev);
1368         u16 q_idx = skb_get_queue_mapping(skb);
1369         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1370         struct be_wrb_params wrb_params = { 0 };
1371         bool flush = !skb->xmit_more;
1372         u16 wrb_cnt;
1373
1374         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1375         if (unlikely(!skb))
1376                 goto drop;
1377
1378         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1379
1380         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1381         if (unlikely(!wrb_cnt)) {
1382                 dev_kfree_skb_any(skb);
1383                 goto drop;
1384         }
1385
1386         /* if os2bmc is enabled and if the pkt is destined to bmc,
1387          * enqueue the pkt a 2nd time with mgmt bit set.
1388          */
1389         if (be_send_pkt_to_bmc(adapter, &skb)) {
1390                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1391                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1392                 if (unlikely(!wrb_cnt))
1393                         goto drop;
1394                 else
1395                         skb_get(skb);
1396         }
1397
1398         if (be_is_txq_full(txo)) {
1399                 netif_stop_subqueue(netdev, q_idx);
1400                 tx_stats(txo)->tx_stops++;
1401         }
1402
1403         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1404                 be_xmit_flush(adapter, txo);
1405
1406         return NETDEV_TX_OK;
1407 drop:
1408         tx_stats(txo)->tx_drv_drops++;
1409         /* Flush the already enqueued tx requests */
1410         if (flush && txo->pend_wrb_cnt)
1411                 be_xmit_flush(adapter, txo);
1412
1413         return NETDEV_TX_OK;
1414 }
1415
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424         struct device *dev = &adapter->pdev->dev;
1425         int status;
1426
1427         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428                 return 0;
1429
1430         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431         if (!status) {
1432                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434         } else {
1435                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436         }
1437         return status;
1438 }
1439
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442         struct device *dev = &adapter->pdev->dev;
1443         int status;
1444
1445         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446         if (!status) {
1447                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449         }
1450         return status;
1451 }
1452
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459         struct device *dev = &adapter->pdev->dev;
1460         u16 vids[BE_NUM_VLANS_SUPPORTED];
1461         u16 num = 0, i = 0;
1462         int status = 0;
1463
1464         /* No need to change the VLAN state if the I/F is in promiscuous */
1465         if (adapter->netdev->flags & IFF_PROMISC)
1466                 return 0;
1467
1468         if (adapter->vlans_added > be_max_vlans(adapter))
1469                 return be_set_vlan_promisc(adapter);
1470
1471         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472                 status = be_clear_vlan_promisc(adapter);
1473                 if (status)
1474                         return status;
1475         }
1476         /* Construct VLAN Table to give to HW */
1477         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478                 vids[num++] = cpu_to_le16(i);
1479
1480         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481         if (status) {
1482                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1483                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1484                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485                     addl_status(status) ==
1486                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487                         return be_set_vlan_promisc(adapter);
1488         }
1489         return status;
1490 }
1491
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494         struct be_adapter *adapter = netdev_priv(netdev);
1495         int status = 0;
1496
1497         mutex_lock(&adapter->rx_filter_lock);
1498
1499         /* Packets with VID 0 are always received by Lancer by default */
1500         if (lancer_chip(adapter) && vid == 0)
1501                 goto done;
1502
1503         if (test_bit(vid, adapter->vids))
1504                 goto done;
1505
1506         set_bit(vid, adapter->vids);
1507         adapter->vlans_added++;
1508
1509         status = be_vid_config(adapter);
1510 done:
1511         mutex_unlock(&adapter->rx_filter_lock);
1512         return status;
1513 }
1514
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517         struct be_adapter *adapter = netdev_priv(netdev);
1518         int status = 0;
1519
1520         mutex_lock(&adapter->rx_filter_lock);
1521
1522         /* Packets with VID 0 are always received by Lancer by default */
1523         if (lancer_chip(adapter) && vid == 0)
1524                 goto done;
1525
1526         if (!test_bit(vid, adapter->vids))
1527                 goto done;
1528
1529         clear_bit(vid, adapter->vids);
1530         adapter->vlans_added--;
1531
1532         status = be_vid_config(adapter);
1533 done:
1534         mutex_unlock(&adapter->rx_filter_lock);
1535         return status;
1536 }
1537
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546         int status;
1547
1548         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549                 return;
1550
1551         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552         if (!status)
1553                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558         int status;
1559
1560         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561                 return;
1562
1563         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564         if (!status)
1565                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570         int status;
1571
1572         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573                 return;
1574
1575         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576         if (!status)
1577                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586                              const unsigned char *addr)
1587 {
1588         struct be_adapter *adapter = netdev_priv(netdev);
1589
1590         adapter->update_uc_list = true;
1591         return 0;
1592 }
1593
1594 static int be_mc_list_update(struct net_device *netdev,
1595                              const unsigned char *addr)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598
1599         adapter->update_mc_list = true;
1600         return 0;
1601 }
1602
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605         struct net_device *netdev = adapter->netdev;
1606         struct netdev_hw_addr *ha;
1607         bool mc_promisc = false;
1608         int status;
1609
1610         netif_addr_lock_bh(netdev);
1611         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612
1613         if (netdev->flags & IFF_PROMISC) {
1614                 adapter->update_mc_list = false;
1615         } else if (netdev->flags & IFF_ALLMULTI ||
1616                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617                 /* Enable multicast promisc if num configured exceeds
1618                  * what we support
1619                  */
1620                 mc_promisc = true;
1621                 adapter->update_mc_list = false;
1622         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623                 /* Update mc-list unconditionally if the iface was previously
1624                  * in mc-promisc mode and now is out of that mode.
1625                  */
1626                 adapter->update_mc_list = true;
1627         }
1628
1629         if (adapter->update_mc_list) {
1630                 int i = 0;
1631
1632                 /* cache the mc-list in adapter */
1633                 netdev_for_each_mc_addr(ha, netdev) {
1634                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635                         i++;
1636                 }
1637                 adapter->mc_count = netdev_mc_count(netdev);
1638         }
1639         netif_addr_unlock_bh(netdev);
1640
1641         if (mc_promisc) {
1642                 be_set_mc_promisc(adapter);
1643         } else if (adapter->update_mc_list) {
1644                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645                 if (!status)
1646                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647                 else
1648                         be_set_mc_promisc(adapter);
1649
1650                 adapter->update_mc_list = false;
1651         }
1652 }
1653
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656         struct net_device *netdev = adapter->netdev;
1657
1658         __dev_mc_unsync(netdev, NULL);
1659         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660         adapter->mc_count = 0;
1661 }
1662
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1666                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1667                 return 0;
1668         }
1669
1670         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1671                                adapter->if_handle,
1672                                &adapter->pmac_id[uc_idx + 1], 0);
1673 }
1674
1675 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1676 {
1677         if (pmac_id == adapter->pmac_id[0])
1678                 return;
1679
1680         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1681 }
1682
1683 static void be_set_uc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool uc_promisc = false;
1688         int curr_uc_macs = 0, i;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_uc_list = false;
1695         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1696                 uc_promisc = true;
1697                 adapter->update_uc_list = false;
1698         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1699                 /* Update uc-list unconditionally if the iface was previously
1700                  * in uc-promisc mode and now is out of that mode.
1701                  */
1702                 adapter->update_uc_list = true;
1703         }
1704
1705         if (adapter->update_uc_list) {
1706                 /* cache the uc-list in adapter array */
1707                 i = 0;
1708                 netdev_for_each_uc_addr(ha, netdev) {
1709                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1710                         i++;
1711                 }
1712                 curr_uc_macs = netdev_uc_count(netdev);
1713         }
1714         netif_addr_unlock_bh(netdev);
1715
1716         if (uc_promisc) {
1717                 be_set_uc_promisc(adapter);
1718         } else if (adapter->update_uc_list) {
1719                 be_clear_uc_promisc(adapter);
1720
1721                 for (i = 0; i < adapter->uc_macs; i++)
1722                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1723
1724                 for (i = 0; i < curr_uc_macs; i++)
1725                         be_uc_mac_add(adapter, i);
1726                 adapter->uc_macs = curr_uc_macs;
1727                 adapter->update_uc_list = false;
1728         }
1729 }
1730
1731 static void be_clear_uc_list(struct be_adapter *adapter)
1732 {
1733         struct net_device *netdev = adapter->netdev;
1734         int i;
1735
1736         __dev_uc_unsync(netdev, NULL);
1737         for (i = 0; i < adapter->uc_macs; i++)
1738                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1739
1740         adapter->uc_macs = 0;
1741 }
1742
1743 static void __be_set_rx_mode(struct be_adapter *adapter)
1744 {
1745         struct net_device *netdev = adapter->netdev;
1746
1747         mutex_lock(&adapter->rx_filter_lock);
1748
1749         if (netdev->flags & IFF_PROMISC) {
1750                 if (!be_in_all_promisc(adapter))
1751                         be_set_all_promisc(adapter);
1752         } else if (be_in_all_promisc(adapter)) {
1753                 /* We need to re-program the vlan-list or clear
1754                  * vlan-promisc mode (if needed) when the interface
1755                  * comes out of promisc mode.
1756                  */
1757                 be_vid_config(adapter);
1758         }
1759
1760         be_set_uc_list(adapter);
1761         be_set_mc_list(adapter);
1762
1763         mutex_unlock(&adapter->rx_filter_lock);
1764 }
1765
1766 static void be_work_set_rx_mode(struct work_struct *work)
1767 {
1768         struct be_cmd_work *cmd_work =
1769                                 container_of(work, struct be_cmd_work, work);
1770
1771         __be_set_rx_mode(cmd_work->adapter);
1772         kfree(cmd_work);
1773 }
1774
1775 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1776 {
1777         struct be_adapter *adapter = netdev_priv(netdev);
1778         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1779         int status;
1780
1781         if (!sriov_enabled(adapter))
1782                 return -EPERM;
1783
1784         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1785                 return -EINVAL;
1786
1787         /* Proceed further only if user provided MAC is different
1788          * from active MAC
1789          */
1790         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1791                 return 0;
1792
1793         if (BEx_chip(adapter)) {
1794                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1795                                 vf + 1);
1796
1797                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1798                                          &vf_cfg->pmac_id, vf + 1);
1799         } else {
1800                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1801                                         vf + 1);
1802         }
1803
1804         if (status) {
1805                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1806                         mac, vf, status);
1807                 return be_cmd_status(status);
1808         }
1809
1810         ether_addr_copy(vf_cfg->mac_addr, mac);
1811
1812         return 0;
1813 }
1814
1815 static int be_get_vf_config(struct net_device *netdev, int vf,
1816                             struct ifla_vf_info *vi)
1817 {
1818         struct be_adapter *adapter = netdev_priv(netdev);
1819         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1820
1821         if (!sriov_enabled(adapter))
1822                 return -EPERM;
1823
1824         if (vf >= adapter->num_vfs)
1825                 return -EINVAL;
1826
1827         vi->vf = vf;
1828         vi->max_tx_rate = vf_cfg->tx_rate;
1829         vi->min_tx_rate = 0;
1830         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1831         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1832         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1833         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1834         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1835
1836         return 0;
1837 }
1838
1839 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1840 {
1841         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1842         u16 vids[BE_NUM_VLANS_SUPPORTED];
1843         int vf_if_id = vf_cfg->if_handle;
1844         int status;
1845
1846         /* Enable Transparent VLAN Tagging */
1847         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1848         if (status)
1849                 return status;
1850
1851         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1852         vids[0] = 0;
1853         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1854         if (!status)
1855                 dev_info(&adapter->pdev->dev,
1856                          "Cleared guest VLANs on VF%d", vf);
1857
1858         /* After TVT is enabled, disallow VFs to program VLAN filters */
1859         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1860                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1861                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1862                 if (!status)
1863                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1864         }
1865         return 0;
1866 }
1867
1868 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1869 {
1870         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1871         struct device *dev = &adapter->pdev->dev;
1872         int status;
1873
1874         /* Reset Transparent VLAN Tagging. */
1875         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1876                                        vf_cfg->if_handle, 0, 0);
1877         if (status)
1878                 return status;
1879
1880         /* Allow VFs to program VLAN filtering */
1881         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1882                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1883                                                   BE_PRIV_FILTMGMT, vf + 1);
1884                 if (!status) {
1885                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1886                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1887                 }
1888         }
1889
1890         dev_info(dev,
1891                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1892         return 0;
1893 }
1894
1895 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1896                           __be16 vlan_proto)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900         int status;
1901
1902         if (!sriov_enabled(adapter))
1903                 return -EPERM;
1904
1905         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1906                 return -EINVAL;
1907
1908         if (vlan_proto != htons(ETH_P_8021Q))
1909                 return -EPROTONOSUPPORT;
1910
1911         if (vlan || qos) {
1912                 vlan |= qos << VLAN_PRIO_SHIFT;
1913                 status = be_set_vf_tvt(adapter, vf, vlan);
1914         } else {
1915                 status = be_clear_vf_tvt(adapter, vf);
1916         }
1917
1918         if (status) {
1919                 dev_err(&adapter->pdev->dev,
1920                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1921                         status);
1922                 return be_cmd_status(status);
1923         }
1924
1925         vf_cfg->vlan_tag = vlan;
1926         return 0;
1927 }
1928
1929 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1930                              int min_tx_rate, int max_tx_rate)
1931 {
1932         struct be_adapter *adapter = netdev_priv(netdev);
1933         struct device *dev = &adapter->pdev->dev;
1934         int percent_rate, status = 0;
1935         u16 link_speed = 0;
1936         u8 link_status;
1937
1938         if (!sriov_enabled(adapter))
1939                 return -EPERM;
1940
1941         if (vf >= adapter->num_vfs)
1942                 return -EINVAL;
1943
1944         if (min_tx_rate)
1945                 return -EINVAL;
1946
1947         if (!max_tx_rate)
1948                 goto config_qos;
1949
1950         status = be_cmd_link_status_query(adapter, &link_speed,
1951                                           &link_status, 0);
1952         if (status)
1953                 goto err;
1954
1955         if (!link_status) {
1956                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1957                 status = -ENETDOWN;
1958                 goto err;
1959         }
1960
1961         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1962                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1963                         link_speed);
1964                 status = -EINVAL;
1965                 goto err;
1966         }
1967
1968         /* On Skyhawk the QOS setting must be done only as a % value */
1969         percent_rate = link_speed / 100;
1970         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1971                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1972                         percent_rate);
1973                 status = -EINVAL;
1974                 goto err;
1975         }
1976
1977 config_qos:
1978         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1979         if (status)
1980                 goto err;
1981
1982         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1983         return 0;
1984
1985 err:
1986         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1987                 max_tx_rate, vf);
1988         return be_cmd_status(status);
1989 }
1990
1991 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1992                                 int link_state)
1993 {
1994         struct be_adapter *adapter = netdev_priv(netdev);
1995         int status;
1996
1997         if (!sriov_enabled(adapter))
1998                 return -EPERM;
1999
2000         if (vf >= adapter->num_vfs)
2001                 return -EINVAL;
2002
2003         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2004         if (status) {
2005                 dev_err(&adapter->pdev->dev,
2006                         "Link state change on VF %d failed: %#x\n", vf, status);
2007                 return be_cmd_status(status);
2008         }
2009
2010         adapter->vf_cfg[vf].plink_tracking = link_state;
2011
2012         return 0;
2013 }
2014
2015 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2016 {
2017         struct be_adapter *adapter = netdev_priv(netdev);
2018         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2019         u8 spoofchk;
2020         int status;
2021
2022         if (!sriov_enabled(adapter))
2023                 return -EPERM;
2024
2025         if (vf >= adapter->num_vfs)
2026                 return -EINVAL;
2027
2028         if (BEx_chip(adapter))
2029                 return -EOPNOTSUPP;
2030
2031         if (enable == vf_cfg->spoofchk)
2032                 return 0;
2033
2034         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2035
2036         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2037                                        0, spoofchk);
2038         if (status) {
2039                 dev_err(&adapter->pdev->dev,
2040                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2041                 return be_cmd_status(status);
2042         }
2043
2044         vf_cfg->spoofchk = enable;
2045         return 0;
2046 }
2047
2048 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2049                           ulong now)
2050 {
2051         aic->rx_pkts_prev = rx_pkts;
2052         aic->tx_reqs_prev = tx_pkts;
2053         aic->jiffies = now;
2054 }
2055
2056 static int be_get_new_eqd(struct be_eq_obj *eqo)
2057 {
2058         struct be_adapter *adapter = eqo->adapter;
2059         int eqd, start;
2060         struct be_aic_obj *aic;
2061         struct be_rx_obj *rxo;
2062         struct be_tx_obj *txo;
2063         u64 rx_pkts = 0, tx_pkts = 0;
2064         ulong now;
2065         u32 pps, delta;
2066         int i;
2067
2068         aic = &adapter->aic_obj[eqo->idx];
2069         if (!aic->enable) {
2070                 if (aic->jiffies)
2071                         aic->jiffies = 0;
2072                 eqd = aic->et_eqd;
2073                 return eqd;
2074         }
2075
2076         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2077                 do {
2078                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2079                         rx_pkts += rxo->stats.rx_pkts;
2080                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2081         }
2082
2083         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2084                 do {
2085                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2086                         tx_pkts += txo->stats.tx_reqs;
2087                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2088         }
2089
2090         /* Skip, if wrapped around or first calculation */
2091         now = jiffies;
2092         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2093             rx_pkts < aic->rx_pkts_prev ||
2094             tx_pkts < aic->tx_reqs_prev) {
2095                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2096                 return aic->prev_eqd;
2097         }
2098
2099         delta = jiffies_to_msecs(now - aic->jiffies);
2100         if (delta == 0)
2101                 return aic->prev_eqd;
2102
2103         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2104                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2105         eqd = (pps / 15000) << 2;
2106
2107         if (eqd < 8)
2108                 eqd = 0;
2109         eqd = min_t(u32, eqd, aic->max_eqd);
2110         eqd = max_t(u32, eqd, aic->min_eqd);
2111
2112         be_aic_update(aic, rx_pkts, tx_pkts, now);
2113
2114         return eqd;
2115 }
2116
2117 /* For Skyhawk-R only */
2118 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2119 {
2120         struct be_adapter *adapter = eqo->adapter;
2121         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2122         ulong now = jiffies;
2123         int eqd;
2124         u32 mult_enc;
2125
2126         if (!aic->enable)
2127                 return 0;
2128
2129         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2130                 eqd = aic->prev_eqd;
2131         else
2132                 eqd = be_get_new_eqd(eqo);
2133
2134         if (eqd > 100)
2135                 mult_enc = R2I_DLY_ENC_1;
2136         else if (eqd > 60)
2137                 mult_enc = R2I_DLY_ENC_2;
2138         else if (eqd > 20)
2139                 mult_enc = R2I_DLY_ENC_3;
2140         else
2141                 mult_enc = R2I_DLY_ENC_0;
2142
2143         aic->prev_eqd = eqd;
2144
2145         return mult_enc;
2146 }
2147
2148 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2149 {
2150         struct be_set_eqd set_eqd[MAX_EVT_QS];
2151         struct be_aic_obj *aic;
2152         struct be_eq_obj *eqo;
2153         int i, num = 0, eqd;
2154
2155         for_all_evt_queues(adapter, eqo, i) {
2156                 aic = &adapter->aic_obj[eqo->idx];
2157                 eqd = be_get_new_eqd(eqo);
2158                 if (force_update || eqd != aic->prev_eqd) {
2159                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2160                         set_eqd[num].eq_id = eqo->q.id;
2161                         aic->prev_eqd = eqd;
2162                         num++;
2163                 }
2164         }
2165
2166         if (num)
2167                 be_cmd_modify_eqd(adapter, set_eqd, num);
2168 }
2169
2170 static void be_rx_stats_update(struct be_rx_obj *rxo,
2171                                struct be_rx_compl_info *rxcp)
2172 {
2173         struct be_rx_stats *stats = rx_stats(rxo);
2174
2175         u64_stats_update_begin(&stats->sync);
2176         stats->rx_compl++;
2177         stats->rx_bytes += rxcp->pkt_size;
2178         stats->rx_pkts++;
2179         if (rxcp->tunneled)
2180                 stats->rx_vxlan_offload_pkts++;
2181         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2182                 stats->rx_mcast_pkts++;
2183         if (rxcp->err)
2184                 stats->rx_compl_err++;
2185         u64_stats_update_end(&stats->sync);
2186 }
2187
2188 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2189 {
2190         /* L4 checksum is not reliable for non TCP/UDP packets.
2191          * Also ignore ipcksm for ipv6 pkts
2192          */
2193         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2194                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2195 }
2196
2197 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2198 {
2199         struct be_adapter *adapter = rxo->adapter;
2200         struct be_rx_page_info *rx_page_info;
2201         struct be_queue_info *rxq = &rxo->q;
2202         u32 frag_idx = rxq->tail;
2203
2204         rx_page_info = &rxo->page_info_tbl[frag_idx];
2205         BUG_ON(!rx_page_info->page);
2206
2207         if (rx_page_info->last_frag) {
2208                 dma_unmap_page(&adapter->pdev->dev,
2209                                dma_unmap_addr(rx_page_info, bus),
2210                                adapter->big_page_size, DMA_FROM_DEVICE);
2211                 rx_page_info->last_frag = false;
2212         } else {
2213                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2214                                         dma_unmap_addr(rx_page_info, bus),
2215                                         rx_frag_size, DMA_FROM_DEVICE);
2216         }
2217
2218         queue_tail_inc(rxq);
2219         atomic_dec(&rxq->used);
2220         return rx_page_info;
2221 }
2222
2223 /* Throwaway the data in the Rx completion */
2224 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2225                                 struct be_rx_compl_info *rxcp)
2226 {
2227         struct be_rx_page_info *page_info;
2228         u16 i, num_rcvd = rxcp->num_rcvd;
2229
2230         for (i = 0; i < num_rcvd; i++) {
2231                 page_info = get_rx_page_info(rxo);
2232                 put_page(page_info->page);
2233                 memset(page_info, 0, sizeof(*page_info));
2234         }
2235 }
2236
2237 /*
2238  * skb_fill_rx_data forms a complete skb for an ether frame
2239  * indicated by rxcp.
2240  */
2241 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2242                              struct be_rx_compl_info *rxcp)
2243 {
2244         struct be_rx_page_info *page_info;
2245         u16 i, j;
2246         u16 hdr_len, curr_frag_len, remaining;
2247         u8 *start;
2248
2249         page_info = get_rx_page_info(rxo);
2250         start = page_address(page_info->page) + page_info->page_offset;
2251         prefetch(start);
2252
2253         /* Copy data in the first descriptor of this completion */
2254         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2255
2256         skb->len = curr_frag_len;
2257         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2258                 memcpy(skb->data, start, curr_frag_len);
2259                 /* Complete packet has now been moved to data */
2260                 put_page(page_info->page);
2261                 skb->data_len = 0;
2262                 skb->tail += curr_frag_len;
2263         } else {
2264                 hdr_len = ETH_HLEN;
2265                 memcpy(skb->data, start, hdr_len);
2266                 skb_shinfo(skb)->nr_frags = 1;
2267                 skb_frag_set_page(skb, 0, page_info->page);
2268                 skb_shinfo(skb)->frags[0].page_offset =
2269                                         page_info->page_offset + hdr_len;
2270                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2271                                   curr_frag_len - hdr_len);
2272                 skb->data_len = curr_frag_len - hdr_len;
2273                 skb->truesize += rx_frag_size;
2274                 skb->tail += hdr_len;
2275         }
2276         page_info->page = NULL;
2277
2278         if (rxcp->pkt_size <= rx_frag_size) {
2279                 BUG_ON(rxcp->num_rcvd != 1);
2280                 return;
2281         }
2282
2283         /* More frags present for this completion */
2284         remaining = rxcp->pkt_size - curr_frag_len;
2285         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2286                 page_info = get_rx_page_info(rxo);
2287                 curr_frag_len = min(remaining, rx_frag_size);
2288
2289                 /* Coalesce all frags from the same physical page in one slot */
2290                 if (page_info->page_offset == 0) {
2291                         /* Fresh page */
2292                         j++;
2293                         skb_frag_set_page(skb, j, page_info->page);
2294                         skb_shinfo(skb)->frags[j].page_offset =
2295                                                         page_info->page_offset;
2296                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2297                         skb_shinfo(skb)->nr_frags++;
2298                 } else {
2299                         put_page(page_info->page);
2300                 }
2301
2302                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2303                 skb->len += curr_frag_len;
2304                 skb->data_len += curr_frag_len;
2305                 skb->truesize += rx_frag_size;
2306                 remaining -= curr_frag_len;
2307                 page_info->page = NULL;
2308         }
2309         BUG_ON(j > MAX_SKB_FRAGS);
2310 }
2311
2312 /* Process the RX completion indicated by rxcp when GRO is disabled */
2313 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2314                                 struct be_rx_compl_info *rxcp)
2315 {
2316         struct be_adapter *adapter = rxo->adapter;
2317         struct net_device *netdev = adapter->netdev;
2318         struct sk_buff *skb;
2319
2320         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2321         if (unlikely(!skb)) {
2322                 rx_stats(rxo)->rx_drops_no_skbs++;
2323                 be_rx_compl_discard(rxo, rxcp);
2324                 return;
2325         }
2326
2327         skb_fill_rx_data(rxo, skb, rxcp);
2328
2329         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2330                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2331         else
2332                 skb_checksum_none_assert(skb);
2333
2334         skb->protocol = eth_type_trans(skb, netdev);
2335         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2336         if (netdev->features & NETIF_F_RXHASH)
2337                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2338
2339         skb->csum_level = rxcp->tunneled;
2340         skb_mark_napi_id(skb, napi);
2341
2342         if (rxcp->vlanf)
2343                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344
2345         netif_receive_skb(skb);
2346 }
2347
2348 /* Process the RX completion indicated by rxcp when GRO is enabled */
2349 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2350                                     struct napi_struct *napi,
2351                                     struct be_rx_compl_info *rxcp)
2352 {
2353         struct be_adapter *adapter = rxo->adapter;
2354         struct be_rx_page_info *page_info;
2355         struct sk_buff *skb = NULL;
2356         u16 remaining, curr_frag_len;
2357         u16 i, j;
2358
2359         skb = napi_get_frags(napi);
2360         if (!skb) {
2361                 be_rx_compl_discard(rxo, rxcp);
2362                 return;
2363         }
2364
2365         remaining = rxcp->pkt_size;
2366         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2367                 page_info = get_rx_page_info(rxo);
2368
2369                 curr_frag_len = min(remaining, rx_frag_size);
2370
2371                 /* Coalesce all frags from the same physical page in one slot */
2372                 if (i == 0 || page_info->page_offset == 0) {
2373                         /* First frag or Fresh page */
2374                         j++;
2375                         skb_frag_set_page(skb, j, page_info->page);
2376                         skb_shinfo(skb)->frags[j].page_offset =
2377                                                         page_info->page_offset;
2378                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2379                 } else {
2380                         put_page(page_info->page);
2381                 }
2382                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                 skb->truesize += rx_frag_size;
2384                 remaining -= curr_frag_len;
2385                 memset(page_info, 0, sizeof(*page_info));
2386         }
2387         BUG_ON(j > MAX_SKB_FRAGS);
2388
2389         skb_shinfo(skb)->nr_frags = j + 1;
2390         skb->len = rxcp->pkt_size;
2391         skb->data_len = rxcp->pkt_size;
2392         skb->ip_summed = CHECKSUM_UNNECESSARY;
2393         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2394         if (adapter->netdev->features & NETIF_F_RXHASH)
2395                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2396
2397         skb->csum_level = rxcp->tunneled;
2398
2399         if (rxcp->vlanf)
2400                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2401
2402         napi_gro_frags(napi);
2403 }
2404
2405 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2406                                  struct be_rx_compl_info *rxcp)
2407 {
2408         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2409         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2410         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2411         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2412         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2413         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2414         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2415         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2416         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2417         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2418         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2419         if (rxcp->vlanf) {
2420                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2421                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2422         }
2423         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2424         rxcp->tunneled =
2425                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2426 }
2427
2428 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2429                                  struct be_rx_compl_info *rxcp)
2430 {
2431         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2432         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2433         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2434         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2435         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2436         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2437         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2438         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2439         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2440         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2441         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2442         if (rxcp->vlanf) {
2443                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2444                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2445         }
2446         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2447         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2448 }
2449
2450 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2451 {
2452         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2453         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2454         struct be_adapter *adapter = rxo->adapter;
2455
2456         /* For checking the valid bit it is Ok to use either definition as the
2457          * valid bit is at the same position in both v0 and v1 Rx compl */
2458         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2459                 return NULL;
2460
2461         rmb();
2462         be_dws_le_to_cpu(compl, sizeof(*compl));
2463
2464         if (adapter->be3_native)
2465                 be_parse_rx_compl_v1(compl, rxcp);
2466         else
2467                 be_parse_rx_compl_v0(compl, rxcp);
2468
2469         if (rxcp->ip_frag)
2470                 rxcp->l4_csum = 0;
2471
2472         if (rxcp->vlanf) {
2473                 /* In QNQ modes, if qnq bit is not set, then the packet was
2474                  * tagged only with the transparent outer vlan-tag and must
2475                  * not be treated as a vlan packet by host
2476                  */
2477                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2478                         rxcp->vlanf = 0;
2479
2480                 if (!lancer_chip(adapter))
2481                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2482
2483                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2484                     !test_bit(rxcp->vlan_tag, adapter->vids))
2485                         rxcp->vlanf = 0;
2486         }
2487
2488         /* As the compl has been parsed, reset it; we wont touch it again */
2489         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2490
2491         queue_tail_inc(&rxo->cq);
2492         return rxcp;
2493 }
2494
2495 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2496 {
2497         u32 order = get_order(size);
2498
2499         if (order > 0)
2500                 gfp |= __GFP_COMP;
2501         return  alloc_pages(gfp, order);
2502 }
2503
2504 /*
2505  * Allocate a page, split it to fragments of size rx_frag_size and post as
2506  * receive buffers to BE
2507  */
2508 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2509 {
2510         struct be_adapter *adapter = rxo->adapter;
2511         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2512         struct be_queue_info *rxq = &rxo->q;
2513         struct page *pagep = NULL;
2514         struct device *dev = &adapter->pdev->dev;
2515         struct be_eth_rx_d *rxd;
2516         u64 page_dmaaddr = 0, frag_dmaaddr;
2517         u32 posted, page_offset = 0, notify = 0;
2518
2519         page_info = &rxo->page_info_tbl[rxq->head];
2520         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2521                 if (!pagep) {
2522                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2523                         if (unlikely(!pagep)) {
2524                                 rx_stats(rxo)->rx_post_fail++;
2525                                 break;
2526                         }
2527                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2528                                                     adapter->big_page_size,
2529                                                     DMA_FROM_DEVICE);
2530                         if (dma_mapping_error(dev, page_dmaaddr)) {
2531                                 put_page(pagep);
2532                                 pagep = NULL;
2533                                 adapter->drv_stats.dma_map_errors++;
2534                                 break;
2535                         }
2536                         page_offset = 0;
2537                 } else {
2538                         get_page(pagep);
2539                         page_offset += rx_frag_size;
2540                 }
2541                 page_info->page_offset = page_offset;
2542                 page_info->page = pagep;
2543
2544                 rxd = queue_head_node(rxq);
2545                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2546                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2547                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2548
2549                 /* Any space left in the current big page for another frag? */
2550                 if ((page_offset + rx_frag_size + rx_frag_size) >
2551                                         adapter->big_page_size) {
2552                         pagep = NULL;
2553                         page_info->last_frag = true;
2554                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2555                 } else {
2556                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2557                 }
2558
2559                 prev_page_info = page_info;
2560                 queue_head_inc(rxq);
2561                 page_info = &rxo->page_info_tbl[rxq->head];
2562         }
2563
2564         /* Mark the last frag of a page when we break out of the above loop
2565          * with no more slots available in the RXQ
2566          */
2567         if (pagep) {
2568                 prev_page_info->last_frag = true;
2569                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2570         }
2571
2572         if (posted) {
2573                 atomic_add(posted, &rxq->used);
2574                 if (rxo->rx_post_starved)
2575                         rxo->rx_post_starved = false;
2576                 do {
2577                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2578                         be_rxq_notify(adapter, rxq->id, notify);
2579                         posted -= notify;
2580                 } while (posted);
2581         } else if (atomic_read(&rxq->used) == 0) {
2582                 /* Let be_worker replenish when memory is available */
2583                 rxo->rx_post_starved = true;
2584         }
2585 }
2586
2587 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2588 {
2589         struct be_queue_info *tx_cq = &txo->cq;
2590         struct be_tx_compl_info *txcp = &txo->txcp;
2591         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2592
2593         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2594                 return NULL;
2595
2596         /* Ensure load ordering of valid bit dword and other dwords below */
2597         rmb();
2598         be_dws_le_to_cpu(compl, sizeof(*compl));
2599
2600         txcp->status = GET_TX_COMPL_BITS(status, compl);
2601         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2602
2603         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2604         queue_tail_inc(tx_cq);
2605         return txcp;
2606 }
2607
2608 static u16 be_tx_compl_process(struct be_adapter *adapter,
2609                                struct be_tx_obj *txo, u16 last_index)
2610 {
2611         struct sk_buff **sent_skbs = txo->sent_skb_list;
2612         struct be_queue_info *txq = &txo->q;
2613         struct sk_buff *skb = NULL;
2614         bool unmap_skb_hdr = false;
2615         struct be_eth_wrb *wrb;
2616         u16 num_wrbs = 0;
2617         u32 frag_index;
2618
2619         do {
2620                 if (sent_skbs[txq->tail]) {
2621                         /* Free skb from prev req */
2622                         if (skb)
2623                                 dev_consume_skb_any(skb);
2624                         skb = sent_skbs[txq->tail];
2625                         sent_skbs[txq->tail] = NULL;
2626                         queue_tail_inc(txq);  /* skip hdr wrb */
2627                         num_wrbs++;
2628                         unmap_skb_hdr = true;
2629                 }
2630                 wrb = queue_tail_node(txq);
2631                 frag_index = txq->tail;
2632                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2633                               (unmap_skb_hdr && skb_headlen(skb)));
2634                 unmap_skb_hdr = false;
2635                 queue_tail_inc(txq);
2636                 num_wrbs++;
2637         } while (frag_index != last_index);
2638         dev_consume_skb_any(skb);
2639
2640         return num_wrbs;
2641 }
2642
2643 /* Return the number of events in the event queue */
2644 static inline int events_get(struct be_eq_obj *eqo)
2645 {
2646         struct be_eq_entry *eqe;
2647         int num = 0;
2648
2649         do {
2650                 eqe = queue_tail_node(&eqo->q);
2651                 if (eqe->evt == 0)
2652                         break;
2653
2654                 rmb();
2655                 eqe->evt = 0;
2656                 num++;
2657                 queue_tail_inc(&eqo->q);
2658         } while (true);
2659
2660         return num;
2661 }
2662
2663 /* Leaves the EQ is disarmed state */
2664 static void be_eq_clean(struct be_eq_obj *eqo)
2665 {
2666         int num = events_get(eqo);
2667
2668         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2669 }
2670
2671 /* Free posted rx buffers that were not used */
2672 static void be_rxq_clean(struct be_rx_obj *rxo)
2673 {
2674         struct be_queue_info *rxq = &rxo->q;
2675         struct be_rx_page_info *page_info;
2676
2677         while (atomic_read(&rxq->used) > 0) {
2678                 page_info = get_rx_page_info(rxo);
2679                 put_page(page_info->page);
2680                 memset(page_info, 0, sizeof(*page_info));
2681         }
2682         BUG_ON(atomic_read(&rxq->used));
2683         rxq->tail = 0;
2684         rxq->head = 0;
2685 }
2686
2687 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2688 {
2689         struct be_queue_info *rx_cq = &rxo->cq;
2690         struct be_rx_compl_info *rxcp;
2691         struct be_adapter *adapter = rxo->adapter;
2692         int flush_wait = 0;
2693
2694         /* Consume pending rx completions.
2695          * Wait for the flush completion (identified by zero num_rcvd)
2696          * to arrive. Notify CQ even when there are no more CQ entries
2697          * for HW to flush partially coalesced CQ entries.
2698          * In Lancer, there is no need to wait for flush compl.
2699          */
2700         for (;;) {
2701                 rxcp = be_rx_compl_get(rxo);
2702                 if (!rxcp) {
2703                         if (lancer_chip(adapter))
2704                                 break;
2705
2706                         if (flush_wait++ > 50 ||
2707                             be_check_error(adapter,
2708                                            BE_ERROR_HW)) {
2709                                 dev_warn(&adapter->pdev->dev,
2710                                          "did not receive flush compl\n");
2711                                 break;
2712                         }
2713                         be_cq_notify(adapter, rx_cq->id, true, 0);
2714                         mdelay(1);
2715                 } else {
2716                         be_rx_compl_discard(rxo, rxcp);
2717                         be_cq_notify(adapter, rx_cq->id, false, 1);
2718                         if (rxcp->num_rcvd == 0)
2719                                 break;
2720                 }
2721         }
2722
2723         /* After cleanup, leave the CQ in unarmed state */
2724         be_cq_notify(adapter, rx_cq->id, false, 0);
2725 }
2726
2727 static void be_tx_compl_clean(struct be_adapter *adapter)
2728 {
2729         struct device *dev = &adapter->pdev->dev;
2730         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2731         struct be_tx_compl_info *txcp;
2732         struct be_queue_info *txq;
2733         u32 end_idx, notified_idx;
2734         struct be_tx_obj *txo;
2735         int i, pending_txqs;
2736
2737         /* Stop polling for compls when HW has been silent for 10ms */
2738         do {
2739                 pending_txqs = adapter->num_tx_qs;
2740
2741                 for_all_tx_queues(adapter, txo, i) {
2742                         cmpl = 0;
2743                         num_wrbs = 0;
2744                         txq = &txo->q;
2745                         while ((txcp = be_tx_compl_get(txo))) {
2746                                 num_wrbs +=
2747                                         be_tx_compl_process(adapter, txo,
2748                                                             txcp->end_index);
2749                                 cmpl++;
2750                         }
2751                         if (cmpl) {
2752                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2753                                 atomic_sub(num_wrbs, &txq->used);
2754                                 timeo = 0;
2755                         }
2756                         if (!be_is_tx_compl_pending(txo))
2757                                 pending_txqs--;
2758                 }
2759
2760                 if (pending_txqs == 0 || ++timeo > 10 ||
2761                     be_check_error(adapter, BE_ERROR_HW))
2762                         break;
2763
2764                 mdelay(1);
2765         } while (true);
2766
2767         /* Free enqueued TX that was never notified to HW */
2768         for_all_tx_queues(adapter, txo, i) {
2769                 txq = &txo->q;
2770
2771                 if (atomic_read(&txq->used)) {
2772                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2773                                  i, atomic_read(&txq->used));
2774                         notified_idx = txq->tail;
2775                         end_idx = txq->tail;
2776                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2777                                   txq->len);
2778                         /* Use the tx-compl process logic to handle requests
2779                          * that were not sent to the HW.
2780                          */
2781                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2782                         atomic_sub(num_wrbs, &txq->used);
2783                         BUG_ON(atomic_read(&txq->used));
2784                         txo->pend_wrb_cnt = 0;
2785                         /* Since hw was never notified of these requests,
2786                          * reset TXQ indices
2787                          */
2788                         txq->head = notified_idx;
2789                         txq->tail = notified_idx;
2790                 }
2791         }
2792 }
2793
2794 static void be_evt_queues_destroy(struct be_adapter *adapter)
2795 {
2796         struct be_eq_obj *eqo;
2797         int i;
2798
2799         for_all_evt_queues(adapter, eqo, i) {
2800                 if (eqo->q.created) {
2801                         be_eq_clean(eqo);
2802                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2803                         netif_napi_del(&eqo->napi);
2804                         free_cpumask_var(eqo->affinity_mask);
2805                 }
2806                 be_queue_free(adapter, &eqo->q);
2807         }
2808 }
2809
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812         struct be_queue_info *eq;
2813         struct be_eq_obj *eqo;
2814         struct be_aic_obj *aic;
2815         int i, rc;
2816
2817         /* need enough EQs to service both RX and TX queues */
2818         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819                                     max(adapter->cfg_num_rx_irqs,
2820                                         adapter->cfg_num_tx_irqs));
2821
2822         for_all_evt_queues(adapter, eqo, i) {
2823                 int numa_node = dev_to_node(&adapter->pdev->dev);
2824
2825                 aic = &adapter->aic_obj[i];
2826                 eqo->adapter = adapter;
2827                 eqo->idx = i;
2828                 aic->max_eqd = BE_MAX_EQD;
2829                 aic->enable = true;
2830
2831                 eq = &eqo->q;
2832                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833                                     sizeof(struct be_eq_entry));
2834                 if (rc)
2835                         return rc;
2836
2837                 rc = be_cmd_eq_create(adapter, eqo);
2838                 if (rc)
2839                         return rc;
2840
2841                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842                         return -ENOMEM;
2843                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844                                 eqo->affinity_mask);
2845                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846                                BE_NAPI_WEIGHT);
2847         }
2848         return 0;
2849 }
2850
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853         struct be_queue_info *q;
2854
2855         q = &adapter->mcc_obj.q;
2856         if (q->created)
2857                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858         be_queue_free(adapter, q);
2859
2860         q = &adapter->mcc_obj.cq;
2861         if (q->created)
2862                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863         be_queue_free(adapter, q);
2864 }
2865
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869         struct be_queue_info *q, *cq;
2870
2871         cq = &adapter->mcc_obj.cq;
2872         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873                            sizeof(struct be_mcc_compl)))
2874                 goto err;
2875
2876         /* Use the default EQ for MCC completions */
2877         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878                 goto mcc_cq_free;
2879
2880         q = &adapter->mcc_obj.q;
2881         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882                 goto mcc_cq_destroy;
2883
2884         if (be_cmd_mccq_create(adapter, q, cq))
2885                 goto mcc_q_free;
2886
2887         return 0;
2888
2889 mcc_q_free:
2890         be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894         be_queue_free(adapter, cq);
2895 err:
2896         return -1;
2897 }
2898
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901         struct be_queue_info *q;
2902         struct be_tx_obj *txo;
2903         u8 i;
2904
2905         for_all_tx_queues(adapter, txo, i) {
2906                 q = &txo->q;
2907                 if (q->created)
2908                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909                 be_queue_free(adapter, q);
2910
2911                 q = &txo->cq;
2912                 if (q->created)
2913                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914                 be_queue_free(adapter, q);
2915         }
2916 }
2917
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920         struct be_queue_info *cq;
2921         struct be_tx_obj *txo;
2922         struct be_eq_obj *eqo;
2923         int status, i;
2924
2925         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926
2927         for_all_tx_queues(adapter, txo, i) {
2928                 cq = &txo->cq;
2929                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930                                         sizeof(struct be_eth_tx_compl));
2931                 if (status)
2932                         return status;
2933
2934                 u64_stats_init(&txo->stats.sync);
2935                 u64_stats_init(&txo->stats.sync_compl);
2936
2937                 /* If num_evt_qs is less than num_tx_qs, then more than
2938                  * one txq share an eq
2939                  */
2940                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942                 if (status)
2943                         return status;
2944
2945                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946                                         sizeof(struct be_eth_wrb));
2947                 if (status)
2948                         return status;
2949
2950                 status = be_cmd_txq_create(adapter, txo);
2951                 if (status)
2952                         return status;
2953
2954                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955                                     eqo->idx);
2956         }
2957
2958         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959                  adapter->num_tx_qs);
2960         return 0;
2961 }
2962
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965         struct be_queue_info *q;
2966         struct be_rx_obj *rxo;
2967         int i;
2968
2969         for_all_rx_queues(adapter, rxo, i) {
2970                 q = &rxo->cq;
2971                 if (q->created)
2972                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973                 be_queue_free(adapter, q);
2974         }
2975 }
2976
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979         struct be_queue_info *eq, *cq;
2980         struct be_rx_obj *rxo;
2981         int rc, i;
2982
2983         adapter->num_rss_qs =
2984                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985
2986         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2987         if (adapter->num_rss_qs < 2)
2988                 adapter->num_rss_qs = 0;
2989
2990         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991
2992         /* When the interface is not capable of RSS rings (and there is no
2993          * need to create a default RXQ) we'll still need one RXQ
2994          */
2995         if (adapter->num_rx_qs == 0)
2996                 adapter->num_rx_qs = 1;
2997
2998         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999         for_all_rx_queues(adapter, rxo, i) {
3000                 rxo->adapter = adapter;
3001                 cq = &rxo->cq;
3002                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003                                     sizeof(struct be_eth_rx_compl));
3004                 if (rc)
3005                         return rc;
3006
3007                 u64_stats_init(&rxo->stats.sync);
3008                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010                 if (rc)
3011                         return rc;
3012         }
3013
3014         dev_info(&adapter->pdev->dev,
3015                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3016         return 0;
3017 }
3018
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021         struct be_eq_obj *eqo = dev;
3022         struct be_adapter *adapter = eqo->adapter;
3023         int num_evts = 0;
3024
3025         /* IRQ is not expected when NAPI is scheduled as the EQ
3026          * will not be armed.
3027          * But, this can happen on Lancer INTx where it takes
3028          * a while to de-assert INTx or in BE2 where occasionaly
3029          * an interrupt may be raised even when EQ is unarmed.
3030          * If NAPI is already scheduled, then counting & notifying
3031          * events will orphan them.
3032          */
3033         if (napi_schedule_prep(&eqo->napi)) {
3034                 num_evts = events_get(eqo);
3035                 __napi_schedule(&eqo->napi);
3036                 if (num_evts)
3037                         eqo->spurious_intr = 0;
3038         }
3039         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040
3041         /* Return IRQ_HANDLED only for the the first spurious intr
3042          * after a valid intr to stop the kernel from branding
3043          * this irq as a bad one!
3044          */
3045         if (num_evts || eqo->spurious_intr++ == 0)
3046                 return IRQ_HANDLED;
3047         else
3048                 return IRQ_NONE;
3049 }
3050
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053         struct be_eq_obj *eqo = dev;
3054
3055         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056         napi_schedule(&eqo->napi);
3057         return IRQ_HANDLED;
3058 }
3059
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066                          int budget)
3067 {
3068         struct be_adapter *adapter = rxo->adapter;
3069         struct be_queue_info *rx_cq = &rxo->cq;
3070         struct be_rx_compl_info *rxcp;
3071         u32 work_done;
3072         u32 frags_consumed = 0;
3073
3074         for (work_done = 0; work_done < budget; work_done++) {
3075                 rxcp = be_rx_compl_get(rxo);
3076                 if (!rxcp)
3077                         break;
3078
3079                 /* Is it a flush compl that has no data */
3080                 if (unlikely(rxcp->num_rcvd == 0))
3081                         goto loop_continue;
3082
3083                 /* Discard compl with partial DMA Lancer B0 */
3084                 if (unlikely(!rxcp->pkt_size)) {
3085                         be_rx_compl_discard(rxo, rxcp);
3086                         goto loop_continue;
3087                 }
3088
3089                 /* On BE drop pkts that arrive due to imperfect filtering in
3090                  * promiscuous mode on some skews
3091                  */
3092                 if (unlikely(rxcp->port != adapter->port_num &&
3093                              !lancer_chip(adapter))) {
3094                         be_rx_compl_discard(rxo, rxcp);
3095                         goto loop_continue;
3096                 }
3097
3098                 if (do_gro(rxcp))
3099                         be_rx_compl_process_gro(rxo, napi, rxcp);
3100                 else
3101                         be_rx_compl_process(rxo, napi, rxcp);
3102
3103 loop_continue:
3104                 frags_consumed += rxcp->num_rcvd;
3105                 be_rx_stats_update(rxo, rxcp);
3106         }
3107
3108         if (work_done) {
3109                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3110
3111                 /* When an rx-obj gets into post_starved state, just
3112                  * let be_worker do the posting.
3113                  */
3114                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115                     !rxo->rx_post_starved)
3116                         be_post_rx_frags(rxo, GFP_ATOMIC,
3117                                          max_t(u32, MAX_RX_POST,
3118                                                frags_consumed));
3119         }
3120
3121         return work_done;
3122 }
3123
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126         switch (status) {
3127         case BE_TX_COMP_HDR_PARSE_ERR:
3128                 tx_stats(txo)->tx_hdr_parse_err++;
3129                 break;
3130         case BE_TX_COMP_NDMA_ERR:
3131                 tx_stats(txo)->tx_dma_err++;
3132                 break;
3133         case BE_TX_COMP_ACL_ERR:
3134                 tx_stats(txo)->tx_spoof_check_err++;
3135                 break;
3136         }
3137 }
3138
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141         switch (status) {
3142         case LANCER_TX_COMP_LSO_ERR:
3143                 tx_stats(txo)->tx_tso_err++;
3144                 break;
3145         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147                 tx_stats(txo)->tx_spoof_check_err++;
3148                 break;
3149         case LANCER_TX_COMP_QINQ_ERR:
3150                 tx_stats(txo)->tx_qinq_err++;
3151                 break;
3152         case LANCER_TX_COMP_PARITY_ERR:
3153                 tx_stats(txo)->tx_internal_parity_err++;
3154                 break;
3155         case LANCER_TX_COMP_DMA_ERR:
3156                 tx_stats(txo)->tx_dma_err++;
3157                 break;
3158         }
3159 }
3160
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162                           int idx)
3163 {
3164         int num_wrbs = 0, work_done = 0;
3165         struct be_tx_compl_info *txcp;
3166
3167         while ((txcp = be_tx_compl_get(txo))) {
3168                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169                 work_done++;
3170
3171                 if (txcp->status) {
3172                         if (lancer_chip(adapter))
3173                                 lancer_update_tx_err(txo, txcp->status);
3174                         else
3175                                 be_update_tx_err(txo, txcp->status);
3176                 }
3177         }
3178
3179         if (work_done) {
3180                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3181                 atomic_sub(num_wrbs, &txo->q.used);
3182
3183                 /* As Tx wrbs have been freed up, wake up netdev queue
3184                  * if it was stopped due to lack of tx wrbs.  */
3185                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186                     be_can_txq_wake(txo)) {
3187                         netif_wake_subqueue(adapter->netdev, idx);
3188                 }
3189
3190                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191                 tx_stats(txo)->tx_compl += work_done;
3192                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193         }
3194 }
3195
3196 int be_poll(struct napi_struct *napi, int budget)
3197 {
3198         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3199         struct be_adapter *adapter = eqo->adapter;
3200         int max_work = 0, work, i, num_evts;
3201         struct be_rx_obj *rxo;
3202         struct be_tx_obj *txo;
3203         u32 mult_enc = 0;
3204
3205         num_evts = events_get(eqo);
3206
3207         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3208                 be_process_tx(adapter, txo, i);
3209
3210         /* This loop will iterate twice for EQ0 in which
3211          * completions of the last RXQ (default one) are also processed
3212          * For other EQs the loop iterates only once
3213          */
3214         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3215                 work = be_process_rx(rxo, napi, budget);
3216                 max_work = max(work, max_work);
3217         }
3218
3219         if (is_mcc_eqo(eqo))
3220                 be_process_mcc(adapter);
3221
3222         if (max_work < budget) {
3223                 napi_complete_done(napi, max_work);
3224
3225                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3226                  * delay via a delay multiplier encoding value
3227                  */
3228                 if (skyhawk_chip(adapter))
3229                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3230
3231                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3232                              mult_enc);
3233         } else {
3234                 /* As we'll continue in polling mode, count and clear events */
3235                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3236         }
3237         return max_work;
3238 }
3239
3240 void be_detect_error(struct be_adapter *adapter)
3241 {
3242         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3243         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3244         u32 i;
3245         struct device *dev = &adapter->pdev->dev;
3246
3247         if (be_check_error(adapter, BE_ERROR_HW))
3248                 return;
3249
3250         if (lancer_chip(adapter)) {
3251                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3252                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3253                         be_set_error(adapter, BE_ERROR_UE);
3254                         sliport_err1 = ioread32(adapter->db +
3255                                                 SLIPORT_ERROR1_OFFSET);
3256                         sliport_err2 = ioread32(adapter->db +
3257                                                 SLIPORT_ERROR2_OFFSET);
3258                         /* Do not log error messages if its a FW reset */
3259                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3260                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3261                                 dev_info(dev, "Firmware update in progress\n");
3262                         } else {
3263                                 dev_err(dev, "Error detected in the card\n");
3264                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3265                                         sliport_status);
3266                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3267                                         sliport_err1);
3268                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3269                                         sliport_err2);
3270                         }
3271                 }
3272         } else {
3273                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3274                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3275                 ue_lo_mask = ioread32(adapter->pcicfg +
3276                                       PCICFG_UE_STATUS_LOW_MASK);
3277                 ue_hi_mask = ioread32(adapter->pcicfg +
3278                                       PCICFG_UE_STATUS_HI_MASK);
3279
3280                 ue_lo = (ue_lo & ~ue_lo_mask);
3281                 ue_hi = (ue_hi & ~ue_hi_mask);
3282
3283                 /* On certain platforms BE hardware can indicate spurious UEs.
3284                  * Allow HW to stop working completely in case of a real UE.
3285                  * Hence not setting the hw_error for UE detection.
3286                  */
3287
3288                 if (ue_lo || ue_hi) {
3289                         dev_err(dev, "Error detected in the adapter");
3290                         if (skyhawk_chip(adapter))
3291                                 be_set_error(adapter, BE_ERROR_UE);
3292
3293                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3294                                 if (ue_lo & 1)
3295                                         dev_err(dev, "UE: %s bit set\n",
3296                                                 ue_status_low_desc[i]);
3297                         }
3298                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3299                                 if (ue_hi & 1)
3300                                         dev_err(dev, "UE: %s bit set\n",
3301                                                 ue_status_hi_desc[i]);
3302                         }
3303                 }
3304         }
3305 }
3306
3307 static void be_msix_disable(struct be_adapter *adapter)
3308 {
3309         if (msix_enabled(adapter)) {
3310                 pci_disable_msix(adapter->pdev);
3311                 adapter->num_msix_vec = 0;
3312                 adapter->num_msix_roce_vec = 0;
3313         }
3314 }
3315
3316 static int be_msix_enable(struct be_adapter *adapter)
3317 {
3318         unsigned int i, max_roce_eqs;
3319         struct device *dev = &adapter->pdev->dev;
3320         int num_vec;
3321
3322         /* If RoCE is supported, program the max number of vectors that
3323          * could be used for NIC and RoCE, else, just program the number
3324          * we'll use initially.
3325          */
3326         if (be_roce_supported(adapter)) {
3327                 max_roce_eqs =
3328                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3329                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3330                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3331         } else {
3332                 num_vec = max(adapter->cfg_num_rx_irqs,
3333                               adapter->cfg_num_tx_irqs);
3334         }
3335
3336         for (i = 0; i < num_vec; i++)
3337                 adapter->msix_entries[i].entry = i;
3338
3339         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3340                                         MIN_MSIX_VECTORS, num_vec);
3341         if (num_vec < 0)
3342                 goto fail;
3343
3344         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3345                 adapter->num_msix_roce_vec = num_vec / 2;
3346                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3347                          adapter->num_msix_roce_vec);
3348         }
3349
3350         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3351
3352         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3353                  adapter->num_msix_vec);
3354         return 0;
3355
3356 fail:
3357         dev_warn(dev, "MSIx enable failed\n");
3358
3359         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3360         if (be_virtfn(adapter))
3361                 return num_vec;
3362         return 0;
3363 }
3364
3365 static inline int be_msix_vec_get(struct be_adapter *adapter,
3366                                   struct be_eq_obj *eqo)
3367 {
3368         return adapter->msix_entries[eqo->msix_idx].vector;
3369 }
3370
3371 static int be_msix_register(struct be_adapter *adapter)
3372 {
3373         struct net_device *netdev = adapter->netdev;
3374         struct be_eq_obj *eqo;
3375         int status, i, vec;
3376
3377         for_all_evt_queues(adapter, eqo, i) {
3378                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3379                 vec = be_msix_vec_get(adapter, eqo);
3380                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3381                 if (status)
3382                         goto err_msix;
3383
3384                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3385         }
3386
3387         return 0;
3388 err_msix:
3389         for (i--; i >= 0; i--) {
3390                 eqo = &adapter->eq_obj[i];
3391                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3392         }
3393         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3394                  status);
3395         be_msix_disable(adapter);
3396         return status;
3397 }
3398
3399 static int be_irq_register(struct be_adapter *adapter)
3400 {
3401         struct net_device *netdev = adapter->netdev;
3402         int status;
3403
3404         if (msix_enabled(adapter)) {
3405                 status = be_msix_register(adapter);
3406                 if (status == 0)
3407                         goto done;
3408                 /* INTx is not supported for VF */
3409                 if (be_virtfn(adapter))
3410                         return status;
3411         }
3412
3413         /* INTx: only the first EQ is used */
3414         netdev->irq = adapter->pdev->irq;
3415         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3416                              &adapter->eq_obj[0]);
3417         if (status) {
3418                 dev_err(&adapter->pdev->dev,
3419                         "INTx request IRQ failed - err %d\n", status);
3420                 return status;
3421         }
3422 done:
3423         adapter->isr_registered = true;
3424         return 0;
3425 }
3426
3427 static void be_irq_unregister(struct be_adapter *adapter)
3428 {
3429         struct net_device *netdev = adapter->netdev;
3430         struct be_eq_obj *eqo;
3431         int i, vec;
3432
3433         if (!adapter->isr_registered)
3434                 return;
3435
3436         /* INTx */
3437         if (!msix_enabled(adapter)) {
3438                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3439                 goto done;
3440         }
3441
3442         /* MSIx */
3443         for_all_evt_queues(adapter, eqo, i) {
3444                 vec = be_msix_vec_get(adapter, eqo);
3445                 irq_set_affinity_hint(vec, NULL);
3446                 free_irq(vec, eqo);
3447         }
3448
3449 done:
3450         adapter->isr_registered = false;
3451 }
3452
3453 static void be_rx_qs_destroy(struct be_adapter *adapter)
3454 {
3455         struct rss_info *rss = &adapter->rss_info;
3456         struct be_queue_info *q;
3457         struct be_rx_obj *rxo;
3458         int i;
3459
3460         for_all_rx_queues(adapter, rxo, i) {
3461                 q = &rxo->q;
3462                 if (q->created) {
3463                         /* If RXQs are destroyed while in an "out of buffer"
3464                          * state, there is a possibility of an HW stall on
3465                          * Lancer. So, post 64 buffers to each queue to relieve
3466                          * the "out of buffer" condition.
3467                          * Make sure there's space in the RXQ before posting.
3468                          */
3469                         if (lancer_chip(adapter)) {
3470                                 be_rx_cq_clean(rxo);
3471                                 if (atomic_read(&q->used) == 0)
3472                                         be_post_rx_frags(rxo, GFP_KERNEL,
3473                                                          MAX_RX_POST);
3474                         }
3475
3476                         be_cmd_rxq_destroy(adapter, q);
3477                         be_rx_cq_clean(rxo);
3478                         be_rxq_clean(rxo);
3479                 }
3480                 be_queue_free(adapter, q);
3481         }
3482
3483         if (rss->rss_flags) {
3484                 rss->rss_flags = RSS_ENABLE_NONE;
3485                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3486                                   128, rss->rss_hkey);
3487         }
3488 }
3489
3490 static void be_disable_if_filters(struct be_adapter *adapter)
3491 {
3492         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3493         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3494             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3495                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3496                 eth_zero_addr(adapter->dev_mac);
3497         }
3498
3499         be_clear_uc_list(adapter);
3500         be_clear_mc_list(adapter);
3501
3502         /* The IFACE flags are enabled in the open path and cleared
3503          * in the close path. When a VF gets detached from the host and
3504          * assigned to a VM the following happens:
3505          *      - VF's IFACE flags get cleared in the detach path
3506          *      - IFACE create is issued by the VF in the attach path
3507          * Due to a bug in the BE3/Skyhawk-R FW
3508          * (Lancer FW doesn't have the bug), the IFACE capability flags
3509          * specified along with the IFACE create cmd issued by a VF are not
3510          * honoured by FW.  As a consequence, if a *new* driver
3511          * (that enables/disables IFACE flags in open/close)
3512          * is loaded in the host and an *old* driver is * used by a VM/VF,
3513          * the IFACE gets created *without* the needed flags.
3514          * To avoid this, disable RX-filter flags only for Lancer.
3515          */
3516         if (lancer_chip(adapter)) {
3517                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3518                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3519         }
3520 }
3521
3522 static int be_close(struct net_device *netdev)
3523 {
3524         struct be_adapter *adapter = netdev_priv(netdev);
3525         struct be_eq_obj *eqo;
3526         int i;
3527
3528         /* This protection is needed as be_close() may be called even when the
3529          * adapter is in cleared state (after eeh perm failure)
3530          */
3531         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3532                 return 0;
3533
3534         /* Before attempting cleanup ensure all the pending cmds in the
3535          * config_wq have finished execution
3536          */
3537         flush_workqueue(be_wq);
3538
3539         be_disable_if_filters(adapter);
3540
3541         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3542                 for_all_evt_queues(adapter, eqo, i) {
3543                         napi_disable(&eqo->napi);
3544                 }
3545                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3546         }
3547
3548         be_async_mcc_disable(adapter);
3549
3550         /* Wait for all pending tx completions to arrive so that
3551          * all tx skbs are freed.
3552          */
3553         netif_tx_disable(netdev);
3554         be_tx_compl_clean(adapter);
3555
3556         be_rx_qs_destroy(adapter);
3557
3558         for_all_evt_queues(adapter, eqo, i) {
3559                 if (msix_enabled(adapter))
3560                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3561                 else
3562                         synchronize_irq(netdev->irq);
3563                 be_eq_clean(eqo);
3564         }
3565
3566         be_irq_unregister(adapter);
3567
3568         return 0;
3569 }
3570
3571 static int be_rx_qs_create(struct be_adapter *adapter)
3572 {
3573         struct rss_info *rss = &adapter->rss_info;
3574         u8 rss_key[RSS_HASH_KEY_LEN];
3575         struct be_rx_obj *rxo;
3576         int rc, i, j;
3577
3578         for_all_rx_queues(adapter, rxo, i) {
3579                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3580                                     sizeof(struct be_eth_rx_d));
3581                 if (rc)
3582                         return rc;
3583         }
3584
3585         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3586                 rxo = default_rxo(adapter);
3587                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3588                                        rx_frag_size, adapter->if_handle,
3589                                        false, &rxo->rss_id);
3590                 if (rc)
3591                         return rc;
3592         }
3593
3594         for_all_rss_queues(adapter, rxo, i) {
3595                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3596                                        rx_frag_size, adapter->if_handle,
3597                                        true, &rxo->rss_id);
3598                 if (rc)
3599                         return rc;
3600         }
3601
3602         if (be_multi_rxq(adapter)) {
3603                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3604                         for_all_rss_queues(adapter, rxo, i) {
3605                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3606                                         break;
3607                                 rss->rsstable[j + i] = rxo->rss_id;
3608                                 rss->rss_queue[j + i] = i;
3609                         }
3610                 }
3611                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3612                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3613
3614                 if (!BEx_chip(adapter))
3615                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3616                                 RSS_ENABLE_UDP_IPV6;
3617
3618                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3619                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3620                                        RSS_INDIR_TABLE_LEN, rss_key);
3621                 if (rc) {
3622                         rss->rss_flags = RSS_ENABLE_NONE;
3623                         return rc;
3624                 }
3625
3626                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3627         } else {
3628                 /* Disable RSS, if only default RX Q is created */
3629                 rss->rss_flags = RSS_ENABLE_NONE;
3630         }
3631
3632
3633         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3634          * which is a queue empty condition
3635          */
3636         for_all_rx_queues(adapter, rxo, i)
3637                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3638
3639         return 0;
3640 }
3641
3642 static int be_enable_if_filters(struct be_adapter *adapter)
3643 {
3644         int status;
3645
3646         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3647         if (status)
3648                 return status;
3649
3650         /* Normally this condition usually true as the ->dev_mac is zeroed.
3651          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3652          * subsequent be_dev_mac_add() can fail (after fresh boot)
3653          */
3654         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3655                 int old_pmac_id = -1;
3656
3657                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3658                 if (!is_zero_ether_addr(adapter->dev_mac))
3659                         old_pmac_id = adapter->pmac_id[0];
3660
3661                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3662                 if (status)
3663                         return status;
3664
3665                 /* Delete the old programmed MAC as we successfully programmed
3666                  * a new MAC
3667                  */
3668                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3669                         be_dev_mac_del(adapter, old_pmac_id);
3670
3671                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3672         }
3673
3674         if (adapter->vlans_added)
3675                 be_vid_config(adapter);
3676
3677         __be_set_rx_mode(adapter);
3678
3679         return 0;
3680 }
3681
3682 static int be_open(struct net_device *netdev)
3683 {
3684         struct be_adapter *adapter = netdev_priv(netdev);
3685         struct be_eq_obj *eqo;
3686         struct be_rx_obj *rxo;
3687         struct be_tx_obj *txo;
3688         u8 link_status;
3689         int status, i;
3690
3691         status = be_rx_qs_create(adapter);
3692         if (status)
3693                 goto err;
3694
3695         status = be_enable_if_filters(adapter);
3696         if (status)
3697                 goto err;
3698
3699         status = be_irq_register(adapter);
3700         if (status)
3701                 goto err;
3702
3703         for_all_rx_queues(adapter, rxo, i)
3704                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3705
3706         for_all_tx_queues(adapter, txo, i)
3707                 be_cq_notify(adapter, txo->cq.id, true, 0);
3708
3709         be_async_mcc_enable(adapter);
3710
3711         for_all_evt_queues(adapter, eqo, i) {
3712                 napi_enable(&eqo->napi);
3713                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3714         }
3715         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3716
3717         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3718         if (!status)
3719                 be_link_status_update(adapter, link_status);
3720
3721         netif_tx_start_all_queues(netdev);
3722         if (skyhawk_chip(adapter))
3723                 udp_tunnel_get_rx_info(netdev);
3724
3725         return 0;
3726 err:
3727         be_close(adapter->netdev);
3728         return -EIO;
3729 }
3730
3731 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3732 {
3733         u32 addr;
3734
3735         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3736
3737         mac[5] = (u8)(addr & 0xFF);
3738         mac[4] = (u8)((addr >> 8) & 0xFF);
3739         mac[3] = (u8)((addr >> 16) & 0xFF);
3740         /* Use the OUI from the current MAC address */
3741         memcpy(mac, adapter->netdev->dev_addr, 3);
3742 }
3743
3744 /*
3745  * Generate a seed MAC address from the PF MAC Address using jhash.
3746  * MAC Address for VFs are assigned incrementally starting from the seed.
3747  * These addresses are programmed in the ASIC by the PF and the VF driver
3748  * queries for the MAC address during its probe.
3749  */
3750 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3751 {
3752         u32 vf;
3753         int status = 0;
3754         u8 mac[ETH_ALEN];
3755         struct be_vf_cfg *vf_cfg;
3756
3757         be_vf_eth_addr_generate(adapter, mac);
3758
3759         for_all_vfs(adapter, vf_cfg, vf) {
3760                 if (BEx_chip(adapter))
3761                         status = be_cmd_pmac_add(adapter, mac,
3762                                                  vf_cfg->if_handle,
3763                                                  &vf_cfg->pmac_id, vf + 1);
3764                 else
3765                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3766                                                 vf + 1);
3767
3768                 if (status)
3769                         dev_err(&adapter->pdev->dev,
3770                                 "Mac address assignment failed for VF %d\n",
3771                                 vf);
3772                 else
3773                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3774
3775                 mac[5] += 1;
3776         }
3777         return status;
3778 }
3779
3780 static int be_vfs_mac_query(struct be_adapter *adapter)
3781 {
3782         int status, vf;
3783         u8 mac[ETH_ALEN];
3784         struct be_vf_cfg *vf_cfg;
3785
3786         for_all_vfs(adapter, vf_cfg, vf) {
3787                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3788                                                mac, vf_cfg->if_handle,
3789                                                false, vf+1);
3790                 if (status)
3791                         return status;
3792                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3793         }
3794         return 0;
3795 }
3796
3797 static void be_vf_clear(struct be_adapter *adapter)
3798 {
3799         struct be_vf_cfg *vf_cfg;
3800         u32 vf;
3801
3802         if (pci_vfs_assigned(adapter->pdev)) {
3803                 dev_warn(&adapter->pdev->dev,
3804                          "VFs are assigned to VMs: not disabling VFs\n");
3805                 goto done;
3806         }
3807
3808         pci_disable_sriov(adapter->pdev);
3809
3810         for_all_vfs(adapter, vf_cfg, vf) {
3811                 if (BEx_chip(adapter))
3812                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3813                                         vf_cfg->pmac_id, vf + 1);
3814                 else
3815                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3816                                        vf + 1);
3817
3818                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3819         }
3820
3821         if (BE3_chip(adapter))
3822                 be_cmd_set_hsw_config(adapter, 0, 0,
3823                                       adapter->if_handle,
3824                                       PORT_FWD_TYPE_PASSTHRU, 0);
3825 done:
3826         kfree(adapter->vf_cfg);
3827         adapter->num_vfs = 0;
3828         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3829 }
3830
3831 static void be_clear_queues(struct be_adapter *adapter)
3832 {
3833         be_mcc_queues_destroy(adapter);
3834         be_rx_cqs_destroy(adapter);
3835         be_tx_queues_destroy(adapter);
3836         be_evt_queues_destroy(adapter);
3837 }
3838
3839 static void be_cancel_worker(struct be_adapter *adapter)
3840 {
3841         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3842                 cancel_delayed_work_sync(&adapter->work);
3843                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3844         }
3845 }
3846
3847 static void be_cancel_err_detection(struct be_adapter *adapter)
3848 {
3849         struct be_error_recovery *err_rec = &adapter->error_recovery;
3850
3851         if (!be_err_recovery_workq)
3852                 return;
3853
3854         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3855                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3856                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3857         }
3858 }
3859
3860 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3861 {
3862         struct net_device *netdev = adapter->netdev;
3863         struct device *dev = &adapter->pdev->dev;
3864         struct be_vxlan_port *vxlan_port;
3865         __be16 port;
3866         int status;
3867
3868         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3869                                       struct be_vxlan_port, list);
3870         port = vxlan_port->port;
3871
3872         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3873                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3874         if (status) {
3875                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3876                 return status;
3877         }
3878         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3879
3880         status = be_cmd_set_vxlan_port(adapter, port);
3881         if (status) {
3882                 dev_warn(dev, "Failed to add VxLAN port\n");
3883                 return status;
3884         }
3885         adapter->vxlan_port = port;
3886
3887         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3888                                    NETIF_F_TSO | NETIF_F_TSO6 |
3889                                    NETIF_F_GSO_UDP_TUNNEL;
3890         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
3891         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
3892
3893         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
3894                  be16_to_cpu(port));
3895         return 0;
3896 }
3897
3898 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3899 {
3900         struct net_device *netdev = adapter->netdev;
3901
3902         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3903                 be_cmd_manage_iface(adapter, adapter->if_handle,
3904                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3905
3906         if (adapter->vxlan_port)
3907                 be_cmd_set_vxlan_port(adapter, 0);
3908
3909         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3910         adapter->vxlan_port = 0;
3911
3912         netdev->hw_enc_features = 0;
3913         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3914         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3915 }
3916
3917 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3918                                 struct be_resources *vft_res)
3919 {
3920         struct be_resources res = adapter->pool_res;
3921         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3922         struct be_resources res_mod = {0};
3923         u16 num_vf_qs = 1;
3924
3925         /* Distribute the queue resources among the PF and it's VFs */
3926         if (num_vfs) {
3927                 /* Divide the rx queues evenly among the VFs and the PF, capped
3928                  * at VF-EQ-count. Any remainder queues belong to the PF.
3929                  */
3930                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3931                                 res.max_rss_qs / (num_vfs + 1));
3932
3933                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3934                  * RSS Tables per port. Provide RSS on VFs, only if number of
3935                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3936                  */
3937                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3938                         num_vf_qs = 1;
3939         }
3940
3941         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3942          * which are modifiable using SET_PROFILE_CONFIG cmd.
3943          */
3944         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3945                                   RESOURCE_MODIFIABLE, 0);
3946
3947         /* If RSS IFACE capability flags are modifiable for a VF, set the
3948          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3949          * more than 1 RSSQ is available for a VF.
3950          * Otherwise, provision only 1 queue pair for VF.
3951          */
3952         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3953                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3954                 if (num_vf_qs > 1) {
3955                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3956                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3957                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3958                 } else {
3959                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3960                                              BE_IF_FLAGS_DEFQ_RSS);
3961                 }
3962         } else {
3963                 num_vf_qs = 1;
3964         }
3965
3966         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3967                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3968                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3969         }
3970
3971         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3972         vft_res->max_rx_qs = num_vf_qs;
3973         vft_res->max_rss_qs = num_vf_qs;
3974         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3975         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3976
3977         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3978          * among the PF and it's VFs, if the fields are changeable
3979          */
3980         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3981                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3982
3983         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3984                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3985
3986         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3987                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3988
3989         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3990                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3991 }
3992
3993 static void be_if_destroy(struct be_adapter *adapter)
3994 {
3995         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3996
3997         kfree(adapter->pmac_id);
3998         adapter->pmac_id = NULL;
3999
4000         kfree(adapter->mc_list);
4001         adapter->mc_list = NULL;
4002
4003         kfree(adapter->uc_list);
4004         adapter->uc_list = NULL;
4005 }
4006
4007 static int be_clear(struct be_adapter *adapter)
4008 {
4009         struct pci_dev *pdev = adapter->pdev;
4010         struct  be_resources vft_res = {0};
4011
4012         be_cancel_worker(adapter);
4013
4014         flush_workqueue(be_wq);
4015
4016         if (sriov_enabled(adapter))
4017                 be_vf_clear(adapter);
4018
4019         /* Re-configure FW to distribute resources evenly across max-supported
4020          * number of VFs, only when VFs are not already enabled.
4021          */
4022         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4023             !pci_vfs_assigned(pdev)) {
4024                 be_calculate_vf_res(adapter,
4025                                     pci_sriov_get_totalvfs(pdev),
4026                                     &vft_res);
4027                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4028                                         pci_sriov_get_totalvfs(pdev),
4029                                         &vft_res);
4030         }
4031
4032         be_disable_vxlan_offloads(adapter);
4033
4034         be_if_destroy(adapter);
4035
4036         be_clear_queues(adapter);
4037
4038         be_msix_disable(adapter);
4039         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4040         return 0;
4041 }
4042
4043 static int be_vfs_if_create(struct be_adapter *adapter)
4044 {
4045         struct be_resources res = {0};
4046         u32 cap_flags, en_flags, vf;
4047         struct be_vf_cfg *vf_cfg;
4048         int status;
4049
4050         /* If a FW profile exists, then cap_flags are updated */
4051         cap_flags = BE_VF_IF_EN_FLAGS;
4052
4053         for_all_vfs(adapter, vf_cfg, vf) {
4054                 if (!BE3_chip(adapter)) {
4055                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4056                                                            ACTIVE_PROFILE_TYPE,
4057                                                            RESOURCE_LIMITS,
4058                                                            vf + 1);
4059                         if (!status) {
4060                                 cap_flags = res.if_cap_flags;
4061                                 /* Prevent VFs from enabling VLAN promiscuous
4062                                  * mode
4063                                  */
4064                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4065                         }
4066                 }
4067
4068                 /* PF should enable IF flags during proxy if_create call */
4069                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4070                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4071                                           &vf_cfg->if_handle, vf + 1);
4072                 if (status)
4073                         return status;
4074         }
4075
4076         return 0;
4077 }
4078
4079 static int be_vf_setup_init(struct be_adapter *adapter)
4080 {
4081         struct be_vf_cfg *vf_cfg;
4082         int vf;
4083
4084         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4085                                   GFP_KERNEL);
4086         if (!adapter->vf_cfg)
4087                 return -ENOMEM;
4088
4089         for_all_vfs(adapter, vf_cfg, vf) {
4090                 vf_cfg->if_handle = -1;
4091                 vf_cfg->pmac_id = -1;
4092         }
4093         return 0;
4094 }
4095
4096 static int be_vf_setup(struct be_adapter *adapter)
4097 {
4098         struct device *dev = &adapter->pdev->dev;
4099         struct be_vf_cfg *vf_cfg;
4100         int status, old_vfs, vf;
4101         bool spoofchk;
4102
4103         old_vfs = pci_num_vf(adapter->pdev);
4104
4105         status = be_vf_setup_init(adapter);
4106         if (status)
4107                 goto err;
4108
4109         if (old_vfs) {
4110                 for_all_vfs(adapter, vf_cfg, vf) {
4111                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4112                         if (status)
4113                                 goto err;
4114                 }
4115
4116                 status = be_vfs_mac_query(adapter);
4117                 if (status)
4118                         goto err;
4119         } else {
4120                 status = be_vfs_if_create(adapter);
4121                 if (status)
4122                         goto err;
4123
4124                 status = be_vf_eth_addr_config(adapter);
4125                 if (status)
4126                         goto err;
4127         }
4128
4129         for_all_vfs(adapter, vf_cfg, vf) {
4130                 /* Allow VFs to programs MAC/VLAN filters */
4131                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4132                                                   vf + 1);
4133                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4134                         status = be_cmd_set_fn_privileges(adapter,
4135                                                           vf_cfg->privileges |
4136                                                           BE_PRIV_FILTMGMT,
4137                                                           vf + 1);
4138                         if (!status) {
4139                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4140                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4141                                          vf);
4142                         }
4143                 }
4144
4145                 /* Allow full available bandwidth */
4146                 if (!old_vfs)
4147                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4148
4149                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4150                                                vf_cfg->if_handle, NULL,
4151                                                &spoofchk);
4152                 if (!status)
4153                         vf_cfg->spoofchk = spoofchk;
4154
4155                 if (!old_vfs) {
4156                         be_cmd_enable_vf(adapter, vf + 1);
4157                         be_cmd_set_logical_link_config(adapter,
4158                                                        IFLA_VF_LINK_STATE_AUTO,
4159                                                        vf+1);
4160                 }
4161         }
4162
4163         if (!old_vfs) {
4164                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4165                 if (status) {
4166                         dev_err(dev, "SRIOV enable failed\n");
4167                         adapter->num_vfs = 0;
4168                         goto err;
4169                 }
4170         }
4171
4172         if (BE3_chip(adapter)) {
4173                 /* On BE3, enable VEB only when SRIOV is enabled */
4174                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4175                                                adapter->if_handle,
4176                                                PORT_FWD_TYPE_VEB, 0);
4177                 if (status)
4178                         goto err;
4179         }
4180
4181         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4182         return 0;
4183 err:
4184         dev_err(dev, "VF setup failed\n");
4185         be_vf_clear(adapter);
4186         return status;
4187 }
4188
4189 /* Converting function_mode bits on BE3 to SH mc_type enums */
4190
4191 static u8 be_convert_mc_type(u32 function_mode)
4192 {
4193         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4194                 return vNIC1;
4195         else if (function_mode & QNQ_MODE)
4196                 return FLEX10;
4197         else if (function_mode & VNIC_MODE)
4198                 return vNIC2;
4199         else if (function_mode & UMC_ENABLED)
4200                 return UMC;
4201         else
4202                 return MC_NONE;
4203 }
4204
4205 /* On BE2/BE3 FW does not suggest the supported limits */
4206 static void BEx_get_resources(struct be_adapter *adapter,
4207                               struct be_resources *res)
4208 {
4209         bool use_sriov = adapter->num_vfs ? 1 : 0;
4210
4211         if (be_physfn(adapter))
4212                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4213         else
4214                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4215
4216         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4217
4218         if (be_is_mc(adapter)) {
4219                 /* Assuming that there are 4 channels per port,
4220                  * when multi-channel is enabled
4221                  */
4222                 if (be_is_qnq_mode(adapter))
4223                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4224                 else
4225                         /* In a non-qnq multichannel mode, the pvid
4226                          * takes up one vlan entry
4227                          */
4228                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4229         } else {
4230                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4231         }
4232
4233         res->max_mcast_mac = BE_MAX_MC;
4234
4235         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4236          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4237          *    *only* if it is RSS-capable.
4238          */
4239         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4240             be_virtfn(adapter) ||
4241             (be_is_mc(adapter) &&
4242              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4243                 res->max_tx_qs = 1;
4244         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4245                 struct be_resources super_nic_res = {0};
4246
4247                 /* On a SuperNIC profile, the driver needs to use the
4248                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4249                  */
4250                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4251                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4252                                           0);
4253                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4254                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4255         } else {
4256                 res->max_tx_qs = BE3_MAX_TX_QS;
4257         }
4258
4259         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4260             !use_sriov && be_physfn(adapter))
4261                 res->max_rss_qs = (adapter->be3_native) ?
4262                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4263         res->max_rx_qs = res->max_rss_qs + 1;
4264
4265         if (be_physfn(adapter))
4266                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4267                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4268         else
4269                 res->max_evt_qs = 1;
4270
4271         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4272         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4273         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4274                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4275 }
4276
4277 static void be_setup_init(struct be_adapter *adapter)
4278 {
4279         adapter->vlan_prio_bmap = 0xff;
4280         adapter->phy.link_speed = -1;
4281         adapter->if_handle = -1;
4282         adapter->be3_native = false;
4283         adapter->if_flags = 0;
4284         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4285         if (be_physfn(adapter))
4286                 adapter->cmd_privileges = MAX_PRIVILEGES;
4287         else
4288                 adapter->cmd_privileges = MIN_PRIVILEGES;
4289 }
4290
4291 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4292  * However, this HW limitation is not exposed to the host via any SLI cmd.
4293  * As a result, in the case of SRIOV and in particular multi-partition configs
4294  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4295  * for distribution between the VFs. This self-imposed limit will determine the
4296  * no: of VFs for which RSS can be enabled.
4297  */
4298 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4299 {
4300         struct be_port_resources port_res = {0};
4301         u8 rss_tables_on_port;
4302         u16 max_vfs = be_max_vfs(adapter);
4303
4304         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4305                                   RESOURCE_LIMITS, 0);
4306
4307         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4308
4309         /* Each PF Pool's RSS Tables limit =
4310          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4311          */
4312         adapter->pool_res.max_rss_tables =
4313                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4314 }
4315
4316 static int be_get_sriov_config(struct be_adapter *adapter)
4317 {
4318         struct be_resources res = {0};
4319         int max_vfs, old_vfs;
4320
4321         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4322                                   RESOURCE_LIMITS, 0);
4323
4324         /* Some old versions of BE3 FW don't report max_vfs value */
4325         if (BE3_chip(adapter) && !res.max_vfs) {
4326                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4327                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4328         }
4329
4330         adapter->pool_res = res;
4331
4332         /* If during previous unload of the driver, the VFs were not disabled,
4333          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4334          * Instead use the TotalVFs value stored in the pci-dev struct.
4335          */
4336         old_vfs = pci_num_vf(adapter->pdev);
4337         if (old_vfs) {
4338                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4339                          old_vfs);
4340
4341                 adapter->pool_res.max_vfs =
4342                         pci_sriov_get_totalvfs(adapter->pdev);
4343                 adapter->num_vfs = old_vfs;
4344         }
4345
4346         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4347                 be_calculate_pf_pool_rss_tables(adapter);
4348                 dev_info(&adapter->pdev->dev,
4349                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4350                          be_max_pf_pool_rss_tables(adapter));
4351         }
4352         return 0;
4353 }
4354
4355 static void be_alloc_sriov_res(struct be_adapter *adapter)
4356 {
4357         int old_vfs = pci_num_vf(adapter->pdev);
4358         struct  be_resources vft_res = {0};
4359         int status;
4360
4361         be_get_sriov_config(adapter);
4362
4363         if (!old_vfs)
4364                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4365
4366         /* When the HW is in SRIOV capable configuration, the PF-pool
4367          * resources are given to PF during driver load, if there are no
4368          * old VFs. This facility is not available in BE3 FW.
4369          * Also, this is done by FW in Lancer chip.
4370          */
4371         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4372                 be_calculate_vf_res(adapter, 0, &vft_res);
4373                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4374                                                  &vft_res);
4375                 if (status)
4376                         dev_err(&adapter->pdev->dev,
4377                                 "Failed to optimize SRIOV resources\n");
4378         }
4379 }
4380
4381 static int be_get_resources(struct be_adapter *adapter)
4382 {
4383         struct device *dev = &adapter->pdev->dev;
4384         struct be_resources res = {0};
4385         int status;
4386
4387         /* For Lancer, SH etc read per-function resource limits from FW.
4388          * GET_FUNC_CONFIG returns per function guaranteed limits.
4389          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4390          */
4391         if (BEx_chip(adapter)) {
4392                 BEx_get_resources(adapter, &res);
4393         } else {
4394                 status = be_cmd_get_func_config(adapter, &res);
4395                 if (status)
4396                         return status;
4397
4398                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4399                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4400                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4401                         res.max_rss_qs -= 1;
4402         }
4403
4404         /* If RoCE is supported stash away half the EQs for RoCE */
4405         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4406                                 res.max_evt_qs / 2 : res.max_evt_qs;
4407         adapter->res = res;
4408
4409         /* If FW supports RSS default queue, then skip creating non-RSS
4410          * queue for non-IP traffic.
4411          */
4412         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4413                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4414
4415         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4416                  be_max_txqs(adapter), be_max_rxqs(adapter),
4417                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4418                  be_max_vfs(adapter));
4419         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4420                  be_max_uc(adapter), be_max_mc(adapter),
4421                  be_max_vlans(adapter));
4422
4423         /* Ensure RX and TX queues are created in pairs at init time */
4424         adapter->cfg_num_rx_irqs =
4425                                 min_t(u16, netif_get_num_default_rss_queues(),
4426                                       be_max_qp_irqs(adapter));
4427         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4428         return 0;
4429 }
4430
4431 static int be_get_config(struct be_adapter *adapter)
4432 {
4433         int status, level;
4434         u16 profile_id;
4435
4436         status = be_cmd_get_cntl_attributes(adapter);
4437         if (status)
4438                 return status;
4439
4440         status = be_cmd_query_fw_cfg(adapter);
4441         if (status)
4442                 return status;
4443
4444         if (!lancer_chip(adapter) && be_physfn(adapter))
4445                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4446
4447         if (BEx_chip(adapter)) {
4448                 level = be_cmd_get_fw_log_level(adapter);
4449                 adapter->msg_enable =
4450                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4451         }
4452
4453         be_cmd_get_acpi_wol_cap(adapter);
4454         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4455         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4456
4457         be_cmd_query_port_name(adapter);
4458
4459         if (be_physfn(adapter)) {
4460                 status = be_cmd_get_active_profile(adapter, &profile_id);
4461                 if (!status)
4462                         dev_info(&adapter->pdev->dev,
4463                                  "Using profile 0x%x\n", profile_id);
4464         }
4465
4466         return 0;
4467 }
4468
4469 static int be_mac_setup(struct be_adapter *adapter)
4470 {
4471         u8 mac[ETH_ALEN];
4472         int status;
4473
4474         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4475                 status = be_cmd_get_perm_mac(adapter, mac);
4476                 if (status)
4477                         return status;
4478
4479                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4480                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4481
4482                 /* Initial MAC for BE3 VFs is already programmed by PF */
4483                 if (BEx_chip(adapter) && be_virtfn(adapter))
4484                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4485         }
4486
4487         return 0;
4488 }
4489
4490 static void be_schedule_worker(struct be_adapter *adapter)
4491 {
4492         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4493         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4494 }
4495
4496 static void be_destroy_err_recovery_workq(void)
4497 {
4498         if (!be_err_recovery_workq)
4499                 return;
4500
4501         flush_workqueue(be_err_recovery_workq);
4502         destroy_workqueue(be_err_recovery_workq);
4503         be_err_recovery_workq = NULL;
4504 }
4505
4506 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4507 {
4508         struct be_error_recovery *err_rec = &adapter->error_recovery;
4509
4510         if (!be_err_recovery_workq)
4511                 return;
4512
4513         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4514                            msecs_to_jiffies(delay));
4515         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4516 }
4517
4518 static int be_setup_queues(struct be_adapter *adapter)
4519 {
4520         struct net_device *netdev = adapter->netdev;
4521         int status;
4522
4523         status = be_evt_queues_create(adapter);
4524         if (status)
4525                 goto err;
4526
4527         status = be_tx_qs_create(adapter);
4528         if (status)
4529                 goto err;
4530
4531         status = be_rx_cqs_create(adapter);
4532         if (status)
4533                 goto err;
4534
4535         status = be_mcc_queues_create(adapter);
4536         if (status)
4537                 goto err;
4538
4539         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4540         if (status)
4541                 goto err;
4542
4543         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4544         if (status)
4545                 goto err;
4546
4547         return 0;
4548 err:
4549         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4550         return status;
4551 }
4552
4553 static int be_if_create(struct be_adapter *adapter)
4554 {
4555         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4556         u32 cap_flags = be_if_cap_flags(adapter);
4557         int status;
4558
4559         /* alloc required memory for other filtering fields */
4560         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4561                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4562         if (!adapter->pmac_id)
4563                 return -ENOMEM;
4564
4565         adapter->mc_list = kcalloc(be_max_mc(adapter),
4566                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4567         if (!adapter->mc_list)
4568                 return -ENOMEM;
4569
4570         adapter->uc_list = kcalloc(be_max_uc(adapter),
4571                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4572         if (!adapter->uc_list)
4573                 return -ENOMEM;
4574
4575         if (adapter->cfg_num_rx_irqs == 1)
4576                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4577
4578         en_flags &= cap_flags;
4579         /* will enable all the needed filter flags in be_open() */
4580         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4581                                   &adapter->if_handle, 0);
4582
4583         if (status)
4584                 return status;
4585
4586         return 0;
4587 }
4588
4589 int be_update_queues(struct be_adapter *adapter)
4590 {
4591         struct net_device *netdev = adapter->netdev;
4592         int status;
4593
4594         if (netif_running(netdev))
4595                 be_close(netdev);
4596
4597         be_cancel_worker(adapter);
4598
4599         /* If any vectors have been shared with RoCE we cannot re-program
4600          * the MSIx table.
4601          */
4602         if (!adapter->num_msix_roce_vec)
4603                 be_msix_disable(adapter);
4604
4605         be_clear_queues(adapter);
4606         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4607         if (status)
4608                 return status;
4609
4610         if (!msix_enabled(adapter)) {
4611                 status = be_msix_enable(adapter);
4612                 if (status)
4613                         return status;
4614         }
4615
4616         status = be_if_create(adapter);
4617         if (status)
4618                 return status;
4619
4620         status = be_setup_queues(adapter);
4621         if (status)
4622                 return status;
4623
4624         be_schedule_worker(adapter);
4625
4626         if (netif_running(netdev))
4627                 status = be_open(netdev);
4628
4629         return status;
4630 }
4631
4632 static inline int fw_major_num(const char *fw_ver)
4633 {
4634         int fw_major = 0, i;
4635
4636         i = sscanf(fw_ver, "%d.", &fw_major);
4637         if (i != 1)
4638                 return 0;
4639
4640         return fw_major;
4641 }
4642
4643 /* If it is error recovery, FLR the PF
4644  * Else if any VFs are already enabled don't FLR the PF
4645  */
4646 static bool be_reset_required(struct be_adapter *adapter)
4647 {
4648         if (be_error_recovering(adapter))
4649                 return true;
4650         else
4651                 return pci_num_vf(adapter->pdev) == 0;
4652 }
4653
4654 /* Wait for the FW to be ready and perform the required initialization */
4655 static int be_func_init(struct be_adapter *adapter)
4656 {
4657         int status;
4658
4659         status = be_fw_wait_ready(adapter);
4660         if (status)
4661                 return status;
4662
4663         /* FW is now ready; clear errors to allow cmds/doorbell */
4664         be_clear_error(adapter, BE_CLEAR_ALL);
4665
4666         if (be_reset_required(adapter)) {
4667                 status = be_cmd_reset_function(adapter);
4668                 if (status)
4669                         return status;
4670
4671                 /* Wait for interrupts to quiesce after an FLR */
4672                 msleep(100);
4673         }
4674
4675         /* Tell FW we're ready to fire cmds */
4676         status = be_cmd_fw_init(adapter);
4677         if (status)
4678                 return status;
4679
4680         /* Allow interrupts for other ULPs running on NIC function */
4681         be_intr_set(adapter, true);
4682
4683         return 0;
4684 }
4685
4686 static int be_setup(struct be_adapter *adapter)
4687 {
4688         struct device *dev = &adapter->pdev->dev;
4689         int status;
4690
4691         status = be_func_init(adapter);
4692         if (status)
4693                 return status;
4694
4695         be_setup_init(adapter);
4696
4697         if (!lancer_chip(adapter))
4698                 be_cmd_req_native_mode(adapter);
4699
4700         /* invoke this cmd first to get pf_num and vf_num which are needed
4701          * for issuing profile related cmds
4702          */
4703         if (!BEx_chip(adapter)) {
4704                 status = be_cmd_get_func_config(adapter, NULL);
4705                 if (status)
4706                         return status;
4707         }
4708
4709         status = be_get_config(adapter);
4710         if (status)
4711                 goto err;
4712
4713         if (!BE2_chip(adapter) && be_physfn(adapter))
4714                 be_alloc_sriov_res(adapter);
4715
4716         status = be_get_resources(adapter);
4717         if (status)
4718                 goto err;
4719
4720         status = be_msix_enable(adapter);
4721         if (status)
4722                 goto err;
4723
4724         /* will enable all the needed filter flags in be_open() */
4725         status = be_if_create(adapter);
4726         if (status)
4727                 goto err;
4728
4729         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4730         rtnl_lock();
4731         status = be_setup_queues(adapter);
4732         rtnl_unlock();
4733         if (status)
4734                 goto err;
4735
4736         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4737
4738         status = be_mac_setup(adapter);
4739         if (status)
4740                 goto err;
4741
4742         be_cmd_get_fw_ver(adapter);
4743         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4744
4745         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4746                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4747                         adapter->fw_ver);
4748                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4749         }
4750
4751         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4752                                          adapter->rx_fc);
4753         if (status)
4754                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4755                                         &adapter->rx_fc);
4756
4757         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4758                  adapter->tx_fc, adapter->rx_fc);
4759
4760         if (be_physfn(adapter))
4761                 be_cmd_set_logical_link_config(adapter,
4762                                                IFLA_VF_LINK_STATE_AUTO, 0);
4763
4764         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4765          * confusing a linux bridge or OVS that it might be connected to.
4766          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4767          * when SRIOV is not enabled.
4768          */
4769         if (BE3_chip(adapter))
4770                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4771                                       PORT_FWD_TYPE_PASSTHRU, 0);
4772
4773         if (adapter->num_vfs)
4774                 be_vf_setup(adapter);
4775
4776         status = be_cmd_get_phy_info(adapter);
4777         if (!status && be_pause_supported(adapter))
4778                 adapter->phy.fc_autoneg = 1;
4779
4780         if (be_physfn(adapter) && !lancer_chip(adapter))
4781                 be_cmd_set_features(adapter);
4782
4783         be_schedule_worker(adapter);
4784         adapter->flags |= BE_FLAGS_SETUP_DONE;
4785         return 0;
4786 err:
4787         be_clear(adapter);
4788         return status;
4789 }
4790
4791 #ifdef CONFIG_NET_POLL_CONTROLLER
4792 static void be_netpoll(struct net_device *netdev)
4793 {
4794         struct be_adapter *adapter = netdev_priv(netdev);
4795         struct be_eq_obj *eqo;
4796         int i;
4797
4798         for_all_evt_queues(adapter, eqo, i) {
4799                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4800                 napi_schedule(&eqo->napi);
4801         }
4802 }
4803 #endif
4804
4805 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4806 {
4807         const struct firmware *fw;
4808         int status;
4809
4810         if (!netif_running(adapter->netdev)) {
4811                 dev_err(&adapter->pdev->dev,
4812                         "Firmware load not allowed (interface is down)\n");
4813                 return -ENETDOWN;
4814         }
4815
4816         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4817         if (status)
4818                 goto fw_exit;
4819
4820         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4821
4822         if (lancer_chip(adapter))
4823                 status = lancer_fw_download(adapter, fw);
4824         else
4825                 status = be_fw_download(adapter, fw);
4826
4827         if (!status)
4828                 be_cmd_get_fw_ver(adapter);
4829
4830 fw_exit:
4831         release_firmware(fw);
4832         return status;
4833 }
4834
4835 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4836                                  u16 flags)
4837 {
4838         struct be_adapter *adapter = netdev_priv(dev);
4839         struct nlattr *attr, *br_spec;
4840         int rem;
4841         int status = 0;
4842         u16 mode = 0;
4843
4844         if (!sriov_enabled(adapter))
4845                 return -EOPNOTSUPP;
4846
4847         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4848         if (!br_spec)
4849                 return -EINVAL;
4850
4851         nla_for_each_nested(attr, br_spec, rem) {
4852                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4853                         continue;
4854
4855                 if (nla_len(attr) < sizeof(mode))
4856                         return -EINVAL;
4857
4858                 mode = nla_get_u16(attr);
4859                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4860                         return -EOPNOTSUPP;
4861
4862                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4863                         return -EINVAL;
4864
4865                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4866                                                adapter->if_handle,
4867                                                mode == BRIDGE_MODE_VEPA ?
4868                                                PORT_FWD_TYPE_VEPA :
4869                                                PORT_FWD_TYPE_VEB, 0);
4870                 if (status)
4871                         goto err;
4872
4873                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4874                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4875
4876                 return status;
4877         }
4878 err:
4879         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4880                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4881
4882         return status;
4883 }
4884
4885 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4886                                  struct net_device *dev, u32 filter_mask,
4887                                  int nlflags)
4888 {
4889         struct be_adapter *adapter = netdev_priv(dev);
4890         int status = 0;
4891         u8 hsw_mode;
4892
4893         /* BE and Lancer chips support VEB mode only */
4894         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4895                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4896                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4897                         return 0;
4898                 hsw_mode = PORT_FWD_TYPE_VEB;
4899         } else {
4900                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4901                                                adapter->if_handle, &hsw_mode,
4902                                                NULL);
4903                 if (status)
4904                         return 0;
4905
4906                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4907                         return 0;
4908         }
4909
4910         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4911                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4912                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4913                                        0, 0, nlflags, filter_mask, NULL);
4914 }
4915
4916 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4917                                          void (*func)(struct work_struct *))
4918 {
4919         struct be_cmd_work *work;
4920
4921         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4922         if (!work) {
4923                 dev_err(&adapter->pdev->dev,
4924                         "be_work memory allocation failed\n");
4925                 return NULL;
4926         }
4927
4928         INIT_WORK(&work->work, func);
4929         work->adapter = adapter;
4930         return work;
4931 }
4932
4933 /* VxLAN offload Notes:
4934  *
4935  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4936  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4937  * is expected to work across all types of IP tunnels once exported. Skyhawk
4938  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4939  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4940  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4941  * those other tunnels are unexported on the fly through ndo_features_check().
4942  *
4943  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4944  * adds more than one port, disable offloads and re-enable them again when
4945  * there's only one port left. We maintain a list of ports for this purpose.
4946  */
4947 static void be_work_add_vxlan_port(struct work_struct *work)
4948 {
4949         struct be_cmd_work *cmd_work =
4950                                 container_of(work, struct be_cmd_work, work);
4951         struct be_adapter *adapter = cmd_work->adapter;
4952         struct device *dev = &adapter->pdev->dev;
4953         __be16 port = cmd_work->info.vxlan_port;
4954         struct be_vxlan_port *vxlan_port;
4955         int status;
4956
4957         /* Bump up the alias count if it is an existing port */
4958         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
4959                 if (vxlan_port->port == port) {
4960                         vxlan_port->port_aliases++;
4961                         goto done;
4962                 }
4963         }
4964
4965         /* Add a new port to our list. We don't need a lock here since port
4966          * add/delete are done only in the context of a single-threaded work
4967          * queue (be_wq).
4968          */
4969         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
4970         if (!vxlan_port)
4971                 goto done;
4972
4973         vxlan_port->port = port;
4974         INIT_LIST_HEAD(&vxlan_port->list);
4975         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
4976         adapter->vxlan_port_count++;
4977
4978         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4979                 dev_info(dev,
4980                          "Only one UDP port supported for VxLAN offloads\n");
4981                 dev_info(dev, "Disabling VxLAN offloads\n");
4982                 goto err;
4983         }
4984
4985         if (adapter->vxlan_port_count > 1)
4986                 goto done;
4987
4988         status = be_enable_vxlan_offloads(adapter);
4989         if (!status)
4990                 goto done;
4991
4992 err:
4993         be_disable_vxlan_offloads(adapter);
4994 done:
4995         kfree(cmd_work);
4996         return;
4997 }
4998
4999 static void be_work_del_vxlan_port(struct work_struct *work)
5000 {
5001         struct be_cmd_work *cmd_work =
5002                                 container_of(work, struct be_cmd_work, work);
5003         struct be_adapter *adapter = cmd_work->adapter;
5004         __be16 port = cmd_work->info.vxlan_port;
5005         struct be_vxlan_port *vxlan_port;
5006
5007         /* Nothing to be done if a port alias is being deleted */
5008         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5009                 if (vxlan_port->port == port) {
5010                         if (vxlan_port->port_aliases) {
5011                                 vxlan_port->port_aliases--;
5012                                 goto done;
5013                         }
5014                         break;
5015                 }
5016         }
5017
5018         /* No port aliases left; delete the port from the list */
5019         list_del(&vxlan_port->list);
5020         adapter->vxlan_port_count--;
5021
5022         /* Disable VxLAN offload if this is the offloaded port */
5023         if (adapter->vxlan_port == vxlan_port->port) {
5024                 WARN_ON(adapter->vxlan_port_count);
5025                 be_disable_vxlan_offloads(adapter);
5026                 dev_info(&adapter->pdev->dev,
5027                          "Disabled VxLAN offloads for UDP port %d\n",
5028                          be16_to_cpu(port));
5029                 goto out;
5030         }
5031
5032         /* If only 1 port is left, re-enable VxLAN offload */
5033         if (adapter->vxlan_port_count == 1)
5034                 be_enable_vxlan_offloads(adapter);
5035
5036 out:
5037         kfree(vxlan_port);
5038 done:
5039         kfree(cmd_work);
5040 }
5041
5042 static void be_cfg_vxlan_port(struct net_device *netdev,
5043                               struct udp_tunnel_info *ti,
5044                               void (*func)(struct work_struct *))
5045 {
5046         struct be_adapter *adapter = netdev_priv(netdev);
5047         struct be_cmd_work *cmd_work;
5048
5049         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5050                 return;
5051
5052         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5053                 return;
5054
5055         cmd_work = be_alloc_work(adapter, func);
5056         if (cmd_work) {
5057                 cmd_work->info.vxlan_port = ti->port;
5058                 queue_work(be_wq, &cmd_work->work);
5059         }
5060 }
5061
5062 static void be_del_vxlan_port(struct net_device *netdev,
5063                               struct udp_tunnel_info *ti)
5064 {
5065         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5066 }
5067
5068 static void be_add_vxlan_port(struct net_device *netdev,
5069                               struct udp_tunnel_info *ti)
5070 {
5071         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5072 }
5073
5074 static netdev_features_t be_features_check(struct sk_buff *skb,
5075                                            struct net_device *dev,
5076                                            netdev_features_t features)
5077 {
5078         struct be_adapter *adapter = netdev_priv(dev);
5079         u8 l4_hdr = 0;
5080
5081         /* The code below restricts offload features for some tunneled and
5082          * Q-in-Q packets.
5083          * Offload features for normal (non tunnel) packets are unchanged.
5084          */
5085         features = vlan_features_check(skb, features);
5086         if (!skb->encapsulation ||
5087             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5088                 return features;
5089
5090         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5091          * should disable tunnel offload features if it's not a VxLAN packet,
5092          * as tunnel offloads have been enabled only for VxLAN. This is done to
5093          * allow other tunneled traffic like GRE work fine while VxLAN
5094          * offloads are configured in Skyhawk-R.
5095          */
5096         switch (vlan_get_protocol(skb)) {
5097         case htons(ETH_P_IP):
5098                 l4_hdr = ip_hdr(skb)->protocol;
5099                 break;
5100         case htons(ETH_P_IPV6):
5101                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5102                 break;
5103         default:
5104                 return features;
5105         }
5106
5107         if (l4_hdr != IPPROTO_UDP ||
5108             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5109             skb->inner_protocol != htons(ETH_P_TEB) ||
5110             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5111                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5112             !adapter->vxlan_port ||
5113             udp_hdr(skb)->dest != adapter->vxlan_port)
5114                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5115
5116         return features;
5117 }
5118
5119 static int be_get_phys_port_id(struct net_device *dev,
5120                                struct netdev_phys_item_id *ppid)
5121 {
5122         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5123         struct be_adapter *adapter = netdev_priv(dev);
5124         u8 *id;
5125
5126         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5127                 return -ENOSPC;
5128
5129         ppid->id[0] = adapter->hba_port_num + 1;
5130         id = &ppid->id[1];
5131         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5132              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5133                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5134
5135         ppid->id_len = id_len;
5136
5137         return 0;
5138 }
5139
5140 static void be_set_rx_mode(struct net_device *dev)
5141 {
5142         struct be_adapter *adapter = netdev_priv(dev);
5143         struct be_cmd_work *work;
5144
5145         work = be_alloc_work(adapter, be_work_set_rx_mode);
5146         if (work)
5147                 queue_work(be_wq, &work->work);
5148 }
5149
5150 static const struct net_device_ops be_netdev_ops = {
5151         .ndo_open               = be_open,
5152         .ndo_stop               = be_close,
5153         .ndo_start_xmit         = be_xmit,
5154         .ndo_set_rx_mode        = be_set_rx_mode,
5155         .ndo_set_mac_address    = be_mac_addr_set,
5156         .ndo_get_stats64        = be_get_stats64,
5157         .ndo_validate_addr      = eth_validate_addr,
5158         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5159         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5160         .ndo_set_vf_mac         = be_set_vf_mac,
5161         .ndo_set_vf_vlan        = be_set_vf_vlan,
5162         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5163         .ndo_get_vf_config      = be_get_vf_config,
5164         .ndo_set_vf_link_state  = be_set_vf_link_state,
5165         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5166 #ifdef CONFIG_NET_POLL_CONTROLLER
5167         .ndo_poll_controller    = be_netpoll,
5168 #endif
5169         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5170         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5171         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5172         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5173         .ndo_features_check     = be_features_check,
5174         .ndo_get_phys_port_id   = be_get_phys_port_id,
5175 };
5176
5177 static void be_netdev_init(struct net_device *netdev)
5178 {
5179         struct be_adapter *adapter = netdev_priv(netdev);
5180
5181         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5182                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5183                 NETIF_F_HW_VLAN_CTAG_TX;
5184         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5185                 netdev->hw_features |= NETIF_F_RXHASH;
5186
5187         netdev->features |= netdev->hw_features |
5188                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5189
5190         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5191                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5192
5193         netdev->priv_flags |= IFF_UNICAST_FLT;
5194
5195         netdev->flags |= IFF_MULTICAST;
5196
5197         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5198
5199         netdev->netdev_ops = &be_netdev_ops;
5200
5201         netdev->ethtool_ops = &be_ethtool_ops;
5202
5203         /* MTU range: 256 - 9000 */
5204         netdev->min_mtu = BE_MIN_MTU;
5205         netdev->max_mtu = BE_MAX_MTU;
5206 }
5207
5208 static void be_cleanup(struct be_adapter *adapter)
5209 {
5210         struct net_device *netdev = adapter->netdev;
5211
5212         rtnl_lock();
5213         netif_device_detach(netdev);
5214         if (netif_running(netdev))
5215                 be_close(netdev);
5216         rtnl_unlock();
5217
5218         be_clear(adapter);
5219 }
5220
5221 static int be_resume(struct be_adapter *adapter)
5222 {
5223         struct net_device *netdev = adapter->netdev;
5224         int status;
5225
5226         status = be_setup(adapter);
5227         if (status)
5228                 return status;
5229
5230         rtnl_lock();
5231         if (netif_running(netdev))
5232                 status = be_open(netdev);
5233         rtnl_unlock();
5234
5235         if (status)
5236                 return status;
5237
5238         netif_device_attach(netdev);
5239
5240         return 0;
5241 }
5242
5243 static void be_soft_reset(struct be_adapter *adapter)
5244 {
5245         u32 val;
5246
5247         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5248         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5249         val |= SLIPORT_SOFTRESET_SR_MASK;
5250         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5251 }
5252
5253 static bool be_err_is_recoverable(struct be_adapter *adapter)
5254 {
5255         struct be_error_recovery *err_rec = &adapter->error_recovery;
5256         unsigned long initial_idle_time =
5257                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5258         unsigned long recovery_interval =
5259                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5260         u16 ue_err_code;
5261         u32 val;
5262
5263         val = be_POST_stage_get(adapter);
5264         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5265                 return false;
5266         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5267         if (ue_err_code == 0)
5268                 return false;
5269
5270         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5271                 ue_err_code);
5272
5273         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5274                 dev_err(&adapter->pdev->dev,
5275                         "Cannot recover within %lu sec from driver load\n",
5276                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5277                 return false;
5278         }
5279
5280         if (err_rec->last_recovery_time && time_before_eq(
5281                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5282                 dev_err(&adapter->pdev->dev,
5283                         "Cannot recover within %lu sec from last recovery\n",
5284                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5285                 return false;
5286         }
5287
5288         if (ue_err_code == err_rec->last_err_code) {
5289                 dev_err(&adapter->pdev->dev,
5290                         "Cannot recover from a consecutive TPE error\n");
5291                 return false;
5292         }
5293
5294         err_rec->last_recovery_time = jiffies;
5295         err_rec->last_err_code = ue_err_code;
5296         return true;
5297 }
5298
5299 static int be_tpe_recover(struct be_adapter *adapter)
5300 {
5301         struct be_error_recovery *err_rec = &adapter->error_recovery;
5302         int status = -EAGAIN;
5303         u32 val;
5304
5305         switch (err_rec->recovery_state) {
5306         case ERR_RECOVERY_ST_NONE:
5307                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5308                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5309                 break;
5310
5311         case ERR_RECOVERY_ST_DETECT:
5312                 val = be_POST_stage_get(adapter);
5313                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5314                     POST_STAGE_RECOVERABLE_ERR) {
5315                         dev_err(&adapter->pdev->dev,
5316                                 "Unrecoverable HW error detected: 0x%x\n", val);
5317                         status = -EINVAL;
5318                         err_rec->resched_delay = 0;
5319                         break;
5320                 }
5321
5322                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5323
5324                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5325                  * milliseconds before it checks for final error status in
5326                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5327                  * If it does, then PF0 initiates a Soft Reset.
5328                  */
5329                 if (adapter->pf_num == 0) {
5330                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5331                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5332                                         ERR_RECOVERY_UE_DETECT_DURATION;
5333                         break;
5334                 }
5335
5336                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5337                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5338                                         ERR_RECOVERY_UE_DETECT_DURATION;
5339                 break;
5340
5341         case ERR_RECOVERY_ST_RESET:
5342                 if (!be_err_is_recoverable(adapter)) {
5343                         dev_err(&adapter->pdev->dev,
5344                                 "Failed to meet recovery criteria\n");
5345                         status = -EIO;
5346                         err_rec->resched_delay = 0;
5347                         break;
5348                 }
5349                 be_soft_reset(adapter);
5350                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5351                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5352                                         err_rec->ue_to_reset_time;
5353                 break;
5354
5355         case ERR_RECOVERY_ST_PRE_POLL:
5356                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5357                 err_rec->resched_delay = 0;
5358                 status = 0;                     /* done */
5359                 break;
5360
5361         default:
5362                 status = -EINVAL;
5363                 err_rec->resched_delay = 0;
5364                 break;
5365         }
5366
5367         return status;
5368 }
5369
5370 static int be_err_recover(struct be_adapter *adapter)
5371 {
5372         int status;
5373
5374         if (!lancer_chip(adapter)) {
5375                 if (!adapter->error_recovery.recovery_supported ||
5376                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5377                         return -EIO;
5378                 status = be_tpe_recover(adapter);
5379                 if (status)
5380                         goto err;
5381         }
5382
5383         /* Wait for adapter to reach quiescent state before
5384          * destroying queues
5385          */
5386         status = be_fw_wait_ready(adapter);
5387         if (status)
5388                 goto err;
5389
5390         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5391
5392         be_cleanup(adapter);
5393
5394         status = be_resume(adapter);
5395         if (status)
5396                 goto err;
5397
5398         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5399
5400 err:
5401         return status;
5402 }
5403
5404 static void be_err_detection_task(struct work_struct *work)
5405 {
5406         struct be_error_recovery *err_rec =
5407                         container_of(work, struct be_error_recovery,
5408                                      err_detection_work.work);
5409         struct be_adapter *adapter =
5410                         container_of(err_rec, struct be_adapter,
5411                                      error_recovery);
5412         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5413         struct device *dev = &adapter->pdev->dev;
5414         int recovery_status;
5415
5416         be_detect_error(adapter);
5417         if (!be_check_error(adapter, BE_ERROR_HW))
5418                 goto reschedule_task;
5419
5420         recovery_status = be_err_recover(adapter);
5421         if (!recovery_status) {
5422                 err_rec->recovery_retries = 0;
5423                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5424                 dev_info(dev, "Adapter recovery successful\n");
5425                 goto reschedule_task;
5426         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5427                 /* BEx/SH recovery state machine */
5428                 if (adapter->pf_num == 0 &&
5429                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5430                         dev_err(&adapter->pdev->dev,
5431                                 "Adapter recovery in progress\n");
5432                 resched_delay = err_rec->resched_delay;
5433                 goto reschedule_task;
5434         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5435                 /* For VFs, check if PF have allocated resources
5436                  * every second.
5437                  */
5438                 dev_err(dev, "Re-trying adapter recovery\n");
5439                 goto reschedule_task;
5440         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5441                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5442                 /* In case of another error during recovery, it takes 30 sec
5443                  * for adapter to come out of error. Retry error recovery after
5444                  * this time interval.
5445                  */
5446                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5447                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5448                 goto reschedule_task;
5449         } else {
5450                 dev_err(dev, "Adapter recovery failed\n");
5451                 dev_err(dev, "Please reboot server to recover\n");
5452         }
5453
5454         return;
5455
5456 reschedule_task:
5457         be_schedule_err_detection(adapter, resched_delay);
5458 }
5459
5460 static void be_log_sfp_info(struct be_adapter *adapter)
5461 {
5462         int status;
5463
5464         status = be_cmd_query_sfp_info(adapter);
5465         if (!status) {
5466                 dev_err(&adapter->pdev->dev,
5467                         "Port %c: %s Vendor: %s part no: %s",
5468                         adapter->port_name,
5469                         be_misconfig_evt_port_state[adapter->phy_state],
5470                         adapter->phy.vendor_name,
5471                         adapter->phy.vendor_pn);
5472         }
5473         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5474 }
5475
5476 static void be_worker(struct work_struct *work)
5477 {
5478         struct be_adapter *adapter =
5479                 container_of(work, struct be_adapter, work.work);
5480         struct be_rx_obj *rxo;
5481         int i;
5482
5483         if (be_physfn(adapter) &&
5484             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5485                 be_cmd_get_die_temperature(adapter);
5486
5487         /* when interrupts are not yet enabled, just reap any pending
5488          * mcc completions
5489          */
5490         if (!netif_running(adapter->netdev)) {
5491                 local_bh_disable();
5492                 be_process_mcc(adapter);
5493                 local_bh_enable();
5494                 goto reschedule;
5495         }
5496
5497         if (!adapter->stats_cmd_sent) {
5498                 if (lancer_chip(adapter))
5499                         lancer_cmd_get_pport_stats(adapter,
5500                                                    &adapter->stats_cmd);
5501                 else
5502                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5503         }
5504
5505         for_all_rx_queues(adapter, rxo, i) {
5506                 /* Replenish RX-queues starved due to memory
5507                  * allocation failures.
5508                  */
5509                 if (rxo->rx_post_starved)
5510                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5511         }
5512
5513         /* EQ-delay update for Skyhawk is done while notifying EQ */
5514         if (!skyhawk_chip(adapter))
5515                 be_eqd_update(adapter, false);
5516
5517         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5518                 be_log_sfp_info(adapter);
5519
5520 reschedule:
5521         adapter->work_counter++;
5522         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5523 }
5524
5525 static void be_unmap_pci_bars(struct be_adapter *adapter)
5526 {
5527         if (adapter->csr)
5528                 pci_iounmap(adapter->pdev, adapter->csr);
5529         if (adapter->db)
5530                 pci_iounmap(adapter->pdev, adapter->db);
5531         if (adapter->pcicfg && adapter->pcicfg_mapped)
5532                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5533 }
5534
5535 static int db_bar(struct be_adapter *adapter)
5536 {
5537         if (lancer_chip(adapter) || be_virtfn(adapter))
5538                 return 0;
5539         else
5540                 return 4;
5541 }
5542
5543 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5544 {
5545         if (skyhawk_chip(adapter)) {
5546                 adapter->roce_db.size = 4096;
5547                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5548                                                               db_bar(adapter));
5549                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5550                                                                db_bar(adapter));
5551         }
5552         return 0;
5553 }
5554
5555 static int be_map_pci_bars(struct be_adapter *adapter)
5556 {
5557         struct pci_dev *pdev = adapter->pdev;
5558         u8 __iomem *addr;
5559         u32 sli_intf;
5560
5561         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5562         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5563                                 SLI_INTF_FAMILY_SHIFT;
5564         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5565
5566         if (BEx_chip(adapter) && be_physfn(adapter)) {
5567                 adapter->csr = pci_iomap(pdev, 2, 0);
5568                 if (!adapter->csr)
5569                         return -ENOMEM;
5570         }
5571
5572         addr = pci_iomap(pdev, db_bar(adapter), 0);
5573         if (!addr)
5574                 goto pci_map_err;
5575         adapter->db = addr;
5576
5577         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5578                 if (be_physfn(adapter)) {
5579                         /* PCICFG is the 2nd BAR in BE2 */
5580                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5581                         if (!addr)
5582                                 goto pci_map_err;
5583                         adapter->pcicfg = addr;
5584                         adapter->pcicfg_mapped = true;
5585                 } else {
5586                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5587                         adapter->pcicfg_mapped = false;
5588                 }
5589         }
5590
5591         be_roce_map_pci_bars(adapter);
5592         return 0;
5593
5594 pci_map_err:
5595         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5596         be_unmap_pci_bars(adapter);
5597         return -ENOMEM;
5598 }
5599
5600 static void be_drv_cleanup(struct be_adapter *adapter)
5601 {
5602         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5603         struct device *dev = &adapter->pdev->dev;
5604
5605         if (mem->va)
5606                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5607
5608         mem = &adapter->rx_filter;
5609         if (mem->va)
5610                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5611
5612         mem = &adapter->stats_cmd;
5613         if (mem->va)
5614                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5615 }
5616
5617 /* Allocate and initialize various fields in be_adapter struct */
5618 static int be_drv_init(struct be_adapter *adapter)
5619 {
5620         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5621         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5622         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5623         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5624         struct device *dev = &adapter->pdev->dev;
5625         int status = 0;
5626
5627         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5628         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5629                                                  &mbox_mem_alloc->dma,
5630                                                  GFP_KERNEL);
5631         if (!mbox_mem_alloc->va)
5632                 return -ENOMEM;
5633
5634         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5635         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5636         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5637
5638         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5639         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5640                                             &rx_filter->dma, GFP_KERNEL);
5641         if (!rx_filter->va) {
5642                 status = -ENOMEM;
5643                 goto free_mbox;
5644         }
5645
5646         if (lancer_chip(adapter))
5647                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5648         else if (BE2_chip(adapter))
5649                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5650         else if (BE3_chip(adapter))
5651                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5652         else
5653                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5654         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5655                                             &stats_cmd->dma, GFP_KERNEL);
5656         if (!stats_cmd->va) {
5657                 status = -ENOMEM;
5658                 goto free_rx_filter;
5659         }
5660
5661         mutex_init(&adapter->mbox_lock);
5662         mutex_init(&adapter->mcc_lock);
5663         mutex_init(&adapter->rx_filter_lock);
5664         spin_lock_init(&adapter->mcc_cq_lock);
5665         init_completion(&adapter->et_cmd_compl);
5666
5667         pci_save_state(adapter->pdev);
5668
5669         INIT_DELAYED_WORK(&adapter->work, be_worker);
5670
5671         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5672         adapter->error_recovery.resched_delay = 0;
5673         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5674                           be_err_detection_task);
5675
5676         adapter->rx_fc = true;
5677         adapter->tx_fc = true;
5678
5679         /* Must be a power of 2 or else MODULO will BUG_ON */
5680         adapter->be_get_temp_freq = 64;
5681
5682         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5683         return 0;
5684
5685 free_rx_filter:
5686         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5687 free_mbox:
5688         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5689                           mbox_mem_alloc->dma);
5690         return status;
5691 }
5692
5693 static void be_remove(struct pci_dev *pdev)
5694 {
5695         struct be_adapter *adapter = pci_get_drvdata(pdev);
5696
5697         if (!adapter)
5698                 return;
5699
5700         be_roce_dev_remove(adapter);
5701         be_intr_set(adapter, false);
5702
5703         be_cancel_err_detection(adapter);
5704
5705         unregister_netdev(adapter->netdev);
5706
5707         be_clear(adapter);
5708
5709         if (!pci_vfs_assigned(adapter->pdev))
5710                 be_cmd_reset_function(adapter);
5711
5712         /* tell fw we're done with firing cmds */
5713         be_cmd_fw_clean(adapter);
5714
5715         be_unmap_pci_bars(adapter);
5716         be_drv_cleanup(adapter);
5717
5718         pci_disable_pcie_error_reporting(pdev);
5719
5720         pci_release_regions(pdev);
5721         pci_disable_device(pdev);
5722
5723         free_netdev(adapter->netdev);
5724 }
5725
5726 static ssize_t be_hwmon_show_temp(struct device *dev,
5727                                   struct device_attribute *dev_attr,
5728                                   char *buf)
5729 {
5730         struct be_adapter *adapter = dev_get_drvdata(dev);
5731
5732         /* Unit: millidegree Celsius */
5733         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5734                 return -EIO;
5735         else
5736                 return sprintf(buf, "%u\n",
5737                                adapter->hwmon_info.be_on_die_temp * 1000);
5738 }
5739
5740 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5741                           be_hwmon_show_temp, NULL, 1);
5742
5743 static struct attribute *be_hwmon_attrs[] = {
5744         &sensor_dev_attr_temp1_input.dev_attr.attr,
5745         NULL
5746 };
5747
5748 ATTRIBUTE_GROUPS(be_hwmon);
5749
5750 static char *mc_name(struct be_adapter *adapter)
5751 {
5752         char *str = ""; /* default */
5753
5754         switch (adapter->mc_type) {
5755         case UMC:
5756                 str = "UMC";
5757                 break;
5758         case FLEX10:
5759                 str = "FLEX10";
5760                 break;
5761         case vNIC1:
5762                 str = "vNIC-1";
5763                 break;
5764         case nPAR:
5765                 str = "nPAR";
5766                 break;
5767         case UFP:
5768                 str = "UFP";
5769                 break;
5770         case vNIC2:
5771                 str = "vNIC-2";
5772                 break;
5773         default:
5774                 str = "";
5775         }
5776
5777         return str;
5778 }
5779
5780 static inline char *func_name(struct be_adapter *adapter)
5781 {
5782         return be_physfn(adapter) ? "PF" : "VF";
5783 }
5784
5785 static inline char *nic_name(struct pci_dev *pdev)
5786 {
5787         switch (pdev->device) {
5788         case OC_DEVICE_ID1:
5789                 return OC_NAME;
5790         case OC_DEVICE_ID2:
5791                 return OC_NAME_BE;
5792         case OC_DEVICE_ID3:
5793         case OC_DEVICE_ID4:
5794                 return OC_NAME_LANCER;
5795         case BE_DEVICE_ID2:
5796                 return BE3_NAME;
5797         case OC_DEVICE_ID5:
5798         case OC_DEVICE_ID6:
5799                 return OC_NAME_SH;
5800         default:
5801                 return BE_NAME;
5802         }
5803 }
5804
5805 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5806 {
5807         struct be_adapter *adapter;
5808         struct net_device *netdev;
5809         int status = 0;
5810
5811         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5812
5813         status = pci_enable_device(pdev);
5814         if (status)
5815                 goto do_none;
5816
5817         status = pci_request_regions(pdev, DRV_NAME);
5818         if (status)
5819                 goto disable_dev;
5820         pci_set_master(pdev);
5821
5822         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5823         if (!netdev) {
5824                 status = -ENOMEM;
5825                 goto rel_reg;
5826         }
5827         adapter = netdev_priv(netdev);
5828         adapter->pdev = pdev;
5829         pci_set_drvdata(pdev, adapter);
5830         adapter->netdev = netdev;
5831         SET_NETDEV_DEV(netdev, &pdev->dev);
5832
5833         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5834         if (!status) {
5835                 netdev->features |= NETIF_F_HIGHDMA;
5836         } else {
5837                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5838                 if (status) {
5839                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5840                         goto free_netdev;
5841                 }
5842         }
5843
5844         status = pci_enable_pcie_error_reporting(pdev);
5845         if (!status)
5846                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5847
5848         status = be_map_pci_bars(adapter);
5849         if (status)
5850                 goto free_netdev;
5851
5852         status = be_drv_init(adapter);
5853         if (status)
5854                 goto unmap_bars;
5855
5856         status = be_setup(adapter);
5857         if (status)
5858                 goto drv_cleanup;
5859
5860         be_netdev_init(netdev);
5861         status = register_netdev(netdev);
5862         if (status != 0)
5863                 goto unsetup;
5864
5865         be_roce_dev_add(adapter);
5866
5867         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5868         adapter->error_recovery.probe_time = jiffies;
5869
5870         /* On Die temperature not supported for VF. */
5871         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5872                 adapter->hwmon_info.hwmon_dev =
5873                         devm_hwmon_device_register_with_groups(&pdev->dev,
5874                                                                DRV_NAME,
5875                                                                adapter,
5876                                                                be_hwmon_groups);
5877                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5878         }
5879
5880         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5881                  func_name(adapter), mc_name(adapter), adapter->port_name);
5882
5883         return 0;
5884
5885 unsetup:
5886         be_clear(adapter);
5887 drv_cleanup:
5888         be_drv_cleanup(adapter);
5889 unmap_bars:
5890         be_unmap_pci_bars(adapter);
5891 free_netdev:
5892         free_netdev(netdev);
5893 rel_reg:
5894         pci_release_regions(pdev);
5895 disable_dev:
5896         pci_disable_device(pdev);
5897 do_none:
5898         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5899         return status;
5900 }
5901
5902 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5903 {
5904         struct be_adapter *adapter = pci_get_drvdata(pdev);
5905
5906         be_intr_set(adapter, false);
5907         be_cancel_err_detection(adapter);
5908
5909         be_cleanup(adapter);
5910
5911         pci_save_state(pdev);
5912         pci_disable_device(pdev);
5913         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5914         return 0;
5915 }
5916
5917 static int be_pci_resume(struct pci_dev *pdev)
5918 {
5919         struct be_adapter *adapter = pci_get_drvdata(pdev);
5920         int status = 0;
5921
5922         status = pci_enable_device(pdev);
5923         if (status)
5924                 return status;
5925
5926         pci_restore_state(pdev);
5927
5928         status = be_resume(adapter);
5929         if (status)
5930                 return status;
5931
5932         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5933
5934         return 0;
5935 }
5936
5937 /*
5938  * An FLR will stop BE from DMAing any data.
5939  */
5940 static void be_shutdown(struct pci_dev *pdev)
5941 {
5942         struct be_adapter *adapter = pci_get_drvdata(pdev);
5943
5944         if (!adapter)
5945                 return;
5946
5947         be_roce_dev_shutdown(adapter);
5948         cancel_delayed_work_sync(&adapter->work);
5949         be_cancel_err_detection(adapter);
5950
5951         netif_device_detach(adapter->netdev);
5952
5953         be_cmd_reset_function(adapter);
5954
5955         pci_disable_device(pdev);
5956 }
5957
5958 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5959                                             pci_channel_state_t state)
5960 {
5961         struct be_adapter *adapter = pci_get_drvdata(pdev);
5962
5963         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5964
5965         be_roce_dev_remove(adapter);
5966
5967         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5968                 be_set_error(adapter, BE_ERROR_EEH);
5969
5970                 be_cancel_err_detection(adapter);
5971
5972                 be_cleanup(adapter);
5973         }
5974
5975         if (state == pci_channel_io_perm_failure)
5976                 return PCI_ERS_RESULT_DISCONNECT;
5977
5978         pci_disable_device(pdev);
5979
5980         /* The error could cause the FW to trigger a flash debug dump.
5981          * Resetting the card while flash dump is in progress
5982          * can cause it not to recover; wait for it to finish.
5983          * Wait only for first function as it is needed only once per
5984          * adapter.
5985          */
5986         if (pdev->devfn == 0)
5987                 ssleep(30);
5988
5989         return PCI_ERS_RESULT_NEED_RESET;
5990 }
5991
5992 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5993 {
5994         struct be_adapter *adapter = pci_get_drvdata(pdev);
5995         int status;
5996
5997         dev_info(&adapter->pdev->dev, "EEH reset\n");
5998
5999         status = pci_enable_device(pdev);
6000         if (status)
6001                 return PCI_ERS_RESULT_DISCONNECT;
6002
6003         pci_set_master(pdev);
6004         pci_restore_state(pdev);
6005
6006         /* Check if card is ok and fw is ready */
6007         dev_info(&adapter->pdev->dev,
6008                  "Waiting for FW to be ready after EEH reset\n");
6009         status = be_fw_wait_ready(adapter);
6010         if (status)
6011                 return PCI_ERS_RESULT_DISCONNECT;
6012
6013         pci_cleanup_aer_uncorrect_error_status(pdev);
6014         be_clear_error(adapter, BE_CLEAR_ALL);
6015         return PCI_ERS_RESULT_RECOVERED;
6016 }
6017
6018 static void be_eeh_resume(struct pci_dev *pdev)
6019 {
6020         int status = 0;
6021         struct be_adapter *adapter = pci_get_drvdata(pdev);
6022
6023         dev_info(&adapter->pdev->dev, "EEH resume\n");
6024
6025         pci_save_state(pdev);
6026
6027         status = be_resume(adapter);
6028         if (status)
6029                 goto err;
6030
6031         be_roce_dev_add(adapter);
6032
6033         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6034         return;
6035 err:
6036         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6037 }
6038
6039 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6040 {
6041         struct be_adapter *adapter = pci_get_drvdata(pdev);
6042         struct be_resources vft_res = {0};
6043         int status;
6044
6045         if (!num_vfs)
6046                 be_vf_clear(adapter);
6047
6048         adapter->num_vfs = num_vfs;
6049
6050         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6051                 dev_warn(&pdev->dev,
6052                          "Cannot disable VFs while they are assigned\n");
6053                 return -EBUSY;
6054         }
6055
6056         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6057          * are equally distributed across the max-number of VFs. The user may
6058          * request only a subset of the max-vfs to be enabled.
6059          * Based on num_vfs, redistribute the resources across num_vfs so that
6060          * each VF will have access to more number of resources.
6061          * This facility is not available in BE3 FW.
6062          * Also, this is done by FW in Lancer chip.
6063          */
6064         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6065                 be_calculate_vf_res(adapter, adapter->num_vfs,
6066                                     &vft_res);
6067                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6068                                                  adapter->num_vfs, &vft_res);
6069                 if (status)
6070                         dev_err(&pdev->dev,
6071                                 "Failed to optimize SR-IOV resources\n");
6072         }
6073
6074         status = be_get_resources(adapter);
6075         if (status)
6076                 return be_cmd_status(status);
6077
6078         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6079         rtnl_lock();
6080         status = be_update_queues(adapter);
6081         rtnl_unlock();
6082         if (status)
6083                 return be_cmd_status(status);
6084
6085         if (adapter->num_vfs)
6086                 status = be_vf_setup(adapter);
6087
6088         if (!status)
6089                 return adapter->num_vfs;
6090
6091         return 0;
6092 }
6093
6094 static const struct pci_error_handlers be_eeh_handlers = {
6095         .error_detected = be_eeh_err_detected,
6096         .slot_reset = be_eeh_reset,
6097         .resume = be_eeh_resume,
6098 };
6099
6100 static struct pci_driver be_driver = {
6101         .name = DRV_NAME,
6102         .id_table = be_dev_ids,
6103         .probe = be_probe,
6104         .remove = be_remove,
6105         .suspend = be_suspend,
6106         .resume = be_pci_resume,
6107         .shutdown = be_shutdown,
6108         .sriov_configure = be_pci_sriov_configure,
6109         .err_handler = &be_eeh_handlers
6110 };
6111
6112 static int __init be_init_module(void)
6113 {
6114         int status;
6115
6116         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6117             rx_frag_size != 2048) {
6118                 printk(KERN_WARNING DRV_NAME
6119                         " : Module param rx_frag_size must be 2048/4096/8192."
6120                         " Using 2048\n");
6121                 rx_frag_size = 2048;
6122         }
6123
6124         if (num_vfs > 0) {
6125                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6126                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6127         }
6128
6129         be_wq = create_singlethread_workqueue("be_wq");
6130         if (!be_wq) {
6131                 pr_warn(DRV_NAME "workqueue creation failed\n");
6132                 return -1;
6133         }
6134
6135         be_err_recovery_workq =
6136                 create_singlethread_workqueue("be_err_recover");
6137         if (!be_err_recovery_workq)
6138                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6139
6140         status = pci_register_driver(&be_driver);
6141         if (status) {
6142                 destroy_workqueue(be_wq);
6143                 be_destroy_err_recovery_workq();
6144         }
6145         return status;
6146 }
6147 module_init(be_init_module);
6148
6149 static void __exit be_exit_module(void)
6150 {
6151         pci_unregister_driver(&be_driver);
6152
6153         be_destroy_err_recovery_workq();
6154
6155         if (be_wq)
6156                 destroy_workqueue(be_wq);
6157 }
6158 module_exit(be_exit_module);