1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
49 #include <linux/dca.h>
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
60 static const struct e1000_info *igb_info_tbl[] = {
61 [board_82575] = &e1000_82575_info,
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
87 /* required last entry */
91 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
93 void igb_reset(struct igb_adapter *);
94 static int igb_setup_all_tx_resources(struct igb_adapter *);
95 static int igb_setup_all_rx_resources(struct igb_adapter *);
96 static void igb_free_all_tx_resources(struct igb_adapter *);
97 static void igb_free_all_rx_resources(struct igb_adapter *);
98 static void igb_setup_mrqc(struct igb_adapter *);
99 void igb_update_stats(struct igb_adapter *);
100 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
101 static void __devexit igb_remove(struct pci_dev *pdev);
102 static int igb_sw_init(struct igb_adapter *);
103 static int igb_open(struct net_device *);
104 static int igb_close(struct net_device *);
105 static void igb_configure_tx(struct igb_adapter *);
106 static void igb_configure_rx(struct igb_adapter *);
107 static void igb_clean_all_tx_rings(struct igb_adapter *);
108 static void igb_clean_all_rx_rings(struct igb_adapter *);
109 static void igb_clean_tx_ring(struct igb_ring *);
110 static void igb_clean_rx_ring(struct igb_ring *);
111 static void igb_set_rx_mode(struct net_device *);
112 static void igb_update_phy_info(unsigned long);
113 static void igb_watchdog(unsigned long);
114 static void igb_watchdog_task(struct work_struct *);
115 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
116 static struct net_device_stats *igb_get_stats(struct net_device *);
117 static int igb_change_mtu(struct net_device *, int);
118 static int igb_set_mac(struct net_device *, void *);
119 static void igb_set_uta(struct igb_adapter *adapter);
120 static irqreturn_t igb_intr(int irq, void *);
121 static irqreturn_t igb_intr_msi(int irq, void *);
122 static irqreturn_t igb_msix_other(int irq, void *);
123 static irqreturn_t igb_msix_ring(int irq, void *);
124 #ifdef CONFIG_IGB_DCA
125 static void igb_update_dca(struct igb_q_vector *);
126 static void igb_setup_dca(struct igb_adapter *);
127 #endif /* CONFIG_IGB_DCA */
128 static bool igb_clean_tx_irq(struct igb_q_vector *);
129 static int igb_poll(struct napi_struct *, int);
130 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
131 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
132 static void igb_tx_timeout(struct net_device *);
133 static void igb_reset_task(struct work_struct *);
134 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
135 static void igb_vlan_rx_add_vid(struct net_device *, u16);
136 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
137 static void igb_restore_vlan(struct igb_adapter *);
138 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
139 static void igb_ping_all_vfs(struct igb_adapter *);
140 static void igb_msg_task(struct igb_adapter *);
141 static void igb_vmm_control(struct igb_adapter *);
142 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
143 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
144 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
146 int vf, u16 vlan, u8 qos);
147 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
148 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
149 struct ifla_vf_info *ivi);
152 static int igb_suspend(struct pci_dev *, pm_message_t);
153 static int igb_resume(struct pci_dev *);
155 static void igb_shutdown(struct pci_dev *);
156 #ifdef CONFIG_IGB_DCA
157 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
158 static struct notifier_block dca_notifier = {
159 .notifier_call = igb_notify_dca,
164 #ifdef CONFIG_NET_POLL_CONTROLLER
165 /* for netdump / net console */
166 static void igb_netpoll(struct net_device *);
168 #ifdef CONFIG_PCI_IOV
169 static unsigned int max_vfs = 0;
170 module_param(max_vfs, uint, 0);
171 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
172 "per physical function");
173 #endif /* CONFIG_PCI_IOV */
175 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
176 pci_channel_state_t);
177 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
178 static void igb_io_resume(struct pci_dev *);
180 static struct pci_error_handlers igb_err_handler = {
181 .error_detected = igb_io_error_detected,
182 .slot_reset = igb_io_slot_reset,
183 .resume = igb_io_resume,
187 static struct pci_driver igb_driver = {
188 .name = igb_driver_name,
189 .id_table = igb_pci_tbl,
191 .remove = __devexit_p(igb_remove),
193 /* Power Managment Hooks */
194 .suspend = igb_suspend,
195 .resume = igb_resume,
197 .shutdown = igb_shutdown,
198 .err_handler = &igb_err_handler
201 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
202 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
203 MODULE_LICENSE("GPL");
204 MODULE_VERSION(DRV_VERSION);
206 struct igb_reg_info {
211 static const struct igb_reg_info igb_reg_info_tbl[] = {
213 /* General Registers */
214 {E1000_CTRL, "CTRL"},
215 {E1000_STATUS, "STATUS"},
216 {E1000_CTRL_EXT, "CTRL_EXT"},
218 /* Interrupt Registers */
222 {E1000_RCTL, "RCTL"},
223 {E1000_RDLEN(0), "RDLEN"},
224 {E1000_RDH(0), "RDH"},
225 {E1000_RDT(0), "RDT"},
226 {E1000_RXDCTL(0), "RXDCTL"},
227 {E1000_RDBAL(0), "RDBAL"},
228 {E1000_RDBAH(0), "RDBAH"},
231 {E1000_TCTL, "TCTL"},
232 {E1000_TDBAL(0), "TDBAL"},
233 {E1000_TDBAH(0), "TDBAH"},
234 {E1000_TDLEN(0), "TDLEN"},
235 {E1000_TDH(0), "TDH"},
236 {E1000_TDT(0), "TDT"},
237 {E1000_TXDCTL(0), "TXDCTL"},
238 {E1000_TDFH, "TDFH"},
239 {E1000_TDFT, "TDFT"},
240 {E1000_TDFHS, "TDFHS"},
241 {E1000_TDFPC, "TDFPC"},
243 /* List Terminator */
248 * igb_regdump - register printout routine
250 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
256 switch (reginfo->ofs) {
258 for (n = 0; n < 4; n++)
259 regs[n] = rd32(E1000_RDLEN(n));
262 for (n = 0; n < 4; n++)
263 regs[n] = rd32(E1000_RDH(n));
266 for (n = 0; n < 4; n++)
267 regs[n] = rd32(E1000_RDT(n));
269 case E1000_RXDCTL(0):
270 for (n = 0; n < 4; n++)
271 regs[n] = rd32(E1000_RXDCTL(n));
274 for (n = 0; n < 4; n++)
275 regs[n] = rd32(E1000_RDBAL(n));
278 for (n = 0; n < 4; n++)
279 regs[n] = rd32(E1000_RDBAH(n));
282 for (n = 0; n < 4; n++)
283 regs[n] = rd32(E1000_RDBAL(n));
286 for (n = 0; n < 4; n++)
287 regs[n] = rd32(E1000_TDBAH(n));
290 for (n = 0; n < 4; n++)
291 regs[n] = rd32(E1000_TDLEN(n));
294 for (n = 0; n < 4; n++)
295 regs[n] = rd32(E1000_TDH(n));
298 for (n = 0; n < 4; n++)
299 regs[n] = rd32(E1000_TDT(n));
301 case E1000_TXDCTL(0):
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_TXDCTL(n));
306 printk(KERN_INFO "%-15s %08x\n",
307 reginfo->name, rd32(reginfo->ofs));
311 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
312 printk(KERN_INFO "%-15s ", rname);
313 for (n = 0; n < 4; n++)
314 printk(KERN_CONT "%08x ", regs[n]);
315 printk(KERN_CONT "\n");
319 * igb_dump - Print registers, tx-rings and rx-rings
321 static void igb_dump(struct igb_adapter *adapter)
323 struct net_device *netdev = adapter->netdev;
324 struct e1000_hw *hw = &adapter->hw;
325 struct igb_reg_info *reginfo;
327 struct igb_ring *tx_ring;
328 union e1000_adv_tx_desc *tx_desc;
329 struct my_u0 { u64 a; u64 b; } *u0;
330 struct igb_buffer *buffer_info;
331 struct igb_ring *rx_ring;
332 union e1000_adv_rx_desc *rx_desc;
336 if (!netif_msg_hw(adapter))
339 /* Print netdevice Info */
341 dev_info(&adapter->pdev->dev, "Net device Info\n");
342 printk(KERN_INFO "Device Name state "
343 "trans_start last_rx\n");
344 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
351 /* Print Registers */
352 dev_info(&adapter->pdev->dev, "Register Dump\n");
353 printk(KERN_INFO " Register Name Value\n");
354 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
355 reginfo->name; reginfo++) {
356 igb_regdump(hw, reginfo);
359 /* Print TX Ring Summary */
360 if (!netdev || !netif_running(netdev))
363 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
364 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
365 " leng ntw timestamp\n");
366 for (n = 0; n < adapter->num_tx_queues; n++) {
367 tx_ring = adapter->tx_ring[n];
368 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
369 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
370 n, tx_ring->next_to_use, tx_ring->next_to_clean,
371 (u64)buffer_info->dma,
373 buffer_info->next_to_watch,
374 (u64)buffer_info->time_stamp);
378 if (!netif_msg_tx_done(adapter))
379 goto rx_ring_summary;
381 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
383 /* Transmit Descriptor Formats
385 * Advanced Transmit Descriptor
386 * +--------------------------------------------------------------+
387 * 0 | Buffer Address [63:0] |
388 * +--------------------------------------------------------------+
389 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
390 * +--------------------------------------------------------------+
391 * 63 46 45 40 39 38 36 35 32 31 24 15 0
394 for (n = 0; n < adapter->num_tx_queues; n++) {
395 tx_ring = adapter->tx_ring[n];
396 printk(KERN_INFO "------------------------------------\n");
397 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
398 printk(KERN_INFO "------------------------------------\n");
399 printk(KERN_INFO "T [desc] [address 63:0 ] "
400 "[PlPOCIStDDM Ln] [bi->dma ] "
401 "leng ntw timestamp bi->skb\n");
403 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
404 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
405 buffer_info = &tx_ring->buffer_info[i];
406 u0 = (struct my_u0 *)tx_desc;
407 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
408 " %04X %3X %016llX %p", i,
411 (u64)buffer_info->dma,
413 buffer_info->next_to_watch,
414 (u64)buffer_info->time_stamp,
416 if (i == tx_ring->next_to_use &&
417 i == tx_ring->next_to_clean)
418 printk(KERN_CONT " NTC/U\n");
419 else if (i == tx_ring->next_to_use)
420 printk(KERN_CONT " NTU\n");
421 else if (i == tx_ring->next_to_clean)
422 printk(KERN_CONT " NTC\n");
424 printk(KERN_CONT "\n");
426 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
427 print_hex_dump(KERN_INFO, "",
429 16, 1, phys_to_virt(buffer_info->dma),
430 buffer_info->length, true);
434 /* Print RX Rings Summary */
436 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
437 printk(KERN_INFO "Queue [NTU] [NTC]\n");
438 for (n = 0; n < adapter->num_rx_queues; n++) {
439 rx_ring = adapter->rx_ring[n];
440 printk(KERN_INFO " %5d %5X %5X\n", n,
441 rx_ring->next_to_use, rx_ring->next_to_clean);
445 if (!netif_msg_rx_status(adapter))
448 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
450 /* Advanced Receive Descriptor (Read) Format
452 * +-----------------------------------------------------+
453 * 0 | Packet Buffer Address [63:1] |A0/NSE|
454 * +----------------------------------------------+------+
455 * 8 | Header Buffer Address [63:1] | DD |
456 * +-----------------------------------------------------+
459 * Advanced Receive Descriptor (Write-Back) Format
461 * 63 48 47 32 31 30 21 20 17 16 4 3 0
462 * +------------------------------------------------------+
463 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
464 * | Checksum Ident | | | | Type | Type |
465 * +------------------------------------------------------+
466 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
467 * +------------------------------------------------------+
468 * 63 48 47 32 31 20 19 0
471 for (n = 0; n < adapter->num_rx_queues; n++) {
472 rx_ring = adapter->rx_ring[n];
473 printk(KERN_INFO "------------------------------------\n");
474 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
475 printk(KERN_INFO "------------------------------------\n");
476 printk(KERN_INFO "R [desc] [ PktBuf A0] "
477 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
478 "<-- Adv Rx Read format\n");
479 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
480 "[vl er S cks ln] ---------------- [bi->skb] "
481 "<-- Adv Rx Write-Back format\n");
483 for (i = 0; i < rx_ring->count; i++) {
484 buffer_info = &rx_ring->buffer_info[i];
485 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
486 u0 = (struct my_u0 *)rx_desc;
487 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
488 if (staterr & E1000_RXD_STAT_DD) {
489 /* Descriptor Done */
490 printk(KERN_INFO "RWB[0x%03X] %016llX "
491 "%016llX ---------------- %p", i,
496 printk(KERN_INFO "R [0x%03X] %016llX "
497 "%016llX %016llX %p", i,
500 (u64)buffer_info->dma,
503 if (netif_msg_pktdata(adapter)) {
504 print_hex_dump(KERN_INFO, "",
507 phys_to_virt(buffer_info->dma),
508 rx_ring->rx_buffer_len, true);
509 if (rx_ring->rx_buffer_len
511 print_hex_dump(KERN_INFO, "",
515 buffer_info->page_dma +
516 buffer_info->page_offset),
521 if (i == rx_ring->next_to_use)
522 printk(KERN_CONT " NTU\n");
523 else if (i == rx_ring->next_to_clean)
524 printk(KERN_CONT " NTC\n");
526 printk(KERN_CONT "\n");
537 * igb_read_clock - read raw cycle counter (to be used by time counter)
539 static cycle_t igb_read_clock(const struct cyclecounter *tc)
541 struct igb_adapter *adapter =
542 container_of(tc, struct igb_adapter, cycles);
543 struct e1000_hw *hw = &adapter->hw;
548 * The timestamp latches on lowest register read. For the 82580
549 * the lowest register is SYSTIMR instead of SYSTIML. However we never
550 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
552 if (hw->mac.type == e1000_82580) {
553 stamp = rd32(E1000_SYSTIMR) >> 8;
554 shift = IGB_82580_TSYNC_SHIFT;
557 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
558 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
563 * igb_get_hw_dev - return device
564 * used by hardware layer to print debugging information
566 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
568 struct igb_adapter *adapter = hw->back;
569 return adapter->netdev;
573 * igb_init_module - Driver Registration Routine
575 * igb_init_module is the first routine called when the driver is
576 * loaded. All it does is register with the PCI subsystem.
578 static int __init igb_init_module(void)
581 printk(KERN_INFO "%s - version %s\n",
582 igb_driver_string, igb_driver_version);
584 printk(KERN_INFO "%s\n", igb_copyright);
586 #ifdef CONFIG_IGB_DCA
587 dca_register_notify(&dca_notifier);
589 ret = pci_register_driver(&igb_driver);
593 module_init(igb_init_module);
596 * igb_exit_module - Driver Exit Cleanup Routine
598 * igb_exit_module is called just before the driver is removed
601 static void __exit igb_exit_module(void)
603 #ifdef CONFIG_IGB_DCA
604 dca_unregister_notify(&dca_notifier);
606 pci_unregister_driver(&igb_driver);
609 module_exit(igb_exit_module);
611 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
613 * igb_cache_ring_register - Descriptor ring to register mapping
614 * @adapter: board private structure to initialize
616 * Once we know the feature-set enabled for the device, we'll cache
617 * the register offset the descriptor ring is assigned to.
619 static void igb_cache_ring_register(struct igb_adapter *adapter)
622 u32 rbase_offset = adapter->vfs_allocated_count;
624 switch (adapter->hw.mac.type) {
626 /* The queues are allocated for virtualization such that VF 0
627 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
628 * In order to avoid collision we start at the first free queue
629 * and continue consuming queues in the same sequence
631 if (adapter->vfs_allocated_count) {
632 for (; i < adapter->rss_queues; i++)
633 adapter->rx_ring[i]->reg_idx = rbase_offset +
640 for (; i < adapter->num_rx_queues; i++)
641 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
642 for (; j < adapter->num_tx_queues; j++)
643 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
648 static void igb_free_queues(struct igb_adapter *adapter)
652 for (i = 0; i < adapter->num_tx_queues; i++) {
653 kfree(adapter->tx_ring[i]);
654 adapter->tx_ring[i] = NULL;
656 for (i = 0; i < adapter->num_rx_queues; i++) {
657 kfree(adapter->rx_ring[i]);
658 adapter->rx_ring[i] = NULL;
660 adapter->num_rx_queues = 0;
661 adapter->num_tx_queues = 0;
665 * igb_alloc_queues - Allocate memory for all rings
666 * @adapter: board private structure to initialize
668 * We allocate one ring per queue at run-time since we don't know the
669 * number of queues at compile-time.
671 static int igb_alloc_queues(struct igb_adapter *adapter)
673 struct igb_ring *ring;
676 for (i = 0; i < adapter->num_tx_queues; i++) {
677 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
680 ring->count = adapter->tx_ring_count;
681 ring->queue_index = i;
682 ring->dev = &adapter->pdev->dev;
683 ring->netdev = adapter->netdev;
684 /* For 82575, context index must be unique per ring. */
685 if (adapter->hw.mac.type == e1000_82575)
686 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
687 adapter->tx_ring[i] = ring;
690 for (i = 0; i < adapter->num_rx_queues; i++) {
691 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
694 ring->count = adapter->rx_ring_count;
695 ring->queue_index = i;
696 ring->dev = &adapter->pdev->dev;
697 ring->netdev = adapter->netdev;
698 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
699 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
700 /* set flag indicating ring supports SCTP checksum offload */
701 if (adapter->hw.mac.type >= e1000_82576)
702 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
703 adapter->rx_ring[i] = ring;
706 igb_cache_ring_register(adapter);
711 igb_free_queues(adapter);
716 #define IGB_N0_QUEUE -1
717 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
720 struct igb_adapter *adapter = q_vector->adapter;
721 struct e1000_hw *hw = &adapter->hw;
723 int rx_queue = IGB_N0_QUEUE;
724 int tx_queue = IGB_N0_QUEUE;
726 if (q_vector->rx_ring)
727 rx_queue = q_vector->rx_ring->reg_idx;
728 if (q_vector->tx_ring)
729 tx_queue = q_vector->tx_ring->reg_idx;
731 switch (hw->mac.type) {
733 /* The 82575 assigns vectors using a bitmask, which matches the
734 bitmask for the EICR/EIMS/EIMC registers. To assign one
735 or more queues to a vector, we write the appropriate bits
736 into the MSIXBM register for that vector. */
737 if (rx_queue > IGB_N0_QUEUE)
738 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
739 if (tx_queue > IGB_N0_QUEUE)
740 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
741 if (!adapter->msix_entries && msix_vector == 0)
742 msixbm |= E1000_EIMS_OTHER;
743 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
744 q_vector->eims_value = msixbm;
747 /* 82576 uses a table-based method for assigning vectors.
748 Each queue has a single entry in the table to which we write
749 a vector number along with a "valid" bit. Sadly, the layout
750 of the table is somewhat counterintuitive. */
751 if (rx_queue > IGB_N0_QUEUE) {
752 index = (rx_queue & 0x7);
753 ivar = array_rd32(E1000_IVAR0, index);
755 /* vector goes into low byte of register */
756 ivar = ivar & 0xFFFFFF00;
757 ivar |= msix_vector | E1000_IVAR_VALID;
759 /* vector goes into third byte of register */
760 ivar = ivar & 0xFF00FFFF;
761 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
763 array_wr32(E1000_IVAR0, index, ivar);
765 if (tx_queue > IGB_N0_QUEUE) {
766 index = (tx_queue & 0x7);
767 ivar = array_rd32(E1000_IVAR0, index);
769 /* vector goes into second byte of register */
770 ivar = ivar & 0xFFFF00FF;
771 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
773 /* vector goes into high byte of register */
774 ivar = ivar & 0x00FFFFFF;
775 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
777 array_wr32(E1000_IVAR0, index, ivar);
779 q_vector->eims_value = 1 << msix_vector;
783 /* 82580 uses the same table-based approach as 82576 but has fewer
784 entries as a result we carry over for queues greater than 4. */
785 if (rx_queue > IGB_N0_QUEUE) {
786 index = (rx_queue >> 1);
787 ivar = array_rd32(E1000_IVAR0, index);
788 if (rx_queue & 0x1) {
789 /* vector goes into third byte of register */
790 ivar = ivar & 0xFF00FFFF;
791 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
793 /* vector goes into low byte of register */
794 ivar = ivar & 0xFFFFFF00;
795 ivar |= msix_vector | E1000_IVAR_VALID;
797 array_wr32(E1000_IVAR0, index, ivar);
799 if (tx_queue > IGB_N0_QUEUE) {
800 index = (tx_queue >> 1);
801 ivar = array_rd32(E1000_IVAR0, index);
802 if (tx_queue & 0x1) {
803 /* vector goes into high byte of register */
804 ivar = ivar & 0x00FFFFFF;
805 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
807 /* vector goes into second byte of register */
808 ivar = ivar & 0xFFFF00FF;
809 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
811 array_wr32(E1000_IVAR0, index, ivar);
813 q_vector->eims_value = 1 << msix_vector;
820 /* add q_vector eims value to global eims_enable_mask */
821 adapter->eims_enable_mask |= q_vector->eims_value;
823 /* configure q_vector to set itr on first interrupt */
824 q_vector->set_itr = 1;
828 * igb_configure_msix - Configure MSI-X hardware
830 * igb_configure_msix sets up the hardware to properly
831 * generate MSI-X interrupts.
833 static void igb_configure_msix(struct igb_adapter *adapter)
837 struct e1000_hw *hw = &adapter->hw;
839 adapter->eims_enable_mask = 0;
841 /* set vector for other causes, i.e. link changes */
842 switch (hw->mac.type) {
844 tmp = rd32(E1000_CTRL_EXT);
845 /* enable MSI-X PBA support*/
846 tmp |= E1000_CTRL_EXT_PBA_CLR;
848 /* Auto-Mask interrupts upon ICR read. */
849 tmp |= E1000_CTRL_EXT_EIAME;
850 tmp |= E1000_CTRL_EXT_IRCA;
852 wr32(E1000_CTRL_EXT, tmp);
854 /* enable msix_other interrupt */
855 array_wr32(E1000_MSIXBM(0), vector++,
857 adapter->eims_other = E1000_EIMS_OTHER;
864 /* Turn on MSI-X capability first, or our settings
865 * won't stick. And it will take days to debug. */
866 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
867 E1000_GPIE_PBA | E1000_GPIE_EIAME |
870 /* enable msix_other interrupt */
871 adapter->eims_other = 1 << vector;
872 tmp = (vector++ | E1000_IVAR_VALID) << 8;
874 wr32(E1000_IVAR_MISC, tmp);
877 /* do nothing, since nothing else supports MSI-X */
879 } /* switch (hw->mac.type) */
881 adapter->eims_enable_mask |= adapter->eims_other;
883 for (i = 0; i < adapter->num_q_vectors; i++)
884 igb_assign_vector(adapter->q_vector[i], vector++);
890 * igb_request_msix - Initialize MSI-X interrupts
892 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
895 static int igb_request_msix(struct igb_adapter *adapter)
897 struct net_device *netdev = adapter->netdev;
898 struct e1000_hw *hw = &adapter->hw;
899 int i, err = 0, vector = 0;
901 err = request_irq(adapter->msix_entries[vector].vector,
902 igb_msix_other, 0, netdev->name, adapter);
907 for (i = 0; i < adapter->num_q_vectors; i++) {
908 struct igb_q_vector *q_vector = adapter->q_vector[i];
910 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
912 if (q_vector->rx_ring && q_vector->tx_ring)
913 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
914 q_vector->rx_ring->queue_index);
915 else if (q_vector->tx_ring)
916 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
917 q_vector->tx_ring->queue_index);
918 else if (q_vector->rx_ring)
919 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
920 q_vector->rx_ring->queue_index);
922 sprintf(q_vector->name, "%s-unused", netdev->name);
924 err = request_irq(adapter->msix_entries[vector].vector,
925 igb_msix_ring, 0, q_vector->name,
932 igb_configure_msix(adapter);
938 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
940 if (adapter->msix_entries) {
941 pci_disable_msix(adapter->pdev);
942 kfree(adapter->msix_entries);
943 adapter->msix_entries = NULL;
944 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
945 pci_disable_msi(adapter->pdev);
950 * igb_free_q_vectors - Free memory allocated for interrupt vectors
951 * @adapter: board private structure to initialize
953 * This function frees the memory allocated to the q_vectors. In addition if
954 * NAPI is enabled it will delete any references to the NAPI struct prior
955 * to freeing the q_vector.
957 static void igb_free_q_vectors(struct igb_adapter *adapter)
961 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
962 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
963 adapter->q_vector[v_idx] = NULL;
966 netif_napi_del(&q_vector->napi);
969 adapter->num_q_vectors = 0;
973 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
975 * This function resets the device so that it has 0 rx queues, tx queues, and
976 * MSI-X interrupts allocated.
978 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
980 igb_free_queues(adapter);
981 igb_free_q_vectors(adapter);
982 igb_reset_interrupt_capability(adapter);
986 * igb_set_interrupt_capability - set MSI or MSI-X if supported
988 * Attempt to configure interrupts using the best available
989 * capabilities of the hardware and kernel.
991 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
996 /* Number of supported queues. */
997 adapter->num_rx_queues = adapter->rss_queues;
998 if (adapter->vfs_allocated_count)
999 adapter->num_tx_queues = 1;
1001 adapter->num_tx_queues = adapter->rss_queues;
1003 /* start with one vector for every rx queue */
1004 numvecs = adapter->num_rx_queues;
1006 /* if tx handler is separate add 1 for every tx queue */
1007 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1008 numvecs += adapter->num_tx_queues;
1010 /* store the number of vectors reserved for queues */
1011 adapter->num_q_vectors = numvecs;
1013 /* add 1 vector for link status interrupts */
1015 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1017 if (!adapter->msix_entries)
1020 for (i = 0; i < numvecs; i++)
1021 adapter->msix_entries[i].entry = i;
1023 err = pci_enable_msix(adapter->pdev,
1024 adapter->msix_entries,
1029 igb_reset_interrupt_capability(adapter);
1031 /* If we can't do MSI-X, try MSI */
1033 #ifdef CONFIG_PCI_IOV
1034 /* disable SR-IOV for non MSI-X configurations */
1035 if (adapter->vf_data) {
1036 struct e1000_hw *hw = &adapter->hw;
1037 /* disable iov and allow time for transactions to clear */
1038 pci_disable_sriov(adapter->pdev);
1041 kfree(adapter->vf_data);
1042 adapter->vf_data = NULL;
1043 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1045 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1048 adapter->vfs_allocated_count = 0;
1049 adapter->rss_queues = 1;
1050 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1051 adapter->num_rx_queues = 1;
1052 adapter->num_tx_queues = 1;
1053 adapter->num_q_vectors = 1;
1054 if (!pci_enable_msi(adapter->pdev))
1055 adapter->flags |= IGB_FLAG_HAS_MSI;
1057 /* Notify the stack of the (possibly) reduced queue counts. */
1058 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1059 return netif_set_real_num_rx_queues(adapter->netdev,
1060 adapter->num_rx_queues);
1064 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1065 * @adapter: board private structure to initialize
1067 * We allocate one q_vector per queue interrupt. If allocation fails we
1070 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1072 struct igb_q_vector *q_vector;
1073 struct e1000_hw *hw = &adapter->hw;
1076 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1077 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1080 q_vector->adapter = adapter;
1081 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1082 q_vector->itr_val = IGB_START_ITR;
1083 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1084 adapter->q_vector[v_idx] = q_vector;
1089 igb_free_q_vectors(adapter);
1093 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1094 int ring_idx, int v_idx)
1096 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1098 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1099 q_vector->rx_ring->q_vector = q_vector;
1100 q_vector->itr_val = adapter->rx_itr_setting;
1101 if (q_vector->itr_val && q_vector->itr_val <= 3)
1102 q_vector->itr_val = IGB_START_ITR;
1105 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1106 int ring_idx, int v_idx)
1108 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1110 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1111 q_vector->tx_ring->q_vector = q_vector;
1112 q_vector->itr_val = adapter->tx_itr_setting;
1113 if (q_vector->itr_val && q_vector->itr_val <= 3)
1114 q_vector->itr_val = IGB_START_ITR;
1118 * igb_map_ring_to_vector - maps allocated queues to vectors
1120 * This function maps the recently allocated queues to vectors.
1122 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1127 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1128 (adapter->num_q_vectors < adapter->num_tx_queues))
1131 if (adapter->num_q_vectors >=
1132 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1133 for (i = 0; i < adapter->num_rx_queues; i++)
1134 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1135 for (i = 0; i < adapter->num_tx_queues; i++)
1136 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1138 for (i = 0; i < adapter->num_rx_queues; i++) {
1139 if (i < adapter->num_tx_queues)
1140 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1141 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1143 for (; i < adapter->num_tx_queues; i++)
1144 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1150 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1152 * This function initializes the interrupts and allocates all of the queues.
1154 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1156 struct pci_dev *pdev = adapter->pdev;
1159 err = igb_set_interrupt_capability(adapter);
1163 err = igb_alloc_q_vectors(adapter);
1165 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1166 goto err_alloc_q_vectors;
1169 err = igb_alloc_queues(adapter);
1171 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1172 goto err_alloc_queues;
1175 err = igb_map_ring_to_vector(adapter);
1177 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1178 goto err_map_queues;
1184 igb_free_queues(adapter);
1186 igb_free_q_vectors(adapter);
1187 err_alloc_q_vectors:
1188 igb_reset_interrupt_capability(adapter);
1193 * igb_request_irq - initialize interrupts
1195 * Attempts to configure interrupts using the best available
1196 * capabilities of the hardware and kernel.
1198 static int igb_request_irq(struct igb_adapter *adapter)
1200 struct net_device *netdev = adapter->netdev;
1201 struct pci_dev *pdev = adapter->pdev;
1204 if (adapter->msix_entries) {
1205 err = igb_request_msix(adapter);
1208 /* fall back to MSI */
1209 igb_clear_interrupt_scheme(adapter);
1210 if (!pci_enable_msi(adapter->pdev))
1211 adapter->flags |= IGB_FLAG_HAS_MSI;
1212 igb_free_all_tx_resources(adapter);
1213 igb_free_all_rx_resources(adapter);
1214 adapter->num_tx_queues = 1;
1215 adapter->num_rx_queues = 1;
1216 adapter->num_q_vectors = 1;
1217 err = igb_alloc_q_vectors(adapter);
1220 "Unable to allocate memory for vectors\n");
1223 err = igb_alloc_queues(adapter);
1226 "Unable to allocate memory for queues\n");
1227 igb_free_q_vectors(adapter);
1230 igb_setup_all_tx_resources(adapter);
1231 igb_setup_all_rx_resources(adapter);
1233 igb_assign_vector(adapter->q_vector[0], 0);
1236 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1237 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1238 netdev->name, adapter);
1242 /* fall back to legacy interrupts */
1243 igb_reset_interrupt_capability(adapter);
1244 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1247 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1248 netdev->name, adapter);
1251 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1258 static void igb_free_irq(struct igb_adapter *adapter)
1260 if (adapter->msix_entries) {
1263 free_irq(adapter->msix_entries[vector++].vector, adapter);
1265 for (i = 0; i < adapter->num_q_vectors; i++) {
1266 struct igb_q_vector *q_vector = adapter->q_vector[i];
1267 free_irq(adapter->msix_entries[vector++].vector,
1271 free_irq(adapter->pdev->irq, adapter);
1276 * igb_irq_disable - Mask off interrupt generation on the NIC
1277 * @adapter: board private structure
1279 static void igb_irq_disable(struct igb_adapter *adapter)
1281 struct e1000_hw *hw = &adapter->hw;
1284 * we need to be careful when disabling interrupts. The VFs are also
1285 * mapped into these registers and so clearing the bits can cause
1286 * issues on the VF drivers so we only need to clear what we set
1288 if (adapter->msix_entries) {
1289 u32 regval = rd32(E1000_EIAM);
1290 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1291 wr32(E1000_EIMC, adapter->eims_enable_mask);
1292 regval = rd32(E1000_EIAC);
1293 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1297 wr32(E1000_IMC, ~0);
1299 if (adapter->msix_entries) {
1301 for (i = 0; i < adapter->num_q_vectors; i++)
1302 synchronize_irq(adapter->msix_entries[i].vector);
1304 synchronize_irq(adapter->pdev->irq);
1309 * igb_irq_enable - Enable default interrupt generation settings
1310 * @adapter: board private structure
1312 static void igb_irq_enable(struct igb_adapter *adapter)
1314 struct e1000_hw *hw = &adapter->hw;
1316 if (adapter->msix_entries) {
1317 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1318 u32 regval = rd32(E1000_EIAC);
1319 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1320 regval = rd32(E1000_EIAM);
1321 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1322 wr32(E1000_EIMS, adapter->eims_enable_mask);
1323 if (adapter->vfs_allocated_count) {
1324 wr32(E1000_MBVFIMR, 0xFF);
1325 ims |= E1000_IMS_VMMB;
1327 if (adapter->hw.mac.type == e1000_82580)
1328 ims |= E1000_IMS_DRSTA;
1330 wr32(E1000_IMS, ims);
1332 wr32(E1000_IMS, IMS_ENABLE_MASK |
1334 wr32(E1000_IAM, IMS_ENABLE_MASK |
1339 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1341 struct e1000_hw *hw = &adapter->hw;
1342 u16 vid = adapter->hw.mng_cookie.vlan_id;
1343 u16 old_vid = adapter->mng_vlan_id;
1345 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1346 /* add VID to filter table */
1347 igb_vfta_set(hw, vid, true);
1348 adapter->mng_vlan_id = vid;
1350 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1353 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1355 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1356 /* remove VID from filter table */
1357 igb_vfta_set(hw, old_vid, false);
1362 * igb_release_hw_control - release control of the h/w to f/w
1363 * @adapter: address of board private structure
1365 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1366 * For ASF and Pass Through versions of f/w this means that the
1367 * driver is no longer loaded.
1370 static void igb_release_hw_control(struct igb_adapter *adapter)
1372 struct e1000_hw *hw = &adapter->hw;
1375 /* Let firmware take over control of h/w */
1376 ctrl_ext = rd32(E1000_CTRL_EXT);
1377 wr32(E1000_CTRL_EXT,
1378 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1382 * igb_get_hw_control - get control of the h/w from f/w
1383 * @adapter: address of board private structure
1385 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1386 * For ASF and Pass Through versions of f/w this means that
1387 * the driver is loaded.
1390 static void igb_get_hw_control(struct igb_adapter *adapter)
1392 struct e1000_hw *hw = &adapter->hw;
1395 /* Let firmware know the driver has taken over */
1396 ctrl_ext = rd32(E1000_CTRL_EXT);
1397 wr32(E1000_CTRL_EXT,
1398 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1402 * igb_configure - configure the hardware for RX and TX
1403 * @adapter: private board structure
1405 static void igb_configure(struct igb_adapter *adapter)
1407 struct net_device *netdev = adapter->netdev;
1410 igb_get_hw_control(adapter);
1411 igb_set_rx_mode(netdev);
1413 igb_restore_vlan(adapter);
1415 igb_setup_tctl(adapter);
1416 igb_setup_mrqc(adapter);
1417 igb_setup_rctl(adapter);
1419 igb_configure_tx(adapter);
1420 igb_configure_rx(adapter);
1422 igb_rx_fifo_flush_82575(&adapter->hw);
1424 /* call igb_desc_unused which always leaves
1425 * at least 1 descriptor unused to make sure
1426 * next_to_use != next_to_clean */
1427 for (i = 0; i < adapter->num_rx_queues; i++) {
1428 struct igb_ring *ring = adapter->rx_ring[i];
1429 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1434 * igb_power_up_link - Power up the phy/serdes link
1435 * @adapter: address of board private structure
1437 void igb_power_up_link(struct igb_adapter *adapter)
1439 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1440 igb_power_up_phy_copper(&adapter->hw);
1442 igb_power_up_serdes_link_82575(&adapter->hw);
1446 * igb_power_down_link - Power down the phy/serdes link
1447 * @adapter: address of board private structure
1449 static void igb_power_down_link(struct igb_adapter *adapter)
1451 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452 igb_power_down_phy_copper_82575(&adapter->hw);
1454 igb_shutdown_serdes_link_82575(&adapter->hw);
1458 * igb_up - Open the interface and prepare it to handle traffic
1459 * @adapter: board private structure
1461 int igb_up(struct igb_adapter *adapter)
1463 struct e1000_hw *hw = &adapter->hw;
1466 /* hardware has been reset, we need to reload some things */
1467 igb_configure(adapter);
1469 clear_bit(__IGB_DOWN, &adapter->state);
1471 for (i = 0; i < adapter->num_q_vectors; i++) {
1472 struct igb_q_vector *q_vector = adapter->q_vector[i];
1473 napi_enable(&q_vector->napi);
1475 if (adapter->msix_entries)
1476 igb_configure_msix(adapter);
1478 igb_assign_vector(adapter->q_vector[0], 0);
1480 /* Clear any pending interrupts. */
1482 igb_irq_enable(adapter);
1484 /* notify VFs that reset has been completed */
1485 if (adapter->vfs_allocated_count) {
1486 u32 reg_data = rd32(E1000_CTRL_EXT);
1487 reg_data |= E1000_CTRL_EXT_PFRSTD;
1488 wr32(E1000_CTRL_EXT, reg_data);
1491 netif_tx_start_all_queues(adapter->netdev);
1493 /* start the watchdog. */
1494 hw->mac.get_link_status = 1;
1495 schedule_work(&adapter->watchdog_task);
1500 void igb_down(struct igb_adapter *adapter)
1502 struct net_device *netdev = adapter->netdev;
1503 struct e1000_hw *hw = &adapter->hw;
1507 /* signal that we're down so the interrupt handler does not
1508 * reschedule our watchdog timer */
1509 set_bit(__IGB_DOWN, &adapter->state);
1511 /* disable receives in the hardware */
1512 rctl = rd32(E1000_RCTL);
1513 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1514 /* flush and sleep below */
1516 netif_tx_stop_all_queues(netdev);
1518 /* disable transmits in the hardware */
1519 tctl = rd32(E1000_TCTL);
1520 tctl &= ~E1000_TCTL_EN;
1521 wr32(E1000_TCTL, tctl);
1522 /* flush both disables and wait for them to finish */
1526 for (i = 0; i < adapter->num_q_vectors; i++) {
1527 struct igb_q_vector *q_vector = adapter->q_vector[i];
1528 napi_disable(&q_vector->napi);
1531 igb_irq_disable(adapter);
1533 del_timer_sync(&adapter->watchdog_timer);
1534 del_timer_sync(&adapter->phy_info_timer);
1536 netif_carrier_off(netdev);
1538 /* record the stats before reset*/
1539 igb_update_stats(adapter);
1541 adapter->link_speed = 0;
1542 adapter->link_duplex = 0;
1544 if (!pci_channel_offline(adapter->pdev))
1546 igb_clean_all_tx_rings(adapter);
1547 igb_clean_all_rx_rings(adapter);
1548 #ifdef CONFIG_IGB_DCA
1550 /* since we reset the hardware DCA settings were cleared */
1551 igb_setup_dca(adapter);
1555 void igb_reinit_locked(struct igb_adapter *adapter)
1557 WARN_ON(in_interrupt());
1558 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1562 clear_bit(__IGB_RESETTING, &adapter->state);
1565 void igb_reset(struct igb_adapter *adapter)
1567 struct pci_dev *pdev = adapter->pdev;
1568 struct e1000_hw *hw = &adapter->hw;
1569 struct e1000_mac_info *mac = &hw->mac;
1570 struct e1000_fc_info *fc = &hw->fc;
1571 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1574 /* Repartition Pba for greater than 9k mtu
1575 * To take effect CTRL.RST is required.
1577 switch (mac->type) {
1580 pba = rd32(E1000_RXPBS);
1581 pba = igb_rxpbs_adjust_82580(pba);
1584 pba = rd32(E1000_RXPBS);
1585 pba &= E1000_RXPBS_SIZE_MASK_82576;
1589 pba = E1000_PBA_34K;
1593 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1594 (mac->type < e1000_82576)) {
1595 /* adjust PBA for jumbo frames */
1596 wr32(E1000_PBA, pba);
1598 /* To maintain wire speed transmits, the Tx FIFO should be
1599 * large enough to accommodate two full transmit packets,
1600 * rounded up to the next 1KB and expressed in KB. Likewise,
1601 * the Rx FIFO should be large enough to accommodate at least
1602 * one full receive packet and is similarly rounded up and
1603 * expressed in KB. */
1604 pba = rd32(E1000_PBA);
1605 /* upper 16 bits has Tx packet buffer allocation size in KB */
1606 tx_space = pba >> 16;
1607 /* lower 16 bits has Rx packet buffer allocation size in KB */
1609 /* the tx fifo also stores 16 bytes of information about the tx
1610 * but don't include ethernet FCS because hardware appends it */
1611 min_tx_space = (adapter->max_frame_size +
1612 sizeof(union e1000_adv_tx_desc) -
1614 min_tx_space = ALIGN(min_tx_space, 1024);
1615 min_tx_space >>= 10;
1616 /* software strips receive CRC, so leave room for it */
1617 min_rx_space = adapter->max_frame_size;
1618 min_rx_space = ALIGN(min_rx_space, 1024);
1619 min_rx_space >>= 10;
1621 /* If current Tx allocation is less than the min Tx FIFO size,
1622 * and the min Tx FIFO size is less than the current Rx FIFO
1623 * allocation, take space away from current Rx allocation */
1624 if (tx_space < min_tx_space &&
1625 ((min_tx_space - tx_space) < pba)) {
1626 pba = pba - (min_tx_space - tx_space);
1628 /* if short on rx space, rx wins and must trump tx
1630 if (pba < min_rx_space)
1633 wr32(E1000_PBA, pba);
1636 /* flow control settings */
1637 /* The high water mark must be low enough to fit one full frame
1638 * (or the size used for early receive) above it in the Rx FIFO.
1639 * Set it to the lower of:
1640 * - 90% of the Rx FIFO size, or
1641 * - the full Rx FIFO size minus one full frame */
1642 hwm = min(((pba << 10) * 9 / 10),
1643 ((pba << 10) - 2 * adapter->max_frame_size));
1645 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1646 fc->low_water = fc->high_water - 16;
1647 fc->pause_time = 0xFFFF;
1649 fc->current_mode = fc->requested_mode;
1651 /* disable receive for all VFs and wait one second */
1652 if (adapter->vfs_allocated_count) {
1654 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1655 adapter->vf_data[i].flags = 0;
1657 /* ping all the active vfs to let them know we are going down */
1658 igb_ping_all_vfs(adapter);
1660 /* disable transmits and receives */
1661 wr32(E1000_VFRE, 0);
1662 wr32(E1000_VFTE, 0);
1665 /* Allow time for pending master requests to run */
1666 hw->mac.ops.reset_hw(hw);
1669 if (hw->mac.ops.init_hw(hw))
1670 dev_err(&pdev->dev, "Hardware Error\n");
1672 if (hw->mac.type == e1000_82580) {
1673 u32 reg = rd32(E1000_PCIEMISC);
1674 wr32(E1000_PCIEMISC,
1675 reg & ~E1000_PCIEMISC_LX_DECISION);
1677 if (!netif_running(adapter->netdev))
1678 igb_power_down_link(adapter);
1680 igb_update_mng_vlan(adapter);
1682 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1683 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1685 igb_get_phy_info(hw);
1688 static const struct net_device_ops igb_netdev_ops = {
1689 .ndo_open = igb_open,
1690 .ndo_stop = igb_close,
1691 .ndo_start_xmit = igb_xmit_frame_adv,
1692 .ndo_get_stats = igb_get_stats,
1693 .ndo_set_rx_mode = igb_set_rx_mode,
1694 .ndo_set_multicast_list = igb_set_rx_mode,
1695 .ndo_set_mac_address = igb_set_mac,
1696 .ndo_change_mtu = igb_change_mtu,
1697 .ndo_do_ioctl = igb_ioctl,
1698 .ndo_tx_timeout = igb_tx_timeout,
1699 .ndo_validate_addr = eth_validate_addr,
1700 .ndo_vlan_rx_register = igb_vlan_rx_register,
1701 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1702 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1703 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1704 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1705 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1706 .ndo_get_vf_config = igb_ndo_get_vf_config,
1707 #ifdef CONFIG_NET_POLL_CONTROLLER
1708 .ndo_poll_controller = igb_netpoll,
1713 * igb_probe - Device Initialization Routine
1714 * @pdev: PCI device information struct
1715 * @ent: entry in igb_pci_tbl
1717 * Returns 0 on success, negative on failure
1719 * igb_probe initializes an adapter identified by a pci_dev structure.
1720 * The OS initialization, configuring of the adapter private structure,
1721 * and a hardware reset occur.
1723 static int __devinit igb_probe(struct pci_dev *pdev,
1724 const struct pci_device_id *ent)
1726 struct net_device *netdev;
1727 struct igb_adapter *adapter;
1728 struct e1000_hw *hw;
1729 u16 eeprom_data = 0;
1730 static int global_quad_port_a; /* global quad port a indication */
1731 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1732 unsigned long mmio_start, mmio_len;
1733 int err, pci_using_dac;
1734 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1737 /* Catch broken hardware that put the wrong VF device ID in
1738 * the PCIe SR-IOV capability.
1740 if (pdev->is_virtfn) {
1741 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1742 pci_name(pdev), pdev->vendor, pdev->device);
1746 err = pci_enable_device_mem(pdev);
1751 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1753 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1757 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1759 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1761 dev_err(&pdev->dev, "No usable DMA "
1762 "configuration, aborting\n");
1768 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1774 pci_enable_pcie_error_reporting(pdev);
1776 pci_set_master(pdev);
1777 pci_save_state(pdev);
1780 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1781 IGB_ABS_MAX_TX_QUEUES);
1783 goto err_alloc_etherdev;
1785 SET_NETDEV_DEV(netdev, &pdev->dev);
1787 pci_set_drvdata(pdev, netdev);
1788 adapter = netdev_priv(netdev);
1789 adapter->netdev = netdev;
1790 adapter->pdev = pdev;
1793 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1795 mmio_start = pci_resource_start(pdev, 0);
1796 mmio_len = pci_resource_len(pdev, 0);
1799 hw->hw_addr = ioremap(mmio_start, mmio_len);
1803 netdev->netdev_ops = &igb_netdev_ops;
1804 igb_set_ethtool_ops(netdev);
1805 netdev->watchdog_timeo = 5 * HZ;
1807 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1809 netdev->mem_start = mmio_start;
1810 netdev->mem_end = mmio_start + mmio_len;
1812 /* PCI config space info */
1813 hw->vendor_id = pdev->vendor;
1814 hw->device_id = pdev->device;
1815 hw->revision_id = pdev->revision;
1816 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1817 hw->subsystem_device_id = pdev->subsystem_device;
1819 /* Copy the default MAC, PHY and NVM function pointers */
1820 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1821 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1822 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1823 /* Initialize skew-specific constants */
1824 err = ei->get_invariants(hw);
1828 /* setup the private structure */
1829 err = igb_sw_init(adapter);
1833 igb_get_bus_info_pcie(hw);
1835 hw->phy.autoneg_wait_to_complete = false;
1837 /* Copper options */
1838 if (hw->phy.media_type == e1000_media_type_copper) {
1839 hw->phy.mdix = AUTO_ALL_MODES;
1840 hw->phy.disable_polarity_correction = false;
1841 hw->phy.ms_type = e1000_ms_hw_default;
1844 if (igb_check_reset_block(hw))
1845 dev_info(&pdev->dev,
1846 "PHY reset is blocked due to SOL/IDER session.\n");
1848 netdev->features = NETIF_F_SG |
1850 NETIF_F_HW_VLAN_TX |
1851 NETIF_F_HW_VLAN_RX |
1852 NETIF_F_HW_VLAN_FILTER;
1854 netdev->features |= NETIF_F_IPV6_CSUM;
1855 netdev->features |= NETIF_F_TSO;
1856 netdev->features |= NETIF_F_TSO6;
1857 netdev->features |= NETIF_F_GRO;
1859 netdev->vlan_features |= NETIF_F_TSO;
1860 netdev->vlan_features |= NETIF_F_TSO6;
1861 netdev->vlan_features |= NETIF_F_IP_CSUM;
1862 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1863 netdev->vlan_features |= NETIF_F_SG;
1865 if (pci_using_dac) {
1866 netdev->features |= NETIF_F_HIGHDMA;
1867 netdev->vlan_features |= NETIF_F_HIGHDMA;
1870 if (hw->mac.type >= e1000_82576)
1871 netdev->features |= NETIF_F_SCTP_CSUM;
1873 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1875 /* before reading the NVM, reset the controller to put the device in a
1876 * known good starting state */
1877 hw->mac.ops.reset_hw(hw);
1879 /* make sure the NVM is good */
1880 if (igb_validate_nvm_checksum(hw) < 0) {
1881 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1886 /* copy the MAC address out of the NVM */
1887 if (hw->mac.ops.read_mac_addr(hw))
1888 dev_err(&pdev->dev, "NVM Read Error\n");
1890 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1891 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1893 if (!is_valid_ether_addr(netdev->perm_addr)) {
1894 dev_err(&pdev->dev, "Invalid MAC Address\n");
1899 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1900 (unsigned long) adapter);
1901 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1902 (unsigned long) adapter);
1904 INIT_WORK(&adapter->reset_task, igb_reset_task);
1905 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1907 /* Initialize link properties that are user-changeable */
1908 adapter->fc_autoneg = true;
1909 hw->mac.autoneg = true;
1910 hw->phy.autoneg_advertised = 0x2f;
1912 hw->fc.requested_mode = e1000_fc_default;
1913 hw->fc.current_mode = e1000_fc_default;
1915 igb_validate_mdi_setting(hw);
1917 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1918 * enable the ACPI Magic Packet filter
1921 if (hw->bus.func == 0)
1922 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1923 else if (hw->mac.type == e1000_82580)
1924 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1925 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1927 else if (hw->bus.func == 1)
1928 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1930 if (eeprom_data & eeprom_apme_mask)
1931 adapter->eeprom_wol |= E1000_WUFC_MAG;
1933 /* now that we have the eeprom settings, apply the special cases where
1934 * the eeprom may be wrong or the board simply won't support wake on
1935 * lan on a particular port */
1936 switch (pdev->device) {
1937 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1938 adapter->eeprom_wol = 0;
1940 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1941 case E1000_DEV_ID_82576_FIBER:
1942 case E1000_DEV_ID_82576_SERDES:
1943 /* Wake events only supported on port A for dual fiber
1944 * regardless of eeprom setting */
1945 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1946 adapter->eeprom_wol = 0;
1948 case E1000_DEV_ID_82576_QUAD_COPPER:
1949 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1950 /* if quad port adapter, disable WoL on all but port A */
1951 if (global_quad_port_a != 0)
1952 adapter->eeprom_wol = 0;
1954 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1955 /* Reset for multiple quad port adapters */
1956 if (++global_quad_port_a == 4)
1957 global_quad_port_a = 0;
1961 /* initialize the wol settings based on the eeprom settings */
1962 adapter->wol = adapter->eeprom_wol;
1963 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1965 /* reset the hardware with the new settings */
1968 /* let the f/w know that the h/w is now under the control of the
1970 igb_get_hw_control(adapter);
1972 strcpy(netdev->name, "eth%d");
1973 err = register_netdev(netdev);
1977 /* carrier off reporting is important to ethtool even BEFORE open */
1978 netif_carrier_off(netdev);
1980 #ifdef CONFIG_IGB_DCA
1981 if (dca_add_requester(&pdev->dev) == 0) {
1982 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1983 dev_info(&pdev->dev, "DCA enabled\n");
1984 igb_setup_dca(adapter);
1988 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1989 /* print bus type/speed/width info */
1990 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1992 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1993 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1995 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1996 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1997 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2001 igb_read_part_num(hw, &part_num);
2002 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
2003 (part_num >> 8), (part_num & 0xff));
2005 dev_info(&pdev->dev,
2006 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2007 adapter->msix_entries ? "MSI-X" :
2008 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2009 adapter->num_rx_queues, adapter->num_tx_queues);
2014 igb_release_hw_control(adapter);
2016 if (!igb_check_reset_block(hw))
2019 if (hw->flash_address)
2020 iounmap(hw->flash_address);
2022 igb_clear_interrupt_scheme(adapter);
2023 iounmap(hw->hw_addr);
2025 free_netdev(netdev);
2027 pci_release_selected_regions(pdev,
2028 pci_select_bars(pdev, IORESOURCE_MEM));
2031 pci_disable_device(pdev);
2036 * igb_remove - Device Removal Routine
2037 * @pdev: PCI device information struct
2039 * igb_remove is called by the PCI subsystem to alert the driver
2040 * that it should release a PCI device. The could be caused by a
2041 * Hot-Plug event, or because the driver is going to be removed from
2044 static void __devexit igb_remove(struct pci_dev *pdev)
2046 struct net_device *netdev = pci_get_drvdata(pdev);
2047 struct igb_adapter *adapter = netdev_priv(netdev);
2048 struct e1000_hw *hw = &adapter->hw;
2050 /* flush_scheduled work may reschedule our watchdog task, so
2051 * explicitly disable watchdog tasks from being rescheduled */
2052 set_bit(__IGB_DOWN, &adapter->state);
2053 del_timer_sync(&adapter->watchdog_timer);
2054 del_timer_sync(&adapter->phy_info_timer);
2056 flush_scheduled_work();
2058 #ifdef CONFIG_IGB_DCA
2059 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2060 dev_info(&pdev->dev, "DCA disabled\n");
2061 dca_remove_requester(&pdev->dev);
2062 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2063 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2067 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2068 * would have already happened in close and is redundant. */
2069 igb_release_hw_control(adapter);
2071 unregister_netdev(netdev);
2073 igb_clear_interrupt_scheme(adapter);
2075 #ifdef CONFIG_PCI_IOV
2076 /* reclaim resources allocated to VFs */
2077 if (adapter->vf_data) {
2078 /* disable iov and allow time for transactions to clear */
2079 pci_disable_sriov(pdev);
2082 kfree(adapter->vf_data);
2083 adapter->vf_data = NULL;
2084 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2086 dev_info(&pdev->dev, "IOV Disabled\n");
2090 iounmap(hw->hw_addr);
2091 if (hw->flash_address)
2092 iounmap(hw->flash_address);
2093 pci_release_selected_regions(pdev,
2094 pci_select_bars(pdev, IORESOURCE_MEM));
2096 free_netdev(netdev);
2098 pci_disable_pcie_error_reporting(pdev);
2100 pci_disable_device(pdev);
2104 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2105 * @adapter: board private structure to initialize
2107 * This function initializes the vf specific data storage and then attempts to
2108 * allocate the VFs. The reason for ordering it this way is because it is much
2109 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2110 * the memory for the VFs.
2112 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2114 #ifdef CONFIG_PCI_IOV
2115 struct pci_dev *pdev = adapter->pdev;
2117 if (adapter->vfs_allocated_count) {
2118 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2119 sizeof(struct vf_data_storage),
2121 /* if allocation failed then we do not support SR-IOV */
2122 if (!adapter->vf_data) {
2123 adapter->vfs_allocated_count = 0;
2124 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2129 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2130 kfree(adapter->vf_data);
2131 adapter->vf_data = NULL;
2132 #endif /* CONFIG_PCI_IOV */
2133 adapter->vfs_allocated_count = 0;
2134 #ifdef CONFIG_PCI_IOV
2136 unsigned char mac_addr[ETH_ALEN];
2138 dev_info(&pdev->dev, "%d vfs allocated\n",
2139 adapter->vfs_allocated_count);
2140 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2141 random_ether_addr(mac_addr);
2142 igb_set_vf_mac(adapter, i, mac_addr);
2145 #endif /* CONFIG_PCI_IOV */
2150 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2151 * @adapter: board private structure to initialize
2153 * igb_init_hw_timer initializes the function pointer and values for the hw
2154 * timer found in hardware.
2156 static void igb_init_hw_timer(struct igb_adapter *adapter)
2158 struct e1000_hw *hw = &adapter->hw;
2160 switch (hw->mac.type) {
2163 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2164 adapter->cycles.read = igb_read_clock;
2165 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2166 adapter->cycles.mult = 1;
2168 * The 82580 timesync updates the system timer every 8ns by 8ns
2169 * and the value cannot be shifted. Instead we need to shift
2170 * the registers to generate a 64bit timer value. As a result
2171 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2172 * 24 in order to generate a larger value for synchronization.
2174 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2175 /* disable system timer temporarily by setting bit 31 */
2176 wr32(E1000_TSAUXC, 0x80000000);
2179 /* Set registers so that rollover occurs soon to test this. */
2180 wr32(E1000_SYSTIMR, 0x00000000);
2181 wr32(E1000_SYSTIML, 0x80000000);
2182 wr32(E1000_SYSTIMH, 0x000000FF);
2185 /* enable system timer by clearing bit 31 */
2186 wr32(E1000_TSAUXC, 0x0);
2189 timecounter_init(&adapter->clock,
2191 ktime_to_ns(ktime_get_real()));
2193 * Synchronize our NIC clock against system wall clock. NIC
2194 * time stamp reading requires ~3us per sample, each sample
2195 * was pretty stable even under load => only require 10
2196 * samples for each offset comparison.
2198 memset(&adapter->compare, 0, sizeof(adapter->compare));
2199 adapter->compare.source = &adapter->clock;
2200 adapter->compare.target = ktime_get_real;
2201 adapter->compare.num_samples = 10;
2202 timecompare_update(&adapter->compare, 0);
2206 * Initialize hardware timer: we keep it running just in case
2207 * that some program needs it later on.
2209 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2210 adapter->cycles.read = igb_read_clock;
2211 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2212 adapter->cycles.mult = 1;
2214 * Scale the NIC clock cycle by a large factor so that
2215 * relatively small clock corrections can be added or
2216 * substracted at each clock tick. The drawbacks of a large
2217 * factor are a) that the clock register overflows more quickly
2218 * (not such a big deal) and b) that the increment per tick has
2219 * to fit into 24 bits. As a result we need to use a shift of
2220 * 19 so we can fit a value of 16 into the TIMINCA register.
2222 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2224 (1 << E1000_TIMINCA_16NS_SHIFT) |
2225 (16 << IGB_82576_TSYNC_SHIFT));
2227 /* Set registers so that rollover occurs soon to test this. */
2228 wr32(E1000_SYSTIML, 0x00000000);
2229 wr32(E1000_SYSTIMH, 0xFF800000);
2232 timecounter_init(&adapter->clock,
2234 ktime_to_ns(ktime_get_real()));
2236 * Synchronize our NIC clock against system wall clock. NIC
2237 * time stamp reading requires ~3us per sample, each sample
2238 * was pretty stable even under load => only require 10
2239 * samples for each offset comparison.
2241 memset(&adapter->compare, 0, sizeof(adapter->compare));
2242 adapter->compare.source = &adapter->clock;
2243 adapter->compare.target = ktime_get_real;
2244 adapter->compare.num_samples = 10;
2245 timecompare_update(&adapter->compare, 0);
2248 /* 82575 does not support timesync */
2256 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2257 * @adapter: board private structure to initialize
2259 * igb_sw_init initializes the Adapter private data structure.
2260 * Fields are initialized based on PCI device information and
2261 * OS network device settings (MTU size).
2263 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2265 struct e1000_hw *hw = &adapter->hw;
2266 struct net_device *netdev = adapter->netdev;
2267 struct pci_dev *pdev = adapter->pdev;
2269 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2271 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2272 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2273 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2274 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2276 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2277 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2279 #ifdef CONFIG_PCI_IOV
2280 if (hw->mac.type == e1000_82576)
2281 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2283 #endif /* CONFIG_PCI_IOV */
2284 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2287 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2288 * then we should combine the queues into a queue pair in order to
2289 * conserve interrupts due to limited supply
2291 if ((adapter->rss_queues > 4) ||
2292 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2293 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2295 /* This call may decrease the number of queues */
2296 if (igb_init_interrupt_scheme(adapter)) {
2297 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2301 igb_init_hw_timer(adapter);
2302 igb_probe_vfs(adapter);
2304 /* Explicitly disable IRQ since the NIC can be in any state. */
2305 igb_irq_disable(adapter);
2307 set_bit(__IGB_DOWN, &adapter->state);
2312 * igb_open - Called when a network interface is made active
2313 * @netdev: network interface device structure
2315 * Returns 0 on success, negative value on failure
2317 * The open entry point is called when a network interface is made
2318 * active by the system (IFF_UP). At this point all resources needed
2319 * for transmit and receive operations are allocated, the interrupt
2320 * handler is registered with the OS, the watchdog timer is started,
2321 * and the stack is notified that the interface is ready.
2323 static int igb_open(struct net_device *netdev)
2325 struct igb_adapter *adapter = netdev_priv(netdev);
2326 struct e1000_hw *hw = &adapter->hw;
2330 /* disallow open during test */
2331 if (test_bit(__IGB_TESTING, &adapter->state))
2334 netif_carrier_off(netdev);
2336 /* allocate transmit descriptors */
2337 err = igb_setup_all_tx_resources(adapter);
2341 /* allocate receive descriptors */
2342 err = igb_setup_all_rx_resources(adapter);
2346 igb_power_up_link(adapter);
2348 /* before we allocate an interrupt, we must be ready to handle it.
2349 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2350 * as soon as we call pci_request_irq, so we have to setup our
2351 * clean_rx handler before we do so. */
2352 igb_configure(adapter);
2354 err = igb_request_irq(adapter);
2358 /* From here on the code is the same as igb_up() */
2359 clear_bit(__IGB_DOWN, &adapter->state);
2361 for (i = 0; i < adapter->num_q_vectors; i++) {
2362 struct igb_q_vector *q_vector = adapter->q_vector[i];
2363 napi_enable(&q_vector->napi);
2366 /* Clear any pending interrupts. */
2369 igb_irq_enable(adapter);
2371 /* notify VFs that reset has been completed */
2372 if (adapter->vfs_allocated_count) {
2373 u32 reg_data = rd32(E1000_CTRL_EXT);
2374 reg_data |= E1000_CTRL_EXT_PFRSTD;
2375 wr32(E1000_CTRL_EXT, reg_data);
2378 netif_tx_start_all_queues(netdev);
2380 /* start the watchdog. */
2381 hw->mac.get_link_status = 1;
2382 schedule_work(&adapter->watchdog_task);
2387 igb_release_hw_control(adapter);
2388 igb_power_down_link(adapter);
2389 igb_free_all_rx_resources(adapter);
2391 igb_free_all_tx_resources(adapter);
2399 * igb_close - Disables a network interface
2400 * @netdev: network interface device structure
2402 * Returns 0, this is not allowed to fail
2404 * The close entry point is called when an interface is de-activated
2405 * by the OS. The hardware is still under the driver's control, but
2406 * needs to be disabled. A global MAC reset is issued to stop the
2407 * hardware, and all transmit and receive resources are freed.
2409 static int igb_close(struct net_device *netdev)
2411 struct igb_adapter *adapter = netdev_priv(netdev);
2413 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2416 igb_free_irq(adapter);
2418 igb_free_all_tx_resources(adapter);
2419 igb_free_all_rx_resources(adapter);
2425 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2426 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2428 * Return 0 on success, negative on failure
2430 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2432 struct device *dev = tx_ring->dev;
2435 size = sizeof(struct igb_buffer) * tx_ring->count;
2436 tx_ring->buffer_info = vmalloc(size);
2437 if (!tx_ring->buffer_info)
2439 memset(tx_ring->buffer_info, 0, size);
2441 /* round up to nearest 4K */
2442 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2443 tx_ring->size = ALIGN(tx_ring->size, 4096);
2445 tx_ring->desc = dma_alloc_coherent(dev,
2453 tx_ring->next_to_use = 0;
2454 tx_ring->next_to_clean = 0;
2458 vfree(tx_ring->buffer_info);
2460 "Unable to allocate memory for the transmit descriptor ring\n");
2465 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2466 * (Descriptors) for all queues
2467 * @adapter: board private structure
2469 * Return 0 on success, negative on failure
2471 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2473 struct pci_dev *pdev = adapter->pdev;
2476 for (i = 0; i < adapter->num_tx_queues; i++) {
2477 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2480 "Allocation for Tx Queue %u failed\n", i);
2481 for (i--; i >= 0; i--)
2482 igb_free_tx_resources(adapter->tx_ring[i]);
2487 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2488 int r_idx = i % adapter->num_tx_queues;
2489 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2495 * igb_setup_tctl - configure the transmit control registers
2496 * @adapter: Board private structure
2498 void igb_setup_tctl(struct igb_adapter *adapter)
2500 struct e1000_hw *hw = &adapter->hw;
2503 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2504 wr32(E1000_TXDCTL(0), 0);
2506 /* Program the Transmit Control Register */
2507 tctl = rd32(E1000_TCTL);
2508 tctl &= ~E1000_TCTL_CT;
2509 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2510 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2512 igb_config_collision_dist(hw);
2514 /* Enable transmits */
2515 tctl |= E1000_TCTL_EN;
2517 wr32(E1000_TCTL, tctl);
2521 * igb_configure_tx_ring - Configure transmit ring after Reset
2522 * @adapter: board private structure
2523 * @ring: tx ring to configure
2525 * Configure a transmit ring after a reset.
2527 void igb_configure_tx_ring(struct igb_adapter *adapter,
2528 struct igb_ring *ring)
2530 struct e1000_hw *hw = &adapter->hw;
2532 u64 tdba = ring->dma;
2533 int reg_idx = ring->reg_idx;
2535 /* disable the queue */
2536 txdctl = rd32(E1000_TXDCTL(reg_idx));
2537 wr32(E1000_TXDCTL(reg_idx),
2538 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2542 wr32(E1000_TDLEN(reg_idx),
2543 ring->count * sizeof(union e1000_adv_tx_desc));
2544 wr32(E1000_TDBAL(reg_idx),
2545 tdba & 0x00000000ffffffffULL);
2546 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2548 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2549 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2550 writel(0, ring->head);
2551 writel(0, ring->tail);
2553 txdctl |= IGB_TX_PTHRESH;
2554 txdctl |= IGB_TX_HTHRESH << 8;
2555 txdctl |= IGB_TX_WTHRESH << 16;
2557 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2558 wr32(E1000_TXDCTL(reg_idx), txdctl);
2562 * igb_configure_tx - Configure transmit Unit after Reset
2563 * @adapter: board private structure
2565 * Configure the Tx unit of the MAC after a reset.
2567 static void igb_configure_tx(struct igb_adapter *adapter)
2571 for (i = 0; i < adapter->num_tx_queues; i++)
2572 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2576 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2577 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2579 * Returns 0 on success, negative on failure
2581 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2583 struct device *dev = rx_ring->dev;
2586 size = sizeof(struct igb_buffer) * rx_ring->count;
2587 rx_ring->buffer_info = vmalloc(size);
2588 if (!rx_ring->buffer_info)
2590 memset(rx_ring->buffer_info, 0, size);
2592 desc_len = sizeof(union e1000_adv_rx_desc);
2594 /* Round up to nearest 4K */
2595 rx_ring->size = rx_ring->count * desc_len;
2596 rx_ring->size = ALIGN(rx_ring->size, 4096);
2598 rx_ring->desc = dma_alloc_coherent(dev,
2606 rx_ring->next_to_clean = 0;
2607 rx_ring->next_to_use = 0;
2612 vfree(rx_ring->buffer_info);
2613 rx_ring->buffer_info = NULL;
2614 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2620 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2621 * (Descriptors) for all queues
2622 * @adapter: board private structure
2624 * Return 0 on success, negative on failure
2626 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2628 struct pci_dev *pdev = adapter->pdev;
2631 for (i = 0; i < adapter->num_rx_queues; i++) {
2632 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2635 "Allocation for Rx Queue %u failed\n", i);
2636 for (i--; i >= 0; i--)
2637 igb_free_rx_resources(adapter->rx_ring[i]);
2646 * igb_setup_mrqc - configure the multiple receive queue control registers
2647 * @adapter: Board private structure
2649 static void igb_setup_mrqc(struct igb_adapter *adapter)
2651 struct e1000_hw *hw = &adapter->hw;
2653 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2658 static const u8 rsshash[40] = {
2659 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2660 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2661 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2662 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2664 /* Fill out hash function seeds */
2665 for (j = 0; j < 10; j++) {
2666 u32 rsskey = rsshash[(j * 4)];
2667 rsskey |= rsshash[(j * 4) + 1] << 8;
2668 rsskey |= rsshash[(j * 4) + 2] << 16;
2669 rsskey |= rsshash[(j * 4) + 3] << 24;
2670 array_wr32(E1000_RSSRK(0), j, rsskey);
2673 num_rx_queues = adapter->rss_queues;
2675 if (adapter->vfs_allocated_count) {
2676 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2677 switch (hw->mac.type) {
2694 if (hw->mac.type == e1000_82575)
2698 for (j = 0; j < (32 * 4); j++) {
2699 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2701 reta.bytes[j & 3] |= num_rx_queues << shift2;
2703 wr32(E1000_RETA(j >> 2), reta.dword);
2707 * Disable raw packet checksumming so that RSS hash is placed in
2708 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2709 * offloads as they are enabled by default
2711 rxcsum = rd32(E1000_RXCSUM);
2712 rxcsum |= E1000_RXCSUM_PCSD;
2714 if (adapter->hw.mac.type >= e1000_82576)
2715 /* Enable Receive Checksum Offload for SCTP */
2716 rxcsum |= E1000_RXCSUM_CRCOFL;
2718 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2719 wr32(E1000_RXCSUM, rxcsum);
2721 /* If VMDq is enabled then we set the appropriate mode for that, else
2722 * we default to RSS so that an RSS hash is calculated per packet even
2723 * if we are only using one queue */
2724 if (adapter->vfs_allocated_count) {
2725 if (hw->mac.type > e1000_82575) {
2726 /* Set the default pool for the PF's first queue */
2727 u32 vtctl = rd32(E1000_VT_CTL);
2728 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2729 E1000_VT_CTL_DISABLE_DEF_POOL);
2730 vtctl |= adapter->vfs_allocated_count <<
2731 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2732 wr32(E1000_VT_CTL, vtctl);
2734 if (adapter->rss_queues > 1)
2735 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2737 mrqc = E1000_MRQC_ENABLE_VMDQ;
2739 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2741 igb_vmm_control(adapter);
2744 * Generate RSS hash based on TCP port numbers and/or
2745 * IPv4/v6 src and dst addresses since UDP cannot be
2746 * hashed reliably due to IP fragmentation
2748 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2749 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2750 E1000_MRQC_RSS_FIELD_IPV6 |
2751 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2752 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2754 wr32(E1000_MRQC, mrqc);
2758 * igb_setup_rctl - configure the receive control registers
2759 * @adapter: Board private structure
2761 void igb_setup_rctl(struct igb_adapter *adapter)
2763 struct e1000_hw *hw = &adapter->hw;
2766 rctl = rd32(E1000_RCTL);
2768 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2769 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2771 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2772 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2775 * enable stripping of CRC. It's unlikely this will break BMC
2776 * redirection as it did with e1000. Newer features require
2777 * that the HW strips the CRC.
2779 rctl |= E1000_RCTL_SECRC;
2781 /* disable store bad packets and clear size bits. */
2782 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2784 /* enable LPE to prevent packets larger than max_frame_size */
2785 rctl |= E1000_RCTL_LPE;
2787 /* disable queue 0 to prevent tail write w/o re-config */
2788 wr32(E1000_RXDCTL(0), 0);
2790 /* Attention!!! For SR-IOV PF driver operations you must enable
2791 * queue drop for all VF and PF queues to prevent head of line blocking
2792 * if an un-trusted VF does not provide descriptors to hardware.
2794 if (adapter->vfs_allocated_count) {
2795 /* set all queue drop enable bits */
2796 wr32(E1000_QDE, ALL_QUEUES);
2799 wr32(E1000_RCTL, rctl);
2802 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2805 struct e1000_hw *hw = &adapter->hw;
2808 /* if it isn't the PF check to see if VFs are enabled and
2809 * increase the size to support vlan tags */
2810 if (vfn < adapter->vfs_allocated_count &&
2811 adapter->vf_data[vfn].vlans_enabled)
2812 size += VLAN_TAG_SIZE;
2814 vmolr = rd32(E1000_VMOLR(vfn));
2815 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2816 vmolr |= size | E1000_VMOLR_LPE;
2817 wr32(E1000_VMOLR(vfn), vmolr);
2823 * igb_rlpml_set - set maximum receive packet size
2824 * @adapter: board private structure
2826 * Configure maximum receivable packet size.
2828 static void igb_rlpml_set(struct igb_adapter *adapter)
2830 u32 max_frame_size = adapter->max_frame_size;
2831 struct e1000_hw *hw = &adapter->hw;
2832 u16 pf_id = adapter->vfs_allocated_count;
2835 max_frame_size += VLAN_TAG_SIZE;
2837 /* if vfs are enabled we set RLPML to the largest possible request
2838 * size and set the VMOLR RLPML to the size we need */
2840 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2841 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2844 wr32(E1000_RLPML, max_frame_size);
2847 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2850 struct e1000_hw *hw = &adapter->hw;
2854 * This register exists only on 82576 and newer so if we are older then
2855 * we should exit and do nothing
2857 if (hw->mac.type < e1000_82576)
2860 vmolr = rd32(E1000_VMOLR(vfn));
2861 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2863 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2865 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2867 /* clear all bits that might not be set */
2868 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2870 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2871 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2873 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2876 if (vfn <= adapter->vfs_allocated_count)
2877 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2879 wr32(E1000_VMOLR(vfn), vmolr);
2883 * igb_configure_rx_ring - Configure a receive ring after Reset
2884 * @adapter: board private structure
2885 * @ring: receive ring to be configured
2887 * Configure the Rx unit of the MAC after a reset.
2889 void igb_configure_rx_ring(struct igb_adapter *adapter,
2890 struct igb_ring *ring)
2892 struct e1000_hw *hw = &adapter->hw;
2893 u64 rdba = ring->dma;
2894 int reg_idx = ring->reg_idx;
2897 /* disable the queue */
2898 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2899 wr32(E1000_RXDCTL(reg_idx),
2900 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2902 /* Set DMA base address registers */
2903 wr32(E1000_RDBAL(reg_idx),
2904 rdba & 0x00000000ffffffffULL);
2905 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2906 wr32(E1000_RDLEN(reg_idx),
2907 ring->count * sizeof(union e1000_adv_rx_desc));
2909 /* initialize head and tail */
2910 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2911 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2912 writel(0, ring->head);
2913 writel(0, ring->tail);
2915 /* set descriptor configuration */
2916 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2917 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2918 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2919 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2920 srrctl |= IGB_RXBUFFER_16384 >>
2921 E1000_SRRCTL_BSIZEPKT_SHIFT;
2923 srrctl |= (PAGE_SIZE / 2) >>
2924 E1000_SRRCTL_BSIZEPKT_SHIFT;
2926 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2928 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2929 E1000_SRRCTL_BSIZEPKT_SHIFT;
2930 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2932 if (hw->mac.type == e1000_82580)
2933 srrctl |= E1000_SRRCTL_TIMESTAMP;
2934 /* Only set Drop Enable if we are supporting multiple queues */
2935 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2936 srrctl |= E1000_SRRCTL_DROP_EN;
2938 wr32(E1000_SRRCTL(reg_idx), srrctl);
2940 /* set filtering for VMDQ pools */
2941 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2943 /* enable receive descriptor fetching */
2944 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2945 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2946 rxdctl &= 0xFFF00000;
2947 rxdctl |= IGB_RX_PTHRESH;
2948 rxdctl |= IGB_RX_HTHRESH << 8;
2949 rxdctl |= IGB_RX_WTHRESH << 16;
2950 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2954 * igb_configure_rx - Configure receive Unit after Reset
2955 * @adapter: board private structure
2957 * Configure the Rx unit of the MAC after a reset.
2959 static void igb_configure_rx(struct igb_adapter *adapter)
2963 /* set UTA to appropriate mode */
2964 igb_set_uta(adapter);
2966 /* set the correct pool for the PF default MAC address in entry 0 */
2967 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2968 adapter->vfs_allocated_count);
2970 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2971 * the Base and Length of the Rx Descriptor Ring */
2972 for (i = 0; i < adapter->num_rx_queues; i++)
2973 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2977 * igb_free_tx_resources - Free Tx Resources per Queue
2978 * @tx_ring: Tx descriptor ring for a specific queue
2980 * Free all transmit software resources
2982 void igb_free_tx_resources(struct igb_ring *tx_ring)
2984 igb_clean_tx_ring(tx_ring);
2986 vfree(tx_ring->buffer_info);
2987 tx_ring->buffer_info = NULL;
2989 /* if not set, then don't free */
2993 dma_free_coherent(tx_ring->dev, tx_ring->size,
2994 tx_ring->desc, tx_ring->dma);
2996 tx_ring->desc = NULL;
3000 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3001 * @adapter: board private structure
3003 * Free all transmit software resources
3005 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3009 for (i = 0; i < adapter->num_tx_queues; i++)
3010 igb_free_tx_resources(adapter->tx_ring[i]);
3013 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3014 struct igb_buffer *buffer_info)
3016 if (buffer_info->dma) {
3017 if (buffer_info->mapped_as_page)
3018 dma_unmap_page(tx_ring->dev,
3020 buffer_info->length,
3023 dma_unmap_single(tx_ring->dev,
3025 buffer_info->length,
3027 buffer_info->dma = 0;
3029 if (buffer_info->skb) {
3030 dev_kfree_skb_any(buffer_info->skb);
3031 buffer_info->skb = NULL;
3033 buffer_info->time_stamp = 0;
3034 buffer_info->length = 0;
3035 buffer_info->next_to_watch = 0;
3036 buffer_info->mapped_as_page = false;
3040 * igb_clean_tx_ring - Free Tx Buffers
3041 * @tx_ring: ring to be cleaned
3043 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3045 struct igb_buffer *buffer_info;
3049 if (!tx_ring->buffer_info)
3051 /* Free all the Tx ring sk_buffs */
3053 for (i = 0; i < tx_ring->count; i++) {
3054 buffer_info = &tx_ring->buffer_info[i];
3055 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3058 size = sizeof(struct igb_buffer) * tx_ring->count;
3059 memset(tx_ring->buffer_info, 0, size);
3061 /* Zero out the descriptor ring */
3062 memset(tx_ring->desc, 0, tx_ring->size);
3064 tx_ring->next_to_use = 0;
3065 tx_ring->next_to_clean = 0;
3069 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3070 * @adapter: board private structure
3072 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3076 for (i = 0; i < adapter->num_tx_queues; i++)
3077 igb_clean_tx_ring(adapter->tx_ring[i]);
3081 * igb_free_rx_resources - Free Rx Resources
3082 * @rx_ring: ring to clean the resources from
3084 * Free all receive software resources
3086 void igb_free_rx_resources(struct igb_ring *rx_ring)
3088 igb_clean_rx_ring(rx_ring);
3090 vfree(rx_ring->buffer_info);
3091 rx_ring->buffer_info = NULL;
3093 /* if not set, then don't free */
3097 dma_free_coherent(rx_ring->dev, rx_ring->size,
3098 rx_ring->desc, rx_ring->dma);
3100 rx_ring->desc = NULL;
3104 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3105 * @adapter: board private structure
3107 * Free all receive software resources
3109 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3113 for (i = 0; i < adapter->num_rx_queues; i++)
3114 igb_free_rx_resources(adapter->rx_ring[i]);
3118 * igb_clean_rx_ring - Free Rx Buffers per Queue
3119 * @rx_ring: ring to free buffers from
3121 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3123 struct igb_buffer *buffer_info;
3127 if (!rx_ring->buffer_info)
3130 /* Free all the Rx ring sk_buffs */
3131 for (i = 0; i < rx_ring->count; i++) {
3132 buffer_info = &rx_ring->buffer_info[i];
3133 if (buffer_info->dma) {
3134 dma_unmap_single(rx_ring->dev,
3136 rx_ring->rx_buffer_len,
3138 buffer_info->dma = 0;
3141 if (buffer_info->skb) {
3142 dev_kfree_skb(buffer_info->skb);
3143 buffer_info->skb = NULL;
3145 if (buffer_info->page_dma) {
3146 dma_unmap_page(rx_ring->dev,
3147 buffer_info->page_dma,
3150 buffer_info->page_dma = 0;
3152 if (buffer_info->page) {
3153 put_page(buffer_info->page);
3154 buffer_info->page = NULL;
3155 buffer_info->page_offset = 0;
3159 size = sizeof(struct igb_buffer) * rx_ring->count;
3160 memset(rx_ring->buffer_info, 0, size);
3162 /* Zero out the descriptor ring */
3163 memset(rx_ring->desc, 0, rx_ring->size);
3165 rx_ring->next_to_clean = 0;
3166 rx_ring->next_to_use = 0;
3170 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3171 * @adapter: board private structure
3173 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3177 for (i = 0; i < adapter->num_rx_queues; i++)
3178 igb_clean_rx_ring(adapter->rx_ring[i]);
3182 * igb_set_mac - Change the Ethernet Address of the NIC
3183 * @netdev: network interface device structure
3184 * @p: pointer to an address structure
3186 * Returns 0 on success, negative on failure
3188 static int igb_set_mac(struct net_device *netdev, void *p)
3190 struct igb_adapter *adapter = netdev_priv(netdev);
3191 struct e1000_hw *hw = &adapter->hw;
3192 struct sockaddr *addr = p;
3194 if (!is_valid_ether_addr(addr->sa_data))
3195 return -EADDRNOTAVAIL;
3197 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3198 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3200 /* set the correct pool for the new PF MAC address in entry 0 */
3201 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3202 adapter->vfs_allocated_count);
3208 * igb_write_mc_addr_list - write multicast addresses to MTA
3209 * @netdev: network interface device structure
3211 * Writes multicast address list to the MTA hash table.
3212 * Returns: -ENOMEM on failure
3213 * 0 on no addresses written
3214 * X on writing X addresses to MTA
3216 static int igb_write_mc_addr_list(struct net_device *netdev)
3218 struct igb_adapter *adapter = netdev_priv(netdev);
3219 struct e1000_hw *hw = &adapter->hw;
3220 struct netdev_hw_addr *ha;
3224 if (netdev_mc_empty(netdev)) {
3225 /* nothing to program, so clear mc list */
3226 igb_update_mc_addr_list(hw, NULL, 0);
3227 igb_restore_vf_multicasts(adapter);
3231 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3235 /* The shared function expects a packed array of only addresses. */
3237 netdev_for_each_mc_addr(ha, netdev)
3238 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3240 igb_update_mc_addr_list(hw, mta_list, i);
3243 return netdev_mc_count(netdev);
3247 * igb_write_uc_addr_list - write unicast addresses to RAR table
3248 * @netdev: network interface device structure
3250 * Writes unicast address list to the RAR table.
3251 * Returns: -ENOMEM on failure/insufficient address space
3252 * 0 on no addresses written
3253 * X on writing X addresses to the RAR table
3255 static int igb_write_uc_addr_list(struct net_device *netdev)
3257 struct igb_adapter *adapter = netdev_priv(netdev);
3258 struct e1000_hw *hw = &adapter->hw;
3259 unsigned int vfn = adapter->vfs_allocated_count;
3260 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3263 /* return ENOMEM indicating insufficient memory for addresses */
3264 if (netdev_uc_count(netdev) > rar_entries)
3267 if (!netdev_uc_empty(netdev) && rar_entries) {
3268 struct netdev_hw_addr *ha;
3270 netdev_for_each_uc_addr(ha, netdev) {
3273 igb_rar_set_qsel(adapter, ha->addr,
3279 /* write the addresses in reverse order to avoid write combining */
3280 for (; rar_entries > 0 ; rar_entries--) {
3281 wr32(E1000_RAH(rar_entries), 0);
3282 wr32(E1000_RAL(rar_entries), 0);
3290 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3291 * @netdev: network interface device structure
3293 * The set_rx_mode entry point is called whenever the unicast or multicast
3294 * address lists or the network interface flags are updated. This routine is
3295 * responsible for configuring the hardware for proper unicast, multicast,
3296 * promiscuous mode, and all-multi behavior.
3298 static void igb_set_rx_mode(struct net_device *netdev)
3300 struct igb_adapter *adapter = netdev_priv(netdev);
3301 struct e1000_hw *hw = &adapter->hw;
3302 unsigned int vfn = adapter->vfs_allocated_count;
3303 u32 rctl, vmolr = 0;
3306 /* Check for Promiscuous and All Multicast modes */
3307 rctl = rd32(E1000_RCTL);
3309 /* clear the effected bits */
3310 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3312 if (netdev->flags & IFF_PROMISC) {
3313 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3314 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3316 if (netdev->flags & IFF_ALLMULTI) {
3317 rctl |= E1000_RCTL_MPE;
3318 vmolr |= E1000_VMOLR_MPME;
3321 * Write addresses to the MTA, if the attempt fails
3322 * then we should just turn on promiscous mode so
3323 * that we can at least receive multicast traffic
3325 count = igb_write_mc_addr_list(netdev);
3327 rctl |= E1000_RCTL_MPE;
3328 vmolr |= E1000_VMOLR_MPME;
3330 vmolr |= E1000_VMOLR_ROMPE;
3334 * Write addresses to available RAR registers, if there is not
3335 * sufficient space to store all the addresses then enable
3336 * unicast promiscous mode
3338 count = igb_write_uc_addr_list(netdev);
3340 rctl |= E1000_RCTL_UPE;
3341 vmolr |= E1000_VMOLR_ROPE;
3343 rctl |= E1000_RCTL_VFE;
3345 wr32(E1000_RCTL, rctl);
3348 * In order to support SR-IOV and eventually VMDq it is necessary to set
3349 * the VMOLR to enable the appropriate modes. Without this workaround
3350 * we will have issues with VLAN tag stripping not being done for frames
3351 * that are only arriving because we are the default pool
3353 if (hw->mac.type < e1000_82576)
3356 vmolr |= rd32(E1000_VMOLR(vfn)) &
3357 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3358 wr32(E1000_VMOLR(vfn), vmolr);
3359 igb_restore_vf_multicasts(adapter);
3362 /* Need to wait a few seconds after link up to get diagnostic information from
3364 static void igb_update_phy_info(unsigned long data)
3366 struct igb_adapter *adapter = (struct igb_adapter *) data;
3367 igb_get_phy_info(&adapter->hw);
3371 * igb_has_link - check shared code for link and determine up/down
3372 * @adapter: pointer to driver private info
3374 bool igb_has_link(struct igb_adapter *adapter)
3376 struct e1000_hw *hw = &adapter->hw;
3377 bool link_active = false;
3380 /* get_link_status is set on LSC (link status) interrupt or
3381 * rx sequence error interrupt. get_link_status will stay
3382 * false until the e1000_check_for_link establishes link
3383 * for copper adapters ONLY
3385 switch (hw->phy.media_type) {
3386 case e1000_media_type_copper:
3387 if (hw->mac.get_link_status) {
3388 ret_val = hw->mac.ops.check_for_link(hw);
3389 link_active = !hw->mac.get_link_status;
3394 case e1000_media_type_internal_serdes:
3395 ret_val = hw->mac.ops.check_for_link(hw);
3396 link_active = hw->mac.serdes_has_link;
3399 case e1000_media_type_unknown:
3407 * igb_watchdog - Timer Call-back
3408 * @data: pointer to adapter cast into an unsigned long
3410 static void igb_watchdog(unsigned long data)
3412 struct igb_adapter *adapter = (struct igb_adapter *)data;
3413 /* Do the rest outside of interrupt context */
3414 schedule_work(&adapter->watchdog_task);
3417 static void igb_watchdog_task(struct work_struct *work)
3419 struct igb_adapter *adapter = container_of(work,
3422 struct e1000_hw *hw = &adapter->hw;
3423 struct net_device *netdev = adapter->netdev;
3427 link = igb_has_link(adapter);
3429 if (!netif_carrier_ok(netdev)) {
3431 hw->mac.ops.get_speed_and_duplex(hw,
3432 &adapter->link_speed,
3433 &adapter->link_duplex);
3435 ctrl = rd32(E1000_CTRL);
3436 /* Links status message must follow this format */
3437 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3438 "Flow Control: %s\n",
3440 adapter->link_speed,
3441 adapter->link_duplex == FULL_DUPLEX ?
3442 "Full Duplex" : "Half Duplex",
3443 ((ctrl & E1000_CTRL_TFCE) &&
3444 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3445 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3446 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3448 /* adjust timeout factor according to speed/duplex */
3449 adapter->tx_timeout_factor = 1;
3450 switch (adapter->link_speed) {
3452 adapter->tx_timeout_factor = 14;
3455 /* maybe add some timeout factor ? */
3459 netif_carrier_on(netdev);
3461 igb_ping_all_vfs(adapter);
3463 /* link state has changed, schedule phy info update */
3464 if (!test_bit(__IGB_DOWN, &adapter->state))
3465 mod_timer(&adapter->phy_info_timer,
3466 round_jiffies(jiffies + 2 * HZ));
3469 if (netif_carrier_ok(netdev)) {
3470 adapter->link_speed = 0;
3471 adapter->link_duplex = 0;
3472 /* Links status message must follow this format */
3473 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3475 netif_carrier_off(netdev);
3477 igb_ping_all_vfs(adapter);
3479 /* link state has changed, schedule phy info update */
3480 if (!test_bit(__IGB_DOWN, &adapter->state))
3481 mod_timer(&adapter->phy_info_timer,
3482 round_jiffies(jiffies + 2 * HZ));
3486 igb_update_stats(adapter);
3488 for (i = 0; i < adapter->num_tx_queues; i++) {
3489 struct igb_ring *tx_ring = adapter->tx_ring[i];
3490 if (!netif_carrier_ok(netdev)) {
3491 /* We've lost link, so the controller stops DMA,
3492 * but we've got queued Tx work that's never going
3493 * to get done, so reset controller to flush Tx.
3494 * (Do the reset outside of interrupt context). */
3495 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3496 adapter->tx_timeout_count++;
3497 schedule_work(&adapter->reset_task);
3498 /* return immediately since reset is imminent */
3503 /* Force detection of hung controller every watchdog period */
3504 tx_ring->detect_tx_hung = true;
3507 /* Cause software interrupt to ensure rx ring is cleaned */
3508 if (adapter->msix_entries) {
3510 for (i = 0; i < adapter->num_q_vectors; i++) {
3511 struct igb_q_vector *q_vector = adapter->q_vector[i];
3512 eics |= q_vector->eims_value;
3514 wr32(E1000_EICS, eics);
3516 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3519 /* Reset the timer */
3520 if (!test_bit(__IGB_DOWN, &adapter->state))
3521 mod_timer(&adapter->watchdog_timer,
3522 round_jiffies(jiffies + 2 * HZ));
3525 enum latency_range {
3529 latency_invalid = 255
3533 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3535 * Stores a new ITR value based on strictly on packet size. This
3536 * algorithm is less sophisticated than that used in igb_update_itr,
3537 * due to the difficulty of synchronizing statistics across multiple
3538 * receive rings. The divisors and thresholds used by this fuction
3539 * were determined based on theoretical maximum wire speed and testing
3540 * data, in order to minimize response time while increasing bulk
3542 * This functionality is controlled by the InterruptThrottleRate module
3543 * parameter (see igb_param.c)
3544 * NOTE: This function is called only when operating in a multiqueue
3545 * receive environment.
3546 * @q_vector: pointer to q_vector
3548 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3550 int new_val = q_vector->itr_val;
3551 int avg_wire_size = 0;
3552 struct igb_adapter *adapter = q_vector->adapter;
3554 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3555 * ints/sec - ITR timer value of 120 ticks.
3557 if (adapter->link_speed != SPEED_1000) {
3562 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3563 struct igb_ring *ring = q_vector->rx_ring;
3564 avg_wire_size = ring->total_bytes / ring->total_packets;
3567 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3568 struct igb_ring *ring = q_vector->tx_ring;
3569 avg_wire_size = max_t(u32, avg_wire_size,
3570 (ring->total_bytes /
3571 ring->total_packets));
3574 /* if avg_wire_size isn't set no work was done */
3578 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3579 avg_wire_size += 24;
3581 /* Don't starve jumbo frames */
3582 avg_wire_size = min(avg_wire_size, 3000);
3584 /* Give a little boost to mid-size frames */
3585 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3586 new_val = avg_wire_size / 3;
3588 new_val = avg_wire_size / 2;
3590 /* when in itr mode 3 do not exceed 20K ints/sec */
3591 if (adapter->rx_itr_setting == 3 && new_val < 196)
3595 if (new_val != q_vector->itr_val) {
3596 q_vector->itr_val = new_val;
3597 q_vector->set_itr = 1;
3600 if (q_vector->rx_ring) {
3601 q_vector->rx_ring->total_bytes = 0;
3602 q_vector->rx_ring->total_packets = 0;
3604 if (q_vector->tx_ring) {
3605 q_vector->tx_ring->total_bytes = 0;
3606 q_vector->tx_ring->total_packets = 0;
3611 * igb_update_itr - update the dynamic ITR value based on statistics
3612 * Stores a new ITR value based on packets and byte
3613 * counts during the last interrupt. The advantage of per interrupt
3614 * computation is faster updates and more accurate ITR for the current
3615 * traffic pattern. Constants in this function were computed
3616 * based on theoretical maximum wire speed and thresholds were set based
3617 * on testing data as well as attempting to minimize response time
3618 * while increasing bulk throughput.
3619 * this functionality is controlled by the InterruptThrottleRate module
3620 * parameter (see igb_param.c)
3621 * NOTE: These calculations are only valid when operating in a single-
3622 * queue environment.
3623 * @adapter: pointer to adapter
3624 * @itr_setting: current q_vector->itr_val
3625 * @packets: the number of packets during this measurement interval
3626 * @bytes: the number of bytes during this measurement interval
3628 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3629 int packets, int bytes)
3631 unsigned int retval = itr_setting;
3634 goto update_itr_done;
3636 switch (itr_setting) {
3637 case lowest_latency:
3638 /* handle TSO and jumbo frames */
3639 if (bytes/packets > 8000)
3640 retval = bulk_latency;
3641 else if ((packets < 5) && (bytes > 512))
3642 retval = low_latency;
3644 case low_latency: /* 50 usec aka 20000 ints/s */
3645 if (bytes > 10000) {
3646 /* this if handles the TSO accounting */
3647 if (bytes/packets > 8000) {
3648 retval = bulk_latency;
3649 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3650 retval = bulk_latency;
3651 } else if ((packets > 35)) {
3652 retval = lowest_latency;
3654 } else if (bytes/packets > 2000) {
3655 retval = bulk_latency;
3656 } else if (packets <= 2 && bytes < 512) {
3657 retval = lowest_latency;
3660 case bulk_latency: /* 250 usec aka 4000 ints/s */
3661 if (bytes > 25000) {
3663 retval = low_latency;
3664 } else if (bytes < 1500) {
3665 retval = low_latency;
3674 static void igb_set_itr(struct igb_adapter *adapter)
3676 struct igb_q_vector *q_vector = adapter->q_vector[0];
3678 u32 new_itr = q_vector->itr_val;
3680 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3681 if (adapter->link_speed != SPEED_1000) {
3687 adapter->rx_itr = igb_update_itr(adapter,
3689 q_vector->rx_ring->total_packets,
3690 q_vector->rx_ring->total_bytes);
3692 adapter->tx_itr = igb_update_itr(adapter,
3694 q_vector->tx_ring->total_packets,
3695 q_vector->tx_ring->total_bytes);
3696 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3698 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3699 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3700 current_itr = low_latency;
3702 switch (current_itr) {
3703 /* counts and packets in update_itr are dependent on these numbers */
3704 case lowest_latency:
3705 new_itr = 56; /* aka 70,000 ints/sec */
3708 new_itr = 196; /* aka 20,000 ints/sec */
3711 new_itr = 980; /* aka 4,000 ints/sec */
3718 q_vector->rx_ring->total_bytes = 0;
3719 q_vector->rx_ring->total_packets = 0;
3720 q_vector->tx_ring->total_bytes = 0;
3721 q_vector->tx_ring->total_packets = 0;
3723 if (new_itr != q_vector->itr_val) {
3724 /* this attempts to bias the interrupt rate towards Bulk
3725 * by adding intermediate steps when interrupt rate is
3727 new_itr = new_itr > q_vector->itr_val ?
3728 max((new_itr * q_vector->itr_val) /
3729 (new_itr + (q_vector->itr_val >> 2)),
3732 /* Don't write the value here; it resets the adapter's
3733 * internal timer, and causes us to delay far longer than
3734 * we should between interrupts. Instead, we write the ITR
3735 * value at the beginning of the next interrupt so the timing
3736 * ends up being correct.
3738 q_vector->itr_val = new_itr;
3739 q_vector->set_itr = 1;
3743 #define IGB_TX_FLAGS_CSUM 0x00000001
3744 #define IGB_TX_FLAGS_VLAN 0x00000002
3745 #define IGB_TX_FLAGS_TSO 0x00000004
3746 #define IGB_TX_FLAGS_IPV4 0x00000008
3747 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3748 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3749 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3751 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3752 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3754 struct e1000_adv_tx_context_desc *context_desc;
3757 struct igb_buffer *buffer_info;
3758 u32 info = 0, tu_cmd = 0;
3762 if (skb_header_cloned(skb)) {
3763 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3768 l4len = tcp_hdrlen(skb);
3771 if (skb->protocol == htons(ETH_P_IP)) {
3772 struct iphdr *iph = ip_hdr(skb);
3775 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3779 } else if (skb_is_gso_v6(skb)) {
3780 ipv6_hdr(skb)->payload_len = 0;
3781 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3782 &ipv6_hdr(skb)->daddr,
3786 i = tx_ring->next_to_use;
3788 buffer_info = &tx_ring->buffer_info[i];
3789 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3790 /* VLAN MACLEN IPLEN */
3791 if (tx_flags & IGB_TX_FLAGS_VLAN)
3792 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3793 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3794 *hdr_len += skb_network_offset(skb);
3795 info |= skb_network_header_len(skb);
3796 *hdr_len += skb_network_header_len(skb);
3797 context_desc->vlan_macip_lens = cpu_to_le32(info);
3799 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3800 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3802 if (skb->protocol == htons(ETH_P_IP))
3803 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3804 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3806 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3809 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3810 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3812 /* For 82575, context index must be unique per ring. */
3813 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3814 mss_l4len_idx |= tx_ring->reg_idx << 4;
3816 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3817 context_desc->seqnum_seed = 0;
3819 buffer_info->time_stamp = jiffies;
3820 buffer_info->next_to_watch = i;
3821 buffer_info->dma = 0;
3823 if (i == tx_ring->count)
3826 tx_ring->next_to_use = i;
3831 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3832 struct sk_buff *skb, u32 tx_flags)
3834 struct e1000_adv_tx_context_desc *context_desc;
3835 struct device *dev = tx_ring->dev;
3836 struct igb_buffer *buffer_info;
3837 u32 info = 0, tu_cmd = 0;
3840 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3841 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3842 i = tx_ring->next_to_use;
3843 buffer_info = &tx_ring->buffer_info[i];
3844 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3846 if (tx_flags & IGB_TX_FLAGS_VLAN)
3847 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3849 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3850 if (skb->ip_summed == CHECKSUM_PARTIAL)
3851 info |= skb_network_header_len(skb);
3853 context_desc->vlan_macip_lens = cpu_to_le32(info);
3855 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3857 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3860 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3861 const struct vlan_ethhdr *vhdr =
3862 (const struct vlan_ethhdr*)skb->data;
3864 protocol = vhdr->h_vlan_encapsulated_proto;
3866 protocol = skb->protocol;
3870 case cpu_to_be16(ETH_P_IP):
3871 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3872 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3873 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3874 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3875 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3877 case cpu_to_be16(ETH_P_IPV6):
3878 /* XXX what about other V6 headers?? */
3879 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3880 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3881 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3882 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3885 if (unlikely(net_ratelimit()))
3887 "partial checksum but proto=%x!\n",
3893 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3894 context_desc->seqnum_seed = 0;
3895 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3896 context_desc->mss_l4len_idx =
3897 cpu_to_le32(tx_ring->reg_idx << 4);
3899 buffer_info->time_stamp = jiffies;
3900 buffer_info->next_to_watch = i;
3901 buffer_info->dma = 0;
3904 if (i == tx_ring->count)
3906 tx_ring->next_to_use = i;
3913 #define IGB_MAX_TXD_PWR 16
3914 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3916 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3919 struct igb_buffer *buffer_info;
3920 struct device *dev = tx_ring->dev;
3921 unsigned int hlen = skb_headlen(skb);
3922 unsigned int count = 0, i;
3924 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3926 i = tx_ring->next_to_use;
3928 buffer_info = &tx_ring->buffer_info[i];
3929 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3930 buffer_info->length = hlen;
3931 /* set time_stamp *before* dma to help avoid a possible race */
3932 buffer_info->time_stamp = jiffies;
3933 buffer_info->next_to_watch = i;
3934 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3936 if (dma_mapping_error(dev, buffer_info->dma))
3939 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3940 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3941 unsigned int len = frag->size;
3945 if (i == tx_ring->count)
3948 buffer_info = &tx_ring->buffer_info[i];
3949 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3950 buffer_info->length = len;
3951 buffer_info->time_stamp = jiffies;
3952 buffer_info->next_to_watch = i;
3953 buffer_info->mapped_as_page = true;
3954 buffer_info->dma = dma_map_page(dev,
3959 if (dma_mapping_error(dev, buffer_info->dma))
3964 tx_ring->buffer_info[i].skb = skb;
3965 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
3966 /* multiply data chunks by size of headers */
3967 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3968 tx_ring->buffer_info[i].gso_segs = gso_segs;
3969 tx_ring->buffer_info[first].next_to_watch = i;
3974 dev_err(dev, "TX DMA map failed\n");
3976 /* clear timestamp and dma mappings for failed buffer_info mapping */
3977 buffer_info->dma = 0;
3978 buffer_info->time_stamp = 0;
3979 buffer_info->length = 0;
3980 buffer_info->next_to_watch = 0;
3981 buffer_info->mapped_as_page = false;
3983 /* clear timestamp and dma mappings for remaining portion of packet */
3988 buffer_info = &tx_ring->buffer_info[i];
3989 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3995 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3996 u32 tx_flags, int count, u32 paylen,
3999 union e1000_adv_tx_desc *tx_desc;
4000 struct igb_buffer *buffer_info;
4001 u32 olinfo_status = 0, cmd_type_len;
4002 unsigned int i = tx_ring->next_to_use;
4004 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4005 E1000_ADVTXD_DCMD_DEXT);
4007 if (tx_flags & IGB_TX_FLAGS_VLAN)
4008 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4010 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4011 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4013 if (tx_flags & IGB_TX_FLAGS_TSO) {
4014 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4016 /* insert tcp checksum */
4017 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4019 /* insert ip checksum */
4020 if (tx_flags & IGB_TX_FLAGS_IPV4)
4021 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4023 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4024 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4027 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4028 (tx_flags & (IGB_TX_FLAGS_CSUM |
4030 IGB_TX_FLAGS_VLAN)))
4031 olinfo_status |= tx_ring->reg_idx << 4;
4033 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4036 buffer_info = &tx_ring->buffer_info[i];
4037 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4038 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4039 tx_desc->read.cmd_type_len =
4040 cpu_to_le32(cmd_type_len | buffer_info->length);
4041 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4044 if (i == tx_ring->count)
4046 } while (count > 0);
4048 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4049 /* Force memory writes to complete before letting h/w
4050 * know there are new descriptors to fetch. (Only
4051 * applicable for weak-ordered memory model archs,
4052 * such as IA-64). */
4055 tx_ring->next_to_use = i;
4056 writel(i, tx_ring->tail);
4057 /* we need this if more than one processor can write to our tail
4058 * at a time, it syncronizes IO on IA64/Altix systems */
4062 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4064 struct net_device *netdev = tx_ring->netdev;
4066 netif_stop_subqueue(netdev, tx_ring->queue_index);
4068 /* Herbert's original patch had:
4069 * smp_mb__after_netif_stop_queue();
4070 * but since that doesn't exist yet, just open code it. */
4073 /* We need to check again in a case another CPU has just
4074 * made room available. */
4075 if (igb_desc_unused(tx_ring) < size)
4079 netif_wake_subqueue(netdev, tx_ring->queue_index);
4080 tx_ring->tx_stats.restart_queue++;
4084 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4086 if (igb_desc_unused(tx_ring) >= size)
4088 return __igb_maybe_stop_tx(tx_ring, size);
4091 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4092 struct igb_ring *tx_ring)
4094 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4100 /* need: 1 descriptor per page,
4101 * + 2 desc gap to keep tail from touching head,
4102 * + 1 desc for skb->data,
4103 * + 1 desc for context descriptor,
4104 * otherwise try next time */
4105 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4106 /* this is a hard error */
4107 return NETDEV_TX_BUSY;
4110 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4111 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4112 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4115 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
4116 tx_flags |= IGB_TX_FLAGS_VLAN;
4117 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4120 if (skb->protocol == htons(ETH_P_IP))
4121 tx_flags |= IGB_TX_FLAGS_IPV4;
4123 first = tx_ring->next_to_use;
4124 if (skb_is_gso(skb)) {
4125 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4128 dev_kfree_skb_any(skb);
4129 return NETDEV_TX_OK;
4134 tx_flags |= IGB_TX_FLAGS_TSO;
4135 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4136 (skb->ip_summed == CHECKSUM_PARTIAL))
4137 tx_flags |= IGB_TX_FLAGS_CSUM;
4140 * count reflects descriptors mapped, if 0 or less then mapping error
4141 * has occured and we need to rewind the descriptor queue
4143 count = igb_tx_map_adv(tx_ring, skb, first);
4145 dev_kfree_skb_any(skb);
4146 tx_ring->buffer_info[first].time_stamp = 0;
4147 tx_ring->next_to_use = first;
4148 return NETDEV_TX_OK;
4151 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4153 /* Make sure there is space in the ring for the next send. */
4154 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4156 return NETDEV_TX_OK;
4159 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4160 struct net_device *netdev)
4162 struct igb_adapter *adapter = netdev_priv(netdev);
4163 struct igb_ring *tx_ring;
4166 if (test_bit(__IGB_DOWN, &adapter->state)) {
4167 dev_kfree_skb_any(skb);
4168 return NETDEV_TX_OK;
4171 if (skb->len <= 0) {
4172 dev_kfree_skb_any(skb);
4173 return NETDEV_TX_OK;
4176 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4177 tx_ring = adapter->multi_tx_table[r_idx];
4179 /* This goes back to the question of how to logically map a tx queue
4180 * to a flow. Right now, performance is impacted slightly negatively
4181 * if using multiple tx queues. If the stack breaks away from a
4182 * single qdisc implementation, we can look at this again. */
4183 return igb_xmit_frame_ring_adv(skb, tx_ring);
4187 * igb_tx_timeout - Respond to a Tx Hang
4188 * @netdev: network interface device structure
4190 static void igb_tx_timeout(struct net_device *netdev)
4192 struct igb_adapter *adapter = netdev_priv(netdev);
4193 struct e1000_hw *hw = &adapter->hw;
4195 /* Do the reset outside of interrupt context */
4196 adapter->tx_timeout_count++;
4198 if (hw->mac.type == e1000_82580)
4199 hw->dev_spec._82575.global_device_reset = true;
4201 schedule_work(&adapter->reset_task);
4203 (adapter->eims_enable_mask & ~adapter->eims_other));
4206 static void igb_reset_task(struct work_struct *work)
4208 struct igb_adapter *adapter;
4209 adapter = container_of(work, struct igb_adapter, reset_task);
4212 netdev_err(adapter->netdev, "Reset adapter\n");
4213 igb_reinit_locked(adapter);
4217 * igb_get_stats - Get System Network Statistics
4218 * @netdev: network interface device structure
4220 * Returns the address of the device statistics structure.
4221 * The statistics are actually updated from the timer callback.
4223 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4225 /* only return the current stats */
4226 return &netdev->stats;
4230 * igb_change_mtu - Change the Maximum Transfer Unit
4231 * @netdev: network interface device structure
4232 * @new_mtu: new value for maximum frame size
4234 * Returns 0 on success, negative on failure
4236 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4238 struct igb_adapter *adapter = netdev_priv(netdev);
4239 struct pci_dev *pdev = adapter->pdev;
4240 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4241 u32 rx_buffer_len, i;
4243 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4244 dev_err(&pdev->dev, "Invalid MTU setting\n");
4248 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4249 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4253 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4256 /* igb_down has a dependency on max_frame_size */
4257 adapter->max_frame_size = max_frame;
4259 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4260 * means we reserve 2 more, this pushes us to allocate from the next
4262 * i.e. RXBUFFER_2048 --> size-4096 slab
4265 if (adapter->hw.mac.type == e1000_82580)
4266 max_frame += IGB_TS_HDR_LEN;
4268 if (max_frame <= IGB_RXBUFFER_1024)
4269 rx_buffer_len = IGB_RXBUFFER_1024;
4270 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4271 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4273 rx_buffer_len = IGB_RXBUFFER_128;
4275 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4276 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4277 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4279 if ((adapter->hw.mac.type == e1000_82580) &&
4280 (rx_buffer_len == IGB_RXBUFFER_128))
4281 rx_buffer_len += IGB_RXBUFFER_64;
4283 if (netif_running(netdev))
4286 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4287 netdev->mtu, new_mtu);
4288 netdev->mtu = new_mtu;
4290 for (i = 0; i < adapter->num_rx_queues; i++)
4291 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4293 if (netif_running(netdev))
4298 clear_bit(__IGB_RESETTING, &adapter->state);
4304 * igb_update_stats - Update the board statistics counters
4305 * @adapter: board private structure
4308 void igb_update_stats(struct igb_adapter *adapter)
4310 struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
4311 struct e1000_hw *hw = &adapter->hw;
4312 struct pci_dev *pdev = adapter->pdev;
4318 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4321 * Prevent stats update while adapter is being reset, or if the pci
4322 * connection is down.
4324 if (adapter->link_speed == 0)
4326 if (pci_channel_offline(pdev))
4331 for (i = 0; i < adapter->num_rx_queues; i++) {
4332 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4333 struct igb_ring *ring = adapter->rx_ring[i];
4334 ring->rx_stats.drops += rqdpc_tmp;
4335 net_stats->rx_fifo_errors += rqdpc_tmp;
4336 bytes += ring->rx_stats.bytes;
4337 packets += ring->rx_stats.packets;
4340 net_stats->rx_bytes = bytes;
4341 net_stats->rx_packets = packets;
4345 for (i = 0; i < adapter->num_tx_queues; i++) {
4346 struct igb_ring *ring = adapter->tx_ring[i];
4347 bytes += ring->tx_stats.bytes;
4348 packets += ring->tx_stats.packets;
4350 net_stats->tx_bytes = bytes;
4351 net_stats->tx_packets = packets;
4353 /* read stats registers */
4354 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4355 adapter->stats.gprc += rd32(E1000_GPRC);
4356 adapter->stats.gorc += rd32(E1000_GORCL);
4357 rd32(E1000_GORCH); /* clear GORCL */
4358 adapter->stats.bprc += rd32(E1000_BPRC);
4359 adapter->stats.mprc += rd32(E1000_MPRC);
4360 adapter->stats.roc += rd32(E1000_ROC);
4362 adapter->stats.prc64 += rd32(E1000_PRC64);
4363 adapter->stats.prc127 += rd32(E1000_PRC127);
4364 adapter->stats.prc255 += rd32(E1000_PRC255);
4365 adapter->stats.prc511 += rd32(E1000_PRC511);
4366 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4367 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4368 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4369 adapter->stats.sec += rd32(E1000_SEC);
4371 mpc = rd32(E1000_MPC);
4372 adapter->stats.mpc += mpc;
4373 net_stats->rx_fifo_errors += mpc;
4374 adapter->stats.scc += rd32(E1000_SCC);
4375 adapter->stats.ecol += rd32(E1000_ECOL);
4376 adapter->stats.mcc += rd32(E1000_MCC);
4377 adapter->stats.latecol += rd32(E1000_LATECOL);
4378 adapter->stats.dc += rd32(E1000_DC);
4379 adapter->stats.rlec += rd32(E1000_RLEC);
4380 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4381 adapter->stats.xontxc += rd32(E1000_XONTXC);
4382 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4383 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4384 adapter->stats.fcruc += rd32(E1000_FCRUC);
4385 adapter->stats.gptc += rd32(E1000_GPTC);
4386 adapter->stats.gotc += rd32(E1000_GOTCL);
4387 rd32(E1000_GOTCH); /* clear GOTCL */
4388 adapter->stats.rnbc += rd32(E1000_RNBC);
4389 adapter->stats.ruc += rd32(E1000_RUC);
4390 adapter->stats.rfc += rd32(E1000_RFC);
4391 adapter->stats.rjc += rd32(E1000_RJC);
4392 adapter->stats.tor += rd32(E1000_TORH);
4393 adapter->stats.tot += rd32(E1000_TOTH);
4394 adapter->stats.tpr += rd32(E1000_TPR);
4396 adapter->stats.ptc64 += rd32(E1000_PTC64);
4397 adapter->stats.ptc127 += rd32(E1000_PTC127);
4398 adapter->stats.ptc255 += rd32(E1000_PTC255);
4399 adapter->stats.ptc511 += rd32(E1000_PTC511);
4400 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4401 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4403 adapter->stats.mptc += rd32(E1000_MPTC);
4404 adapter->stats.bptc += rd32(E1000_BPTC);
4406 adapter->stats.tpt += rd32(E1000_TPT);
4407 adapter->stats.colc += rd32(E1000_COLC);
4409 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4410 /* read internal phy specific stats */
4411 reg = rd32(E1000_CTRL_EXT);
4412 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4413 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4414 adapter->stats.tncrs += rd32(E1000_TNCRS);
4417 adapter->stats.tsctc += rd32(E1000_TSCTC);
4418 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4420 adapter->stats.iac += rd32(E1000_IAC);
4421 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4422 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4423 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4424 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4425 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4426 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4427 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4428 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4430 /* Fill out the OS statistics structure */
4431 net_stats->multicast = adapter->stats.mprc;
4432 net_stats->collisions = adapter->stats.colc;
4436 /* RLEC on some newer hardware can be incorrect so build
4437 * our own version based on RUC and ROC */
4438 net_stats->rx_errors = adapter->stats.rxerrc +
4439 adapter->stats.crcerrs + adapter->stats.algnerrc +
4440 adapter->stats.ruc + adapter->stats.roc +
4441 adapter->stats.cexterr;
4442 net_stats->rx_length_errors = adapter->stats.ruc +
4444 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4445 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4446 net_stats->rx_missed_errors = adapter->stats.mpc;
4449 net_stats->tx_errors = adapter->stats.ecol +
4450 adapter->stats.latecol;
4451 net_stats->tx_aborted_errors = adapter->stats.ecol;
4452 net_stats->tx_window_errors = adapter->stats.latecol;
4453 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4455 /* Tx Dropped needs to be maintained elsewhere */
4458 if (hw->phy.media_type == e1000_media_type_copper) {
4459 if ((adapter->link_speed == SPEED_1000) &&
4460 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4461 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4462 adapter->phy_stats.idle_errors += phy_tmp;
4466 /* Management Stats */
4467 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4468 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4469 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4472 static irqreturn_t igb_msix_other(int irq, void *data)
4474 struct igb_adapter *adapter = data;
4475 struct e1000_hw *hw = &adapter->hw;
4476 u32 icr = rd32(E1000_ICR);
4477 /* reading ICR causes bit 31 of EICR to be cleared */
4479 if (icr & E1000_ICR_DRSTA)
4480 schedule_work(&adapter->reset_task);
4482 if (icr & E1000_ICR_DOUTSYNC) {
4483 /* HW is reporting DMA is out of sync */
4484 adapter->stats.doosync++;
4487 /* Check for a mailbox event */
4488 if (icr & E1000_ICR_VMMB)
4489 igb_msg_task(adapter);
4491 if (icr & E1000_ICR_LSC) {
4492 hw->mac.get_link_status = 1;
4493 /* guard against interrupt when we're going down */
4494 if (!test_bit(__IGB_DOWN, &adapter->state))
4495 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4498 if (adapter->vfs_allocated_count)
4499 wr32(E1000_IMS, E1000_IMS_LSC |
4501 E1000_IMS_DOUTSYNC);
4503 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4504 wr32(E1000_EIMS, adapter->eims_other);
4509 static void igb_write_itr(struct igb_q_vector *q_vector)
4511 struct igb_adapter *adapter = q_vector->adapter;
4512 u32 itr_val = q_vector->itr_val & 0x7FFC;
4514 if (!q_vector->set_itr)
4520 if (adapter->hw.mac.type == e1000_82575)
4521 itr_val |= itr_val << 16;
4523 itr_val |= 0x8000000;
4525 writel(itr_val, q_vector->itr_register);
4526 q_vector->set_itr = 0;
4529 static irqreturn_t igb_msix_ring(int irq, void *data)
4531 struct igb_q_vector *q_vector = data;
4533 /* Write the ITR value calculated from the previous interrupt. */
4534 igb_write_itr(q_vector);
4536 napi_schedule(&q_vector->napi);
4541 #ifdef CONFIG_IGB_DCA
4542 static void igb_update_dca(struct igb_q_vector *q_vector)
4544 struct igb_adapter *adapter = q_vector->adapter;
4545 struct e1000_hw *hw = &adapter->hw;
4546 int cpu = get_cpu();
4548 if (q_vector->cpu == cpu)
4551 if (q_vector->tx_ring) {
4552 int q = q_vector->tx_ring->reg_idx;
4553 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4554 if (hw->mac.type == e1000_82575) {
4555 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4556 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4558 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4559 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4560 E1000_DCA_TXCTRL_CPUID_SHIFT;
4562 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4563 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4565 if (q_vector->rx_ring) {
4566 int q = q_vector->rx_ring->reg_idx;
4567 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4568 if (hw->mac.type == e1000_82575) {
4569 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4570 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4572 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4573 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4574 E1000_DCA_RXCTRL_CPUID_SHIFT;
4576 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4577 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4578 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4579 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4581 q_vector->cpu = cpu;
4586 static void igb_setup_dca(struct igb_adapter *adapter)
4588 struct e1000_hw *hw = &adapter->hw;
4591 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4594 /* Always use CB2 mode, difference is masked in the CB driver. */
4595 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4597 for (i = 0; i < adapter->num_q_vectors; i++) {
4598 adapter->q_vector[i]->cpu = -1;
4599 igb_update_dca(adapter->q_vector[i]);
4603 static int __igb_notify_dca(struct device *dev, void *data)
4605 struct net_device *netdev = dev_get_drvdata(dev);
4606 struct igb_adapter *adapter = netdev_priv(netdev);
4607 struct pci_dev *pdev = adapter->pdev;
4608 struct e1000_hw *hw = &adapter->hw;
4609 unsigned long event = *(unsigned long *)data;
4612 case DCA_PROVIDER_ADD:
4613 /* if already enabled, don't do it again */
4614 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4616 if (dca_add_requester(dev) == 0) {
4617 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4618 dev_info(&pdev->dev, "DCA enabled\n");
4619 igb_setup_dca(adapter);
4622 /* Fall Through since DCA is disabled. */
4623 case DCA_PROVIDER_REMOVE:
4624 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4625 /* without this a class_device is left
4626 * hanging around in the sysfs model */
4627 dca_remove_requester(dev);
4628 dev_info(&pdev->dev, "DCA disabled\n");
4629 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4630 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4638 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4643 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4646 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4648 #endif /* CONFIG_IGB_DCA */
4650 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4652 struct e1000_hw *hw = &adapter->hw;
4656 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4657 ping = E1000_PF_CONTROL_MSG;
4658 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4659 ping |= E1000_VT_MSGTYPE_CTS;
4660 igb_write_mbx(hw, &ping, 1, i);
4664 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4666 struct e1000_hw *hw = &adapter->hw;
4667 u32 vmolr = rd32(E1000_VMOLR(vf));
4668 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4670 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4671 IGB_VF_FLAG_MULTI_PROMISC);
4672 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4674 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4675 vmolr |= E1000_VMOLR_MPME;
4676 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4677 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4680 * if we have hashes and we are clearing a multicast promisc
4681 * flag we need to write the hashes to the MTA as this step
4682 * was previously skipped
4684 if (vf_data->num_vf_mc_hashes > 30) {
4685 vmolr |= E1000_VMOLR_MPME;
4686 } else if (vf_data->num_vf_mc_hashes) {
4688 vmolr |= E1000_VMOLR_ROMPE;
4689 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4690 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4694 wr32(E1000_VMOLR(vf), vmolr);
4696 /* there are flags left unprocessed, likely not supported */
4697 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4704 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4705 u32 *msgbuf, u32 vf)
4707 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4708 u16 *hash_list = (u16 *)&msgbuf[1];
4709 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4712 /* salt away the number of multicast addresses assigned
4713 * to this VF for later use to restore when the PF multi cast
4716 vf_data->num_vf_mc_hashes = n;
4718 /* only up to 30 hash values supported */
4722 /* store the hashes for later use */
4723 for (i = 0; i < n; i++)
4724 vf_data->vf_mc_hashes[i] = hash_list[i];
4726 /* Flush and reset the mta with the new values */
4727 igb_set_rx_mode(adapter->netdev);
4732 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4734 struct e1000_hw *hw = &adapter->hw;
4735 struct vf_data_storage *vf_data;
4738 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4739 u32 vmolr = rd32(E1000_VMOLR(i));
4740 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4742 vf_data = &adapter->vf_data[i];
4744 if ((vf_data->num_vf_mc_hashes > 30) ||
4745 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4746 vmolr |= E1000_VMOLR_MPME;
4747 } else if (vf_data->num_vf_mc_hashes) {
4748 vmolr |= E1000_VMOLR_ROMPE;
4749 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4750 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4752 wr32(E1000_VMOLR(i), vmolr);
4756 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4758 struct e1000_hw *hw = &adapter->hw;
4759 u32 pool_mask, reg, vid;
4762 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4764 /* Find the vlan filter for this id */
4765 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4766 reg = rd32(E1000_VLVF(i));
4768 /* remove the vf from the pool */
4771 /* if pool is empty then remove entry from vfta */
4772 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4773 (reg & E1000_VLVF_VLANID_ENABLE)) {
4775 vid = reg & E1000_VLVF_VLANID_MASK;
4776 igb_vfta_set(hw, vid, false);
4779 wr32(E1000_VLVF(i), reg);
4782 adapter->vf_data[vf].vlans_enabled = 0;
4785 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4787 struct e1000_hw *hw = &adapter->hw;
4790 /* The vlvf table only exists on 82576 hardware and newer */
4791 if (hw->mac.type < e1000_82576)
4794 /* we only need to do this if VMDq is enabled */
4795 if (!adapter->vfs_allocated_count)
4798 /* Find the vlan filter for this id */
4799 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4800 reg = rd32(E1000_VLVF(i));
4801 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4802 vid == (reg & E1000_VLVF_VLANID_MASK))
4807 if (i == E1000_VLVF_ARRAY_SIZE) {
4808 /* Did not find a matching VLAN ID entry that was
4809 * enabled. Search for a free filter entry, i.e.
4810 * one without the enable bit set
4812 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4813 reg = rd32(E1000_VLVF(i));
4814 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4818 if (i < E1000_VLVF_ARRAY_SIZE) {
4819 /* Found an enabled/available entry */
4820 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4822 /* if !enabled we need to set this up in vfta */
4823 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4824 /* add VID to filter table */
4825 igb_vfta_set(hw, vid, true);
4826 reg |= E1000_VLVF_VLANID_ENABLE;
4828 reg &= ~E1000_VLVF_VLANID_MASK;
4830 wr32(E1000_VLVF(i), reg);
4832 /* do not modify RLPML for PF devices */
4833 if (vf >= adapter->vfs_allocated_count)
4836 if (!adapter->vf_data[vf].vlans_enabled) {
4838 reg = rd32(E1000_VMOLR(vf));
4839 size = reg & E1000_VMOLR_RLPML_MASK;
4841 reg &= ~E1000_VMOLR_RLPML_MASK;
4843 wr32(E1000_VMOLR(vf), reg);
4846 adapter->vf_data[vf].vlans_enabled++;
4850 if (i < E1000_VLVF_ARRAY_SIZE) {
4851 /* remove vf from the pool */
4852 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4853 /* if pool is empty then remove entry from vfta */
4854 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4856 igb_vfta_set(hw, vid, false);
4858 wr32(E1000_VLVF(i), reg);
4860 /* do not modify RLPML for PF devices */
4861 if (vf >= adapter->vfs_allocated_count)
4864 adapter->vf_data[vf].vlans_enabled--;
4865 if (!adapter->vf_data[vf].vlans_enabled) {
4867 reg = rd32(E1000_VMOLR(vf));
4868 size = reg & E1000_VMOLR_RLPML_MASK;
4870 reg &= ~E1000_VMOLR_RLPML_MASK;
4872 wr32(E1000_VMOLR(vf), reg);
4879 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4881 struct e1000_hw *hw = &adapter->hw;
4884 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4886 wr32(E1000_VMVIR(vf), 0);
4889 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4890 int vf, u16 vlan, u8 qos)
4893 struct igb_adapter *adapter = netdev_priv(netdev);
4895 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4898 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4901 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4902 igb_set_vmolr(adapter, vf, !vlan);
4903 adapter->vf_data[vf].pf_vlan = vlan;
4904 adapter->vf_data[vf].pf_qos = qos;
4905 dev_info(&adapter->pdev->dev,
4906 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4907 if (test_bit(__IGB_DOWN, &adapter->state)) {
4908 dev_warn(&adapter->pdev->dev,
4909 "The VF VLAN has been set,"
4910 " but the PF device is not up.\n");
4911 dev_warn(&adapter->pdev->dev,
4912 "Bring the PF device up before"
4913 " attempting to use the VF device.\n");
4916 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4918 igb_set_vmvir(adapter, vlan, vf);
4919 igb_set_vmolr(adapter, vf, true);
4920 adapter->vf_data[vf].pf_vlan = 0;
4921 adapter->vf_data[vf].pf_qos = 0;
4927 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4929 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4930 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4932 return igb_vlvf_set(adapter, vid, add, vf);
4935 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4938 adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4939 adapter->vf_data[vf].last_nack = jiffies;
4941 /* reset offloads to defaults */
4942 igb_set_vmolr(adapter, vf, true);
4944 /* reset vlans for device */
4945 igb_clear_vf_vfta(adapter, vf);
4946 if (adapter->vf_data[vf].pf_vlan)
4947 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4948 adapter->vf_data[vf].pf_vlan,
4949 adapter->vf_data[vf].pf_qos);
4951 igb_clear_vf_vfta(adapter, vf);
4953 /* reset multicast table array for vf */
4954 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4956 /* Flush and reset the mta with the new values */
4957 igb_set_rx_mode(adapter->netdev);
4960 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4962 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4964 /* generate a new mac address as we were hotplug removed/added */
4965 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4966 random_ether_addr(vf_mac);
4968 /* process remaining reset events */
4969 igb_vf_reset(adapter, vf);
4972 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4974 struct e1000_hw *hw = &adapter->hw;
4975 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4976 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4978 u8 *addr = (u8 *)(&msgbuf[1]);
4980 /* process all the same items cleared in a function level reset */
4981 igb_vf_reset(adapter, vf);
4983 /* set vf mac address */
4984 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4986 /* enable transmit and receive for vf */
4987 reg = rd32(E1000_VFTE);
4988 wr32(E1000_VFTE, reg | (1 << vf));
4989 reg = rd32(E1000_VFRE);
4990 wr32(E1000_VFRE, reg | (1 << vf));
4992 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4994 /* reply to reset with ack and vf mac address */
4995 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4996 memcpy(addr, vf_mac, 6);
4997 igb_write_mbx(hw, msgbuf, 3, vf);
5000 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5003 * The VF MAC Address is stored in a packed array of bytes
5004 * starting at the second 32 bit word of the msg array
5006 unsigned char *addr = (char *)&msg[1];
5009 if (is_valid_ether_addr(addr))
5010 err = igb_set_vf_mac(adapter, vf, addr);
5015 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5017 struct e1000_hw *hw = &adapter->hw;
5018 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5019 u32 msg = E1000_VT_MSGTYPE_NACK;
5021 /* if device isn't clear to send it shouldn't be reading either */
5022 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5023 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5024 igb_write_mbx(hw, &msg, 1, vf);
5025 vf_data->last_nack = jiffies;
5029 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5031 struct pci_dev *pdev = adapter->pdev;
5032 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5033 struct e1000_hw *hw = &adapter->hw;
5034 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5037 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5040 /* if receive failed revoke VF CTS stats and restart init */
5041 dev_err(&pdev->dev, "Error receiving message from VF\n");
5042 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5043 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5048 /* this is a message we already processed, do nothing */
5049 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5053 * until the vf completes a reset it should not be
5054 * allowed to start any configuration.
5057 if (msgbuf[0] == E1000_VF_RESET) {
5058 igb_vf_reset_msg(adapter, vf);
5062 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5063 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5069 switch ((msgbuf[0] & 0xFFFF)) {
5070 case E1000_VF_SET_MAC_ADDR:
5071 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5073 case E1000_VF_SET_PROMISC:
5074 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5076 case E1000_VF_SET_MULTICAST:
5077 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5079 case E1000_VF_SET_LPE:
5080 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5082 case E1000_VF_SET_VLAN:
5083 if (adapter->vf_data[vf].pf_vlan)
5086 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5089 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5094 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5096 /* notify the VF of the results of what it sent us */
5098 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5100 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5102 igb_write_mbx(hw, msgbuf, 1, vf);
5105 static void igb_msg_task(struct igb_adapter *adapter)
5107 struct e1000_hw *hw = &adapter->hw;
5110 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5111 /* process any reset requests */
5112 if (!igb_check_for_rst(hw, vf))
5113 igb_vf_reset_event(adapter, vf);
5115 /* process any messages pending */
5116 if (!igb_check_for_msg(hw, vf))
5117 igb_rcv_msg_from_vf(adapter, vf);
5119 /* process any acks */
5120 if (!igb_check_for_ack(hw, vf))
5121 igb_rcv_ack_from_vf(adapter, vf);
5126 * igb_set_uta - Set unicast filter table address
5127 * @adapter: board private structure
5129 * The unicast table address is a register array of 32-bit registers.
5130 * The table is meant to be used in a way similar to how the MTA is used
5131 * however due to certain limitations in the hardware it is necessary to
5132 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5133 * enable bit to allow vlan tag stripping when promiscous mode is enabled
5135 static void igb_set_uta(struct igb_adapter *adapter)
5137 struct e1000_hw *hw = &adapter->hw;
5140 /* The UTA table only exists on 82576 hardware and newer */
5141 if (hw->mac.type < e1000_82576)
5144 /* we only need to do this if VMDq is enabled */
5145 if (!adapter->vfs_allocated_count)
5148 for (i = 0; i < hw->mac.uta_reg_count; i++)
5149 array_wr32(E1000_UTA, i, ~0);
5153 * igb_intr_msi - Interrupt Handler
5154 * @irq: interrupt number
5155 * @data: pointer to a network interface device structure
5157 static irqreturn_t igb_intr_msi(int irq, void *data)
5159 struct igb_adapter *adapter = data;
5160 struct igb_q_vector *q_vector = adapter->q_vector[0];
5161 struct e1000_hw *hw = &adapter->hw;
5162 /* read ICR disables interrupts using IAM */
5163 u32 icr = rd32(E1000_ICR);
5165 igb_write_itr(q_vector);
5167 if (icr & E1000_ICR_DRSTA)
5168 schedule_work(&adapter->reset_task);
5170 if (icr & E1000_ICR_DOUTSYNC) {
5171 /* HW is reporting DMA is out of sync */
5172 adapter->stats.doosync++;
5175 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5176 hw->mac.get_link_status = 1;
5177 if (!test_bit(__IGB_DOWN, &adapter->state))
5178 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5181 napi_schedule(&q_vector->napi);
5187 * igb_intr - Legacy Interrupt Handler
5188 * @irq: interrupt number
5189 * @data: pointer to a network interface device structure
5191 static irqreturn_t igb_intr(int irq, void *data)
5193 struct igb_adapter *adapter = data;
5194 struct igb_q_vector *q_vector = adapter->q_vector[0];
5195 struct e1000_hw *hw = &adapter->hw;
5196 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5197 * need for the IMC write */
5198 u32 icr = rd32(E1000_ICR);
5200 return IRQ_NONE; /* Not our interrupt */
5202 igb_write_itr(q_vector);
5204 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5205 * not set, then the adapter didn't send an interrupt */
5206 if (!(icr & E1000_ICR_INT_ASSERTED))
5209 if (icr & E1000_ICR_DRSTA)
5210 schedule_work(&adapter->reset_task);
5212 if (icr & E1000_ICR_DOUTSYNC) {
5213 /* HW is reporting DMA is out of sync */
5214 adapter->stats.doosync++;
5217 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5218 hw->mac.get_link_status = 1;
5219 /* guard against interrupt when we're going down */
5220 if (!test_bit(__IGB_DOWN, &adapter->state))
5221 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5224 napi_schedule(&q_vector->napi);
5229 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5231 struct igb_adapter *adapter = q_vector->adapter;
5232 struct e1000_hw *hw = &adapter->hw;
5234 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5235 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5236 if (!adapter->msix_entries)
5237 igb_set_itr(adapter);
5239 igb_update_ring_itr(q_vector);
5242 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5243 if (adapter->msix_entries)
5244 wr32(E1000_EIMS, q_vector->eims_value);
5246 igb_irq_enable(adapter);
5251 * igb_poll - NAPI Rx polling callback
5252 * @napi: napi polling structure
5253 * @budget: count of how many packets we should handle
5255 static int igb_poll(struct napi_struct *napi, int budget)
5257 struct igb_q_vector *q_vector = container_of(napi,
5258 struct igb_q_vector,
5260 int tx_clean_complete = 1, work_done = 0;
5262 #ifdef CONFIG_IGB_DCA
5263 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5264 igb_update_dca(q_vector);
5266 if (q_vector->tx_ring)
5267 tx_clean_complete = igb_clean_tx_irq(q_vector);
5269 if (q_vector->rx_ring)
5270 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5272 if (!tx_clean_complete)
5275 /* If not enough Rx work done, exit the polling mode */
5276 if (work_done < budget) {
5277 napi_complete(napi);
5278 igb_ring_irq_enable(q_vector);
5285 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5286 * @adapter: board private structure
5287 * @shhwtstamps: timestamp structure to update
5288 * @regval: unsigned 64bit system time value.
5290 * We need to convert the system time value stored in the RX/TXSTMP registers
5291 * into a hwtstamp which can be used by the upper level timestamping functions
5293 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5294 struct skb_shared_hwtstamps *shhwtstamps,
5300 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5301 * 24 to match clock shift we setup earlier.
5303 if (adapter->hw.mac.type == e1000_82580)
5304 regval <<= IGB_82580_TSYNC_SHIFT;
5306 ns = timecounter_cyc2time(&adapter->clock, regval);
5307 timecompare_update(&adapter->compare, ns);
5308 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5309 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5310 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5314 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5315 * @q_vector: pointer to q_vector containing needed info
5316 * @buffer: pointer to igb_buffer structure
5318 * If we were asked to do hardware stamping and such a time stamp is
5319 * available, then it must have been for this skb here because we only
5320 * allow only one such packet into the queue.
5322 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5324 struct igb_adapter *adapter = q_vector->adapter;
5325 struct e1000_hw *hw = &adapter->hw;
5326 struct skb_shared_hwtstamps shhwtstamps;
5329 /* if skb does not support hw timestamp or TX stamp not valid exit */
5330 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5331 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5334 regval = rd32(E1000_TXSTMPL);
5335 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5337 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5338 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5342 * igb_clean_tx_irq - Reclaim resources after transmit completes
5343 * @q_vector: pointer to q_vector containing needed info
5344 * returns true if ring is completely cleaned
5346 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5348 struct igb_adapter *adapter = q_vector->adapter;
5349 struct igb_ring *tx_ring = q_vector->tx_ring;
5350 struct net_device *netdev = tx_ring->netdev;
5351 struct e1000_hw *hw = &adapter->hw;
5352 struct igb_buffer *buffer_info;
5353 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5354 unsigned int total_bytes = 0, total_packets = 0;
5355 unsigned int i, eop, count = 0;
5356 bool cleaned = false;
5358 i = tx_ring->next_to_clean;
5359 eop = tx_ring->buffer_info[i].next_to_watch;
5360 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5362 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5363 (count < tx_ring->count)) {
5364 rmb(); /* read buffer_info after eop_desc status */
5365 for (cleaned = false; !cleaned; count++) {
5366 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5367 buffer_info = &tx_ring->buffer_info[i];
5368 cleaned = (i == eop);
5370 if (buffer_info->skb) {
5371 total_bytes += buffer_info->bytecount;
5372 /* gso_segs is currently only valid for tcp */
5373 total_packets += buffer_info->gso_segs;
5374 igb_tx_hwtstamp(q_vector, buffer_info);
5377 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5378 tx_desc->wb.status = 0;
5381 if (i == tx_ring->count)
5384 eop = tx_ring->buffer_info[i].next_to_watch;
5385 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5388 tx_ring->next_to_clean = i;
5390 if (unlikely(count &&
5391 netif_carrier_ok(netdev) &&
5392 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5393 /* Make sure that anybody stopping the queue after this
5394 * sees the new next_to_clean.
5397 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5398 !(test_bit(__IGB_DOWN, &adapter->state))) {
5399 netif_wake_subqueue(netdev, tx_ring->queue_index);
5400 tx_ring->tx_stats.restart_queue++;
5404 if (tx_ring->detect_tx_hung) {
5405 /* Detect a transmit hang in hardware, this serializes the
5406 * check with the clearing of time_stamp and movement of i */
5407 tx_ring->detect_tx_hung = false;
5408 if (tx_ring->buffer_info[i].time_stamp &&
5409 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5410 (adapter->tx_timeout_factor * HZ)) &&
5411 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5413 /* detected Tx unit hang */
5414 dev_err(tx_ring->dev,
5415 "Detected Tx Unit Hang\n"
5419 " next_to_use <%x>\n"
5420 " next_to_clean <%x>\n"
5421 "buffer_info[next_to_clean]\n"
5422 " time_stamp <%lx>\n"
5423 " next_to_watch <%x>\n"
5425 " desc.status <%x>\n",
5426 tx_ring->queue_index,
5427 readl(tx_ring->head),
5428 readl(tx_ring->tail),
5429 tx_ring->next_to_use,
5430 tx_ring->next_to_clean,
5431 tx_ring->buffer_info[eop].time_stamp,
5434 eop_desc->wb.status);
5435 netif_stop_subqueue(netdev, tx_ring->queue_index);
5438 tx_ring->total_bytes += total_bytes;
5439 tx_ring->total_packets += total_packets;
5440 tx_ring->tx_stats.bytes += total_bytes;
5441 tx_ring->tx_stats.packets += total_packets;
5442 return count < tx_ring->count;
5446 * igb_receive_skb - helper function to handle rx indications
5447 * @q_vector: structure containing interrupt and ring information
5448 * @skb: packet to send up
5449 * @vlan_tag: vlan tag for packet
5451 static void igb_receive_skb(struct igb_q_vector *q_vector,
5452 struct sk_buff *skb,
5455 struct igb_adapter *adapter = q_vector->adapter;
5457 if (vlan_tag && adapter->vlgrp)
5458 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5461 napi_gro_receive(&q_vector->napi, skb);
5464 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5465 u32 status_err, struct sk_buff *skb)
5467 skb_checksum_none_assert(skb);
5469 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5470 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5471 (status_err & E1000_RXD_STAT_IXSM))
5474 /* TCP/UDP checksum error bit is set */
5476 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5478 * work around errata with sctp packets where the TCPE aka
5479 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5480 * packets, (aka let the stack check the crc32c)
5482 if ((skb->len == 60) &&
5483 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5484 ring->rx_stats.csum_err++;
5486 /* let the stack verify checksum errors */
5489 /* It must be a TCP or UDP packet with a valid checksum */
5490 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5491 skb->ip_summed = CHECKSUM_UNNECESSARY;
5493 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5496 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5497 struct sk_buff *skb)
5499 struct igb_adapter *adapter = q_vector->adapter;
5500 struct e1000_hw *hw = &adapter->hw;
5504 * If this bit is set, then the RX registers contain the time stamp. No
5505 * other packet will be time stamped until we read these registers, so
5506 * read the registers to make them available again. Because only one
5507 * packet can be time stamped at a time, we know that the register
5508 * values must belong to this one here and therefore we don't need to
5509 * compare any of the additional attributes stored for it.
5511 * If nothing went wrong, then it should have a shared tx_flags that we
5512 * can turn into a skb_shared_hwtstamps.
5514 if (staterr & E1000_RXDADV_STAT_TSIP) {
5515 u32 *stamp = (u32 *)skb->data;
5516 regval = le32_to_cpu(*(stamp + 2));
5517 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5518 skb_pull(skb, IGB_TS_HDR_LEN);
5520 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5523 regval = rd32(E1000_RXSTMPL);
5524 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5527 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5529 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5530 union e1000_adv_rx_desc *rx_desc)
5532 /* HW will not DMA in data larger than the given buffer, even if it
5533 * parses the (NFS, of course) header to be larger. In that case, it
5534 * fills the header buffer and spills the rest into the page.
5536 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5537 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5538 if (hlen > rx_ring->rx_buffer_len)
5539 hlen = rx_ring->rx_buffer_len;
5543 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5544 int *work_done, int budget)
5546 struct igb_ring *rx_ring = q_vector->rx_ring;
5547 struct net_device *netdev = rx_ring->netdev;
5548 struct device *dev = rx_ring->dev;
5549 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5550 struct igb_buffer *buffer_info , *next_buffer;
5551 struct sk_buff *skb;
5552 bool cleaned = false;
5553 int cleaned_count = 0;
5554 int current_node = numa_node_id();
5555 unsigned int total_bytes = 0, total_packets = 0;
5561 i = rx_ring->next_to_clean;
5562 buffer_info = &rx_ring->buffer_info[i];
5563 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5564 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5566 while (staterr & E1000_RXD_STAT_DD) {
5567 if (*work_done >= budget)
5570 rmb(); /* read descriptor and rx_buffer_info after status DD */
5572 skb = buffer_info->skb;
5573 prefetch(skb->data - NET_IP_ALIGN);
5574 buffer_info->skb = NULL;
5577 if (i == rx_ring->count)
5580 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5582 next_buffer = &rx_ring->buffer_info[i];
5584 length = le16_to_cpu(rx_desc->wb.upper.length);
5588 if (buffer_info->dma) {
5589 dma_unmap_single(dev, buffer_info->dma,
5590 rx_ring->rx_buffer_len,
5592 buffer_info->dma = 0;
5593 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5594 skb_put(skb, length);
5597 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5601 dma_unmap_page(dev, buffer_info->page_dma,
5602 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5603 buffer_info->page_dma = 0;
5605 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5607 buffer_info->page_offset,
5610 if ((page_count(buffer_info->page) != 1) ||
5611 (page_to_nid(buffer_info->page) != current_node))
5612 buffer_info->page = NULL;
5614 get_page(buffer_info->page);
5617 skb->data_len += length;
5618 skb->truesize += length;
5621 if (!(staterr & E1000_RXD_STAT_EOP)) {
5622 buffer_info->skb = next_buffer->skb;
5623 buffer_info->dma = next_buffer->dma;
5624 next_buffer->skb = skb;
5625 next_buffer->dma = 0;
5629 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5630 dev_kfree_skb_irq(skb);
5634 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5635 igb_rx_hwtstamp(q_vector, staterr, skb);
5636 total_bytes += skb->len;
5639 igb_rx_checksum_adv(rx_ring, staterr, skb);
5641 skb->protocol = eth_type_trans(skb, netdev);
5642 skb_record_rx_queue(skb, rx_ring->queue_index);
5644 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5645 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5647 igb_receive_skb(q_vector, skb, vlan_tag);
5650 rx_desc->wb.upper.status_error = 0;
5652 /* return some buffers to hardware, one at a time is too slow */
5653 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5654 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5658 /* use prefetched values */
5660 buffer_info = next_buffer;
5661 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5664 rx_ring->next_to_clean = i;
5665 cleaned_count = igb_desc_unused(rx_ring);
5668 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5670 rx_ring->total_packets += total_packets;
5671 rx_ring->total_bytes += total_bytes;
5672 rx_ring->rx_stats.packets += total_packets;
5673 rx_ring->rx_stats.bytes += total_bytes;
5678 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5679 * @adapter: address of board private structure
5681 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5683 struct net_device *netdev = rx_ring->netdev;
5684 union e1000_adv_rx_desc *rx_desc;
5685 struct igb_buffer *buffer_info;
5686 struct sk_buff *skb;
5690 i = rx_ring->next_to_use;
5691 buffer_info = &rx_ring->buffer_info[i];
5693 bufsz = rx_ring->rx_buffer_len;
5695 while (cleaned_count--) {
5696 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5698 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5699 if (!buffer_info->page) {
5700 buffer_info->page = netdev_alloc_page(netdev);
5701 if (!buffer_info->page) {
5702 rx_ring->rx_stats.alloc_failed++;
5705 buffer_info->page_offset = 0;
5707 buffer_info->page_offset ^= PAGE_SIZE / 2;
5709 buffer_info->page_dma =
5710 dma_map_page(rx_ring->dev, buffer_info->page,
5711 buffer_info->page_offset,
5714 if (dma_mapping_error(rx_ring->dev,
5715 buffer_info->page_dma)) {
5716 buffer_info->page_dma = 0;
5717 rx_ring->rx_stats.alloc_failed++;
5722 skb = buffer_info->skb;
5724 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5726 rx_ring->rx_stats.alloc_failed++;
5730 buffer_info->skb = skb;
5732 if (!buffer_info->dma) {
5733 buffer_info->dma = dma_map_single(rx_ring->dev,
5737 if (dma_mapping_error(rx_ring->dev,
5738 buffer_info->dma)) {
5739 buffer_info->dma = 0;
5740 rx_ring->rx_stats.alloc_failed++;
5744 /* Refresh the desc even if buffer_addrs didn't change because
5745 * each write-back erases this info. */
5746 if (bufsz < IGB_RXBUFFER_1024) {
5747 rx_desc->read.pkt_addr =
5748 cpu_to_le64(buffer_info->page_dma);
5749 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5751 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5752 rx_desc->read.hdr_addr = 0;
5756 if (i == rx_ring->count)
5758 buffer_info = &rx_ring->buffer_info[i];
5762 if (rx_ring->next_to_use != i) {
5763 rx_ring->next_to_use = i;
5765 i = (rx_ring->count - 1);
5769 /* Force memory writes to complete before letting h/w
5770 * know there are new descriptors to fetch. (Only
5771 * applicable for weak-ordered memory model archs,
5772 * such as IA-64). */
5774 writel(i, rx_ring->tail);
5784 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5786 struct igb_adapter *adapter = netdev_priv(netdev);
5787 struct mii_ioctl_data *data = if_mii(ifr);
5789 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5794 data->phy_id = adapter->hw.phy.addr;
5797 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5809 * igb_hwtstamp_ioctl - control hardware time stamping
5814 * Outgoing time stamping can be enabled and disabled. Play nice and
5815 * disable it when requested, although it shouldn't case any overhead
5816 * when no packet needs it. At most one packet in the queue may be
5817 * marked for time stamping, otherwise it would be impossible to tell
5818 * for sure to which packet the hardware time stamp belongs.
5820 * Incoming time stamping has to be configured via the hardware
5821 * filters. Not all combinations are supported, in particular event
5822 * type has to be specified. Matching the kind of event packet is
5823 * not supported, with the exception of "all V2 events regardless of
5827 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5828 struct ifreq *ifr, int cmd)
5830 struct igb_adapter *adapter = netdev_priv(netdev);
5831 struct e1000_hw *hw = &adapter->hw;
5832 struct hwtstamp_config config;
5833 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5834 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5835 u32 tsync_rx_cfg = 0;
5840 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5843 /* reserved for future extensions */
5847 switch (config.tx_type) {
5848 case HWTSTAMP_TX_OFF:
5850 case HWTSTAMP_TX_ON:
5856 switch (config.rx_filter) {
5857 case HWTSTAMP_FILTER_NONE:
5860 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5861 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5862 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5863 case HWTSTAMP_FILTER_ALL:
5865 * register TSYNCRXCFG must be set, therefore it is not
5866 * possible to time stamp both Sync and Delay_Req messages
5867 * => fall back to time stamping all packets
5869 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5870 config.rx_filter = HWTSTAMP_FILTER_ALL;
5872 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5873 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5874 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5877 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5878 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5879 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5882 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5883 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5884 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5885 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5888 config.rx_filter = HWTSTAMP_FILTER_SOME;
5890 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5891 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5892 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5893 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5896 config.rx_filter = HWTSTAMP_FILTER_SOME;
5898 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5899 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5900 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5901 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5902 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5909 if (hw->mac.type == e1000_82575) {
5910 if (tsync_rx_ctl | tsync_tx_ctl)
5916 * Per-packet timestamping only works if all packets are
5917 * timestamped, so enable timestamping in all packets as
5918 * long as one rx filter was configured.
5920 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5921 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5922 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5925 /* enable/disable TX */
5926 regval = rd32(E1000_TSYNCTXCTL);
5927 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5928 regval |= tsync_tx_ctl;
5929 wr32(E1000_TSYNCTXCTL, regval);
5931 /* enable/disable RX */
5932 regval = rd32(E1000_TSYNCRXCTL);
5933 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5934 regval |= tsync_rx_ctl;
5935 wr32(E1000_TSYNCRXCTL, regval);
5937 /* define which PTP packets are time stamped */
5938 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5940 /* define ethertype filter for timestamped packets */
5943 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5944 E1000_ETQF_1588 | /* enable timestamping */
5945 ETH_P_1588)); /* 1588 eth protocol type */
5947 wr32(E1000_ETQF(3), 0);
5949 #define PTP_PORT 319
5950 /* L4 Queue Filter[3]: filter by destination port and protocol */
5952 u32 ftqf = (IPPROTO_UDP /* UDP */
5953 | E1000_FTQF_VF_BP /* VF not compared */
5954 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5955 | E1000_FTQF_MASK); /* mask all inputs */
5956 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5958 wr32(E1000_IMIR(3), htons(PTP_PORT));
5959 wr32(E1000_IMIREXT(3),
5960 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5961 if (hw->mac.type == e1000_82576) {
5962 /* enable source port check */
5963 wr32(E1000_SPQF(3), htons(PTP_PORT));
5964 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5966 wr32(E1000_FTQF(3), ftqf);
5968 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5972 adapter->hwtstamp_config = config;
5974 /* clear TX/RX time stamp registers, just to be sure */
5975 regval = rd32(E1000_TXSTMPH);
5976 regval = rd32(E1000_RXSTMPH);
5978 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5988 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5994 return igb_mii_ioctl(netdev, ifr, cmd);
5996 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6002 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6004 struct igb_adapter *adapter = hw->back;
6007 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6009 return -E1000_ERR_CONFIG;
6011 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6016 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6018 struct igb_adapter *adapter = hw->back;
6021 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6023 return -E1000_ERR_CONFIG;
6025 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6030 static void igb_vlan_rx_register(struct net_device *netdev,
6031 struct vlan_group *grp)
6033 struct igb_adapter *adapter = netdev_priv(netdev);
6034 struct e1000_hw *hw = &adapter->hw;
6037 igb_irq_disable(adapter);
6038 adapter->vlgrp = grp;
6041 /* enable VLAN tag insert/strip */
6042 ctrl = rd32(E1000_CTRL);
6043 ctrl |= E1000_CTRL_VME;
6044 wr32(E1000_CTRL, ctrl);
6046 /* Disable CFI check */
6047 rctl = rd32(E1000_RCTL);
6048 rctl &= ~E1000_RCTL_CFIEN;
6049 wr32(E1000_RCTL, rctl);
6051 /* disable VLAN tag insert/strip */
6052 ctrl = rd32(E1000_CTRL);
6053 ctrl &= ~E1000_CTRL_VME;
6054 wr32(E1000_CTRL, ctrl);
6057 igb_rlpml_set(adapter);
6059 if (!test_bit(__IGB_DOWN, &adapter->state))
6060 igb_irq_enable(adapter);
6063 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6065 struct igb_adapter *adapter = netdev_priv(netdev);
6066 struct e1000_hw *hw = &adapter->hw;
6067 int pf_id = adapter->vfs_allocated_count;
6069 /* attempt to add filter to vlvf array */
6070 igb_vlvf_set(adapter, vid, true, pf_id);
6072 /* add the filter since PF can receive vlans w/o entry in vlvf */
6073 igb_vfta_set(hw, vid, true);
6076 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6078 struct igb_adapter *adapter = netdev_priv(netdev);
6079 struct e1000_hw *hw = &adapter->hw;
6080 int pf_id = adapter->vfs_allocated_count;
6083 igb_irq_disable(adapter);
6084 vlan_group_set_device(adapter->vlgrp, vid, NULL);
6086 if (!test_bit(__IGB_DOWN, &adapter->state))
6087 igb_irq_enable(adapter);
6089 /* remove vlan from VLVF table array */
6090 err = igb_vlvf_set(adapter, vid, false, pf_id);
6092 /* if vid was not present in VLVF just remove it from table */
6094 igb_vfta_set(hw, vid, false);
6097 static void igb_restore_vlan(struct igb_adapter *adapter)
6099 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6101 if (adapter->vlgrp) {
6103 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
6104 if (!vlan_group_get_device(adapter->vlgrp, vid))
6106 igb_vlan_rx_add_vid(adapter->netdev, vid);
6111 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6113 struct pci_dev *pdev = adapter->pdev;
6114 struct e1000_mac_info *mac = &adapter->hw.mac;
6118 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6119 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6120 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6121 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6126 case SPEED_10 + DUPLEX_HALF:
6127 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6129 case SPEED_10 + DUPLEX_FULL:
6130 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6132 case SPEED_100 + DUPLEX_HALF:
6133 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6135 case SPEED_100 + DUPLEX_FULL:
6136 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6138 case SPEED_1000 + DUPLEX_FULL:
6140 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6142 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6144 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6150 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6152 struct net_device *netdev = pci_get_drvdata(pdev);
6153 struct igb_adapter *adapter = netdev_priv(netdev);
6154 struct e1000_hw *hw = &adapter->hw;
6155 u32 ctrl, rctl, status;
6156 u32 wufc = adapter->wol;
6161 netif_device_detach(netdev);
6163 if (netif_running(netdev))
6166 igb_clear_interrupt_scheme(adapter);
6169 retval = pci_save_state(pdev);
6174 status = rd32(E1000_STATUS);
6175 if (status & E1000_STATUS_LU)
6176 wufc &= ~E1000_WUFC_LNKC;
6179 igb_setup_rctl(adapter);
6180 igb_set_rx_mode(netdev);
6182 /* turn on all-multi mode if wake on multicast is enabled */
6183 if (wufc & E1000_WUFC_MC) {
6184 rctl = rd32(E1000_RCTL);
6185 rctl |= E1000_RCTL_MPE;
6186 wr32(E1000_RCTL, rctl);
6189 ctrl = rd32(E1000_CTRL);
6190 /* advertise wake from D3Cold */
6191 #define E1000_CTRL_ADVD3WUC 0x00100000
6192 /* phy power management enable */
6193 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6194 ctrl |= E1000_CTRL_ADVD3WUC;
6195 wr32(E1000_CTRL, ctrl);
6197 /* Allow time for pending master requests to run */
6198 igb_disable_pcie_master(hw);
6200 wr32(E1000_WUC, E1000_WUC_PME_EN);
6201 wr32(E1000_WUFC, wufc);
6204 wr32(E1000_WUFC, 0);
6207 *enable_wake = wufc || adapter->en_mng_pt;
6209 igb_power_down_link(adapter);
6211 igb_power_up_link(adapter);
6213 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6214 * would have already happened in close and is redundant. */
6215 igb_release_hw_control(adapter);
6217 pci_disable_device(pdev);
6223 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6228 retval = __igb_shutdown(pdev, &wake);
6233 pci_prepare_to_sleep(pdev);
6235 pci_wake_from_d3(pdev, false);
6236 pci_set_power_state(pdev, PCI_D3hot);
6242 static int igb_resume(struct pci_dev *pdev)
6244 struct net_device *netdev = pci_get_drvdata(pdev);
6245 struct igb_adapter *adapter = netdev_priv(netdev);
6246 struct e1000_hw *hw = &adapter->hw;
6249 pci_set_power_state(pdev, PCI_D0);
6250 pci_restore_state(pdev);
6251 pci_save_state(pdev);
6253 err = pci_enable_device_mem(pdev);
6256 "igb: Cannot enable PCI device from suspend\n");
6259 pci_set_master(pdev);
6261 pci_enable_wake(pdev, PCI_D3hot, 0);
6262 pci_enable_wake(pdev, PCI_D3cold, 0);
6264 if (igb_init_interrupt_scheme(adapter)) {
6265 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6271 /* let the f/w know that the h/w is now under the control of the
6273 igb_get_hw_control(adapter);
6275 wr32(E1000_WUS, ~0);
6277 if (netif_running(netdev)) {
6278 err = igb_open(netdev);
6283 netif_device_attach(netdev);
6289 static void igb_shutdown(struct pci_dev *pdev)
6293 __igb_shutdown(pdev, &wake);
6295 if (system_state == SYSTEM_POWER_OFF) {
6296 pci_wake_from_d3(pdev, wake);
6297 pci_set_power_state(pdev, PCI_D3hot);
6301 #ifdef CONFIG_NET_POLL_CONTROLLER
6303 * Polling 'interrupt' - used by things like netconsole to send skbs
6304 * without having to re-enable interrupts. It's not called while
6305 * the interrupt routine is executing.
6307 static void igb_netpoll(struct net_device *netdev)
6309 struct igb_adapter *adapter = netdev_priv(netdev);
6310 struct e1000_hw *hw = &adapter->hw;
6313 if (!adapter->msix_entries) {
6314 struct igb_q_vector *q_vector = adapter->q_vector[0];
6315 igb_irq_disable(adapter);
6316 napi_schedule(&q_vector->napi);
6320 for (i = 0; i < adapter->num_q_vectors; i++) {
6321 struct igb_q_vector *q_vector = adapter->q_vector[i];
6322 wr32(E1000_EIMC, q_vector->eims_value);
6323 napi_schedule(&q_vector->napi);
6326 #endif /* CONFIG_NET_POLL_CONTROLLER */
6329 * igb_io_error_detected - called when PCI error is detected
6330 * @pdev: Pointer to PCI device
6331 * @state: The current pci connection state
6333 * This function is called after a PCI bus error affecting
6334 * this device has been detected.
6336 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6337 pci_channel_state_t state)
6339 struct net_device *netdev = pci_get_drvdata(pdev);
6340 struct igb_adapter *adapter = netdev_priv(netdev);
6342 netif_device_detach(netdev);
6344 if (state == pci_channel_io_perm_failure)
6345 return PCI_ERS_RESULT_DISCONNECT;
6347 if (netif_running(netdev))
6349 pci_disable_device(pdev);
6351 /* Request a slot slot reset. */
6352 return PCI_ERS_RESULT_NEED_RESET;
6356 * igb_io_slot_reset - called after the pci bus has been reset.
6357 * @pdev: Pointer to PCI device
6359 * Restart the card from scratch, as if from a cold-boot. Implementation
6360 * resembles the first-half of the igb_resume routine.
6362 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6364 struct net_device *netdev = pci_get_drvdata(pdev);
6365 struct igb_adapter *adapter = netdev_priv(netdev);
6366 struct e1000_hw *hw = &adapter->hw;
6367 pci_ers_result_t result;
6370 if (pci_enable_device_mem(pdev)) {
6372 "Cannot re-enable PCI device after reset.\n");
6373 result = PCI_ERS_RESULT_DISCONNECT;
6375 pci_set_master(pdev);
6376 pci_restore_state(pdev);
6377 pci_save_state(pdev);
6379 pci_enable_wake(pdev, PCI_D3hot, 0);
6380 pci_enable_wake(pdev, PCI_D3cold, 0);
6383 wr32(E1000_WUS, ~0);
6384 result = PCI_ERS_RESULT_RECOVERED;
6387 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6389 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6390 "failed 0x%0x\n", err);
6391 /* non-fatal, continue */
6398 * igb_io_resume - called when traffic can start flowing again.
6399 * @pdev: Pointer to PCI device
6401 * This callback is called when the error recovery driver tells us that
6402 * its OK to resume normal operation. Implementation resembles the
6403 * second-half of the igb_resume routine.
6405 static void igb_io_resume(struct pci_dev *pdev)
6407 struct net_device *netdev = pci_get_drvdata(pdev);
6408 struct igb_adapter *adapter = netdev_priv(netdev);
6410 if (netif_running(netdev)) {
6411 if (igb_up(adapter)) {
6412 dev_err(&pdev->dev, "igb_up failed after reset\n");
6417 netif_device_attach(netdev);
6419 /* let the f/w know that the h/w is now under the control of the
6421 igb_get_hw_control(adapter);
6424 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6427 u32 rar_low, rar_high;
6428 struct e1000_hw *hw = &adapter->hw;
6430 /* HW expects these in little endian so we reverse the byte order
6431 * from network order (big endian) to little endian
6433 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6434 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6435 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6437 /* Indicate to hardware the Address is Valid. */
6438 rar_high |= E1000_RAH_AV;
6440 if (hw->mac.type == e1000_82575)
6441 rar_high |= E1000_RAH_POOL_1 * qsel;
6443 rar_high |= E1000_RAH_POOL_1 << qsel;
6445 wr32(E1000_RAL(index), rar_low);
6447 wr32(E1000_RAH(index), rar_high);
6451 static int igb_set_vf_mac(struct igb_adapter *adapter,
6452 int vf, unsigned char *mac_addr)
6454 struct e1000_hw *hw = &adapter->hw;
6455 /* VF MAC addresses start at end of receive addresses and moves
6456 * torwards the first, as a result a collision should not be possible */
6457 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6459 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6461 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6466 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6468 struct igb_adapter *adapter = netdev_priv(netdev);
6469 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6471 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6472 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6473 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6474 " change effective.");
6475 if (test_bit(__IGB_DOWN, &adapter->state)) {
6476 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6477 " but the PF device is not up.\n");
6478 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6479 " attempting to use the VF device.\n");
6481 return igb_set_vf_mac(adapter, vf, mac);
6484 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6489 static int igb_ndo_get_vf_config(struct net_device *netdev,
6490 int vf, struct ifla_vf_info *ivi)
6492 struct igb_adapter *adapter = netdev_priv(netdev);
6493 if (vf >= adapter->vfs_allocated_count)
6496 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6498 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6499 ivi->qos = adapter->vf_data[vf].pf_qos;
6503 static void igb_vmm_control(struct igb_adapter *adapter)
6505 struct e1000_hw *hw = &adapter->hw;
6508 switch (hw->mac.type) {
6511 /* replication is not supported for 82575 */
6514 /* notify HW that the MAC is adding vlan tags */
6515 reg = rd32(E1000_DTXCTL);
6516 reg |= E1000_DTXCTL_VLAN_ADDED;
6517 wr32(E1000_DTXCTL, reg);
6519 /* enable replication vlan tag stripping */
6520 reg = rd32(E1000_RPLOLR);
6521 reg |= E1000_RPLOLR_STRVLAN;
6522 wr32(E1000_RPLOLR, reg);
6524 /* none of the above registers are supported by i350 */
6528 if (adapter->vfs_allocated_count) {
6529 igb_vmdq_set_loopback_pf(hw, true);
6530 igb_vmdq_set_replication_pf(hw, true);
6532 igb_vmdq_set_loopback_pf(hw, false);
6533 igb_vmdq_set_replication_pf(hw, false);