1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
49 #include <linux/dca.h>
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
60 static const struct e1000_info *igb_info_tbl[] = {
61 [board_82575] = &e1000_82575_info,
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
81 /* required last entry */
85 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
87 void igb_reset(struct igb_adapter *);
88 static int igb_setup_all_tx_resources(struct igb_adapter *);
89 static int igb_setup_all_rx_resources(struct igb_adapter *);
90 static void igb_free_all_tx_resources(struct igb_adapter *);
91 static void igb_free_all_rx_resources(struct igb_adapter *);
92 static void igb_setup_mrqc(struct igb_adapter *);
93 void igb_update_stats(struct igb_adapter *);
94 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
95 static void __devexit igb_remove(struct pci_dev *pdev);
96 static int igb_sw_init(struct igb_adapter *);
97 static int igb_open(struct net_device *);
98 static int igb_close(struct net_device *);
99 static void igb_configure_tx(struct igb_adapter *);
100 static void igb_configure_rx(struct igb_adapter *);
101 static void igb_clean_all_tx_rings(struct igb_adapter *);
102 static void igb_clean_all_rx_rings(struct igb_adapter *);
103 static void igb_clean_tx_ring(struct igb_ring *);
104 static void igb_clean_rx_ring(struct igb_ring *);
105 static void igb_set_rx_mode(struct net_device *);
106 static void igb_update_phy_info(unsigned long);
107 static void igb_watchdog(unsigned long);
108 static void igb_watchdog_task(struct work_struct *);
109 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
110 static struct net_device_stats *igb_get_stats(struct net_device *);
111 static int igb_change_mtu(struct net_device *, int);
112 static int igb_set_mac(struct net_device *, void *);
113 static void igb_set_uta(struct igb_adapter *adapter);
114 static irqreturn_t igb_intr(int irq, void *);
115 static irqreturn_t igb_intr_msi(int irq, void *);
116 static irqreturn_t igb_msix_other(int irq, void *);
117 static irqreturn_t igb_msix_ring(int irq, void *);
118 #ifdef CONFIG_IGB_DCA
119 static void igb_update_dca(struct igb_q_vector *);
120 static void igb_setup_dca(struct igb_adapter *);
121 #endif /* CONFIG_IGB_DCA */
122 static bool igb_clean_tx_irq(struct igb_q_vector *);
123 static int igb_poll(struct napi_struct *, int);
124 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
125 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
126 static void igb_tx_timeout(struct net_device *);
127 static void igb_reset_task(struct work_struct *);
128 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
129 static void igb_vlan_rx_add_vid(struct net_device *, u16);
130 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
131 static void igb_restore_vlan(struct igb_adapter *);
132 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
133 static void igb_ping_all_vfs(struct igb_adapter *);
134 static void igb_msg_task(struct igb_adapter *);
135 static void igb_vmm_control(struct igb_adapter *);
136 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
137 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
138 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
139 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
140 int vf, u16 vlan, u8 qos);
141 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
142 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
143 struct ifla_vf_info *ivi);
146 static int igb_suspend(struct pci_dev *, pm_message_t);
147 static int igb_resume(struct pci_dev *);
149 static void igb_shutdown(struct pci_dev *);
150 #ifdef CONFIG_IGB_DCA
151 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
152 static struct notifier_block dca_notifier = {
153 .notifier_call = igb_notify_dca,
158 #ifdef CONFIG_NET_POLL_CONTROLLER
159 /* for netdump / net console */
160 static void igb_netpoll(struct net_device *);
162 #ifdef CONFIG_PCI_IOV
163 static unsigned int max_vfs = 0;
164 module_param(max_vfs, uint, 0);
165 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
166 "per physical function");
167 #endif /* CONFIG_PCI_IOV */
169 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
170 pci_channel_state_t);
171 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
172 static void igb_io_resume(struct pci_dev *);
174 static struct pci_error_handlers igb_err_handler = {
175 .error_detected = igb_io_error_detected,
176 .slot_reset = igb_io_slot_reset,
177 .resume = igb_io_resume,
181 static struct pci_driver igb_driver = {
182 .name = igb_driver_name,
183 .id_table = igb_pci_tbl,
185 .remove = __devexit_p(igb_remove),
187 /* Power Managment Hooks */
188 .suspend = igb_suspend,
189 .resume = igb_resume,
191 .shutdown = igb_shutdown,
192 .err_handler = &igb_err_handler
195 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
196 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
197 MODULE_LICENSE("GPL");
198 MODULE_VERSION(DRV_VERSION);
201 * igb_read_clock - read raw cycle counter (to be used by time counter)
203 static cycle_t igb_read_clock(const struct cyclecounter *tc)
205 struct igb_adapter *adapter =
206 container_of(tc, struct igb_adapter, cycles);
207 struct e1000_hw *hw = &adapter->hw;
212 * The timestamp latches on lowest register read. For the 82580
213 * the lowest register is SYSTIMR instead of SYSTIML. However we never
214 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
216 if (hw->mac.type == e1000_82580) {
217 stamp = rd32(E1000_SYSTIMR) >> 8;
218 shift = IGB_82580_TSYNC_SHIFT;
221 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
222 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
228 * igb_get_hw_dev_name - return device name string
229 * used by hardware layer to print debugging information
231 char *igb_get_hw_dev_name(struct e1000_hw *hw)
233 struct igb_adapter *adapter = hw->back;
234 return adapter->netdev->name;
238 * igb_get_time_str - format current NIC and system time as string
240 static char *igb_get_time_str(struct igb_adapter *adapter,
243 cycle_t hw = adapter->cycles.read(&adapter->cycles);
244 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
246 struct timespec delta;
247 getnstimeofday(&sys);
249 delta = timespec_sub(nic, sys);
252 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
254 (long)nic.tv_sec, nic.tv_nsec,
255 (long)sys.tv_sec, sys.tv_nsec,
256 (long)delta.tv_sec, delta.tv_nsec);
263 * igb_init_module - Driver Registration Routine
265 * igb_init_module is the first routine called when the driver is
266 * loaded. All it does is register with the PCI subsystem.
268 static int __init igb_init_module(void)
271 printk(KERN_INFO "%s - version %s\n",
272 igb_driver_string, igb_driver_version);
274 printk(KERN_INFO "%s\n", igb_copyright);
276 #ifdef CONFIG_IGB_DCA
277 dca_register_notify(&dca_notifier);
279 ret = pci_register_driver(&igb_driver);
283 module_init(igb_init_module);
286 * igb_exit_module - Driver Exit Cleanup Routine
288 * igb_exit_module is called just before the driver is removed
291 static void __exit igb_exit_module(void)
293 #ifdef CONFIG_IGB_DCA
294 dca_unregister_notify(&dca_notifier);
296 pci_unregister_driver(&igb_driver);
299 module_exit(igb_exit_module);
301 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
303 * igb_cache_ring_register - Descriptor ring to register mapping
304 * @adapter: board private structure to initialize
306 * Once we know the feature-set enabled for the device, we'll cache
307 * the register offset the descriptor ring is assigned to.
309 static void igb_cache_ring_register(struct igb_adapter *adapter)
312 u32 rbase_offset = adapter->vfs_allocated_count;
314 switch (adapter->hw.mac.type) {
316 /* The queues are allocated for virtualization such that VF 0
317 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
318 * In order to avoid collision we start at the first free queue
319 * and continue consuming queues in the same sequence
321 if (adapter->vfs_allocated_count) {
322 for (; i < adapter->rss_queues; i++)
323 adapter->rx_ring[i]->reg_idx = rbase_offset +
325 for (; j < adapter->rss_queues; j++)
326 adapter->tx_ring[j]->reg_idx = rbase_offset +
332 for (; i < adapter->num_rx_queues; i++)
333 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
334 for (; j < adapter->num_tx_queues; j++)
335 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
340 static void igb_free_queues(struct igb_adapter *adapter)
344 for (i = 0; i < adapter->num_tx_queues; i++) {
345 kfree(adapter->tx_ring[i]);
346 adapter->tx_ring[i] = NULL;
348 for (i = 0; i < adapter->num_rx_queues; i++) {
349 kfree(adapter->rx_ring[i]);
350 adapter->rx_ring[i] = NULL;
352 adapter->num_rx_queues = 0;
353 adapter->num_tx_queues = 0;
357 * igb_alloc_queues - Allocate memory for all rings
358 * @adapter: board private structure to initialize
360 * We allocate one ring per queue at run-time since we don't know the
361 * number of queues at compile-time.
363 static int igb_alloc_queues(struct igb_adapter *adapter)
365 struct igb_ring *ring;
368 for (i = 0; i < adapter->num_tx_queues; i++) {
369 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
372 ring->count = adapter->tx_ring_count;
373 ring->queue_index = i;
374 ring->pdev = adapter->pdev;
375 ring->netdev = adapter->netdev;
376 /* For 82575, context index must be unique per ring. */
377 if (adapter->hw.mac.type == e1000_82575)
378 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
379 adapter->tx_ring[i] = ring;
382 for (i = 0; i < adapter->num_rx_queues; i++) {
383 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
386 ring->count = adapter->rx_ring_count;
387 ring->queue_index = i;
388 ring->pdev = adapter->pdev;
389 ring->netdev = adapter->netdev;
390 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
391 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
392 /* set flag indicating ring supports SCTP checksum offload */
393 if (adapter->hw.mac.type >= e1000_82576)
394 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
395 adapter->rx_ring[i] = ring;
398 igb_cache_ring_register(adapter);
403 igb_free_queues(adapter);
408 #define IGB_N0_QUEUE -1
409 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
412 struct igb_adapter *adapter = q_vector->adapter;
413 struct e1000_hw *hw = &adapter->hw;
415 int rx_queue = IGB_N0_QUEUE;
416 int tx_queue = IGB_N0_QUEUE;
418 if (q_vector->rx_ring)
419 rx_queue = q_vector->rx_ring->reg_idx;
420 if (q_vector->tx_ring)
421 tx_queue = q_vector->tx_ring->reg_idx;
423 switch (hw->mac.type) {
425 /* The 82575 assigns vectors using a bitmask, which matches the
426 bitmask for the EICR/EIMS/EIMC registers. To assign one
427 or more queues to a vector, we write the appropriate bits
428 into the MSIXBM register for that vector. */
429 if (rx_queue > IGB_N0_QUEUE)
430 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
431 if (tx_queue > IGB_N0_QUEUE)
432 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
433 if (!adapter->msix_entries && msix_vector == 0)
434 msixbm |= E1000_EIMS_OTHER;
435 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
436 q_vector->eims_value = msixbm;
439 /* 82576 uses a table-based method for assigning vectors.
440 Each queue has a single entry in the table to which we write
441 a vector number along with a "valid" bit. Sadly, the layout
442 of the table is somewhat counterintuitive. */
443 if (rx_queue > IGB_N0_QUEUE) {
444 index = (rx_queue & 0x7);
445 ivar = array_rd32(E1000_IVAR0, index);
447 /* vector goes into low byte of register */
448 ivar = ivar & 0xFFFFFF00;
449 ivar |= msix_vector | E1000_IVAR_VALID;
451 /* vector goes into third byte of register */
452 ivar = ivar & 0xFF00FFFF;
453 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
455 array_wr32(E1000_IVAR0, index, ivar);
457 if (tx_queue > IGB_N0_QUEUE) {
458 index = (tx_queue & 0x7);
459 ivar = array_rd32(E1000_IVAR0, index);
461 /* vector goes into second byte of register */
462 ivar = ivar & 0xFFFF00FF;
463 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
465 /* vector goes into high byte of register */
466 ivar = ivar & 0x00FFFFFF;
467 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
469 array_wr32(E1000_IVAR0, index, ivar);
471 q_vector->eims_value = 1 << msix_vector;
474 /* 82580 uses the same table-based approach as 82576 but has fewer
475 entries as a result we carry over for queues greater than 4. */
476 if (rx_queue > IGB_N0_QUEUE) {
477 index = (rx_queue >> 1);
478 ivar = array_rd32(E1000_IVAR0, index);
479 if (rx_queue & 0x1) {
480 /* vector goes into third byte of register */
481 ivar = ivar & 0xFF00FFFF;
482 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
484 /* vector goes into low byte of register */
485 ivar = ivar & 0xFFFFFF00;
486 ivar |= msix_vector | E1000_IVAR_VALID;
488 array_wr32(E1000_IVAR0, index, ivar);
490 if (tx_queue > IGB_N0_QUEUE) {
491 index = (tx_queue >> 1);
492 ivar = array_rd32(E1000_IVAR0, index);
493 if (tx_queue & 0x1) {
494 /* vector goes into high byte of register */
495 ivar = ivar & 0x00FFFFFF;
496 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
498 /* vector goes into second byte of register */
499 ivar = ivar & 0xFFFF00FF;
500 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
502 array_wr32(E1000_IVAR0, index, ivar);
504 q_vector->eims_value = 1 << msix_vector;
511 /* add q_vector eims value to global eims_enable_mask */
512 adapter->eims_enable_mask |= q_vector->eims_value;
514 /* configure q_vector to set itr on first interrupt */
515 q_vector->set_itr = 1;
519 * igb_configure_msix - Configure MSI-X hardware
521 * igb_configure_msix sets up the hardware to properly
522 * generate MSI-X interrupts.
524 static void igb_configure_msix(struct igb_adapter *adapter)
528 struct e1000_hw *hw = &adapter->hw;
530 adapter->eims_enable_mask = 0;
532 /* set vector for other causes, i.e. link changes */
533 switch (hw->mac.type) {
535 tmp = rd32(E1000_CTRL_EXT);
536 /* enable MSI-X PBA support*/
537 tmp |= E1000_CTRL_EXT_PBA_CLR;
539 /* Auto-Mask interrupts upon ICR read. */
540 tmp |= E1000_CTRL_EXT_EIAME;
541 tmp |= E1000_CTRL_EXT_IRCA;
543 wr32(E1000_CTRL_EXT, tmp);
545 /* enable msix_other interrupt */
546 array_wr32(E1000_MSIXBM(0), vector++,
548 adapter->eims_other = E1000_EIMS_OTHER;
554 /* Turn on MSI-X capability first, or our settings
555 * won't stick. And it will take days to debug. */
556 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
557 E1000_GPIE_PBA | E1000_GPIE_EIAME |
560 /* enable msix_other interrupt */
561 adapter->eims_other = 1 << vector;
562 tmp = (vector++ | E1000_IVAR_VALID) << 8;
564 wr32(E1000_IVAR_MISC, tmp);
567 /* do nothing, since nothing else supports MSI-X */
569 } /* switch (hw->mac.type) */
571 adapter->eims_enable_mask |= adapter->eims_other;
573 for (i = 0; i < adapter->num_q_vectors; i++)
574 igb_assign_vector(adapter->q_vector[i], vector++);
580 * igb_request_msix - Initialize MSI-X interrupts
582 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
585 static int igb_request_msix(struct igb_adapter *adapter)
587 struct net_device *netdev = adapter->netdev;
588 struct e1000_hw *hw = &adapter->hw;
589 int i, err = 0, vector = 0;
591 err = request_irq(adapter->msix_entries[vector].vector,
592 igb_msix_other, 0, netdev->name, adapter);
597 for (i = 0; i < adapter->num_q_vectors; i++) {
598 struct igb_q_vector *q_vector = adapter->q_vector[i];
600 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
602 if (q_vector->rx_ring && q_vector->tx_ring)
603 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
604 q_vector->rx_ring->queue_index);
605 else if (q_vector->tx_ring)
606 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
607 q_vector->tx_ring->queue_index);
608 else if (q_vector->rx_ring)
609 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
610 q_vector->rx_ring->queue_index);
612 sprintf(q_vector->name, "%s-unused", netdev->name);
614 err = request_irq(adapter->msix_entries[vector].vector,
615 igb_msix_ring, 0, q_vector->name,
622 igb_configure_msix(adapter);
628 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
630 if (adapter->msix_entries) {
631 pci_disable_msix(adapter->pdev);
632 kfree(adapter->msix_entries);
633 adapter->msix_entries = NULL;
634 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
635 pci_disable_msi(adapter->pdev);
640 * igb_free_q_vectors - Free memory allocated for interrupt vectors
641 * @adapter: board private structure to initialize
643 * This function frees the memory allocated to the q_vectors. In addition if
644 * NAPI is enabled it will delete any references to the NAPI struct prior
645 * to freeing the q_vector.
647 static void igb_free_q_vectors(struct igb_adapter *adapter)
651 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
652 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
653 adapter->q_vector[v_idx] = NULL;
656 netif_napi_del(&q_vector->napi);
659 adapter->num_q_vectors = 0;
663 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
665 * This function resets the device so that it has 0 rx queues, tx queues, and
666 * MSI-X interrupts allocated.
668 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
670 igb_free_queues(adapter);
671 igb_free_q_vectors(adapter);
672 igb_reset_interrupt_capability(adapter);
676 * igb_set_interrupt_capability - set MSI or MSI-X if supported
678 * Attempt to configure interrupts using the best available
679 * capabilities of the hardware and kernel.
681 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
686 /* Number of supported queues. */
687 adapter->num_rx_queues = adapter->rss_queues;
688 adapter->num_tx_queues = adapter->rss_queues;
690 /* start with one vector for every rx queue */
691 numvecs = adapter->num_rx_queues;
693 /* if tx handler is separate add 1 for every tx queue */
694 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
695 numvecs += adapter->num_tx_queues;
697 /* store the number of vectors reserved for queues */
698 adapter->num_q_vectors = numvecs;
700 /* add 1 vector for link status interrupts */
702 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
704 if (!adapter->msix_entries)
707 for (i = 0; i < numvecs; i++)
708 adapter->msix_entries[i].entry = i;
710 err = pci_enable_msix(adapter->pdev,
711 adapter->msix_entries,
716 igb_reset_interrupt_capability(adapter);
718 /* If we can't do MSI-X, try MSI */
720 #ifdef CONFIG_PCI_IOV
721 /* disable SR-IOV for non MSI-X configurations */
722 if (adapter->vf_data) {
723 struct e1000_hw *hw = &adapter->hw;
724 /* disable iov and allow time for transactions to clear */
725 pci_disable_sriov(adapter->pdev);
728 kfree(adapter->vf_data);
729 adapter->vf_data = NULL;
730 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
732 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
735 adapter->vfs_allocated_count = 0;
736 adapter->rss_queues = 1;
737 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
738 adapter->num_rx_queues = 1;
739 adapter->num_tx_queues = 1;
740 adapter->num_q_vectors = 1;
741 if (!pci_enable_msi(adapter->pdev))
742 adapter->flags |= IGB_FLAG_HAS_MSI;
744 /* Notify the stack of the (possibly) reduced Tx Queue count. */
745 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
750 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
751 * @adapter: board private structure to initialize
753 * We allocate one q_vector per queue interrupt. If allocation fails we
756 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
758 struct igb_q_vector *q_vector;
759 struct e1000_hw *hw = &adapter->hw;
762 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
763 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
766 q_vector->adapter = adapter;
767 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
768 q_vector->itr_val = IGB_START_ITR;
769 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
770 adapter->q_vector[v_idx] = q_vector;
775 igb_free_q_vectors(adapter);
779 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
780 int ring_idx, int v_idx)
782 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
784 q_vector->rx_ring = adapter->rx_ring[ring_idx];
785 q_vector->rx_ring->q_vector = q_vector;
786 q_vector->itr_val = adapter->rx_itr_setting;
787 if (q_vector->itr_val && q_vector->itr_val <= 3)
788 q_vector->itr_val = IGB_START_ITR;
791 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
792 int ring_idx, int v_idx)
794 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
796 q_vector->tx_ring = adapter->tx_ring[ring_idx];
797 q_vector->tx_ring->q_vector = q_vector;
798 q_vector->itr_val = adapter->tx_itr_setting;
799 if (q_vector->itr_val && q_vector->itr_val <= 3)
800 q_vector->itr_val = IGB_START_ITR;
804 * igb_map_ring_to_vector - maps allocated queues to vectors
806 * This function maps the recently allocated queues to vectors.
808 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
813 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
814 (adapter->num_q_vectors < adapter->num_tx_queues))
817 if (adapter->num_q_vectors >=
818 (adapter->num_rx_queues + adapter->num_tx_queues)) {
819 for (i = 0; i < adapter->num_rx_queues; i++)
820 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
821 for (i = 0; i < adapter->num_tx_queues; i++)
822 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
824 for (i = 0; i < adapter->num_rx_queues; i++) {
825 if (i < adapter->num_tx_queues)
826 igb_map_tx_ring_to_vector(adapter, i, v_idx);
827 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
829 for (; i < adapter->num_tx_queues; i++)
830 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
836 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
838 * This function initializes the interrupts and allocates all of the queues.
840 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
842 struct pci_dev *pdev = adapter->pdev;
845 igb_set_interrupt_capability(adapter);
847 err = igb_alloc_q_vectors(adapter);
849 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
850 goto err_alloc_q_vectors;
853 err = igb_alloc_queues(adapter);
855 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
856 goto err_alloc_queues;
859 err = igb_map_ring_to_vector(adapter);
861 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
868 igb_free_queues(adapter);
870 igb_free_q_vectors(adapter);
872 igb_reset_interrupt_capability(adapter);
877 * igb_request_irq - initialize interrupts
879 * Attempts to configure interrupts using the best available
880 * capabilities of the hardware and kernel.
882 static int igb_request_irq(struct igb_adapter *adapter)
884 struct net_device *netdev = adapter->netdev;
885 struct pci_dev *pdev = adapter->pdev;
888 if (adapter->msix_entries) {
889 err = igb_request_msix(adapter);
892 /* fall back to MSI */
893 igb_clear_interrupt_scheme(adapter);
894 if (!pci_enable_msi(adapter->pdev))
895 adapter->flags |= IGB_FLAG_HAS_MSI;
896 igb_free_all_tx_resources(adapter);
897 igb_free_all_rx_resources(adapter);
898 adapter->num_tx_queues = 1;
899 adapter->num_rx_queues = 1;
900 adapter->num_q_vectors = 1;
901 err = igb_alloc_q_vectors(adapter);
904 "Unable to allocate memory for vectors\n");
907 err = igb_alloc_queues(adapter);
910 "Unable to allocate memory for queues\n");
911 igb_free_q_vectors(adapter);
914 igb_setup_all_tx_resources(adapter);
915 igb_setup_all_rx_resources(adapter);
917 igb_assign_vector(adapter->q_vector[0], 0);
920 if (adapter->flags & IGB_FLAG_HAS_MSI) {
921 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
922 netdev->name, adapter);
926 /* fall back to legacy interrupts */
927 igb_reset_interrupt_capability(adapter);
928 adapter->flags &= ~IGB_FLAG_HAS_MSI;
931 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
932 netdev->name, adapter);
935 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
942 static void igb_free_irq(struct igb_adapter *adapter)
944 if (adapter->msix_entries) {
947 free_irq(adapter->msix_entries[vector++].vector, adapter);
949 for (i = 0; i < adapter->num_q_vectors; i++) {
950 struct igb_q_vector *q_vector = adapter->q_vector[i];
951 free_irq(adapter->msix_entries[vector++].vector,
955 free_irq(adapter->pdev->irq, adapter);
960 * igb_irq_disable - Mask off interrupt generation on the NIC
961 * @adapter: board private structure
963 static void igb_irq_disable(struct igb_adapter *adapter)
965 struct e1000_hw *hw = &adapter->hw;
968 * we need to be careful when disabling interrupts. The VFs are also
969 * mapped into these registers and so clearing the bits can cause
970 * issues on the VF drivers so we only need to clear what we set
972 if (adapter->msix_entries) {
973 u32 regval = rd32(E1000_EIAM);
974 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
975 wr32(E1000_EIMC, adapter->eims_enable_mask);
976 regval = rd32(E1000_EIAC);
977 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
983 synchronize_irq(adapter->pdev->irq);
987 * igb_irq_enable - Enable default interrupt generation settings
988 * @adapter: board private structure
990 static void igb_irq_enable(struct igb_adapter *adapter)
992 struct e1000_hw *hw = &adapter->hw;
994 if (adapter->msix_entries) {
995 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
996 u32 regval = rd32(E1000_EIAC);
997 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
998 regval = rd32(E1000_EIAM);
999 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1000 wr32(E1000_EIMS, adapter->eims_enable_mask);
1001 if (adapter->vfs_allocated_count) {
1002 wr32(E1000_MBVFIMR, 0xFF);
1003 ims |= E1000_IMS_VMMB;
1005 if (adapter->hw.mac.type == e1000_82580)
1006 ims |= E1000_IMS_DRSTA;
1008 wr32(E1000_IMS, ims);
1010 wr32(E1000_IMS, IMS_ENABLE_MASK |
1012 wr32(E1000_IAM, IMS_ENABLE_MASK |
1017 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1019 struct e1000_hw *hw = &adapter->hw;
1020 u16 vid = adapter->hw.mng_cookie.vlan_id;
1021 u16 old_vid = adapter->mng_vlan_id;
1023 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1024 /* add VID to filter table */
1025 igb_vfta_set(hw, vid, true);
1026 adapter->mng_vlan_id = vid;
1028 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1031 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1033 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1034 /* remove VID from filter table */
1035 igb_vfta_set(hw, old_vid, false);
1040 * igb_release_hw_control - release control of the h/w to f/w
1041 * @adapter: address of board private structure
1043 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1044 * For ASF and Pass Through versions of f/w this means that the
1045 * driver is no longer loaded.
1048 static void igb_release_hw_control(struct igb_adapter *adapter)
1050 struct e1000_hw *hw = &adapter->hw;
1053 /* Let firmware take over control of h/w */
1054 ctrl_ext = rd32(E1000_CTRL_EXT);
1055 wr32(E1000_CTRL_EXT,
1056 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1060 * igb_get_hw_control - get control of the h/w from f/w
1061 * @adapter: address of board private structure
1063 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1064 * For ASF and Pass Through versions of f/w this means that
1065 * the driver is loaded.
1068 static void igb_get_hw_control(struct igb_adapter *adapter)
1070 struct e1000_hw *hw = &adapter->hw;
1073 /* Let firmware know the driver has taken over */
1074 ctrl_ext = rd32(E1000_CTRL_EXT);
1075 wr32(E1000_CTRL_EXT,
1076 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1080 * igb_configure - configure the hardware for RX and TX
1081 * @adapter: private board structure
1083 static void igb_configure(struct igb_adapter *adapter)
1085 struct net_device *netdev = adapter->netdev;
1088 igb_get_hw_control(adapter);
1089 igb_set_rx_mode(netdev);
1091 igb_restore_vlan(adapter);
1093 igb_setup_tctl(adapter);
1094 igb_setup_mrqc(adapter);
1095 igb_setup_rctl(adapter);
1097 igb_configure_tx(adapter);
1098 igb_configure_rx(adapter);
1100 igb_rx_fifo_flush_82575(&adapter->hw);
1102 /* call igb_desc_unused which always leaves
1103 * at least 1 descriptor unused to make sure
1104 * next_to_use != next_to_clean */
1105 for (i = 0; i < adapter->num_rx_queues; i++) {
1106 struct igb_ring *ring = adapter->rx_ring[i];
1107 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1112 * igb_power_up_link - Power up the phy/serdes link
1113 * @adapter: address of board private structure
1115 void igb_power_up_link(struct igb_adapter *adapter)
1117 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1118 igb_power_up_phy_copper(&adapter->hw);
1120 igb_power_up_serdes_link_82575(&adapter->hw);
1124 * igb_power_down_link - Power down the phy/serdes link
1125 * @adapter: address of board private structure
1127 static void igb_power_down_link(struct igb_adapter *adapter)
1129 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1130 igb_power_down_phy_copper_82575(&adapter->hw);
1132 igb_shutdown_serdes_link_82575(&adapter->hw);
1136 * igb_up - Open the interface and prepare it to handle traffic
1137 * @adapter: board private structure
1139 int igb_up(struct igb_adapter *adapter)
1141 struct e1000_hw *hw = &adapter->hw;
1144 /* hardware has been reset, we need to reload some things */
1145 igb_configure(adapter);
1147 clear_bit(__IGB_DOWN, &adapter->state);
1149 for (i = 0; i < adapter->num_q_vectors; i++) {
1150 struct igb_q_vector *q_vector = adapter->q_vector[i];
1151 napi_enable(&q_vector->napi);
1153 if (adapter->msix_entries)
1154 igb_configure_msix(adapter);
1156 igb_assign_vector(adapter->q_vector[0], 0);
1158 /* Clear any pending interrupts. */
1160 igb_irq_enable(adapter);
1162 /* notify VFs that reset has been completed */
1163 if (adapter->vfs_allocated_count) {
1164 u32 reg_data = rd32(E1000_CTRL_EXT);
1165 reg_data |= E1000_CTRL_EXT_PFRSTD;
1166 wr32(E1000_CTRL_EXT, reg_data);
1169 netif_tx_start_all_queues(adapter->netdev);
1171 /* start the watchdog. */
1172 hw->mac.get_link_status = 1;
1173 schedule_work(&adapter->watchdog_task);
1178 void igb_down(struct igb_adapter *adapter)
1180 struct net_device *netdev = adapter->netdev;
1181 struct e1000_hw *hw = &adapter->hw;
1185 /* signal that we're down so the interrupt handler does not
1186 * reschedule our watchdog timer */
1187 set_bit(__IGB_DOWN, &adapter->state);
1189 /* disable receives in the hardware */
1190 rctl = rd32(E1000_RCTL);
1191 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1192 /* flush and sleep below */
1194 netif_tx_stop_all_queues(netdev);
1196 /* disable transmits in the hardware */
1197 tctl = rd32(E1000_TCTL);
1198 tctl &= ~E1000_TCTL_EN;
1199 wr32(E1000_TCTL, tctl);
1200 /* flush both disables and wait for them to finish */
1204 for (i = 0; i < adapter->num_q_vectors; i++) {
1205 struct igb_q_vector *q_vector = adapter->q_vector[i];
1206 napi_disable(&q_vector->napi);
1209 igb_irq_disable(adapter);
1211 del_timer_sync(&adapter->watchdog_timer);
1212 del_timer_sync(&adapter->phy_info_timer);
1214 netif_carrier_off(netdev);
1216 /* record the stats before reset*/
1217 igb_update_stats(adapter);
1219 adapter->link_speed = 0;
1220 adapter->link_duplex = 0;
1222 if (!pci_channel_offline(adapter->pdev))
1224 igb_clean_all_tx_rings(adapter);
1225 igb_clean_all_rx_rings(adapter);
1226 #ifdef CONFIG_IGB_DCA
1228 /* since we reset the hardware DCA settings were cleared */
1229 igb_setup_dca(adapter);
1233 void igb_reinit_locked(struct igb_adapter *adapter)
1235 WARN_ON(in_interrupt());
1236 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1240 clear_bit(__IGB_RESETTING, &adapter->state);
1243 void igb_reset(struct igb_adapter *adapter)
1245 struct pci_dev *pdev = adapter->pdev;
1246 struct e1000_hw *hw = &adapter->hw;
1247 struct e1000_mac_info *mac = &hw->mac;
1248 struct e1000_fc_info *fc = &hw->fc;
1249 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1252 /* Repartition Pba for greater than 9k mtu
1253 * To take effect CTRL.RST is required.
1255 switch (mac->type) {
1257 pba = rd32(E1000_RXPBS);
1258 pba = igb_rxpbs_adjust_82580(pba);
1261 pba = rd32(E1000_RXPBS);
1262 pba &= E1000_RXPBS_SIZE_MASK_82576;
1266 pba = E1000_PBA_34K;
1270 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1271 (mac->type < e1000_82576)) {
1272 /* adjust PBA for jumbo frames */
1273 wr32(E1000_PBA, pba);
1275 /* To maintain wire speed transmits, the Tx FIFO should be
1276 * large enough to accommodate two full transmit packets,
1277 * rounded up to the next 1KB and expressed in KB. Likewise,
1278 * the Rx FIFO should be large enough to accommodate at least
1279 * one full receive packet and is similarly rounded up and
1280 * expressed in KB. */
1281 pba = rd32(E1000_PBA);
1282 /* upper 16 bits has Tx packet buffer allocation size in KB */
1283 tx_space = pba >> 16;
1284 /* lower 16 bits has Rx packet buffer allocation size in KB */
1286 /* the tx fifo also stores 16 bytes of information about the tx
1287 * but don't include ethernet FCS because hardware appends it */
1288 min_tx_space = (adapter->max_frame_size +
1289 sizeof(union e1000_adv_tx_desc) -
1291 min_tx_space = ALIGN(min_tx_space, 1024);
1292 min_tx_space >>= 10;
1293 /* software strips receive CRC, so leave room for it */
1294 min_rx_space = adapter->max_frame_size;
1295 min_rx_space = ALIGN(min_rx_space, 1024);
1296 min_rx_space >>= 10;
1298 /* If current Tx allocation is less than the min Tx FIFO size,
1299 * and the min Tx FIFO size is less than the current Rx FIFO
1300 * allocation, take space away from current Rx allocation */
1301 if (tx_space < min_tx_space &&
1302 ((min_tx_space - tx_space) < pba)) {
1303 pba = pba - (min_tx_space - tx_space);
1305 /* if short on rx space, rx wins and must trump tx
1307 if (pba < min_rx_space)
1310 wr32(E1000_PBA, pba);
1313 /* flow control settings */
1314 /* The high water mark must be low enough to fit one full frame
1315 * (or the size used for early receive) above it in the Rx FIFO.
1316 * Set it to the lower of:
1317 * - 90% of the Rx FIFO size, or
1318 * - the full Rx FIFO size minus one full frame */
1319 hwm = min(((pba << 10) * 9 / 10),
1320 ((pba << 10) - 2 * adapter->max_frame_size));
1322 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1323 fc->low_water = fc->high_water - 16;
1324 fc->pause_time = 0xFFFF;
1326 fc->current_mode = fc->requested_mode;
1328 /* disable receive for all VFs and wait one second */
1329 if (adapter->vfs_allocated_count) {
1331 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1332 adapter->vf_data[i].flags = 0;
1334 /* ping all the active vfs to let them know we are going down */
1335 igb_ping_all_vfs(adapter);
1337 /* disable transmits and receives */
1338 wr32(E1000_VFRE, 0);
1339 wr32(E1000_VFTE, 0);
1342 /* Allow time for pending master requests to run */
1343 hw->mac.ops.reset_hw(hw);
1346 if (hw->mac.ops.init_hw(hw))
1347 dev_err(&pdev->dev, "Hardware Error\n");
1349 if (hw->mac.type == e1000_82580) {
1350 u32 reg = rd32(E1000_PCIEMISC);
1351 wr32(E1000_PCIEMISC,
1352 reg & ~E1000_PCIEMISC_LX_DECISION);
1354 if (!netif_running(adapter->netdev))
1355 igb_power_down_link(adapter);
1357 igb_update_mng_vlan(adapter);
1359 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1360 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1362 igb_get_phy_info(hw);
1365 static const struct net_device_ops igb_netdev_ops = {
1366 .ndo_open = igb_open,
1367 .ndo_stop = igb_close,
1368 .ndo_start_xmit = igb_xmit_frame_adv,
1369 .ndo_get_stats = igb_get_stats,
1370 .ndo_set_rx_mode = igb_set_rx_mode,
1371 .ndo_set_multicast_list = igb_set_rx_mode,
1372 .ndo_set_mac_address = igb_set_mac,
1373 .ndo_change_mtu = igb_change_mtu,
1374 .ndo_do_ioctl = igb_ioctl,
1375 .ndo_tx_timeout = igb_tx_timeout,
1376 .ndo_validate_addr = eth_validate_addr,
1377 .ndo_vlan_rx_register = igb_vlan_rx_register,
1378 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1379 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1380 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1381 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1382 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1383 .ndo_get_vf_config = igb_ndo_get_vf_config,
1384 #ifdef CONFIG_NET_POLL_CONTROLLER
1385 .ndo_poll_controller = igb_netpoll,
1390 * igb_probe - Device Initialization Routine
1391 * @pdev: PCI device information struct
1392 * @ent: entry in igb_pci_tbl
1394 * Returns 0 on success, negative on failure
1396 * igb_probe initializes an adapter identified by a pci_dev structure.
1397 * The OS initialization, configuring of the adapter private structure,
1398 * and a hardware reset occur.
1400 static int __devinit igb_probe(struct pci_dev *pdev,
1401 const struct pci_device_id *ent)
1403 struct net_device *netdev;
1404 struct igb_adapter *adapter;
1405 struct e1000_hw *hw;
1406 u16 eeprom_data = 0;
1407 static int global_quad_port_a; /* global quad port a indication */
1408 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1409 unsigned long mmio_start, mmio_len;
1410 int err, pci_using_dac;
1411 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1414 err = pci_enable_device_mem(pdev);
1419 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1421 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1425 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1427 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1429 dev_err(&pdev->dev, "No usable DMA "
1430 "configuration, aborting\n");
1436 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1442 pci_enable_pcie_error_reporting(pdev);
1444 pci_set_master(pdev);
1445 pci_save_state(pdev);
1448 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1449 IGB_ABS_MAX_TX_QUEUES);
1451 goto err_alloc_etherdev;
1453 SET_NETDEV_DEV(netdev, &pdev->dev);
1455 pci_set_drvdata(pdev, netdev);
1456 adapter = netdev_priv(netdev);
1457 adapter->netdev = netdev;
1458 adapter->pdev = pdev;
1461 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1463 mmio_start = pci_resource_start(pdev, 0);
1464 mmio_len = pci_resource_len(pdev, 0);
1467 hw->hw_addr = ioremap(mmio_start, mmio_len);
1471 netdev->netdev_ops = &igb_netdev_ops;
1472 igb_set_ethtool_ops(netdev);
1473 netdev->watchdog_timeo = 5 * HZ;
1475 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1477 netdev->mem_start = mmio_start;
1478 netdev->mem_end = mmio_start + mmio_len;
1480 /* PCI config space info */
1481 hw->vendor_id = pdev->vendor;
1482 hw->device_id = pdev->device;
1483 hw->revision_id = pdev->revision;
1484 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1485 hw->subsystem_device_id = pdev->subsystem_device;
1487 /* Copy the default MAC, PHY and NVM function pointers */
1488 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1489 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1490 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1491 /* Initialize skew-specific constants */
1492 err = ei->get_invariants(hw);
1496 /* setup the private structure */
1497 err = igb_sw_init(adapter);
1501 igb_get_bus_info_pcie(hw);
1503 hw->phy.autoneg_wait_to_complete = false;
1505 /* Copper options */
1506 if (hw->phy.media_type == e1000_media_type_copper) {
1507 hw->phy.mdix = AUTO_ALL_MODES;
1508 hw->phy.disable_polarity_correction = false;
1509 hw->phy.ms_type = e1000_ms_hw_default;
1512 if (igb_check_reset_block(hw))
1513 dev_info(&pdev->dev,
1514 "PHY reset is blocked due to SOL/IDER session.\n");
1516 netdev->features = NETIF_F_SG |
1518 NETIF_F_HW_VLAN_TX |
1519 NETIF_F_HW_VLAN_RX |
1520 NETIF_F_HW_VLAN_FILTER;
1522 netdev->features |= NETIF_F_IPV6_CSUM;
1523 netdev->features |= NETIF_F_TSO;
1524 netdev->features |= NETIF_F_TSO6;
1525 netdev->features |= NETIF_F_GRO;
1527 netdev->vlan_features |= NETIF_F_TSO;
1528 netdev->vlan_features |= NETIF_F_TSO6;
1529 netdev->vlan_features |= NETIF_F_IP_CSUM;
1530 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1531 netdev->vlan_features |= NETIF_F_SG;
1534 netdev->features |= NETIF_F_HIGHDMA;
1536 if (hw->mac.type >= e1000_82576)
1537 netdev->features |= NETIF_F_SCTP_CSUM;
1539 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1541 /* before reading the NVM, reset the controller to put the device in a
1542 * known good starting state */
1543 hw->mac.ops.reset_hw(hw);
1545 /* make sure the NVM is good */
1546 if (igb_validate_nvm_checksum(hw) < 0) {
1547 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1552 /* copy the MAC address out of the NVM */
1553 if (hw->mac.ops.read_mac_addr(hw))
1554 dev_err(&pdev->dev, "NVM Read Error\n");
1556 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1557 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1559 if (!is_valid_ether_addr(netdev->perm_addr)) {
1560 dev_err(&pdev->dev, "Invalid MAC Address\n");
1565 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1566 (unsigned long) adapter);
1567 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1568 (unsigned long) adapter);
1570 INIT_WORK(&adapter->reset_task, igb_reset_task);
1571 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1573 /* Initialize link properties that are user-changeable */
1574 adapter->fc_autoneg = true;
1575 hw->mac.autoneg = true;
1576 hw->phy.autoneg_advertised = 0x2f;
1578 hw->fc.requested_mode = e1000_fc_default;
1579 hw->fc.current_mode = e1000_fc_default;
1581 igb_validate_mdi_setting(hw);
1583 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1584 * enable the ACPI Magic Packet filter
1587 if (hw->bus.func == 0)
1588 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1589 else if (hw->mac.type == e1000_82580)
1590 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1591 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1593 else if (hw->bus.func == 1)
1594 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1596 if (eeprom_data & eeprom_apme_mask)
1597 adapter->eeprom_wol |= E1000_WUFC_MAG;
1599 /* now that we have the eeprom settings, apply the special cases where
1600 * the eeprom may be wrong or the board simply won't support wake on
1601 * lan on a particular port */
1602 switch (pdev->device) {
1603 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1604 adapter->eeprom_wol = 0;
1606 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1607 case E1000_DEV_ID_82576_FIBER:
1608 case E1000_DEV_ID_82576_SERDES:
1609 /* Wake events only supported on port A for dual fiber
1610 * regardless of eeprom setting */
1611 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1612 adapter->eeprom_wol = 0;
1614 case E1000_DEV_ID_82576_QUAD_COPPER:
1615 /* if quad port adapter, disable WoL on all but port A */
1616 if (global_quad_port_a != 0)
1617 adapter->eeprom_wol = 0;
1619 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1620 /* Reset for multiple quad port adapters */
1621 if (++global_quad_port_a == 4)
1622 global_quad_port_a = 0;
1626 /* initialize the wol settings based on the eeprom settings */
1627 adapter->wol = adapter->eeprom_wol;
1628 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1630 /* reset the hardware with the new settings */
1633 /* let the f/w know that the h/w is now under the control of the
1635 igb_get_hw_control(adapter);
1637 strcpy(netdev->name, "eth%d");
1638 err = register_netdev(netdev);
1642 /* carrier off reporting is important to ethtool even BEFORE open */
1643 netif_carrier_off(netdev);
1645 #ifdef CONFIG_IGB_DCA
1646 if (dca_add_requester(&pdev->dev) == 0) {
1647 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1648 dev_info(&pdev->dev, "DCA enabled\n");
1649 igb_setup_dca(adapter);
1653 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1654 /* print bus type/speed/width info */
1655 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1657 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1659 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1660 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1661 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1665 igb_read_part_num(hw, &part_num);
1666 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1667 (part_num >> 8), (part_num & 0xff));
1669 dev_info(&pdev->dev,
1670 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1671 adapter->msix_entries ? "MSI-X" :
1672 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1673 adapter->num_rx_queues, adapter->num_tx_queues);
1678 igb_release_hw_control(adapter);
1680 if (!igb_check_reset_block(hw))
1683 if (hw->flash_address)
1684 iounmap(hw->flash_address);
1686 igb_clear_interrupt_scheme(adapter);
1687 iounmap(hw->hw_addr);
1689 free_netdev(netdev);
1691 pci_release_selected_regions(pdev,
1692 pci_select_bars(pdev, IORESOURCE_MEM));
1695 pci_disable_device(pdev);
1700 * igb_remove - Device Removal Routine
1701 * @pdev: PCI device information struct
1703 * igb_remove is called by the PCI subsystem to alert the driver
1704 * that it should release a PCI device. The could be caused by a
1705 * Hot-Plug event, or because the driver is going to be removed from
1708 static void __devexit igb_remove(struct pci_dev *pdev)
1710 struct net_device *netdev = pci_get_drvdata(pdev);
1711 struct igb_adapter *adapter = netdev_priv(netdev);
1712 struct e1000_hw *hw = &adapter->hw;
1714 /* flush_scheduled work may reschedule our watchdog task, so
1715 * explicitly disable watchdog tasks from being rescheduled */
1716 set_bit(__IGB_DOWN, &adapter->state);
1717 del_timer_sync(&adapter->watchdog_timer);
1718 del_timer_sync(&adapter->phy_info_timer);
1720 flush_scheduled_work();
1722 #ifdef CONFIG_IGB_DCA
1723 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1724 dev_info(&pdev->dev, "DCA disabled\n");
1725 dca_remove_requester(&pdev->dev);
1726 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1727 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1731 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1732 * would have already happened in close and is redundant. */
1733 igb_release_hw_control(adapter);
1735 unregister_netdev(netdev);
1737 igb_clear_interrupt_scheme(adapter);
1739 #ifdef CONFIG_PCI_IOV
1740 /* reclaim resources allocated to VFs */
1741 if (adapter->vf_data) {
1742 /* disable iov and allow time for transactions to clear */
1743 pci_disable_sriov(pdev);
1746 kfree(adapter->vf_data);
1747 adapter->vf_data = NULL;
1748 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1750 dev_info(&pdev->dev, "IOV Disabled\n");
1754 iounmap(hw->hw_addr);
1755 if (hw->flash_address)
1756 iounmap(hw->flash_address);
1757 pci_release_selected_regions(pdev,
1758 pci_select_bars(pdev, IORESOURCE_MEM));
1760 free_netdev(netdev);
1762 pci_disable_pcie_error_reporting(pdev);
1764 pci_disable_device(pdev);
1768 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1769 * @adapter: board private structure to initialize
1771 * This function initializes the vf specific data storage and then attempts to
1772 * allocate the VFs. The reason for ordering it this way is because it is much
1773 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1774 * the memory for the VFs.
1776 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1778 #ifdef CONFIG_PCI_IOV
1779 struct pci_dev *pdev = adapter->pdev;
1781 if (adapter->vfs_allocated_count > 7)
1782 adapter->vfs_allocated_count = 7;
1784 if (adapter->vfs_allocated_count) {
1785 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1786 sizeof(struct vf_data_storage),
1788 /* if allocation failed then we do not support SR-IOV */
1789 if (!adapter->vf_data) {
1790 adapter->vfs_allocated_count = 0;
1791 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1796 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1797 kfree(adapter->vf_data);
1798 adapter->vf_data = NULL;
1799 #endif /* CONFIG_PCI_IOV */
1800 adapter->vfs_allocated_count = 0;
1801 #ifdef CONFIG_PCI_IOV
1803 unsigned char mac_addr[ETH_ALEN];
1805 dev_info(&pdev->dev, "%d vfs allocated\n",
1806 adapter->vfs_allocated_count);
1807 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1808 random_ether_addr(mac_addr);
1809 igb_set_vf_mac(adapter, i, mac_addr);
1812 #endif /* CONFIG_PCI_IOV */
1817 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1818 * @adapter: board private structure to initialize
1820 * igb_init_hw_timer initializes the function pointer and values for the hw
1821 * timer found in hardware.
1823 static void igb_init_hw_timer(struct igb_adapter *adapter)
1825 struct e1000_hw *hw = &adapter->hw;
1827 switch (hw->mac.type) {
1829 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1830 adapter->cycles.read = igb_read_clock;
1831 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1832 adapter->cycles.mult = 1;
1834 * The 82580 timesync updates the system timer every 8ns by 8ns
1835 * and the value cannot be shifted. Instead we need to shift
1836 * the registers to generate a 64bit timer value. As a result
1837 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1838 * 24 in order to generate a larger value for synchronization.
1840 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1841 /* disable system timer temporarily by setting bit 31 */
1842 wr32(E1000_TSAUXC, 0x80000000);
1845 /* Set registers so that rollover occurs soon to test this. */
1846 wr32(E1000_SYSTIMR, 0x00000000);
1847 wr32(E1000_SYSTIML, 0x80000000);
1848 wr32(E1000_SYSTIMH, 0x000000FF);
1851 /* enable system timer by clearing bit 31 */
1852 wr32(E1000_TSAUXC, 0x0);
1855 timecounter_init(&adapter->clock,
1857 ktime_to_ns(ktime_get_real()));
1859 * Synchronize our NIC clock against system wall clock. NIC
1860 * time stamp reading requires ~3us per sample, each sample
1861 * was pretty stable even under load => only require 10
1862 * samples for each offset comparison.
1864 memset(&adapter->compare, 0, sizeof(adapter->compare));
1865 adapter->compare.source = &adapter->clock;
1866 adapter->compare.target = ktime_get_real;
1867 adapter->compare.num_samples = 10;
1868 timecompare_update(&adapter->compare, 0);
1872 * Initialize hardware timer: we keep it running just in case
1873 * that some program needs it later on.
1875 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1876 adapter->cycles.read = igb_read_clock;
1877 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1878 adapter->cycles.mult = 1;
1880 * Scale the NIC clock cycle by a large factor so that
1881 * relatively small clock corrections can be added or
1882 * substracted at each clock tick. The drawbacks of a large
1883 * factor are a) that the clock register overflows more quickly
1884 * (not such a big deal) and b) that the increment per tick has
1885 * to fit into 24 bits. As a result we need to use a shift of
1886 * 19 so we can fit a value of 16 into the TIMINCA register.
1888 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1890 (1 << E1000_TIMINCA_16NS_SHIFT) |
1891 (16 << IGB_82576_TSYNC_SHIFT));
1893 /* Set registers so that rollover occurs soon to test this. */
1894 wr32(E1000_SYSTIML, 0x00000000);
1895 wr32(E1000_SYSTIMH, 0xFF800000);
1898 timecounter_init(&adapter->clock,
1900 ktime_to_ns(ktime_get_real()));
1902 * Synchronize our NIC clock against system wall clock. NIC
1903 * time stamp reading requires ~3us per sample, each sample
1904 * was pretty stable even under load => only require 10
1905 * samples for each offset comparison.
1907 memset(&adapter->compare, 0, sizeof(adapter->compare));
1908 adapter->compare.source = &adapter->clock;
1909 adapter->compare.target = ktime_get_real;
1910 adapter->compare.num_samples = 10;
1911 timecompare_update(&adapter->compare, 0);
1914 /* 82575 does not support timesync */
1922 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1923 * @adapter: board private structure to initialize
1925 * igb_sw_init initializes the Adapter private data structure.
1926 * Fields are initialized based on PCI device information and
1927 * OS network device settings (MTU size).
1929 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1931 struct e1000_hw *hw = &adapter->hw;
1932 struct net_device *netdev = adapter->netdev;
1933 struct pci_dev *pdev = adapter->pdev;
1935 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1937 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1938 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1939 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1940 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1942 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1943 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1945 #ifdef CONFIG_PCI_IOV
1946 if (hw->mac.type == e1000_82576)
1947 adapter->vfs_allocated_count = max_vfs;
1949 #endif /* CONFIG_PCI_IOV */
1950 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1953 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1954 * then we should combine the queues into a queue pair in order to
1955 * conserve interrupts due to limited supply
1957 if ((adapter->rss_queues > 4) ||
1958 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1959 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1961 /* This call may decrease the number of queues */
1962 if (igb_init_interrupt_scheme(adapter)) {
1963 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1967 igb_init_hw_timer(adapter);
1968 igb_probe_vfs(adapter);
1970 /* Explicitly disable IRQ since the NIC can be in any state. */
1971 igb_irq_disable(adapter);
1973 set_bit(__IGB_DOWN, &adapter->state);
1978 * igb_open - Called when a network interface is made active
1979 * @netdev: network interface device structure
1981 * Returns 0 on success, negative value on failure
1983 * The open entry point is called when a network interface is made
1984 * active by the system (IFF_UP). At this point all resources needed
1985 * for transmit and receive operations are allocated, the interrupt
1986 * handler is registered with the OS, the watchdog timer is started,
1987 * and the stack is notified that the interface is ready.
1989 static int igb_open(struct net_device *netdev)
1991 struct igb_adapter *adapter = netdev_priv(netdev);
1992 struct e1000_hw *hw = &adapter->hw;
1996 /* disallow open during test */
1997 if (test_bit(__IGB_TESTING, &adapter->state))
2000 netif_carrier_off(netdev);
2002 /* allocate transmit descriptors */
2003 err = igb_setup_all_tx_resources(adapter);
2007 /* allocate receive descriptors */
2008 err = igb_setup_all_rx_resources(adapter);
2012 igb_power_up_link(adapter);
2014 /* before we allocate an interrupt, we must be ready to handle it.
2015 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2016 * as soon as we call pci_request_irq, so we have to setup our
2017 * clean_rx handler before we do so. */
2018 igb_configure(adapter);
2020 err = igb_request_irq(adapter);
2024 /* From here on the code is the same as igb_up() */
2025 clear_bit(__IGB_DOWN, &adapter->state);
2027 for (i = 0; i < adapter->num_q_vectors; i++) {
2028 struct igb_q_vector *q_vector = adapter->q_vector[i];
2029 napi_enable(&q_vector->napi);
2032 /* Clear any pending interrupts. */
2035 igb_irq_enable(adapter);
2037 /* notify VFs that reset has been completed */
2038 if (adapter->vfs_allocated_count) {
2039 u32 reg_data = rd32(E1000_CTRL_EXT);
2040 reg_data |= E1000_CTRL_EXT_PFRSTD;
2041 wr32(E1000_CTRL_EXT, reg_data);
2044 netif_tx_start_all_queues(netdev);
2046 /* start the watchdog. */
2047 hw->mac.get_link_status = 1;
2048 schedule_work(&adapter->watchdog_task);
2053 igb_release_hw_control(adapter);
2054 igb_power_down_link(adapter);
2055 igb_free_all_rx_resources(adapter);
2057 igb_free_all_tx_resources(adapter);
2065 * igb_close - Disables a network interface
2066 * @netdev: network interface device structure
2068 * Returns 0, this is not allowed to fail
2070 * The close entry point is called when an interface is de-activated
2071 * by the OS. The hardware is still under the driver's control, but
2072 * needs to be disabled. A global MAC reset is issued to stop the
2073 * hardware, and all transmit and receive resources are freed.
2075 static int igb_close(struct net_device *netdev)
2077 struct igb_adapter *adapter = netdev_priv(netdev);
2079 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2082 igb_free_irq(adapter);
2084 igb_free_all_tx_resources(adapter);
2085 igb_free_all_rx_resources(adapter);
2091 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2092 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2094 * Return 0 on success, negative on failure
2096 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2098 struct pci_dev *pdev = tx_ring->pdev;
2101 size = sizeof(struct igb_buffer) * tx_ring->count;
2102 tx_ring->buffer_info = vmalloc(size);
2103 if (!tx_ring->buffer_info)
2105 memset(tx_ring->buffer_info, 0, size);
2107 /* round up to nearest 4K */
2108 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2109 tx_ring->size = ALIGN(tx_ring->size, 4096);
2111 tx_ring->desc = pci_alloc_consistent(pdev,
2118 tx_ring->next_to_use = 0;
2119 tx_ring->next_to_clean = 0;
2123 vfree(tx_ring->buffer_info);
2125 "Unable to allocate memory for the transmit descriptor ring\n");
2130 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2131 * (Descriptors) for all queues
2132 * @adapter: board private structure
2134 * Return 0 on success, negative on failure
2136 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2138 struct pci_dev *pdev = adapter->pdev;
2141 for (i = 0; i < adapter->num_tx_queues; i++) {
2142 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2145 "Allocation for Tx Queue %u failed\n", i);
2146 for (i--; i >= 0; i--)
2147 igb_free_tx_resources(adapter->tx_ring[i]);
2152 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2153 int r_idx = i % adapter->num_tx_queues;
2154 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2160 * igb_setup_tctl - configure the transmit control registers
2161 * @adapter: Board private structure
2163 void igb_setup_tctl(struct igb_adapter *adapter)
2165 struct e1000_hw *hw = &adapter->hw;
2168 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2169 wr32(E1000_TXDCTL(0), 0);
2171 /* Program the Transmit Control Register */
2172 tctl = rd32(E1000_TCTL);
2173 tctl &= ~E1000_TCTL_CT;
2174 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2175 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2177 igb_config_collision_dist(hw);
2179 /* Enable transmits */
2180 tctl |= E1000_TCTL_EN;
2182 wr32(E1000_TCTL, tctl);
2186 * igb_configure_tx_ring - Configure transmit ring after Reset
2187 * @adapter: board private structure
2188 * @ring: tx ring to configure
2190 * Configure a transmit ring after a reset.
2192 void igb_configure_tx_ring(struct igb_adapter *adapter,
2193 struct igb_ring *ring)
2195 struct e1000_hw *hw = &adapter->hw;
2197 u64 tdba = ring->dma;
2198 int reg_idx = ring->reg_idx;
2200 /* disable the queue */
2201 txdctl = rd32(E1000_TXDCTL(reg_idx));
2202 wr32(E1000_TXDCTL(reg_idx),
2203 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2207 wr32(E1000_TDLEN(reg_idx),
2208 ring->count * sizeof(union e1000_adv_tx_desc));
2209 wr32(E1000_TDBAL(reg_idx),
2210 tdba & 0x00000000ffffffffULL);
2211 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2213 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2214 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2215 writel(0, ring->head);
2216 writel(0, ring->tail);
2218 txdctl |= IGB_TX_PTHRESH;
2219 txdctl |= IGB_TX_HTHRESH << 8;
2220 txdctl |= IGB_TX_WTHRESH << 16;
2222 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2223 wr32(E1000_TXDCTL(reg_idx), txdctl);
2227 * igb_configure_tx - Configure transmit Unit after Reset
2228 * @adapter: board private structure
2230 * Configure the Tx unit of the MAC after a reset.
2232 static void igb_configure_tx(struct igb_adapter *adapter)
2236 for (i = 0; i < adapter->num_tx_queues; i++)
2237 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2241 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2242 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2244 * Returns 0 on success, negative on failure
2246 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2248 struct pci_dev *pdev = rx_ring->pdev;
2251 size = sizeof(struct igb_buffer) * rx_ring->count;
2252 rx_ring->buffer_info = vmalloc(size);
2253 if (!rx_ring->buffer_info)
2255 memset(rx_ring->buffer_info, 0, size);
2257 desc_len = sizeof(union e1000_adv_rx_desc);
2259 /* Round up to nearest 4K */
2260 rx_ring->size = rx_ring->count * desc_len;
2261 rx_ring->size = ALIGN(rx_ring->size, 4096);
2263 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2269 rx_ring->next_to_clean = 0;
2270 rx_ring->next_to_use = 0;
2275 vfree(rx_ring->buffer_info);
2276 rx_ring->buffer_info = NULL;
2277 dev_err(&pdev->dev, "Unable to allocate memory for "
2278 "the receive descriptor ring\n");
2283 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2284 * (Descriptors) for all queues
2285 * @adapter: board private structure
2287 * Return 0 on success, negative on failure
2289 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2291 struct pci_dev *pdev = adapter->pdev;
2294 for (i = 0; i < adapter->num_rx_queues; i++) {
2295 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2298 "Allocation for Rx Queue %u failed\n", i);
2299 for (i--; i >= 0; i--)
2300 igb_free_rx_resources(adapter->rx_ring[i]);
2309 * igb_setup_mrqc - configure the multiple receive queue control registers
2310 * @adapter: Board private structure
2312 static void igb_setup_mrqc(struct igb_adapter *adapter)
2314 struct e1000_hw *hw = &adapter->hw;
2316 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2321 static const u8 rsshash[40] = {
2322 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2323 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2324 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2325 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2327 /* Fill out hash function seeds */
2328 for (j = 0; j < 10; j++) {
2329 u32 rsskey = rsshash[(j * 4)];
2330 rsskey |= rsshash[(j * 4) + 1] << 8;
2331 rsskey |= rsshash[(j * 4) + 2] << 16;
2332 rsskey |= rsshash[(j * 4) + 3] << 24;
2333 array_wr32(E1000_RSSRK(0), j, rsskey);
2336 num_rx_queues = adapter->rss_queues;
2338 if (adapter->vfs_allocated_count) {
2339 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2340 switch (hw->mac.type) {
2356 if (hw->mac.type == e1000_82575)
2360 for (j = 0; j < (32 * 4); j++) {
2361 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2363 reta.bytes[j & 3] |= num_rx_queues << shift2;
2365 wr32(E1000_RETA(j >> 2), reta.dword);
2369 * Disable raw packet checksumming so that RSS hash is placed in
2370 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2371 * offloads as they are enabled by default
2373 rxcsum = rd32(E1000_RXCSUM);
2374 rxcsum |= E1000_RXCSUM_PCSD;
2376 if (adapter->hw.mac.type >= e1000_82576)
2377 /* Enable Receive Checksum Offload for SCTP */
2378 rxcsum |= E1000_RXCSUM_CRCOFL;
2380 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2381 wr32(E1000_RXCSUM, rxcsum);
2383 /* If VMDq is enabled then we set the appropriate mode for that, else
2384 * we default to RSS so that an RSS hash is calculated per packet even
2385 * if we are only using one queue */
2386 if (adapter->vfs_allocated_count) {
2387 if (hw->mac.type > e1000_82575) {
2388 /* Set the default pool for the PF's first queue */
2389 u32 vtctl = rd32(E1000_VT_CTL);
2390 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2391 E1000_VT_CTL_DISABLE_DEF_POOL);
2392 vtctl |= adapter->vfs_allocated_count <<
2393 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2394 wr32(E1000_VT_CTL, vtctl);
2396 if (adapter->rss_queues > 1)
2397 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2399 mrqc = E1000_MRQC_ENABLE_VMDQ;
2401 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2403 igb_vmm_control(adapter);
2405 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2406 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2407 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2408 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2409 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2410 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2411 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2412 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2414 wr32(E1000_MRQC, mrqc);
2418 * igb_setup_rctl - configure the receive control registers
2419 * @adapter: Board private structure
2421 void igb_setup_rctl(struct igb_adapter *adapter)
2423 struct e1000_hw *hw = &adapter->hw;
2426 rctl = rd32(E1000_RCTL);
2428 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2429 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2431 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2432 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2435 * enable stripping of CRC. It's unlikely this will break BMC
2436 * redirection as it did with e1000. Newer features require
2437 * that the HW strips the CRC.
2439 rctl |= E1000_RCTL_SECRC;
2441 /* disable store bad packets and clear size bits. */
2442 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2444 /* enable LPE to prevent packets larger than max_frame_size */
2445 rctl |= E1000_RCTL_LPE;
2447 /* disable queue 0 to prevent tail write w/o re-config */
2448 wr32(E1000_RXDCTL(0), 0);
2450 /* Attention!!! For SR-IOV PF driver operations you must enable
2451 * queue drop for all VF and PF queues to prevent head of line blocking
2452 * if an un-trusted VF does not provide descriptors to hardware.
2454 if (adapter->vfs_allocated_count) {
2455 /* set all queue drop enable bits */
2456 wr32(E1000_QDE, ALL_QUEUES);
2459 wr32(E1000_RCTL, rctl);
2462 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2465 struct e1000_hw *hw = &adapter->hw;
2468 /* if it isn't the PF check to see if VFs are enabled and
2469 * increase the size to support vlan tags */
2470 if (vfn < adapter->vfs_allocated_count &&
2471 adapter->vf_data[vfn].vlans_enabled)
2472 size += VLAN_TAG_SIZE;
2474 vmolr = rd32(E1000_VMOLR(vfn));
2475 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2476 vmolr |= size | E1000_VMOLR_LPE;
2477 wr32(E1000_VMOLR(vfn), vmolr);
2483 * igb_rlpml_set - set maximum receive packet size
2484 * @adapter: board private structure
2486 * Configure maximum receivable packet size.
2488 static void igb_rlpml_set(struct igb_adapter *adapter)
2490 u32 max_frame_size = adapter->max_frame_size;
2491 struct e1000_hw *hw = &adapter->hw;
2492 u16 pf_id = adapter->vfs_allocated_count;
2495 max_frame_size += VLAN_TAG_SIZE;
2497 /* if vfs are enabled we set RLPML to the largest possible request
2498 * size and set the VMOLR RLPML to the size we need */
2500 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2501 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2504 wr32(E1000_RLPML, max_frame_size);
2507 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2510 struct e1000_hw *hw = &adapter->hw;
2514 * This register exists only on 82576 and newer so if we are older then
2515 * we should exit and do nothing
2517 if (hw->mac.type < e1000_82576)
2520 vmolr = rd32(E1000_VMOLR(vfn));
2521 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2523 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2525 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2527 /* clear all bits that might not be set */
2528 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2530 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2531 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2533 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2536 if (vfn <= adapter->vfs_allocated_count)
2537 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2539 wr32(E1000_VMOLR(vfn), vmolr);
2543 * igb_configure_rx_ring - Configure a receive ring after Reset
2544 * @adapter: board private structure
2545 * @ring: receive ring to be configured
2547 * Configure the Rx unit of the MAC after a reset.
2549 void igb_configure_rx_ring(struct igb_adapter *adapter,
2550 struct igb_ring *ring)
2552 struct e1000_hw *hw = &adapter->hw;
2553 u64 rdba = ring->dma;
2554 int reg_idx = ring->reg_idx;
2557 /* disable the queue */
2558 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2559 wr32(E1000_RXDCTL(reg_idx),
2560 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2562 /* Set DMA base address registers */
2563 wr32(E1000_RDBAL(reg_idx),
2564 rdba & 0x00000000ffffffffULL);
2565 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2566 wr32(E1000_RDLEN(reg_idx),
2567 ring->count * sizeof(union e1000_adv_rx_desc));
2569 /* initialize head and tail */
2570 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2571 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2572 writel(0, ring->head);
2573 writel(0, ring->tail);
2575 /* set descriptor configuration */
2576 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2577 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2578 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2579 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2580 srrctl |= IGB_RXBUFFER_16384 >>
2581 E1000_SRRCTL_BSIZEPKT_SHIFT;
2583 srrctl |= (PAGE_SIZE / 2) >>
2584 E1000_SRRCTL_BSIZEPKT_SHIFT;
2586 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2588 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2589 E1000_SRRCTL_BSIZEPKT_SHIFT;
2590 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2592 /* Only set Drop Enable if we are supporting multiple queues */
2593 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2594 srrctl |= E1000_SRRCTL_DROP_EN;
2596 wr32(E1000_SRRCTL(reg_idx), srrctl);
2598 /* set filtering for VMDQ pools */
2599 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2601 /* enable receive descriptor fetching */
2602 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2603 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2604 rxdctl &= 0xFFF00000;
2605 rxdctl |= IGB_RX_PTHRESH;
2606 rxdctl |= IGB_RX_HTHRESH << 8;
2607 rxdctl |= IGB_RX_WTHRESH << 16;
2608 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2612 * igb_configure_rx - Configure receive Unit after Reset
2613 * @adapter: board private structure
2615 * Configure the Rx unit of the MAC after a reset.
2617 static void igb_configure_rx(struct igb_adapter *adapter)
2621 /* set UTA to appropriate mode */
2622 igb_set_uta(adapter);
2624 /* set the correct pool for the PF default MAC address in entry 0 */
2625 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2626 adapter->vfs_allocated_count);
2628 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2629 * the Base and Length of the Rx Descriptor Ring */
2630 for (i = 0; i < adapter->num_rx_queues; i++)
2631 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2635 * igb_free_tx_resources - Free Tx Resources per Queue
2636 * @tx_ring: Tx descriptor ring for a specific queue
2638 * Free all transmit software resources
2640 void igb_free_tx_resources(struct igb_ring *tx_ring)
2642 igb_clean_tx_ring(tx_ring);
2644 vfree(tx_ring->buffer_info);
2645 tx_ring->buffer_info = NULL;
2647 /* if not set, then don't free */
2651 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2652 tx_ring->desc, tx_ring->dma);
2654 tx_ring->desc = NULL;
2658 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2659 * @adapter: board private structure
2661 * Free all transmit software resources
2663 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2667 for (i = 0; i < adapter->num_tx_queues; i++)
2668 igb_free_tx_resources(adapter->tx_ring[i]);
2671 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2672 struct igb_buffer *buffer_info)
2674 if (buffer_info->dma) {
2675 if (buffer_info->mapped_as_page)
2676 pci_unmap_page(tx_ring->pdev,
2678 buffer_info->length,
2681 pci_unmap_single(tx_ring->pdev,
2683 buffer_info->length,
2685 buffer_info->dma = 0;
2687 if (buffer_info->skb) {
2688 dev_kfree_skb_any(buffer_info->skb);
2689 buffer_info->skb = NULL;
2691 buffer_info->time_stamp = 0;
2692 buffer_info->length = 0;
2693 buffer_info->next_to_watch = 0;
2694 buffer_info->mapped_as_page = false;
2698 * igb_clean_tx_ring - Free Tx Buffers
2699 * @tx_ring: ring to be cleaned
2701 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2703 struct igb_buffer *buffer_info;
2707 if (!tx_ring->buffer_info)
2709 /* Free all the Tx ring sk_buffs */
2711 for (i = 0; i < tx_ring->count; i++) {
2712 buffer_info = &tx_ring->buffer_info[i];
2713 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2716 size = sizeof(struct igb_buffer) * tx_ring->count;
2717 memset(tx_ring->buffer_info, 0, size);
2719 /* Zero out the descriptor ring */
2720 memset(tx_ring->desc, 0, tx_ring->size);
2722 tx_ring->next_to_use = 0;
2723 tx_ring->next_to_clean = 0;
2727 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2728 * @adapter: board private structure
2730 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2734 for (i = 0; i < adapter->num_tx_queues; i++)
2735 igb_clean_tx_ring(adapter->tx_ring[i]);
2739 * igb_free_rx_resources - Free Rx Resources
2740 * @rx_ring: ring to clean the resources from
2742 * Free all receive software resources
2744 void igb_free_rx_resources(struct igb_ring *rx_ring)
2746 igb_clean_rx_ring(rx_ring);
2748 vfree(rx_ring->buffer_info);
2749 rx_ring->buffer_info = NULL;
2751 /* if not set, then don't free */
2755 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2756 rx_ring->desc, rx_ring->dma);
2758 rx_ring->desc = NULL;
2762 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2763 * @adapter: board private structure
2765 * Free all receive software resources
2767 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2771 for (i = 0; i < adapter->num_rx_queues; i++)
2772 igb_free_rx_resources(adapter->rx_ring[i]);
2776 * igb_clean_rx_ring - Free Rx Buffers per Queue
2777 * @rx_ring: ring to free buffers from
2779 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2781 struct igb_buffer *buffer_info;
2785 if (!rx_ring->buffer_info)
2788 /* Free all the Rx ring sk_buffs */
2789 for (i = 0; i < rx_ring->count; i++) {
2790 buffer_info = &rx_ring->buffer_info[i];
2791 if (buffer_info->dma) {
2792 pci_unmap_single(rx_ring->pdev,
2794 rx_ring->rx_buffer_len,
2795 PCI_DMA_FROMDEVICE);
2796 buffer_info->dma = 0;
2799 if (buffer_info->skb) {
2800 dev_kfree_skb(buffer_info->skb);
2801 buffer_info->skb = NULL;
2803 if (buffer_info->page_dma) {
2804 pci_unmap_page(rx_ring->pdev,
2805 buffer_info->page_dma,
2807 PCI_DMA_FROMDEVICE);
2808 buffer_info->page_dma = 0;
2810 if (buffer_info->page) {
2811 put_page(buffer_info->page);
2812 buffer_info->page = NULL;
2813 buffer_info->page_offset = 0;
2817 size = sizeof(struct igb_buffer) * rx_ring->count;
2818 memset(rx_ring->buffer_info, 0, size);
2820 /* Zero out the descriptor ring */
2821 memset(rx_ring->desc, 0, rx_ring->size);
2823 rx_ring->next_to_clean = 0;
2824 rx_ring->next_to_use = 0;
2828 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2829 * @adapter: board private structure
2831 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2835 for (i = 0; i < adapter->num_rx_queues; i++)
2836 igb_clean_rx_ring(adapter->rx_ring[i]);
2840 * igb_set_mac - Change the Ethernet Address of the NIC
2841 * @netdev: network interface device structure
2842 * @p: pointer to an address structure
2844 * Returns 0 on success, negative on failure
2846 static int igb_set_mac(struct net_device *netdev, void *p)
2848 struct igb_adapter *adapter = netdev_priv(netdev);
2849 struct e1000_hw *hw = &adapter->hw;
2850 struct sockaddr *addr = p;
2852 if (!is_valid_ether_addr(addr->sa_data))
2853 return -EADDRNOTAVAIL;
2855 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2856 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2858 /* set the correct pool for the new PF MAC address in entry 0 */
2859 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2860 adapter->vfs_allocated_count);
2866 * igb_write_mc_addr_list - write multicast addresses to MTA
2867 * @netdev: network interface device structure
2869 * Writes multicast address list to the MTA hash table.
2870 * Returns: -ENOMEM on failure
2871 * 0 on no addresses written
2872 * X on writing X addresses to MTA
2874 static int igb_write_mc_addr_list(struct net_device *netdev)
2876 struct igb_adapter *adapter = netdev_priv(netdev);
2877 struct e1000_hw *hw = &adapter->hw;
2878 struct dev_mc_list *mc_ptr;
2882 if (netdev_mc_empty(netdev)) {
2883 /* nothing to program, so clear mc list */
2884 igb_update_mc_addr_list(hw, NULL, 0);
2885 igb_restore_vf_multicasts(adapter);
2889 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2893 /* The shared function expects a packed array of only addresses. */
2895 netdev_for_each_mc_addr(mc_ptr, netdev)
2896 memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2898 igb_update_mc_addr_list(hw, mta_list, i);
2901 return netdev_mc_count(netdev);
2905 * igb_write_uc_addr_list - write unicast addresses to RAR table
2906 * @netdev: network interface device structure
2908 * Writes unicast address list to the RAR table.
2909 * Returns: -ENOMEM on failure/insufficient address space
2910 * 0 on no addresses written
2911 * X on writing X addresses to the RAR table
2913 static int igb_write_uc_addr_list(struct net_device *netdev)
2915 struct igb_adapter *adapter = netdev_priv(netdev);
2916 struct e1000_hw *hw = &adapter->hw;
2917 unsigned int vfn = adapter->vfs_allocated_count;
2918 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2921 /* return ENOMEM indicating insufficient memory for addresses */
2922 if (netdev_uc_count(netdev) > rar_entries)
2925 if (!netdev_uc_empty(netdev) && rar_entries) {
2926 struct netdev_hw_addr *ha;
2928 netdev_for_each_uc_addr(ha, netdev) {
2931 igb_rar_set_qsel(adapter, ha->addr,
2937 /* write the addresses in reverse order to avoid write combining */
2938 for (; rar_entries > 0 ; rar_entries--) {
2939 wr32(E1000_RAH(rar_entries), 0);
2940 wr32(E1000_RAL(rar_entries), 0);
2948 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2949 * @netdev: network interface device structure
2951 * The set_rx_mode entry point is called whenever the unicast or multicast
2952 * address lists or the network interface flags are updated. This routine is
2953 * responsible for configuring the hardware for proper unicast, multicast,
2954 * promiscuous mode, and all-multi behavior.
2956 static void igb_set_rx_mode(struct net_device *netdev)
2958 struct igb_adapter *adapter = netdev_priv(netdev);
2959 struct e1000_hw *hw = &adapter->hw;
2960 unsigned int vfn = adapter->vfs_allocated_count;
2961 u32 rctl, vmolr = 0;
2964 /* Check for Promiscuous and All Multicast modes */
2965 rctl = rd32(E1000_RCTL);
2967 /* clear the effected bits */
2968 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2970 if (netdev->flags & IFF_PROMISC) {
2971 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2972 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2974 if (netdev->flags & IFF_ALLMULTI) {
2975 rctl |= E1000_RCTL_MPE;
2976 vmolr |= E1000_VMOLR_MPME;
2979 * Write addresses to the MTA, if the attempt fails
2980 * then we should just turn on promiscous mode so
2981 * that we can at least receive multicast traffic
2983 count = igb_write_mc_addr_list(netdev);
2985 rctl |= E1000_RCTL_MPE;
2986 vmolr |= E1000_VMOLR_MPME;
2988 vmolr |= E1000_VMOLR_ROMPE;
2992 * Write addresses to available RAR registers, if there is not
2993 * sufficient space to store all the addresses then enable
2994 * unicast promiscous mode
2996 count = igb_write_uc_addr_list(netdev);
2998 rctl |= E1000_RCTL_UPE;
2999 vmolr |= E1000_VMOLR_ROPE;
3001 rctl |= E1000_RCTL_VFE;
3003 wr32(E1000_RCTL, rctl);
3006 * In order to support SR-IOV and eventually VMDq it is necessary to set
3007 * the VMOLR to enable the appropriate modes. Without this workaround
3008 * we will have issues with VLAN tag stripping not being done for frames
3009 * that are only arriving because we are the default pool
3011 if (hw->mac.type < e1000_82576)
3014 vmolr |= rd32(E1000_VMOLR(vfn)) &
3015 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3016 wr32(E1000_VMOLR(vfn), vmolr);
3017 igb_restore_vf_multicasts(adapter);
3020 /* Need to wait a few seconds after link up to get diagnostic information from
3022 static void igb_update_phy_info(unsigned long data)
3024 struct igb_adapter *adapter = (struct igb_adapter *) data;
3025 igb_get_phy_info(&adapter->hw);
3029 * igb_has_link - check shared code for link and determine up/down
3030 * @adapter: pointer to driver private info
3032 bool igb_has_link(struct igb_adapter *adapter)
3034 struct e1000_hw *hw = &adapter->hw;
3035 bool link_active = false;
3038 /* get_link_status is set on LSC (link status) interrupt or
3039 * rx sequence error interrupt. get_link_status will stay
3040 * false until the e1000_check_for_link establishes link
3041 * for copper adapters ONLY
3043 switch (hw->phy.media_type) {
3044 case e1000_media_type_copper:
3045 if (hw->mac.get_link_status) {
3046 ret_val = hw->mac.ops.check_for_link(hw);
3047 link_active = !hw->mac.get_link_status;
3052 case e1000_media_type_internal_serdes:
3053 ret_val = hw->mac.ops.check_for_link(hw);
3054 link_active = hw->mac.serdes_has_link;
3057 case e1000_media_type_unknown:
3065 * igb_watchdog - Timer Call-back
3066 * @data: pointer to adapter cast into an unsigned long
3068 static void igb_watchdog(unsigned long data)
3070 struct igb_adapter *adapter = (struct igb_adapter *)data;
3071 /* Do the rest outside of interrupt context */
3072 schedule_work(&adapter->watchdog_task);
3075 static void igb_watchdog_task(struct work_struct *work)
3077 struct igb_adapter *adapter = container_of(work,
3080 struct e1000_hw *hw = &adapter->hw;
3081 struct net_device *netdev = adapter->netdev;
3085 link = igb_has_link(adapter);
3087 if (!netif_carrier_ok(netdev)) {
3089 hw->mac.ops.get_speed_and_duplex(hw,
3090 &adapter->link_speed,
3091 &adapter->link_duplex);
3093 ctrl = rd32(E1000_CTRL);
3094 /* Links status message must follow this format */
3095 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3096 "Flow Control: %s\n",
3098 adapter->link_speed,
3099 adapter->link_duplex == FULL_DUPLEX ?
3100 "Full Duplex" : "Half Duplex",
3101 ((ctrl & E1000_CTRL_TFCE) &&
3102 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3103 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3104 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3106 /* adjust timeout factor according to speed/duplex */
3107 adapter->tx_timeout_factor = 1;
3108 switch (adapter->link_speed) {
3110 adapter->tx_timeout_factor = 14;
3113 /* maybe add some timeout factor ? */
3117 netif_carrier_on(netdev);
3119 igb_ping_all_vfs(adapter);
3121 /* link state has changed, schedule phy info update */
3122 if (!test_bit(__IGB_DOWN, &adapter->state))
3123 mod_timer(&adapter->phy_info_timer,
3124 round_jiffies(jiffies + 2 * HZ));
3127 if (netif_carrier_ok(netdev)) {
3128 adapter->link_speed = 0;
3129 adapter->link_duplex = 0;
3130 /* Links status message must follow this format */
3131 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3133 netif_carrier_off(netdev);
3135 igb_ping_all_vfs(adapter);
3137 /* link state has changed, schedule phy info update */
3138 if (!test_bit(__IGB_DOWN, &adapter->state))
3139 mod_timer(&adapter->phy_info_timer,
3140 round_jiffies(jiffies + 2 * HZ));
3144 igb_update_stats(adapter);
3146 for (i = 0; i < adapter->num_tx_queues; i++) {
3147 struct igb_ring *tx_ring = adapter->tx_ring[i];
3148 if (!netif_carrier_ok(netdev)) {
3149 /* We've lost link, so the controller stops DMA,
3150 * but we've got queued Tx work that's never going
3151 * to get done, so reset controller to flush Tx.
3152 * (Do the reset outside of interrupt context). */
3153 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3154 adapter->tx_timeout_count++;
3155 schedule_work(&adapter->reset_task);
3156 /* return immediately since reset is imminent */
3161 /* Force detection of hung controller every watchdog period */
3162 tx_ring->detect_tx_hung = true;
3165 /* Cause software interrupt to ensure rx ring is cleaned */
3166 if (adapter->msix_entries) {
3168 for (i = 0; i < adapter->num_q_vectors; i++) {
3169 struct igb_q_vector *q_vector = adapter->q_vector[i];
3170 eics |= q_vector->eims_value;
3172 wr32(E1000_EICS, eics);
3174 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3177 /* Reset the timer */
3178 if (!test_bit(__IGB_DOWN, &adapter->state))
3179 mod_timer(&adapter->watchdog_timer,
3180 round_jiffies(jiffies + 2 * HZ));
3183 enum latency_range {
3187 latency_invalid = 255
3191 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3193 * Stores a new ITR value based on strictly on packet size. This
3194 * algorithm is less sophisticated than that used in igb_update_itr,
3195 * due to the difficulty of synchronizing statistics across multiple
3196 * receive rings. The divisors and thresholds used by this fuction
3197 * were determined based on theoretical maximum wire speed and testing
3198 * data, in order to minimize response time while increasing bulk
3200 * This functionality is controlled by the InterruptThrottleRate module
3201 * parameter (see igb_param.c)
3202 * NOTE: This function is called only when operating in a multiqueue
3203 * receive environment.
3204 * @q_vector: pointer to q_vector
3206 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3208 int new_val = q_vector->itr_val;
3209 int avg_wire_size = 0;
3210 struct igb_adapter *adapter = q_vector->adapter;
3212 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3213 * ints/sec - ITR timer value of 120 ticks.
3215 if (adapter->link_speed != SPEED_1000) {
3220 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3221 struct igb_ring *ring = q_vector->rx_ring;
3222 avg_wire_size = ring->total_bytes / ring->total_packets;
3225 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3226 struct igb_ring *ring = q_vector->tx_ring;
3227 avg_wire_size = max_t(u32, avg_wire_size,
3228 (ring->total_bytes /
3229 ring->total_packets));
3232 /* if avg_wire_size isn't set no work was done */
3236 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3237 avg_wire_size += 24;
3239 /* Don't starve jumbo frames */
3240 avg_wire_size = min(avg_wire_size, 3000);
3242 /* Give a little boost to mid-size frames */
3243 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3244 new_val = avg_wire_size / 3;
3246 new_val = avg_wire_size / 2;
3248 /* when in itr mode 3 do not exceed 20K ints/sec */
3249 if (adapter->rx_itr_setting == 3 && new_val < 196)
3253 if (new_val != q_vector->itr_val) {
3254 q_vector->itr_val = new_val;
3255 q_vector->set_itr = 1;
3258 if (q_vector->rx_ring) {
3259 q_vector->rx_ring->total_bytes = 0;
3260 q_vector->rx_ring->total_packets = 0;
3262 if (q_vector->tx_ring) {
3263 q_vector->tx_ring->total_bytes = 0;
3264 q_vector->tx_ring->total_packets = 0;
3269 * igb_update_itr - update the dynamic ITR value based on statistics
3270 * Stores a new ITR value based on packets and byte
3271 * counts during the last interrupt. The advantage of per interrupt
3272 * computation is faster updates and more accurate ITR for the current
3273 * traffic pattern. Constants in this function were computed
3274 * based on theoretical maximum wire speed and thresholds were set based
3275 * on testing data as well as attempting to minimize response time
3276 * while increasing bulk throughput.
3277 * this functionality is controlled by the InterruptThrottleRate module
3278 * parameter (see igb_param.c)
3279 * NOTE: These calculations are only valid when operating in a single-
3280 * queue environment.
3281 * @adapter: pointer to adapter
3282 * @itr_setting: current q_vector->itr_val
3283 * @packets: the number of packets during this measurement interval
3284 * @bytes: the number of bytes during this measurement interval
3286 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3287 int packets, int bytes)
3289 unsigned int retval = itr_setting;
3292 goto update_itr_done;
3294 switch (itr_setting) {
3295 case lowest_latency:
3296 /* handle TSO and jumbo frames */
3297 if (bytes/packets > 8000)
3298 retval = bulk_latency;
3299 else if ((packets < 5) && (bytes > 512))
3300 retval = low_latency;
3302 case low_latency: /* 50 usec aka 20000 ints/s */
3303 if (bytes > 10000) {
3304 /* this if handles the TSO accounting */
3305 if (bytes/packets > 8000) {
3306 retval = bulk_latency;
3307 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3308 retval = bulk_latency;
3309 } else if ((packets > 35)) {
3310 retval = lowest_latency;
3312 } else if (bytes/packets > 2000) {
3313 retval = bulk_latency;
3314 } else if (packets <= 2 && bytes < 512) {
3315 retval = lowest_latency;
3318 case bulk_latency: /* 250 usec aka 4000 ints/s */
3319 if (bytes > 25000) {
3321 retval = low_latency;
3322 } else if (bytes < 1500) {
3323 retval = low_latency;
3332 static void igb_set_itr(struct igb_adapter *adapter)
3334 struct igb_q_vector *q_vector = adapter->q_vector[0];
3336 u32 new_itr = q_vector->itr_val;
3338 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3339 if (adapter->link_speed != SPEED_1000) {
3345 adapter->rx_itr = igb_update_itr(adapter,
3347 q_vector->rx_ring->total_packets,
3348 q_vector->rx_ring->total_bytes);
3350 adapter->tx_itr = igb_update_itr(adapter,
3352 q_vector->tx_ring->total_packets,
3353 q_vector->tx_ring->total_bytes);
3354 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3356 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3357 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3358 current_itr = low_latency;
3360 switch (current_itr) {
3361 /* counts and packets in update_itr are dependent on these numbers */
3362 case lowest_latency:
3363 new_itr = 56; /* aka 70,000 ints/sec */
3366 new_itr = 196; /* aka 20,000 ints/sec */
3369 new_itr = 980; /* aka 4,000 ints/sec */
3376 q_vector->rx_ring->total_bytes = 0;
3377 q_vector->rx_ring->total_packets = 0;
3378 q_vector->tx_ring->total_bytes = 0;
3379 q_vector->tx_ring->total_packets = 0;
3381 if (new_itr != q_vector->itr_val) {
3382 /* this attempts to bias the interrupt rate towards Bulk
3383 * by adding intermediate steps when interrupt rate is
3385 new_itr = new_itr > q_vector->itr_val ?
3386 max((new_itr * q_vector->itr_val) /
3387 (new_itr + (q_vector->itr_val >> 2)),
3390 /* Don't write the value here; it resets the adapter's
3391 * internal timer, and causes us to delay far longer than
3392 * we should between interrupts. Instead, we write the ITR
3393 * value at the beginning of the next interrupt so the timing
3394 * ends up being correct.
3396 q_vector->itr_val = new_itr;
3397 q_vector->set_itr = 1;
3403 #define IGB_TX_FLAGS_CSUM 0x00000001
3404 #define IGB_TX_FLAGS_VLAN 0x00000002
3405 #define IGB_TX_FLAGS_TSO 0x00000004
3406 #define IGB_TX_FLAGS_IPV4 0x00000008
3407 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3408 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3409 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3411 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3412 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3414 struct e1000_adv_tx_context_desc *context_desc;
3417 struct igb_buffer *buffer_info;
3418 u32 info = 0, tu_cmd = 0;
3422 if (skb_header_cloned(skb)) {
3423 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3428 l4len = tcp_hdrlen(skb);
3431 if (skb->protocol == htons(ETH_P_IP)) {
3432 struct iphdr *iph = ip_hdr(skb);
3435 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3439 } else if (skb_is_gso_v6(skb)) {
3440 ipv6_hdr(skb)->payload_len = 0;
3441 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3442 &ipv6_hdr(skb)->daddr,
3446 i = tx_ring->next_to_use;
3448 buffer_info = &tx_ring->buffer_info[i];
3449 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3450 /* VLAN MACLEN IPLEN */
3451 if (tx_flags & IGB_TX_FLAGS_VLAN)
3452 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3453 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3454 *hdr_len += skb_network_offset(skb);
3455 info |= skb_network_header_len(skb);
3456 *hdr_len += skb_network_header_len(skb);
3457 context_desc->vlan_macip_lens = cpu_to_le32(info);
3459 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3460 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3462 if (skb->protocol == htons(ETH_P_IP))
3463 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3464 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3466 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3469 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3470 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3472 /* For 82575, context index must be unique per ring. */
3473 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3474 mss_l4len_idx |= tx_ring->reg_idx << 4;
3476 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3477 context_desc->seqnum_seed = 0;
3479 buffer_info->time_stamp = jiffies;
3480 buffer_info->next_to_watch = i;
3481 buffer_info->dma = 0;
3483 if (i == tx_ring->count)
3486 tx_ring->next_to_use = i;
3491 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3492 struct sk_buff *skb, u32 tx_flags)
3494 struct e1000_adv_tx_context_desc *context_desc;
3495 struct pci_dev *pdev = tx_ring->pdev;
3496 struct igb_buffer *buffer_info;
3497 u32 info = 0, tu_cmd = 0;
3500 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3501 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3502 i = tx_ring->next_to_use;
3503 buffer_info = &tx_ring->buffer_info[i];
3504 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3506 if (tx_flags & IGB_TX_FLAGS_VLAN)
3507 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3509 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3510 if (skb->ip_summed == CHECKSUM_PARTIAL)
3511 info |= skb_network_header_len(skb);
3513 context_desc->vlan_macip_lens = cpu_to_le32(info);
3515 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3517 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3520 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3521 const struct vlan_ethhdr *vhdr =
3522 (const struct vlan_ethhdr*)skb->data;
3524 protocol = vhdr->h_vlan_encapsulated_proto;
3526 protocol = skb->protocol;
3530 case cpu_to_be16(ETH_P_IP):
3531 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3532 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3533 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3534 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3535 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3537 case cpu_to_be16(ETH_P_IPV6):
3538 /* XXX what about other V6 headers?? */
3539 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3540 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3541 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3542 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3545 if (unlikely(net_ratelimit()))
3546 dev_warn(&pdev->dev,
3547 "partial checksum but proto=%x!\n",
3553 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3554 context_desc->seqnum_seed = 0;
3555 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3556 context_desc->mss_l4len_idx =
3557 cpu_to_le32(tx_ring->reg_idx << 4);
3559 buffer_info->time_stamp = jiffies;
3560 buffer_info->next_to_watch = i;
3561 buffer_info->dma = 0;
3564 if (i == tx_ring->count)
3566 tx_ring->next_to_use = i;
3573 #define IGB_MAX_TXD_PWR 16
3574 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3576 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3579 struct igb_buffer *buffer_info;
3580 struct pci_dev *pdev = tx_ring->pdev;
3581 unsigned int len = skb_headlen(skb);
3582 unsigned int count = 0, i;
3585 i = tx_ring->next_to_use;
3587 buffer_info = &tx_ring->buffer_info[i];
3588 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3589 buffer_info->length = len;
3590 /* set time_stamp *before* dma to help avoid a possible race */
3591 buffer_info->time_stamp = jiffies;
3592 buffer_info->next_to_watch = i;
3593 buffer_info->dma = pci_map_single(pdev, skb->data, len,
3595 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3598 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3599 struct skb_frag_struct *frag;
3603 if (i == tx_ring->count)
3606 frag = &skb_shinfo(skb)->frags[f];
3609 buffer_info = &tx_ring->buffer_info[i];
3610 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3611 buffer_info->length = len;
3612 buffer_info->time_stamp = jiffies;
3613 buffer_info->next_to_watch = i;
3614 buffer_info->mapped_as_page = true;
3615 buffer_info->dma = pci_map_page(pdev,
3620 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3625 tx_ring->buffer_info[i].skb = skb;
3626 tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3627 tx_ring->buffer_info[first].next_to_watch = i;
3632 dev_err(&pdev->dev, "TX DMA map failed\n");
3634 /* clear timestamp and dma mappings for failed buffer_info mapping */
3635 buffer_info->dma = 0;
3636 buffer_info->time_stamp = 0;
3637 buffer_info->length = 0;
3638 buffer_info->next_to_watch = 0;
3639 buffer_info->mapped_as_page = false;
3641 /* clear timestamp and dma mappings for remaining portion of packet */
3646 buffer_info = &tx_ring->buffer_info[i];
3647 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3653 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3654 u32 tx_flags, int count, u32 paylen,
3657 union e1000_adv_tx_desc *tx_desc;
3658 struct igb_buffer *buffer_info;
3659 u32 olinfo_status = 0, cmd_type_len;
3660 unsigned int i = tx_ring->next_to_use;
3662 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3663 E1000_ADVTXD_DCMD_DEXT);
3665 if (tx_flags & IGB_TX_FLAGS_VLAN)
3666 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3668 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3669 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3671 if (tx_flags & IGB_TX_FLAGS_TSO) {
3672 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3674 /* insert tcp checksum */
3675 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3677 /* insert ip checksum */
3678 if (tx_flags & IGB_TX_FLAGS_IPV4)
3679 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3681 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3682 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3685 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3686 (tx_flags & (IGB_TX_FLAGS_CSUM |
3688 IGB_TX_FLAGS_VLAN)))
3689 olinfo_status |= tx_ring->reg_idx << 4;
3691 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3694 buffer_info = &tx_ring->buffer_info[i];
3695 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3696 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3697 tx_desc->read.cmd_type_len =
3698 cpu_to_le32(cmd_type_len | buffer_info->length);
3699 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3702 if (i == tx_ring->count)
3704 } while (count > 0);
3706 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3707 /* Force memory writes to complete before letting h/w
3708 * know there are new descriptors to fetch. (Only
3709 * applicable for weak-ordered memory model archs,
3710 * such as IA-64). */
3713 tx_ring->next_to_use = i;
3714 writel(i, tx_ring->tail);
3715 /* we need this if more than one processor can write to our tail
3716 * at a time, it syncronizes IO on IA64/Altix systems */
3720 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3722 struct net_device *netdev = tx_ring->netdev;
3724 netif_stop_subqueue(netdev, tx_ring->queue_index);
3726 /* Herbert's original patch had:
3727 * smp_mb__after_netif_stop_queue();
3728 * but since that doesn't exist yet, just open code it. */
3731 /* We need to check again in a case another CPU has just
3732 * made room available. */
3733 if (igb_desc_unused(tx_ring) < size)
3737 netif_wake_subqueue(netdev, tx_ring->queue_index);
3738 tx_ring->tx_stats.restart_queue++;
3742 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3744 if (igb_desc_unused(tx_ring) >= size)
3746 return __igb_maybe_stop_tx(tx_ring, size);
3749 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3750 struct igb_ring *tx_ring)
3752 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3757 union skb_shared_tx *shtx = skb_tx(skb);
3759 /* need: 1 descriptor per page,
3760 * + 2 desc gap to keep tail from touching head,
3761 * + 1 desc for skb->data,
3762 * + 1 desc for context descriptor,
3763 * otherwise try next time */
3764 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3765 /* this is a hard error */
3766 return NETDEV_TX_BUSY;
3769 if (unlikely(shtx->hardware)) {
3770 shtx->in_progress = 1;
3771 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3774 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3775 tx_flags |= IGB_TX_FLAGS_VLAN;
3776 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3779 if (skb->protocol == htons(ETH_P_IP))
3780 tx_flags |= IGB_TX_FLAGS_IPV4;
3782 first = tx_ring->next_to_use;
3783 if (skb_is_gso(skb)) {
3784 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3787 dev_kfree_skb_any(skb);
3788 return NETDEV_TX_OK;
3793 tx_flags |= IGB_TX_FLAGS_TSO;
3794 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3795 (skb->ip_summed == CHECKSUM_PARTIAL))
3796 tx_flags |= IGB_TX_FLAGS_CSUM;
3799 * count reflects descriptors mapped, if 0 or less then mapping error
3800 * has occured and we need to rewind the descriptor queue
3802 count = igb_tx_map_adv(tx_ring, skb, first);
3804 dev_kfree_skb_any(skb);
3805 tx_ring->buffer_info[first].time_stamp = 0;
3806 tx_ring->next_to_use = first;
3807 return NETDEV_TX_OK;
3810 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3812 /* Make sure there is space in the ring for the next send. */
3813 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3815 return NETDEV_TX_OK;
3818 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3819 struct net_device *netdev)
3821 struct igb_adapter *adapter = netdev_priv(netdev);
3822 struct igb_ring *tx_ring;
3825 if (test_bit(__IGB_DOWN, &adapter->state)) {
3826 dev_kfree_skb_any(skb);
3827 return NETDEV_TX_OK;
3830 if (skb->len <= 0) {
3831 dev_kfree_skb_any(skb);
3832 return NETDEV_TX_OK;
3835 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3836 tx_ring = adapter->multi_tx_table[r_idx];
3838 /* This goes back to the question of how to logically map a tx queue
3839 * to a flow. Right now, performance is impacted slightly negatively
3840 * if using multiple tx queues. If the stack breaks away from a
3841 * single qdisc implementation, we can look at this again. */
3842 return igb_xmit_frame_ring_adv(skb, tx_ring);
3846 * igb_tx_timeout - Respond to a Tx Hang
3847 * @netdev: network interface device structure
3849 static void igb_tx_timeout(struct net_device *netdev)
3851 struct igb_adapter *adapter = netdev_priv(netdev);
3852 struct e1000_hw *hw = &adapter->hw;
3854 /* Do the reset outside of interrupt context */
3855 adapter->tx_timeout_count++;
3857 if (hw->mac.type == e1000_82580)
3858 hw->dev_spec._82575.global_device_reset = true;
3860 schedule_work(&adapter->reset_task);
3862 (adapter->eims_enable_mask & ~adapter->eims_other));
3865 static void igb_reset_task(struct work_struct *work)
3867 struct igb_adapter *adapter;
3868 adapter = container_of(work, struct igb_adapter, reset_task);
3870 igb_reinit_locked(adapter);
3874 * igb_get_stats - Get System Network Statistics
3875 * @netdev: network interface device structure
3877 * Returns the address of the device statistics structure.
3878 * The statistics are actually updated from the timer callback.
3880 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3882 /* only return the current stats */
3883 return &netdev->stats;
3887 * igb_change_mtu - Change the Maximum Transfer Unit
3888 * @netdev: network interface device structure
3889 * @new_mtu: new value for maximum frame size
3891 * Returns 0 on success, negative on failure
3893 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3895 struct igb_adapter *adapter = netdev_priv(netdev);
3896 struct pci_dev *pdev = adapter->pdev;
3897 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3898 u32 rx_buffer_len, i;
3900 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3901 dev_err(&pdev->dev, "Invalid MTU setting\n");
3905 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3906 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3910 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3913 /* igb_down has a dependency on max_frame_size */
3914 adapter->max_frame_size = max_frame;
3916 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3917 * means we reserve 2 more, this pushes us to allocate from the next
3919 * i.e. RXBUFFER_2048 --> size-4096 slab
3922 if (max_frame <= IGB_RXBUFFER_1024)
3923 rx_buffer_len = IGB_RXBUFFER_1024;
3924 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3925 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3927 rx_buffer_len = IGB_RXBUFFER_128;
3929 if (netif_running(netdev))
3932 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3933 netdev->mtu, new_mtu);
3934 netdev->mtu = new_mtu;
3936 for (i = 0; i < adapter->num_rx_queues; i++)
3937 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3939 if (netif_running(netdev))
3944 clear_bit(__IGB_RESETTING, &adapter->state);
3950 * igb_update_stats - Update the board statistics counters
3951 * @adapter: board private structure
3954 void igb_update_stats(struct igb_adapter *adapter)
3956 struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3957 struct e1000_hw *hw = &adapter->hw;
3958 struct pci_dev *pdev = adapter->pdev;
3964 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3967 * Prevent stats update while adapter is being reset, or if the pci
3968 * connection is down.
3970 if (adapter->link_speed == 0)
3972 if (pci_channel_offline(pdev))
3977 for (i = 0; i < adapter->num_rx_queues; i++) {
3978 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3979 struct igb_ring *ring = adapter->rx_ring[i];
3980 ring->rx_stats.drops += rqdpc_tmp;
3981 net_stats->rx_fifo_errors += rqdpc_tmp;
3982 bytes += ring->rx_stats.bytes;
3983 packets += ring->rx_stats.packets;
3986 net_stats->rx_bytes = bytes;
3987 net_stats->rx_packets = packets;
3991 for (i = 0; i < adapter->num_tx_queues; i++) {
3992 struct igb_ring *ring = adapter->tx_ring[i];
3993 bytes += ring->tx_stats.bytes;
3994 packets += ring->tx_stats.packets;
3996 net_stats->tx_bytes = bytes;
3997 net_stats->tx_packets = packets;
3999 /* read stats registers */
4000 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4001 adapter->stats.gprc += rd32(E1000_GPRC);
4002 adapter->stats.gorc += rd32(E1000_GORCL);
4003 rd32(E1000_GORCH); /* clear GORCL */
4004 adapter->stats.bprc += rd32(E1000_BPRC);
4005 adapter->stats.mprc += rd32(E1000_MPRC);
4006 adapter->stats.roc += rd32(E1000_ROC);
4008 adapter->stats.prc64 += rd32(E1000_PRC64);
4009 adapter->stats.prc127 += rd32(E1000_PRC127);
4010 adapter->stats.prc255 += rd32(E1000_PRC255);
4011 adapter->stats.prc511 += rd32(E1000_PRC511);
4012 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4013 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4014 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4015 adapter->stats.sec += rd32(E1000_SEC);
4017 mpc = rd32(E1000_MPC);
4018 adapter->stats.mpc += mpc;
4019 net_stats->rx_fifo_errors += mpc;
4020 adapter->stats.scc += rd32(E1000_SCC);
4021 adapter->stats.ecol += rd32(E1000_ECOL);
4022 adapter->stats.mcc += rd32(E1000_MCC);
4023 adapter->stats.latecol += rd32(E1000_LATECOL);
4024 adapter->stats.dc += rd32(E1000_DC);
4025 adapter->stats.rlec += rd32(E1000_RLEC);
4026 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4027 adapter->stats.xontxc += rd32(E1000_XONTXC);
4028 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4029 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4030 adapter->stats.fcruc += rd32(E1000_FCRUC);
4031 adapter->stats.gptc += rd32(E1000_GPTC);
4032 adapter->stats.gotc += rd32(E1000_GOTCL);
4033 rd32(E1000_GOTCH); /* clear GOTCL */
4034 adapter->stats.rnbc += rd32(E1000_RNBC);
4035 adapter->stats.ruc += rd32(E1000_RUC);
4036 adapter->stats.rfc += rd32(E1000_RFC);
4037 adapter->stats.rjc += rd32(E1000_RJC);
4038 adapter->stats.tor += rd32(E1000_TORH);
4039 adapter->stats.tot += rd32(E1000_TOTH);
4040 adapter->stats.tpr += rd32(E1000_TPR);
4042 adapter->stats.ptc64 += rd32(E1000_PTC64);
4043 adapter->stats.ptc127 += rd32(E1000_PTC127);
4044 adapter->stats.ptc255 += rd32(E1000_PTC255);
4045 adapter->stats.ptc511 += rd32(E1000_PTC511);
4046 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4047 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4049 adapter->stats.mptc += rd32(E1000_MPTC);
4050 adapter->stats.bptc += rd32(E1000_BPTC);
4052 adapter->stats.tpt += rd32(E1000_TPT);
4053 adapter->stats.colc += rd32(E1000_COLC);
4055 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4056 /* read internal phy specific stats */
4057 reg = rd32(E1000_CTRL_EXT);
4058 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4059 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4060 adapter->stats.tncrs += rd32(E1000_TNCRS);
4063 adapter->stats.tsctc += rd32(E1000_TSCTC);
4064 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4066 adapter->stats.iac += rd32(E1000_IAC);
4067 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4068 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4069 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4070 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4071 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4072 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4073 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4074 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4076 /* Fill out the OS statistics structure */
4077 net_stats->multicast = adapter->stats.mprc;
4078 net_stats->collisions = adapter->stats.colc;
4082 /* RLEC on some newer hardware can be incorrect so build
4083 * our own version based on RUC and ROC */
4084 net_stats->rx_errors = adapter->stats.rxerrc +
4085 adapter->stats.crcerrs + adapter->stats.algnerrc +
4086 adapter->stats.ruc + adapter->stats.roc +
4087 adapter->stats.cexterr;
4088 net_stats->rx_length_errors = adapter->stats.ruc +
4090 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4091 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4092 net_stats->rx_missed_errors = adapter->stats.mpc;
4095 net_stats->tx_errors = adapter->stats.ecol +
4096 adapter->stats.latecol;
4097 net_stats->tx_aborted_errors = adapter->stats.ecol;
4098 net_stats->tx_window_errors = adapter->stats.latecol;
4099 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4101 /* Tx Dropped needs to be maintained elsewhere */
4104 if (hw->phy.media_type == e1000_media_type_copper) {
4105 if ((adapter->link_speed == SPEED_1000) &&
4106 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4107 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4108 adapter->phy_stats.idle_errors += phy_tmp;
4112 /* Management Stats */
4113 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4114 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4115 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4118 static irqreturn_t igb_msix_other(int irq, void *data)
4120 struct igb_adapter *adapter = data;
4121 struct e1000_hw *hw = &adapter->hw;
4122 u32 icr = rd32(E1000_ICR);
4123 /* reading ICR causes bit 31 of EICR to be cleared */
4125 if (icr & E1000_ICR_DRSTA)
4126 schedule_work(&adapter->reset_task);
4128 if (icr & E1000_ICR_DOUTSYNC) {
4129 /* HW is reporting DMA is out of sync */
4130 adapter->stats.doosync++;
4133 /* Check for a mailbox event */
4134 if (icr & E1000_ICR_VMMB)
4135 igb_msg_task(adapter);
4137 if (icr & E1000_ICR_LSC) {
4138 hw->mac.get_link_status = 1;
4139 /* guard against interrupt when we're going down */
4140 if (!test_bit(__IGB_DOWN, &adapter->state))
4141 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4144 if (adapter->vfs_allocated_count)
4145 wr32(E1000_IMS, E1000_IMS_LSC |
4147 E1000_IMS_DOUTSYNC);
4149 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4150 wr32(E1000_EIMS, adapter->eims_other);
4155 static void igb_write_itr(struct igb_q_vector *q_vector)
4157 struct igb_adapter *adapter = q_vector->adapter;
4158 u32 itr_val = q_vector->itr_val & 0x7FFC;
4160 if (!q_vector->set_itr)
4166 if (adapter->hw.mac.type == e1000_82575)
4167 itr_val |= itr_val << 16;
4169 itr_val |= 0x8000000;
4171 writel(itr_val, q_vector->itr_register);
4172 q_vector->set_itr = 0;
4175 static irqreturn_t igb_msix_ring(int irq, void *data)
4177 struct igb_q_vector *q_vector = data;
4179 /* Write the ITR value calculated from the previous interrupt. */
4180 igb_write_itr(q_vector);
4182 napi_schedule(&q_vector->napi);
4187 #ifdef CONFIG_IGB_DCA
4188 static void igb_update_dca(struct igb_q_vector *q_vector)
4190 struct igb_adapter *adapter = q_vector->adapter;
4191 struct e1000_hw *hw = &adapter->hw;
4192 int cpu = get_cpu();
4194 if (q_vector->cpu == cpu)
4197 if (q_vector->tx_ring) {
4198 int q = q_vector->tx_ring->reg_idx;
4199 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4200 if (hw->mac.type == e1000_82575) {
4201 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4202 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4204 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4205 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4206 E1000_DCA_TXCTRL_CPUID_SHIFT;
4208 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4209 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4211 if (q_vector->rx_ring) {
4212 int q = q_vector->rx_ring->reg_idx;
4213 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4214 if (hw->mac.type == e1000_82575) {
4215 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4216 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4218 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4219 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4220 E1000_DCA_RXCTRL_CPUID_SHIFT;
4222 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4223 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4224 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4225 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4227 q_vector->cpu = cpu;
4232 static void igb_setup_dca(struct igb_adapter *adapter)
4234 struct e1000_hw *hw = &adapter->hw;
4237 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4240 /* Always use CB2 mode, difference is masked in the CB driver. */
4241 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4243 for (i = 0; i < adapter->num_q_vectors; i++) {
4244 adapter->q_vector[i]->cpu = -1;
4245 igb_update_dca(adapter->q_vector[i]);
4249 static int __igb_notify_dca(struct device *dev, void *data)
4251 struct net_device *netdev = dev_get_drvdata(dev);
4252 struct igb_adapter *adapter = netdev_priv(netdev);
4253 struct pci_dev *pdev = adapter->pdev;
4254 struct e1000_hw *hw = &adapter->hw;
4255 unsigned long event = *(unsigned long *)data;
4258 case DCA_PROVIDER_ADD:
4259 /* if already enabled, don't do it again */
4260 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4262 if (dca_add_requester(dev) == 0) {
4263 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4264 dev_info(&pdev->dev, "DCA enabled\n");
4265 igb_setup_dca(adapter);
4268 /* Fall Through since DCA is disabled. */
4269 case DCA_PROVIDER_REMOVE:
4270 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4271 /* without this a class_device is left
4272 * hanging around in the sysfs model */
4273 dca_remove_requester(dev);
4274 dev_info(&pdev->dev, "DCA disabled\n");
4275 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4276 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4284 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4289 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4292 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4294 #endif /* CONFIG_IGB_DCA */
4296 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4298 struct e1000_hw *hw = &adapter->hw;
4302 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4303 ping = E1000_PF_CONTROL_MSG;
4304 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4305 ping |= E1000_VT_MSGTYPE_CTS;
4306 igb_write_mbx(hw, &ping, 1, i);
4310 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4312 struct e1000_hw *hw = &adapter->hw;
4313 u32 vmolr = rd32(E1000_VMOLR(vf));
4314 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4316 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4317 IGB_VF_FLAG_MULTI_PROMISC);
4318 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4320 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4321 vmolr |= E1000_VMOLR_MPME;
4322 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4325 * if we have hashes and we are clearing a multicast promisc
4326 * flag we need to write the hashes to the MTA as this step
4327 * was previously skipped
4329 if (vf_data->num_vf_mc_hashes > 30) {
4330 vmolr |= E1000_VMOLR_MPME;
4331 } else if (vf_data->num_vf_mc_hashes) {
4333 vmolr |= E1000_VMOLR_ROMPE;
4334 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4335 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4339 wr32(E1000_VMOLR(vf), vmolr);
4341 /* there are flags left unprocessed, likely not supported */
4342 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4349 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4350 u32 *msgbuf, u32 vf)
4352 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4353 u16 *hash_list = (u16 *)&msgbuf[1];
4354 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4357 /* salt away the number of multicast addresses assigned
4358 * to this VF for later use to restore when the PF multi cast
4361 vf_data->num_vf_mc_hashes = n;
4363 /* only up to 30 hash values supported */
4367 /* store the hashes for later use */
4368 for (i = 0; i < n; i++)
4369 vf_data->vf_mc_hashes[i] = hash_list[i];
4371 /* Flush and reset the mta with the new values */
4372 igb_set_rx_mode(adapter->netdev);
4377 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4379 struct e1000_hw *hw = &adapter->hw;
4380 struct vf_data_storage *vf_data;
4383 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4384 u32 vmolr = rd32(E1000_VMOLR(i));
4385 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4387 vf_data = &adapter->vf_data[i];
4389 if ((vf_data->num_vf_mc_hashes > 30) ||
4390 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4391 vmolr |= E1000_VMOLR_MPME;
4392 } else if (vf_data->num_vf_mc_hashes) {
4393 vmolr |= E1000_VMOLR_ROMPE;
4394 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4395 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4397 wr32(E1000_VMOLR(i), vmolr);
4401 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4403 struct e1000_hw *hw = &adapter->hw;
4404 u32 pool_mask, reg, vid;
4407 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4409 /* Find the vlan filter for this id */
4410 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4411 reg = rd32(E1000_VLVF(i));
4413 /* remove the vf from the pool */
4416 /* if pool is empty then remove entry from vfta */
4417 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4418 (reg & E1000_VLVF_VLANID_ENABLE)) {
4420 vid = reg & E1000_VLVF_VLANID_MASK;
4421 igb_vfta_set(hw, vid, false);
4424 wr32(E1000_VLVF(i), reg);
4427 adapter->vf_data[vf].vlans_enabled = 0;
4430 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4432 struct e1000_hw *hw = &adapter->hw;
4435 /* The vlvf table only exists on 82576 hardware and newer */
4436 if (hw->mac.type < e1000_82576)
4439 /* we only need to do this if VMDq is enabled */
4440 if (!adapter->vfs_allocated_count)
4443 /* Find the vlan filter for this id */
4444 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4445 reg = rd32(E1000_VLVF(i));
4446 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4447 vid == (reg & E1000_VLVF_VLANID_MASK))
4452 if (i == E1000_VLVF_ARRAY_SIZE) {
4453 /* Did not find a matching VLAN ID entry that was
4454 * enabled. Search for a free filter entry, i.e.
4455 * one without the enable bit set
4457 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4458 reg = rd32(E1000_VLVF(i));
4459 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4463 if (i < E1000_VLVF_ARRAY_SIZE) {
4464 /* Found an enabled/available entry */
4465 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4467 /* if !enabled we need to set this up in vfta */
4468 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4469 /* add VID to filter table */
4470 igb_vfta_set(hw, vid, true);
4471 reg |= E1000_VLVF_VLANID_ENABLE;
4473 reg &= ~E1000_VLVF_VLANID_MASK;
4475 wr32(E1000_VLVF(i), reg);
4477 /* do not modify RLPML for PF devices */
4478 if (vf >= adapter->vfs_allocated_count)
4481 if (!adapter->vf_data[vf].vlans_enabled) {
4483 reg = rd32(E1000_VMOLR(vf));
4484 size = reg & E1000_VMOLR_RLPML_MASK;
4486 reg &= ~E1000_VMOLR_RLPML_MASK;
4488 wr32(E1000_VMOLR(vf), reg);
4491 adapter->vf_data[vf].vlans_enabled++;
4495 if (i < E1000_VLVF_ARRAY_SIZE) {
4496 /* remove vf from the pool */
4497 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4498 /* if pool is empty then remove entry from vfta */
4499 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4501 igb_vfta_set(hw, vid, false);
4503 wr32(E1000_VLVF(i), reg);
4505 /* do not modify RLPML for PF devices */
4506 if (vf >= adapter->vfs_allocated_count)
4509 adapter->vf_data[vf].vlans_enabled--;
4510 if (!adapter->vf_data[vf].vlans_enabled) {
4512 reg = rd32(E1000_VMOLR(vf));
4513 size = reg & E1000_VMOLR_RLPML_MASK;
4515 reg &= ~E1000_VMOLR_RLPML_MASK;
4517 wr32(E1000_VMOLR(vf), reg);
4524 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4526 struct e1000_hw *hw = &adapter->hw;
4529 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4531 wr32(E1000_VMVIR(vf), 0);
4534 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4535 int vf, u16 vlan, u8 qos)
4538 struct igb_adapter *adapter = netdev_priv(netdev);
4540 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4543 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4546 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4547 igb_set_vmolr(adapter, vf, !vlan);
4548 adapter->vf_data[vf].pf_vlan = vlan;
4549 adapter->vf_data[vf].pf_qos = qos;
4550 dev_info(&adapter->pdev->dev,
4551 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4552 if (test_bit(__IGB_DOWN, &adapter->state)) {
4553 dev_warn(&adapter->pdev->dev,
4554 "The VF VLAN has been set,"
4555 " but the PF device is not up.\n");
4556 dev_warn(&adapter->pdev->dev,
4557 "Bring the PF device up before"
4558 " attempting to use the VF device.\n");
4561 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4563 igb_set_vmvir(adapter, vlan, vf);
4564 igb_set_vmolr(adapter, vf, true);
4565 adapter->vf_data[vf].pf_vlan = 0;
4566 adapter->vf_data[vf].pf_qos = 0;
4572 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4574 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4575 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4577 return igb_vlvf_set(adapter, vid, add, vf);
4580 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4583 adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4584 adapter->vf_data[vf].last_nack = jiffies;
4586 /* reset offloads to defaults */
4587 igb_set_vmolr(adapter, vf, true);
4589 /* reset vlans for device */
4590 igb_clear_vf_vfta(adapter, vf);
4591 if (adapter->vf_data[vf].pf_vlan)
4592 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4593 adapter->vf_data[vf].pf_vlan,
4594 adapter->vf_data[vf].pf_qos);
4596 igb_clear_vf_vfta(adapter, vf);
4598 /* reset multicast table array for vf */
4599 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4601 /* Flush and reset the mta with the new values */
4602 igb_set_rx_mode(adapter->netdev);
4605 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4607 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4609 /* generate a new mac address as we were hotplug removed/added */
4610 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4611 random_ether_addr(vf_mac);
4613 /* process remaining reset events */
4614 igb_vf_reset(adapter, vf);
4617 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4619 struct e1000_hw *hw = &adapter->hw;
4620 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4621 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4623 u8 *addr = (u8 *)(&msgbuf[1]);
4625 /* process all the same items cleared in a function level reset */
4626 igb_vf_reset(adapter, vf);
4628 /* set vf mac address */
4629 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4631 /* enable transmit and receive for vf */
4632 reg = rd32(E1000_VFTE);
4633 wr32(E1000_VFTE, reg | (1 << vf));
4634 reg = rd32(E1000_VFRE);
4635 wr32(E1000_VFRE, reg | (1 << vf));
4637 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4639 /* reply to reset with ack and vf mac address */
4640 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4641 memcpy(addr, vf_mac, 6);
4642 igb_write_mbx(hw, msgbuf, 3, vf);
4645 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4647 unsigned char *addr = (char *)&msg[1];
4650 if (is_valid_ether_addr(addr))
4651 err = igb_set_vf_mac(adapter, vf, addr);
4656 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4658 struct e1000_hw *hw = &adapter->hw;
4659 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4660 u32 msg = E1000_VT_MSGTYPE_NACK;
4662 /* if device isn't clear to send it shouldn't be reading either */
4663 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4664 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4665 igb_write_mbx(hw, &msg, 1, vf);
4666 vf_data->last_nack = jiffies;
4670 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4672 struct pci_dev *pdev = adapter->pdev;
4673 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4674 struct e1000_hw *hw = &adapter->hw;
4675 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4678 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4681 /* if receive failed revoke VF CTS stats and restart init */
4682 dev_err(&pdev->dev, "Error receiving message from VF\n");
4683 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4684 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4689 /* this is a message we already processed, do nothing */
4690 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4694 * until the vf completes a reset it should not be
4695 * allowed to start any configuration.
4698 if (msgbuf[0] == E1000_VF_RESET) {
4699 igb_vf_reset_msg(adapter, vf);
4703 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4704 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4710 switch ((msgbuf[0] & 0xFFFF)) {
4711 case E1000_VF_SET_MAC_ADDR:
4712 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4714 case E1000_VF_SET_PROMISC:
4715 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4717 case E1000_VF_SET_MULTICAST:
4718 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4720 case E1000_VF_SET_LPE:
4721 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4723 case E1000_VF_SET_VLAN:
4724 if (adapter->vf_data[vf].pf_vlan)
4727 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4730 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4735 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4737 /* notify the VF of the results of what it sent us */
4739 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4741 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4743 igb_write_mbx(hw, msgbuf, 1, vf);
4746 static void igb_msg_task(struct igb_adapter *adapter)
4748 struct e1000_hw *hw = &adapter->hw;
4751 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4752 /* process any reset requests */
4753 if (!igb_check_for_rst(hw, vf))
4754 igb_vf_reset_event(adapter, vf);
4756 /* process any messages pending */
4757 if (!igb_check_for_msg(hw, vf))
4758 igb_rcv_msg_from_vf(adapter, vf);
4760 /* process any acks */
4761 if (!igb_check_for_ack(hw, vf))
4762 igb_rcv_ack_from_vf(adapter, vf);
4767 * igb_set_uta - Set unicast filter table address
4768 * @adapter: board private structure
4770 * The unicast table address is a register array of 32-bit registers.
4771 * The table is meant to be used in a way similar to how the MTA is used
4772 * however due to certain limitations in the hardware it is necessary to
4773 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4774 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4776 static void igb_set_uta(struct igb_adapter *adapter)
4778 struct e1000_hw *hw = &adapter->hw;
4781 /* The UTA table only exists on 82576 hardware and newer */
4782 if (hw->mac.type < e1000_82576)
4785 /* we only need to do this if VMDq is enabled */
4786 if (!adapter->vfs_allocated_count)
4789 for (i = 0; i < hw->mac.uta_reg_count; i++)
4790 array_wr32(E1000_UTA, i, ~0);
4794 * igb_intr_msi - Interrupt Handler
4795 * @irq: interrupt number
4796 * @data: pointer to a network interface device structure
4798 static irqreturn_t igb_intr_msi(int irq, void *data)
4800 struct igb_adapter *adapter = data;
4801 struct igb_q_vector *q_vector = adapter->q_vector[0];
4802 struct e1000_hw *hw = &adapter->hw;
4803 /* read ICR disables interrupts using IAM */
4804 u32 icr = rd32(E1000_ICR);
4806 igb_write_itr(q_vector);
4808 if (icr & E1000_ICR_DRSTA)
4809 schedule_work(&adapter->reset_task);
4811 if (icr & E1000_ICR_DOUTSYNC) {
4812 /* HW is reporting DMA is out of sync */
4813 adapter->stats.doosync++;
4816 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4817 hw->mac.get_link_status = 1;
4818 if (!test_bit(__IGB_DOWN, &adapter->state))
4819 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4822 napi_schedule(&q_vector->napi);
4828 * igb_intr - Legacy Interrupt Handler
4829 * @irq: interrupt number
4830 * @data: pointer to a network interface device structure
4832 static irqreturn_t igb_intr(int irq, void *data)
4834 struct igb_adapter *adapter = data;
4835 struct igb_q_vector *q_vector = adapter->q_vector[0];
4836 struct e1000_hw *hw = &adapter->hw;
4837 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4838 * need for the IMC write */
4839 u32 icr = rd32(E1000_ICR);
4841 return IRQ_NONE; /* Not our interrupt */
4843 igb_write_itr(q_vector);
4845 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4846 * not set, then the adapter didn't send an interrupt */
4847 if (!(icr & E1000_ICR_INT_ASSERTED))
4850 if (icr & E1000_ICR_DRSTA)
4851 schedule_work(&adapter->reset_task);
4853 if (icr & E1000_ICR_DOUTSYNC) {
4854 /* HW is reporting DMA is out of sync */
4855 adapter->stats.doosync++;
4858 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4859 hw->mac.get_link_status = 1;
4860 /* guard against interrupt when we're going down */
4861 if (!test_bit(__IGB_DOWN, &adapter->state))
4862 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4865 napi_schedule(&q_vector->napi);
4870 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4872 struct igb_adapter *adapter = q_vector->adapter;
4873 struct e1000_hw *hw = &adapter->hw;
4875 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4876 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4877 if (!adapter->msix_entries)
4878 igb_set_itr(adapter);
4880 igb_update_ring_itr(q_vector);
4883 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4884 if (adapter->msix_entries)
4885 wr32(E1000_EIMS, q_vector->eims_value);
4887 igb_irq_enable(adapter);
4892 * igb_poll - NAPI Rx polling callback
4893 * @napi: napi polling structure
4894 * @budget: count of how many packets we should handle
4896 static int igb_poll(struct napi_struct *napi, int budget)
4898 struct igb_q_vector *q_vector = container_of(napi,
4899 struct igb_q_vector,
4901 int tx_clean_complete = 1, work_done = 0;
4903 #ifdef CONFIG_IGB_DCA
4904 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4905 igb_update_dca(q_vector);
4907 if (q_vector->tx_ring)
4908 tx_clean_complete = igb_clean_tx_irq(q_vector);
4910 if (q_vector->rx_ring)
4911 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4913 if (!tx_clean_complete)
4916 /* If not enough Rx work done, exit the polling mode */
4917 if (work_done < budget) {
4918 napi_complete(napi);
4919 igb_ring_irq_enable(q_vector);
4926 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4927 * @adapter: board private structure
4928 * @shhwtstamps: timestamp structure to update
4929 * @regval: unsigned 64bit system time value.
4931 * We need to convert the system time value stored in the RX/TXSTMP registers
4932 * into a hwtstamp which can be used by the upper level timestamping functions
4934 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4935 struct skb_shared_hwtstamps *shhwtstamps,
4941 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4942 * 24 to match clock shift we setup earlier.
4944 if (adapter->hw.mac.type == e1000_82580)
4945 regval <<= IGB_82580_TSYNC_SHIFT;
4947 ns = timecounter_cyc2time(&adapter->clock, regval);
4948 timecompare_update(&adapter->compare, ns);
4949 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4950 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4951 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4955 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4956 * @q_vector: pointer to q_vector containing needed info
4957 * @skb: packet that was just sent
4959 * If we were asked to do hardware stamping and such a time stamp is
4960 * available, then it must have been for this skb here because we only
4961 * allow only one such packet into the queue.
4963 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4965 struct igb_adapter *adapter = q_vector->adapter;
4966 union skb_shared_tx *shtx = skb_tx(skb);
4967 struct e1000_hw *hw = &adapter->hw;
4968 struct skb_shared_hwtstamps shhwtstamps;
4971 /* if skb does not support hw timestamp or TX stamp not valid exit */
4972 if (likely(!shtx->hardware) ||
4973 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4976 regval = rd32(E1000_TXSTMPL);
4977 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4979 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4980 skb_tstamp_tx(skb, &shhwtstamps);
4984 * igb_clean_tx_irq - Reclaim resources after transmit completes
4985 * @q_vector: pointer to q_vector containing needed info
4986 * returns true if ring is completely cleaned
4988 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4990 struct igb_adapter *adapter = q_vector->adapter;
4991 struct igb_ring *tx_ring = q_vector->tx_ring;
4992 struct net_device *netdev = tx_ring->netdev;
4993 struct e1000_hw *hw = &adapter->hw;
4994 struct igb_buffer *buffer_info;
4995 struct sk_buff *skb;
4996 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4997 unsigned int total_bytes = 0, total_packets = 0;
4998 unsigned int i, eop, count = 0;
4999 bool cleaned = false;
5001 i = tx_ring->next_to_clean;
5002 eop = tx_ring->buffer_info[i].next_to_watch;
5003 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5005 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5006 (count < tx_ring->count)) {
5007 for (cleaned = false; !cleaned; count++) {
5008 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5009 buffer_info = &tx_ring->buffer_info[i];
5010 cleaned = (i == eop);
5011 skb = buffer_info->skb;
5014 unsigned int segs, bytecount;
5015 /* gso_segs is currently only valid for tcp */
5016 segs = buffer_info->gso_segs;
5017 /* multiply data chunks by size of headers */
5018 bytecount = ((segs - 1) * skb_headlen(skb)) +
5020 total_packets += segs;
5021 total_bytes += bytecount;
5023 igb_tx_hwtstamp(q_vector, skb);
5026 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5027 tx_desc->wb.status = 0;
5030 if (i == tx_ring->count)
5033 eop = tx_ring->buffer_info[i].next_to_watch;
5034 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5037 tx_ring->next_to_clean = i;
5039 if (unlikely(count &&
5040 netif_carrier_ok(netdev) &&
5041 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5042 /* Make sure that anybody stopping the queue after this
5043 * sees the new next_to_clean.
5046 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5047 !(test_bit(__IGB_DOWN, &adapter->state))) {
5048 netif_wake_subqueue(netdev, tx_ring->queue_index);
5049 tx_ring->tx_stats.restart_queue++;
5053 if (tx_ring->detect_tx_hung) {
5054 /* Detect a transmit hang in hardware, this serializes the
5055 * check with the clearing of time_stamp and movement of i */
5056 tx_ring->detect_tx_hung = false;
5057 if (tx_ring->buffer_info[i].time_stamp &&
5058 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5059 (adapter->tx_timeout_factor * HZ)) &&
5060 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5062 /* detected Tx unit hang */
5063 dev_err(&tx_ring->pdev->dev,
5064 "Detected Tx Unit Hang\n"
5068 " next_to_use <%x>\n"
5069 " next_to_clean <%x>\n"
5070 "buffer_info[next_to_clean]\n"
5071 " time_stamp <%lx>\n"
5072 " next_to_watch <%x>\n"
5074 " desc.status <%x>\n",
5075 tx_ring->queue_index,
5076 readl(tx_ring->head),
5077 readl(tx_ring->tail),
5078 tx_ring->next_to_use,
5079 tx_ring->next_to_clean,
5080 tx_ring->buffer_info[eop].time_stamp,
5083 eop_desc->wb.status);
5084 netif_stop_subqueue(netdev, tx_ring->queue_index);
5087 tx_ring->total_bytes += total_bytes;
5088 tx_ring->total_packets += total_packets;
5089 tx_ring->tx_stats.bytes += total_bytes;
5090 tx_ring->tx_stats.packets += total_packets;
5091 return (count < tx_ring->count);
5095 * igb_receive_skb - helper function to handle rx indications
5096 * @q_vector: structure containing interrupt and ring information
5097 * @skb: packet to send up
5098 * @vlan_tag: vlan tag for packet
5100 static void igb_receive_skb(struct igb_q_vector *q_vector,
5101 struct sk_buff *skb,
5104 struct igb_adapter *adapter = q_vector->adapter;
5106 if (vlan_tag && adapter->vlgrp)
5107 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5110 napi_gro_receive(&q_vector->napi, skb);
5113 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5114 u32 status_err, struct sk_buff *skb)
5116 skb->ip_summed = CHECKSUM_NONE;
5118 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5119 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5120 (status_err & E1000_RXD_STAT_IXSM))
5123 /* TCP/UDP checksum error bit is set */
5125 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5127 * work around errata with sctp packets where the TCPE aka
5128 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5129 * packets, (aka let the stack check the crc32c)
5131 if ((skb->len == 60) &&
5132 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5133 ring->rx_stats.csum_err++;
5135 /* let the stack verify checksum errors */
5138 /* It must be a TCP or UDP packet with a valid checksum */
5139 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5140 skb->ip_summed = CHECKSUM_UNNECESSARY;
5142 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5145 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5146 struct sk_buff *skb)
5148 struct igb_adapter *adapter = q_vector->adapter;
5149 struct e1000_hw *hw = &adapter->hw;
5153 * If this bit is set, then the RX registers contain the time stamp. No
5154 * other packet will be time stamped until we read these registers, so
5155 * read the registers to make them available again. Because only one
5156 * packet can be time stamped at a time, we know that the register
5157 * values must belong to this one here and therefore we don't need to
5158 * compare any of the additional attributes stored for it.
5160 * If nothing went wrong, then it should have a skb_shared_tx that we
5161 * can turn into a skb_shared_hwtstamps.
5163 if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5165 if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5168 regval = rd32(E1000_RXSTMPL);
5169 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5171 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5173 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5174 union e1000_adv_rx_desc *rx_desc)
5176 /* HW will not DMA in data larger than the given buffer, even if it
5177 * parses the (NFS, of course) header to be larger. In that case, it
5178 * fills the header buffer and spills the rest into the page.
5180 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5181 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5182 if (hlen > rx_ring->rx_buffer_len)
5183 hlen = rx_ring->rx_buffer_len;
5187 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5188 int *work_done, int budget)
5190 struct igb_ring *rx_ring = q_vector->rx_ring;
5191 struct net_device *netdev = rx_ring->netdev;
5192 struct pci_dev *pdev = rx_ring->pdev;
5193 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5194 struct igb_buffer *buffer_info , *next_buffer;
5195 struct sk_buff *skb;
5196 bool cleaned = false;
5197 int cleaned_count = 0;
5198 int current_node = numa_node_id();
5199 unsigned int total_bytes = 0, total_packets = 0;
5205 i = rx_ring->next_to_clean;
5206 buffer_info = &rx_ring->buffer_info[i];
5207 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5208 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5210 while (staterr & E1000_RXD_STAT_DD) {
5211 if (*work_done >= budget)
5215 skb = buffer_info->skb;
5216 prefetch(skb->data - NET_IP_ALIGN);
5217 buffer_info->skb = NULL;
5220 if (i == rx_ring->count)
5223 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5225 next_buffer = &rx_ring->buffer_info[i];
5227 length = le16_to_cpu(rx_desc->wb.upper.length);
5231 if (buffer_info->dma) {
5232 pci_unmap_single(pdev, buffer_info->dma,
5233 rx_ring->rx_buffer_len,
5234 PCI_DMA_FROMDEVICE);
5235 buffer_info->dma = 0;
5236 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5237 skb_put(skb, length);
5240 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5244 pci_unmap_page(pdev, buffer_info->page_dma,
5245 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5246 buffer_info->page_dma = 0;
5248 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5250 buffer_info->page_offset,
5253 if ((page_count(buffer_info->page) != 1) ||
5254 (page_to_nid(buffer_info->page) != current_node))
5255 buffer_info->page = NULL;
5257 get_page(buffer_info->page);
5260 skb->data_len += length;
5261 skb->truesize += length;
5264 if (!(staterr & E1000_RXD_STAT_EOP)) {
5265 buffer_info->skb = next_buffer->skb;
5266 buffer_info->dma = next_buffer->dma;
5267 next_buffer->skb = skb;
5268 next_buffer->dma = 0;
5272 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5273 dev_kfree_skb_irq(skb);
5277 igb_rx_hwtstamp(q_vector, staterr, skb);
5278 total_bytes += skb->len;
5281 igb_rx_checksum_adv(rx_ring, staterr, skb);
5283 skb->protocol = eth_type_trans(skb, netdev);
5284 skb_record_rx_queue(skb, rx_ring->queue_index);
5286 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5287 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5289 igb_receive_skb(q_vector, skb, vlan_tag);
5292 rx_desc->wb.upper.status_error = 0;
5294 /* return some buffers to hardware, one at a time is too slow */
5295 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5296 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5300 /* use prefetched values */
5302 buffer_info = next_buffer;
5303 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5306 rx_ring->next_to_clean = i;
5307 cleaned_count = igb_desc_unused(rx_ring);
5310 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5312 rx_ring->total_packets += total_packets;
5313 rx_ring->total_bytes += total_bytes;
5314 rx_ring->rx_stats.packets += total_packets;
5315 rx_ring->rx_stats.bytes += total_bytes;
5320 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5321 * @adapter: address of board private structure
5323 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5325 struct net_device *netdev = rx_ring->netdev;
5326 union e1000_adv_rx_desc *rx_desc;
5327 struct igb_buffer *buffer_info;
5328 struct sk_buff *skb;
5332 i = rx_ring->next_to_use;
5333 buffer_info = &rx_ring->buffer_info[i];
5335 bufsz = rx_ring->rx_buffer_len;
5337 while (cleaned_count--) {
5338 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5340 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5341 if (!buffer_info->page) {
5342 buffer_info->page = netdev_alloc_page(netdev);
5343 if (!buffer_info->page) {
5344 rx_ring->rx_stats.alloc_failed++;
5347 buffer_info->page_offset = 0;
5349 buffer_info->page_offset ^= PAGE_SIZE / 2;
5351 buffer_info->page_dma =
5352 pci_map_page(rx_ring->pdev, buffer_info->page,
5353 buffer_info->page_offset,
5355 PCI_DMA_FROMDEVICE);
5356 if (pci_dma_mapping_error(rx_ring->pdev,
5357 buffer_info->page_dma)) {
5358 buffer_info->page_dma = 0;
5359 rx_ring->rx_stats.alloc_failed++;
5364 skb = buffer_info->skb;
5366 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5368 rx_ring->rx_stats.alloc_failed++;
5372 buffer_info->skb = skb;
5374 if (!buffer_info->dma) {
5375 buffer_info->dma = pci_map_single(rx_ring->pdev,
5378 PCI_DMA_FROMDEVICE);
5379 if (pci_dma_mapping_error(rx_ring->pdev,
5380 buffer_info->dma)) {
5381 buffer_info->dma = 0;
5382 rx_ring->rx_stats.alloc_failed++;
5386 /* Refresh the desc even if buffer_addrs didn't change because
5387 * each write-back erases this info. */
5388 if (bufsz < IGB_RXBUFFER_1024) {
5389 rx_desc->read.pkt_addr =
5390 cpu_to_le64(buffer_info->page_dma);
5391 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5393 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5394 rx_desc->read.hdr_addr = 0;
5398 if (i == rx_ring->count)
5400 buffer_info = &rx_ring->buffer_info[i];
5404 if (rx_ring->next_to_use != i) {
5405 rx_ring->next_to_use = i;
5407 i = (rx_ring->count - 1);
5411 /* Force memory writes to complete before letting h/w
5412 * know there are new descriptors to fetch. (Only
5413 * applicable for weak-ordered memory model archs,
5414 * such as IA-64). */
5416 writel(i, rx_ring->tail);
5426 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5428 struct igb_adapter *adapter = netdev_priv(netdev);
5429 struct mii_ioctl_data *data = if_mii(ifr);
5431 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5436 data->phy_id = adapter->hw.phy.addr;
5439 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5451 * igb_hwtstamp_ioctl - control hardware time stamping
5456 * Outgoing time stamping can be enabled and disabled. Play nice and
5457 * disable it when requested, although it shouldn't case any overhead
5458 * when no packet needs it. At most one packet in the queue may be
5459 * marked for time stamping, otherwise it would be impossible to tell
5460 * for sure to which packet the hardware time stamp belongs.
5462 * Incoming time stamping has to be configured via the hardware
5463 * filters. Not all combinations are supported, in particular event
5464 * type has to be specified. Matching the kind of event packet is
5465 * not supported, with the exception of "all V2 events regardless of
5469 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5470 struct ifreq *ifr, int cmd)
5472 struct igb_adapter *adapter = netdev_priv(netdev);
5473 struct e1000_hw *hw = &adapter->hw;
5474 struct hwtstamp_config config;
5475 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5476 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5477 u32 tsync_rx_cfg = 0;
5482 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5485 /* reserved for future extensions */
5489 switch (config.tx_type) {
5490 case HWTSTAMP_TX_OFF:
5492 case HWTSTAMP_TX_ON:
5498 switch (config.rx_filter) {
5499 case HWTSTAMP_FILTER_NONE:
5502 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5503 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5504 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5505 case HWTSTAMP_FILTER_ALL:
5507 * register TSYNCRXCFG must be set, therefore it is not
5508 * possible to time stamp both Sync and Delay_Req messages
5509 * => fall back to time stamping all packets
5511 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5512 config.rx_filter = HWTSTAMP_FILTER_ALL;
5514 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5515 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5516 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5519 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5520 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5521 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5524 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5525 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5526 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5527 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5530 config.rx_filter = HWTSTAMP_FILTER_SOME;
5532 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5533 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5534 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5535 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5538 config.rx_filter = HWTSTAMP_FILTER_SOME;
5540 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5541 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5542 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5543 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5544 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5551 if (hw->mac.type == e1000_82575) {
5552 if (tsync_rx_ctl | tsync_tx_ctl)
5557 /* enable/disable TX */
5558 regval = rd32(E1000_TSYNCTXCTL);
5559 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5560 regval |= tsync_tx_ctl;
5561 wr32(E1000_TSYNCTXCTL, regval);
5563 /* enable/disable RX */
5564 regval = rd32(E1000_TSYNCRXCTL);
5565 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5566 regval |= tsync_rx_ctl;
5567 wr32(E1000_TSYNCRXCTL, regval);
5569 /* define which PTP packets are time stamped */
5570 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5572 /* define ethertype filter for timestamped packets */
5575 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5576 E1000_ETQF_1588 | /* enable timestamping */
5577 ETH_P_1588)); /* 1588 eth protocol type */
5579 wr32(E1000_ETQF(3), 0);
5581 #define PTP_PORT 319
5582 /* L4 Queue Filter[3]: filter by destination port and protocol */
5584 u32 ftqf = (IPPROTO_UDP /* UDP */
5585 | E1000_FTQF_VF_BP /* VF not compared */
5586 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5587 | E1000_FTQF_MASK); /* mask all inputs */
5588 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5590 wr32(E1000_IMIR(3), htons(PTP_PORT));
5591 wr32(E1000_IMIREXT(3),
5592 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5593 if (hw->mac.type == e1000_82576) {
5594 /* enable source port check */
5595 wr32(E1000_SPQF(3), htons(PTP_PORT));
5596 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5598 wr32(E1000_FTQF(3), ftqf);
5600 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5604 adapter->hwtstamp_config = config;
5606 /* clear TX/RX time stamp registers, just to be sure */
5607 regval = rd32(E1000_TXSTMPH);
5608 regval = rd32(E1000_RXSTMPH);
5610 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5620 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5626 return igb_mii_ioctl(netdev, ifr, cmd);
5628 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5634 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5636 struct igb_adapter *adapter = hw->back;
5639 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5641 return -E1000_ERR_CONFIG;
5643 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5648 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5650 struct igb_adapter *adapter = hw->back;
5653 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5655 return -E1000_ERR_CONFIG;
5657 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5662 static void igb_vlan_rx_register(struct net_device *netdev,
5663 struct vlan_group *grp)
5665 struct igb_adapter *adapter = netdev_priv(netdev);
5666 struct e1000_hw *hw = &adapter->hw;
5669 igb_irq_disable(adapter);
5670 adapter->vlgrp = grp;
5673 /* enable VLAN tag insert/strip */
5674 ctrl = rd32(E1000_CTRL);
5675 ctrl |= E1000_CTRL_VME;
5676 wr32(E1000_CTRL, ctrl);
5678 /* Disable CFI check */
5679 rctl = rd32(E1000_RCTL);
5680 rctl &= ~E1000_RCTL_CFIEN;
5681 wr32(E1000_RCTL, rctl);
5683 /* disable VLAN tag insert/strip */
5684 ctrl = rd32(E1000_CTRL);
5685 ctrl &= ~E1000_CTRL_VME;
5686 wr32(E1000_CTRL, ctrl);
5689 igb_rlpml_set(adapter);
5691 if (!test_bit(__IGB_DOWN, &adapter->state))
5692 igb_irq_enable(adapter);
5695 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5697 struct igb_adapter *adapter = netdev_priv(netdev);
5698 struct e1000_hw *hw = &adapter->hw;
5699 int pf_id = adapter->vfs_allocated_count;
5701 /* attempt to add filter to vlvf array */
5702 igb_vlvf_set(adapter, vid, true, pf_id);
5704 /* add the filter since PF can receive vlans w/o entry in vlvf */
5705 igb_vfta_set(hw, vid, true);
5708 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5710 struct igb_adapter *adapter = netdev_priv(netdev);
5711 struct e1000_hw *hw = &adapter->hw;
5712 int pf_id = adapter->vfs_allocated_count;
5715 igb_irq_disable(adapter);
5716 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5718 if (!test_bit(__IGB_DOWN, &adapter->state))
5719 igb_irq_enable(adapter);
5721 /* remove vlan from VLVF table array */
5722 err = igb_vlvf_set(adapter, vid, false, pf_id);
5724 /* if vid was not present in VLVF just remove it from table */
5726 igb_vfta_set(hw, vid, false);
5729 static void igb_restore_vlan(struct igb_adapter *adapter)
5731 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5733 if (adapter->vlgrp) {
5735 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5736 if (!vlan_group_get_device(adapter->vlgrp, vid))
5738 igb_vlan_rx_add_vid(adapter->netdev, vid);
5743 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5745 struct pci_dev *pdev = adapter->pdev;
5746 struct e1000_mac_info *mac = &adapter->hw.mac;
5751 case SPEED_10 + DUPLEX_HALF:
5752 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5754 case SPEED_10 + DUPLEX_FULL:
5755 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5757 case SPEED_100 + DUPLEX_HALF:
5758 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5760 case SPEED_100 + DUPLEX_FULL:
5761 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5763 case SPEED_1000 + DUPLEX_FULL:
5765 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5767 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5769 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5775 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5777 struct net_device *netdev = pci_get_drvdata(pdev);
5778 struct igb_adapter *adapter = netdev_priv(netdev);
5779 struct e1000_hw *hw = &adapter->hw;
5780 u32 ctrl, rctl, status;
5781 u32 wufc = adapter->wol;
5786 netif_device_detach(netdev);
5788 if (netif_running(netdev))
5791 igb_clear_interrupt_scheme(adapter);
5794 retval = pci_save_state(pdev);
5799 status = rd32(E1000_STATUS);
5800 if (status & E1000_STATUS_LU)
5801 wufc &= ~E1000_WUFC_LNKC;
5804 igb_setup_rctl(adapter);
5805 igb_set_rx_mode(netdev);
5807 /* turn on all-multi mode if wake on multicast is enabled */
5808 if (wufc & E1000_WUFC_MC) {
5809 rctl = rd32(E1000_RCTL);
5810 rctl |= E1000_RCTL_MPE;
5811 wr32(E1000_RCTL, rctl);
5814 ctrl = rd32(E1000_CTRL);
5815 /* advertise wake from D3Cold */
5816 #define E1000_CTRL_ADVD3WUC 0x00100000
5817 /* phy power management enable */
5818 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5819 ctrl |= E1000_CTRL_ADVD3WUC;
5820 wr32(E1000_CTRL, ctrl);
5822 /* Allow time for pending master requests to run */
5823 igb_disable_pcie_master(hw);
5825 wr32(E1000_WUC, E1000_WUC_PME_EN);
5826 wr32(E1000_WUFC, wufc);
5829 wr32(E1000_WUFC, 0);
5832 *enable_wake = wufc || adapter->en_mng_pt;
5834 igb_power_down_link(adapter);
5836 igb_power_up_link(adapter);
5838 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5839 * would have already happened in close and is redundant. */
5840 igb_release_hw_control(adapter);
5842 pci_disable_device(pdev);
5848 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5853 retval = __igb_shutdown(pdev, &wake);
5858 pci_prepare_to_sleep(pdev);
5860 pci_wake_from_d3(pdev, false);
5861 pci_set_power_state(pdev, PCI_D3hot);
5867 static int igb_resume(struct pci_dev *pdev)
5869 struct net_device *netdev = pci_get_drvdata(pdev);
5870 struct igb_adapter *adapter = netdev_priv(netdev);
5871 struct e1000_hw *hw = &adapter->hw;
5874 pci_set_power_state(pdev, PCI_D0);
5875 pci_restore_state(pdev);
5876 pci_save_state(pdev);
5878 err = pci_enable_device_mem(pdev);
5881 "igb: Cannot enable PCI device from suspend\n");
5884 pci_set_master(pdev);
5886 pci_enable_wake(pdev, PCI_D3hot, 0);
5887 pci_enable_wake(pdev, PCI_D3cold, 0);
5889 if (igb_init_interrupt_scheme(adapter)) {
5890 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5896 /* let the f/w know that the h/w is now under the control of the
5898 igb_get_hw_control(adapter);
5900 wr32(E1000_WUS, ~0);
5902 if (netif_running(netdev)) {
5903 err = igb_open(netdev);
5908 netif_device_attach(netdev);
5914 static void igb_shutdown(struct pci_dev *pdev)
5918 __igb_shutdown(pdev, &wake);
5920 if (system_state == SYSTEM_POWER_OFF) {
5921 pci_wake_from_d3(pdev, wake);
5922 pci_set_power_state(pdev, PCI_D3hot);
5926 #ifdef CONFIG_NET_POLL_CONTROLLER
5928 * Polling 'interrupt' - used by things like netconsole to send skbs
5929 * without having to re-enable interrupts. It's not called while
5930 * the interrupt routine is executing.
5932 static void igb_netpoll(struct net_device *netdev)
5934 struct igb_adapter *adapter = netdev_priv(netdev);
5935 struct e1000_hw *hw = &adapter->hw;
5938 if (!adapter->msix_entries) {
5939 struct igb_q_vector *q_vector = adapter->q_vector[0];
5940 igb_irq_disable(adapter);
5941 napi_schedule(&q_vector->napi);
5945 for (i = 0; i < adapter->num_q_vectors; i++) {
5946 struct igb_q_vector *q_vector = adapter->q_vector[i];
5947 wr32(E1000_EIMC, q_vector->eims_value);
5948 napi_schedule(&q_vector->napi);
5951 #endif /* CONFIG_NET_POLL_CONTROLLER */
5954 * igb_io_error_detected - called when PCI error is detected
5955 * @pdev: Pointer to PCI device
5956 * @state: The current pci connection state
5958 * This function is called after a PCI bus error affecting
5959 * this device has been detected.
5961 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5962 pci_channel_state_t state)
5964 struct net_device *netdev = pci_get_drvdata(pdev);
5965 struct igb_adapter *adapter = netdev_priv(netdev);
5967 netif_device_detach(netdev);
5969 if (state == pci_channel_io_perm_failure)
5970 return PCI_ERS_RESULT_DISCONNECT;
5972 if (netif_running(netdev))
5974 pci_disable_device(pdev);
5976 /* Request a slot slot reset. */
5977 return PCI_ERS_RESULT_NEED_RESET;
5981 * igb_io_slot_reset - called after the pci bus has been reset.
5982 * @pdev: Pointer to PCI device
5984 * Restart the card from scratch, as if from a cold-boot. Implementation
5985 * resembles the first-half of the igb_resume routine.
5987 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5989 struct net_device *netdev = pci_get_drvdata(pdev);
5990 struct igb_adapter *adapter = netdev_priv(netdev);
5991 struct e1000_hw *hw = &adapter->hw;
5992 pci_ers_result_t result;
5995 if (pci_enable_device_mem(pdev)) {
5997 "Cannot re-enable PCI device after reset.\n");
5998 result = PCI_ERS_RESULT_DISCONNECT;
6000 pci_set_master(pdev);
6001 pci_restore_state(pdev);
6002 pci_save_state(pdev);
6004 pci_enable_wake(pdev, PCI_D3hot, 0);
6005 pci_enable_wake(pdev, PCI_D3cold, 0);
6008 wr32(E1000_WUS, ~0);
6009 result = PCI_ERS_RESULT_RECOVERED;
6012 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6014 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6015 "failed 0x%0x\n", err);
6016 /* non-fatal, continue */
6023 * igb_io_resume - called when traffic can start flowing again.
6024 * @pdev: Pointer to PCI device
6026 * This callback is called when the error recovery driver tells us that
6027 * its OK to resume normal operation. Implementation resembles the
6028 * second-half of the igb_resume routine.
6030 static void igb_io_resume(struct pci_dev *pdev)
6032 struct net_device *netdev = pci_get_drvdata(pdev);
6033 struct igb_adapter *adapter = netdev_priv(netdev);
6035 if (netif_running(netdev)) {
6036 if (igb_up(adapter)) {
6037 dev_err(&pdev->dev, "igb_up failed after reset\n");
6042 netif_device_attach(netdev);
6044 /* let the f/w know that the h/w is now under the control of the
6046 igb_get_hw_control(adapter);
6049 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6052 u32 rar_low, rar_high;
6053 struct e1000_hw *hw = &adapter->hw;
6055 /* HW expects these in little endian so we reverse the byte order
6056 * from network order (big endian) to little endian
6058 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6059 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6060 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6062 /* Indicate to hardware the Address is Valid. */
6063 rar_high |= E1000_RAH_AV;
6065 if (hw->mac.type == e1000_82575)
6066 rar_high |= E1000_RAH_POOL_1 * qsel;
6068 rar_high |= E1000_RAH_POOL_1 << qsel;
6070 wr32(E1000_RAL(index), rar_low);
6072 wr32(E1000_RAH(index), rar_high);
6076 static int igb_set_vf_mac(struct igb_adapter *adapter,
6077 int vf, unsigned char *mac_addr)
6079 struct e1000_hw *hw = &adapter->hw;
6080 /* VF MAC addresses start at end of receive addresses and moves
6081 * torwards the first, as a result a collision should not be possible */
6082 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6084 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6086 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6091 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6093 struct igb_adapter *adapter = netdev_priv(netdev);
6094 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6096 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6097 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6098 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6099 " change effective.");
6100 if (test_bit(__IGB_DOWN, &adapter->state)) {
6101 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6102 " but the PF device is not up.\n");
6103 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6104 " attempting to use the VF device.\n");
6106 return igb_set_vf_mac(adapter, vf, mac);
6109 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6114 static int igb_ndo_get_vf_config(struct net_device *netdev,
6115 int vf, struct ifla_vf_info *ivi)
6117 struct igb_adapter *adapter = netdev_priv(netdev);
6118 if (vf >= adapter->vfs_allocated_count)
6121 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6123 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6124 ivi->qos = adapter->vf_data[vf].pf_qos;
6128 static void igb_vmm_control(struct igb_adapter *adapter)
6130 struct e1000_hw *hw = &adapter->hw;
6133 /* replication is not supported for 82575 */
6134 if (hw->mac.type == e1000_82575)
6137 /* enable replication vlan tag stripping */
6138 reg = rd32(E1000_RPLOLR);
6139 reg |= E1000_RPLOLR_STRVLAN;
6140 wr32(E1000_RPLOLR, reg);
6142 /* notify HW that the MAC is adding vlan tags */
6143 reg = rd32(E1000_DTXCTL);
6144 reg |= E1000_DTXCTL_VLAN_ADDED;
6145 wr32(E1000_DTXCTL, reg);
6147 if (adapter->vfs_allocated_count) {
6148 igb_vmdq_set_loopback_pf(hw, true);
6149 igb_vmdq_set_replication_pf(hw, true);
6151 igb_vmdq_set_loopback_pf(hw, false);
6152 igb_vmdq_set_replication_pf(hw, false);