]> git.karo-electronics.de Git - mv-sheeva.git/blob - drivers/net/igb/igb_main.c
91c123f9df92f15baa508d7f1ffdfb7ec818c5b5
[mv-sheeva.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79         /* required last entry */
80         {0, }
81 };
82
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
84
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138                                int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141                                  struct ifla_vf_info *ivi);
142
143 #ifdef CONFIG_PM
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
146 #endif
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151         .notifier_call  = igb_notify_dca,
152         .next           = NULL,
153         .priority       = 0
154 };
155 #endif
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
159 #endif
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164                  "per physical function");
165 #endif /* CONFIG_PCI_IOV */
166
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168                      pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
171
172 static struct pci_error_handlers igb_err_handler = {
173         .error_detected = igb_io_error_detected,
174         .slot_reset = igb_io_slot_reset,
175         .resume = igb_io_resume,
176 };
177
178
179 static struct pci_driver igb_driver = {
180         .name     = igb_driver_name,
181         .id_table = igb_pci_tbl,
182         .probe    = igb_probe,
183         .remove   = __devexit_p(igb_remove),
184 #ifdef CONFIG_PM
185         /* Power Managment Hooks */
186         .suspend  = igb_suspend,
187         .resume   = igb_resume,
188 #endif
189         .shutdown = igb_shutdown,
190         .err_handler = &igb_err_handler
191 };
192
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
197
198 /**
199  * igb_read_clock - read raw cycle counter (to be used by time counter)
200  */
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
202 {
203         struct igb_adapter *adapter =
204                 container_of(tc, struct igb_adapter, cycles);
205         struct e1000_hw *hw = &adapter->hw;
206         u64 stamp = 0;
207         int shift = 0;
208
209         /*
210          * The timestamp latches on lowest register read. For the 82580
211          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
212          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
213          */
214         if (hw->mac.type == e1000_82580) {
215                 stamp = rd32(E1000_SYSTIMR) >> 8;
216                 shift = IGB_82580_TSYNC_SHIFT;
217         }
218
219         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
221         return stamp;
222 }
223
224 #ifdef DEBUG
225 /**
226  * igb_get_hw_dev_name - return device name string
227  * used by hardware layer to print debugging information
228  **/
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
230 {
231         struct igb_adapter *adapter = hw->back;
232         return adapter->netdev->name;
233 }
234
235 /**
236  * igb_get_time_str - format current NIC and system time as string
237  */
238 static char *igb_get_time_str(struct igb_adapter *adapter,
239                               char buffer[160])
240 {
241         cycle_t hw = adapter->cycles.read(&adapter->cycles);
242         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
243         struct timespec sys;
244         struct timespec delta;
245         getnstimeofday(&sys);
246
247         delta = timespec_sub(nic, sys);
248
249         sprintf(buffer,
250                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
251                 hw,
252                 (long)nic.tv_sec, nic.tv_nsec,
253                 (long)sys.tv_sec, sys.tv_nsec,
254                 (long)delta.tv_sec, delta.tv_nsec);
255
256         return buffer;
257 }
258 #endif
259
260 /**
261  * igb_init_module - Driver Registration Routine
262  *
263  * igb_init_module is the first routine called when the driver is
264  * loaded. All it does is register with the PCI subsystem.
265  **/
266 static int __init igb_init_module(void)
267 {
268         int ret;
269         printk(KERN_INFO "%s - version %s\n",
270                igb_driver_string, igb_driver_version);
271
272         printk(KERN_INFO "%s\n", igb_copyright);
273
274 #ifdef CONFIG_IGB_DCA
275         dca_register_notify(&dca_notifier);
276 #endif
277         ret = pci_register_driver(&igb_driver);
278         return ret;
279 }
280
281 module_init(igb_init_module);
282
283 /**
284  * igb_exit_module - Driver Exit Cleanup Routine
285  *
286  * igb_exit_module is called just before the driver is removed
287  * from memory.
288  **/
289 static void __exit igb_exit_module(void)
290 {
291 #ifdef CONFIG_IGB_DCA
292         dca_unregister_notify(&dca_notifier);
293 #endif
294         pci_unregister_driver(&igb_driver);
295 }
296
297 module_exit(igb_exit_module);
298
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
300 /**
301  * igb_cache_ring_register - Descriptor ring to register mapping
302  * @adapter: board private structure to initialize
303  *
304  * Once we know the feature-set enabled for the device, we'll cache
305  * the register offset the descriptor ring is assigned to.
306  **/
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
308 {
309         int i = 0, j = 0;
310         u32 rbase_offset = adapter->vfs_allocated_count;
311
312         switch (adapter->hw.mac.type) {
313         case e1000_82576:
314                 /* The queues are allocated for virtualization such that VF 0
315                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316                  * In order to avoid collision we start at the first free queue
317                  * and continue consuming queues in the same sequence
318                  */
319                 if (adapter->vfs_allocated_count) {
320                         for (; i < adapter->rss_queues; i++)
321                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
322                                                                Q_IDX_82576(i);
323                         for (; j < adapter->rss_queues; j++)
324                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
325                                                                Q_IDX_82576(j);
326                 }
327         case e1000_82575:
328         case e1000_82580:
329         default:
330                 for (; i < adapter->num_rx_queues; i++)
331                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
332                 for (; j < adapter->num_tx_queues; j++)
333                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
334                 break;
335         }
336 }
337
338 static void igb_free_queues(struct igb_adapter *adapter)
339 {
340         int i;
341
342         for (i = 0; i < adapter->num_tx_queues; i++) {
343                 kfree(adapter->tx_ring[i]);
344                 adapter->tx_ring[i] = NULL;
345         }
346         for (i = 0; i < adapter->num_rx_queues; i++) {
347                 kfree(adapter->rx_ring[i]);
348                 adapter->rx_ring[i] = NULL;
349         }
350         adapter->num_rx_queues = 0;
351         adapter->num_tx_queues = 0;
352 }
353
354 /**
355  * igb_alloc_queues - Allocate memory for all rings
356  * @adapter: board private structure to initialize
357  *
358  * We allocate one ring per queue at run-time since we don't know the
359  * number of queues at compile-time.
360  **/
361 static int igb_alloc_queues(struct igb_adapter *adapter)
362 {
363         struct igb_ring *ring;
364         int i;
365
366         for (i = 0; i < adapter->num_tx_queues; i++) {
367                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
368                 if (!ring)
369                         goto err;
370                 ring->count = adapter->tx_ring_count;
371                 ring->queue_index = i;
372                 ring->pdev = adapter->pdev;
373                 ring->netdev = adapter->netdev;
374                 /* For 82575, context index must be unique per ring. */
375                 if (adapter->hw.mac.type == e1000_82575)
376                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
377                 adapter->tx_ring[i] = ring;
378         }
379
380         for (i = 0; i < adapter->num_rx_queues; i++) {
381                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
382                 if (!ring)
383                         goto err;
384                 ring->count = adapter->rx_ring_count;
385                 ring->queue_index = i;
386                 ring->pdev = adapter->pdev;
387                 ring->netdev = adapter->netdev;
388                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390                 /* set flag indicating ring supports SCTP checksum offload */
391                 if (adapter->hw.mac.type >= e1000_82576)
392                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
393                 adapter->rx_ring[i] = ring;
394         }
395
396         igb_cache_ring_register(adapter);
397
398         return 0;
399
400 err:
401         igb_free_queues(adapter);
402
403         return -ENOMEM;
404 }
405
406 #define IGB_N0_QUEUE -1
407 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
408 {
409         u32 msixbm = 0;
410         struct igb_adapter *adapter = q_vector->adapter;
411         struct e1000_hw *hw = &adapter->hw;
412         u32 ivar, index;
413         int rx_queue = IGB_N0_QUEUE;
414         int tx_queue = IGB_N0_QUEUE;
415
416         if (q_vector->rx_ring)
417                 rx_queue = q_vector->rx_ring->reg_idx;
418         if (q_vector->tx_ring)
419                 tx_queue = q_vector->tx_ring->reg_idx;
420
421         switch (hw->mac.type) {
422         case e1000_82575:
423                 /* The 82575 assigns vectors using a bitmask, which matches the
424                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
425                    or more queues to a vector, we write the appropriate bits
426                    into the MSIXBM register for that vector. */
427                 if (rx_queue > IGB_N0_QUEUE)
428                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
429                 if (tx_queue > IGB_N0_QUEUE)
430                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
431                 if (!adapter->msix_entries && msix_vector == 0)
432                         msixbm |= E1000_EIMS_OTHER;
433                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
434                 q_vector->eims_value = msixbm;
435                 break;
436         case e1000_82576:
437                 /* 82576 uses a table-based method for assigning vectors.
438                    Each queue has a single entry in the table to which we write
439                    a vector number along with a "valid" bit.  Sadly, the layout
440                    of the table is somewhat counterintuitive. */
441                 if (rx_queue > IGB_N0_QUEUE) {
442                         index = (rx_queue & 0x7);
443                         ivar = array_rd32(E1000_IVAR0, index);
444                         if (rx_queue < 8) {
445                                 /* vector goes into low byte of register */
446                                 ivar = ivar & 0xFFFFFF00;
447                                 ivar |= msix_vector | E1000_IVAR_VALID;
448                         } else {
449                                 /* vector goes into third byte of register */
450                                 ivar = ivar & 0xFF00FFFF;
451                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
452                         }
453                         array_wr32(E1000_IVAR0, index, ivar);
454                 }
455                 if (tx_queue > IGB_N0_QUEUE) {
456                         index = (tx_queue & 0x7);
457                         ivar = array_rd32(E1000_IVAR0, index);
458                         if (tx_queue < 8) {
459                                 /* vector goes into second byte of register */
460                                 ivar = ivar & 0xFFFF00FF;
461                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
462                         } else {
463                                 /* vector goes into high byte of register */
464                                 ivar = ivar & 0x00FFFFFF;
465                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
466                         }
467                         array_wr32(E1000_IVAR0, index, ivar);
468                 }
469                 q_vector->eims_value = 1 << msix_vector;
470                 break;
471         case e1000_82580:
472                 /* 82580 uses the same table-based approach as 82576 but has fewer
473                    entries as a result we carry over for queues greater than 4. */
474                 if (rx_queue > IGB_N0_QUEUE) {
475                         index = (rx_queue >> 1);
476                         ivar = array_rd32(E1000_IVAR0, index);
477                         if (rx_queue & 0x1) {
478                                 /* vector goes into third byte of register */
479                                 ivar = ivar & 0xFF00FFFF;
480                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
481                         } else {
482                                 /* vector goes into low byte of register */
483                                 ivar = ivar & 0xFFFFFF00;
484                                 ivar |= msix_vector | E1000_IVAR_VALID;
485                         }
486                         array_wr32(E1000_IVAR0, index, ivar);
487                 }
488                 if (tx_queue > IGB_N0_QUEUE) {
489                         index = (tx_queue >> 1);
490                         ivar = array_rd32(E1000_IVAR0, index);
491                         if (tx_queue & 0x1) {
492                                 /* vector goes into high byte of register */
493                                 ivar = ivar & 0x00FFFFFF;
494                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
495                         } else {
496                                 /* vector goes into second byte of register */
497                                 ivar = ivar & 0xFFFF00FF;
498                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
499                         }
500                         array_wr32(E1000_IVAR0, index, ivar);
501                 }
502                 q_vector->eims_value = 1 << msix_vector;
503                 break;
504         default:
505                 BUG();
506                 break;
507         }
508
509         /* add q_vector eims value to global eims_enable_mask */
510         adapter->eims_enable_mask |= q_vector->eims_value;
511
512         /* configure q_vector to set itr on first interrupt */
513         q_vector->set_itr = 1;
514 }
515
516 /**
517  * igb_configure_msix - Configure MSI-X hardware
518  *
519  * igb_configure_msix sets up the hardware to properly
520  * generate MSI-X interrupts.
521  **/
522 static void igb_configure_msix(struct igb_adapter *adapter)
523 {
524         u32 tmp;
525         int i, vector = 0;
526         struct e1000_hw *hw = &adapter->hw;
527
528         adapter->eims_enable_mask = 0;
529
530         /* set vector for other causes, i.e. link changes */
531         switch (hw->mac.type) {
532         case e1000_82575:
533                 tmp = rd32(E1000_CTRL_EXT);
534                 /* enable MSI-X PBA support*/
535                 tmp |= E1000_CTRL_EXT_PBA_CLR;
536
537                 /* Auto-Mask interrupts upon ICR read. */
538                 tmp |= E1000_CTRL_EXT_EIAME;
539                 tmp |= E1000_CTRL_EXT_IRCA;
540
541                 wr32(E1000_CTRL_EXT, tmp);
542
543                 /* enable msix_other interrupt */
544                 array_wr32(E1000_MSIXBM(0), vector++,
545                                       E1000_EIMS_OTHER);
546                 adapter->eims_other = E1000_EIMS_OTHER;
547
548                 break;
549
550         case e1000_82576:
551         case e1000_82580:
552                 /* Turn on MSI-X capability first, or our settings
553                  * won't stick.  And it will take days to debug. */
554                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
555                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
556                                 E1000_GPIE_NSICR);
557
558                 /* enable msix_other interrupt */
559                 adapter->eims_other = 1 << vector;
560                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
561
562                 wr32(E1000_IVAR_MISC, tmp);
563                 break;
564         default:
565                 /* do nothing, since nothing else supports MSI-X */
566                 break;
567         } /* switch (hw->mac.type) */
568
569         adapter->eims_enable_mask |= adapter->eims_other;
570
571         for (i = 0; i < adapter->num_q_vectors; i++)
572                 igb_assign_vector(adapter->q_vector[i], vector++);
573
574         wrfl();
575 }
576
577 /**
578  * igb_request_msix - Initialize MSI-X interrupts
579  *
580  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
581  * kernel.
582  **/
583 static int igb_request_msix(struct igb_adapter *adapter)
584 {
585         struct net_device *netdev = adapter->netdev;
586         struct e1000_hw *hw = &adapter->hw;
587         int i, err = 0, vector = 0;
588
589         err = request_irq(adapter->msix_entries[vector].vector,
590                           igb_msix_other, 0, netdev->name, adapter);
591         if (err)
592                 goto out;
593         vector++;
594
595         for (i = 0; i < adapter->num_q_vectors; i++) {
596                 struct igb_q_vector *q_vector = adapter->q_vector[i];
597
598                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
599
600                 if (q_vector->rx_ring && q_vector->tx_ring)
601                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
602                                 q_vector->rx_ring->queue_index);
603                 else if (q_vector->tx_ring)
604                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
605                                 q_vector->tx_ring->queue_index);
606                 else if (q_vector->rx_ring)
607                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
608                                 q_vector->rx_ring->queue_index);
609                 else
610                         sprintf(q_vector->name, "%s-unused", netdev->name);
611
612                 err = request_irq(adapter->msix_entries[vector].vector,
613                                   igb_msix_ring, 0, q_vector->name,
614                                   q_vector);
615                 if (err)
616                         goto out;
617                 vector++;
618         }
619
620         igb_configure_msix(adapter);
621         return 0;
622 out:
623         return err;
624 }
625
626 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
627 {
628         if (adapter->msix_entries) {
629                 pci_disable_msix(adapter->pdev);
630                 kfree(adapter->msix_entries);
631                 adapter->msix_entries = NULL;
632         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
633                 pci_disable_msi(adapter->pdev);
634         }
635 }
636
637 /**
638  * igb_free_q_vectors - Free memory allocated for interrupt vectors
639  * @adapter: board private structure to initialize
640  *
641  * This function frees the memory allocated to the q_vectors.  In addition if
642  * NAPI is enabled it will delete any references to the NAPI struct prior
643  * to freeing the q_vector.
644  **/
645 static void igb_free_q_vectors(struct igb_adapter *adapter)
646 {
647         int v_idx;
648
649         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
650                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
651                 adapter->q_vector[v_idx] = NULL;
652                 netif_napi_del(&q_vector->napi);
653                 kfree(q_vector);
654         }
655         adapter->num_q_vectors = 0;
656 }
657
658 /**
659  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
660  *
661  * This function resets the device so that it has 0 rx queues, tx queues, and
662  * MSI-X interrupts allocated.
663  */
664 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
665 {
666         igb_free_queues(adapter);
667         igb_free_q_vectors(adapter);
668         igb_reset_interrupt_capability(adapter);
669 }
670
671 /**
672  * igb_set_interrupt_capability - set MSI or MSI-X if supported
673  *
674  * Attempt to configure interrupts using the best available
675  * capabilities of the hardware and kernel.
676  **/
677 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
678 {
679         int err;
680         int numvecs, i;
681
682         /* Number of supported queues. */
683         adapter->num_rx_queues = adapter->rss_queues;
684         adapter->num_tx_queues = adapter->rss_queues;
685
686         /* start with one vector for every rx queue */
687         numvecs = adapter->num_rx_queues;
688
689         /* if tx handler is seperate add 1 for every tx queue */
690         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
691                 numvecs += adapter->num_tx_queues;
692
693         /* store the number of vectors reserved for queues */
694         adapter->num_q_vectors = numvecs;
695
696         /* add 1 vector for link status interrupts */
697         numvecs++;
698         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
699                                         GFP_KERNEL);
700         if (!adapter->msix_entries)
701                 goto msi_only;
702
703         for (i = 0; i < numvecs; i++)
704                 adapter->msix_entries[i].entry = i;
705
706         err = pci_enable_msix(adapter->pdev,
707                               adapter->msix_entries,
708                               numvecs);
709         if (err == 0)
710                 goto out;
711
712         igb_reset_interrupt_capability(adapter);
713
714         /* If we can't do MSI-X, try MSI */
715 msi_only:
716 #ifdef CONFIG_PCI_IOV
717         /* disable SR-IOV for non MSI-X configurations */
718         if (adapter->vf_data) {
719                 struct e1000_hw *hw = &adapter->hw;
720                 /* disable iov and allow time for transactions to clear */
721                 pci_disable_sriov(adapter->pdev);
722                 msleep(500);
723
724                 kfree(adapter->vf_data);
725                 adapter->vf_data = NULL;
726                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
727                 msleep(100);
728                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
729         }
730 #endif
731         adapter->vfs_allocated_count = 0;
732         adapter->rss_queues = 1;
733         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
734         adapter->num_rx_queues = 1;
735         adapter->num_tx_queues = 1;
736         adapter->num_q_vectors = 1;
737         if (!pci_enable_msi(adapter->pdev))
738                 adapter->flags |= IGB_FLAG_HAS_MSI;
739 out:
740         /* Notify the stack of the (possibly) reduced Tx Queue count. */
741         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
742         return;
743 }
744
745 /**
746  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
747  * @adapter: board private structure to initialize
748  *
749  * We allocate one q_vector per queue interrupt.  If allocation fails we
750  * return -ENOMEM.
751  **/
752 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
753 {
754         struct igb_q_vector *q_vector;
755         struct e1000_hw *hw = &adapter->hw;
756         int v_idx;
757
758         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
759                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
760                 if (!q_vector)
761                         goto err_out;
762                 q_vector->adapter = adapter;
763                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
764                 q_vector->itr_val = IGB_START_ITR;
765                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
766                 adapter->q_vector[v_idx] = q_vector;
767         }
768         return 0;
769
770 err_out:
771         while (v_idx) {
772                 v_idx--;
773                 q_vector = adapter->q_vector[v_idx];
774                 netif_napi_del(&q_vector->napi);
775                 kfree(q_vector);
776                 adapter->q_vector[v_idx] = NULL;
777         }
778         return -ENOMEM;
779 }
780
781 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
782                                       int ring_idx, int v_idx)
783 {
784         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
785
786         q_vector->rx_ring = adapter->rx_ring[ring_idx];
787         q_vector->rx_ring->q_vector = q_vector;
788         q_vector->itr_val = adapter->rx_itr_setting;
789         if (q_vector->itr_val && q_vector->itr_val <= 3)
790                 q_vector->itr_val = IGB_START_ITR;
791 }
792
793 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
794                                       int ring_idx, int v_idx)
795 {
796         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
797
798         q_vector->tx_ring = adapter->tx_ring[ring_idx];
799         q_vector->tx_ring->q_vector = q_vector;
800         q_vector->itr_val = adapter->tx_itr_setting;
801         if (q_vector->itr_val && q_vector->itr_val <= 3)
802                 q_vector->itr_val = IGB_START_ITR;
803 }
804
805 /**
806  * igb_map_ring_to_vector - maps allocated queues to vectors
807  *
808  * This function maps the recently allocated queues to vectors.
809  **/
810 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
811 {
812         int i;
813         int v_idx = 0;
814
815         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
816             (adapter->num_q_vectors < adapter->num_tx_queues))
817                 return -ENOMEM;
818
819         if (adapter->num_q_vectors >=
820             (adapter->num_rx_queues + adapter->num_tx_queues)) {
821                 for (i = 0; i < adapter->num_rx_queues; i++)
822                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
823                 for (i = 0; i < adapter->num_tx_queues; i++)
824                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
825         } else {
826                 for (i = 0; i < adapter->num_rx_queues; i++) {
827                         if (i < adapter->num_tx_queues)
828                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
829                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
830                 }
831                 for (; i < adapter->num_tx_queues; i++)
832                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
833         }
834         return 0;
835 }
836
837 /**
838  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
839  *
840  * This function initializes the interrupts and allocates all of the queues.
841  **/
842 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
843 {
844         struct pci_dev *pdev = adapter->pdev;
845         int err;
846
847         igb_set_interrupt_capability(adapter);
848
849         err = igb_alloc_q_vectors(adapter);
850         if (err) {
851                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
852                 goto err_alloc_q_vectors;
853         }
854
855         err = igb_alloc_queues(adapter);
856         if (err) {
857                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
858                 goto err_alloc_queues;
859         }
860
861         err = igb_map_ring_to_vector(adapter);
862         if (err) {
863                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
864                 goto err_map_queues;
865         }
866
867
868         return 0;
869 err_map_queues:
870         igb_free_queues(adapter);
871 err_alloc_queues:
872         igb_free_q_vectors(adapter);
873 err_alloc_q_vectors:
874         igb_reset_interrupt_capability(adapter);
875         return err;
876 }
877
878 /**
879  * igb_request_irq - initialize interrupts
880  *
881  * Attempts to configure interrupts using the best available
882  * capabilities of the hardware and kernel.
883  **/
884 static int igb_request_irq(struct igb_adapter *adapter)
885 {
886         struct net_device *netdev = adapter->netdev;
887         struct pci_dev *pdev = adapter->pdev;
888         int err = 0;
889
890         if (adapter->msix_entries) {
891                 err = igb_request_msix(adapter);
892                 if (!err)
893                         goto request_done;
894                 /* fall back to MSI */
895                 igb_clear_interrupt_scheme(adapter);
896                 if (!pci_enable_msi(adapter->pdev))
897                         adapter->flags |= IGB_FLAG_HAS_MSI;
898                 igb_free_all_tx_resources(adapter);
899                 igb_free_all_rx_resources(adapter);
900                 adapter->num_tx_queues = 1;
901                 adapter->num_rx_queues = 1;
902                 adapter->num_q_vectors = 1;
903                 err = igb_alloc_q_vectors(adapter);
904                 if (err) {
905                         dev_err(&pdev->dev,
906                                 "Unable to allocate memory for vectors\n");
907                         goto request_done;
908                 }
909                 err = igb_alloc_queues(adapter);
910                 if (err) {
911                         dev_err(&pdev->dev,
912                                 "Unable to allocate memory for queues\n");
913                         igb_free_q_vectors(adapter);
914                         goto request_done;
915                 }
916                 igb_setup_all_tx_resources(adapter);
917                 igb_setup_all_rx_resources(adapter);
918         } else {
919                 igb_assign_vector(adapter->q_vector[0], 0);
920         }
921
922         if (adapter->flags & IGB_FLAG_HAS_MSI) {
923                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
924                                   netdev->name, adapter);
925                 if (!err)
926                         goto request_done;
927
928                 /* fall back to legacy interrupts */
929                 igb_reset_interrupt_capability(adapter);
930                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
931         }
932
933         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
934                           netdev->name, adapter);
935
936         if (err)
937                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
938                         err);
939
940 request_done:
941         return err;
942 }
943
944 static void igb_free_irq(struct igb_adapter *adapter)
945 {
946         if (adapter->msix_entries) {
947                 int vector = 0, i;
948
949                 free_irq(adapter->msix_entries[vector++].vector, adapter);
950
951                 for (i = 0; i < adapter->num_q_vectors; i++) {
952                         struct igb_q_vector *q_vector = adapter->q_vector[i];
953                         free_irq(adapter->msix_entries[vector++].vector,
954                                  q_vector);
955                 }
956         } else {
957                 free_irq(adapter->pdev->irq, adapter);
958         }
959 }
960
961 /**
962  * igb_irq_disable - Mask off interrupt generation on the NIC
963  * @adapter: board private structure
964  **/
965 static void igb_irq_disable(struct igb_adapter *adapter)
966 {
967         struct e1000_hw *hw = &adapter->hw;
968
969         /*
970          * we need to be careful when disabling interrupts.  The VFs are also
971          * mapped into these registers and so clearing the bits can cause
972          * issues on the VF drivers so we only need to clear what we set
973          */
974         if (adapter->msix_entries) {
975                 u32 regval = rd32(E1000_EIAM);
976                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
977                 wr32(E1000_EIMC, adapter->eims_enable_mask);
978                 regval = rd32(E1000_EIAC);
979                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
980         }
981
982         wr32(E1000_IAM, 0);
983         wr32(E1000_IMC, ~0);
984         wrfl();
985         synchronize_irq(adapter->pdev->irq);
986 }
987
988 /**
989  * igb_irq_enable - Enable default interrupt generation settings
990  * @adapter: board private structure
991  **/
992 static void igb_irq_enable(struct igb_adapter *adapter)
993 {
994         struct e1000_hw *hw = &adapter->hw;
995
996         if (adapter->msix_entries) {
997                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
998                 u32 regval = rd32(E1000_EIAC);
999                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1000                 regval = rd32(E1000_EIAM);
1001                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1002                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1003                 if (adapter->vfs_allocated_count) {
1004                         wr32(E1000_MBVFIMR, 0xFF);
1005                         ims |= E1000_IMS_VMMB;
1006                 }
1007                 if (adapter->hw.mac.type == e1000_82580)
1008                         ims |= E1000_IMS_DRSTA;
1009
1010                 wr32(E1000_IMS, ims);
1011         } else {
1012                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1013                                 E1000_IMS_DRSTA);
1014                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1015                                 E1000_IMS_DRSTA);
1016         }
1017 }
1018
1019 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1020 {
1021         struct e1000_hw *hw = &adapter->hw;
1022         u16 vid = adapter->hw.mng_cookie.vlan_id;
1023         u16 old_vid = adapter->mng_vlan_id;
1024
1025         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1026                 /* add VID to filter table */
1027                 igb_vfta_set(hw, vid, true);
1028                 adapter->mng_vlan_id = vid;
1029         } else {
1030                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1031         }
1032
1033         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1034             (vid != old_vid) &&
1035             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1036                 /* remove VID from filter table */
1037                 igb_vfta_set(hw, old_vid, false);
1038         }
1039 }
1040
1041 /**
1042  * igb_release_hw_control - release control of the h/w to f/w
1043  * @adapter: address of board private structure
1044  *
1045  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1046  * For ASF and Pass Through versions of f/w this means that the
1047  * driver is no longer loaded.
1048  *
1049  **/
1050 static void igb_release_hw_control(struct igb_adapter *adapter)
1051 {
1052         struct e1000_hw *hw = &adapter->hw;
1053         u32 ctrl_ext;
1054
1055         /* Let firmware take over control of h/w */
1056         ctrl_ext = rd32(E1000_CTRL_EXT);
1057         wr32(E1000_CTRL_EXT,
1058                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1059 }
1060
1061 /**
1062  * igb_get_hw_control - get control of the h/w from f/w
1063  * @adapter: address of board private structure
1064  *
1065  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1066  * For ASF and Pass Through versions of f/w this means that
1067  * the driver is loaded.
1068  *
1069  **/
1070 static void igb_get_hw_control(struct igb_adapter *adapter)
1071 {
1072         struct e1000_hw *hw = &adapter->hw;
1073         u32 ctrl_ext;
1074
1075         /* Let firmware know the driver has taken over */
1076         ctrl_ext = rd32(E1000_CTRL_EXT);
1077         wr32(E1000_CTRL_EXT,
1078                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1079 }
1080
1081 /**
1082  * igb_configure - configure the hardware for RX and TX
1083  * @adapter: private board structure
1084  **/
1085 static void igb_configure(struct igb_adapter *adapter)
1086 {
1087         struct net_device *netdev = adapter->netdev;
1088         int i;
1089
1090         igb_get_hw_control(adapter);
1091         igb_set_rx_mode(netdev);
1092
1093         igb_restore_vlan(adapter);
1094
1095         igb_setup_tctl(adapter);
1096         igb_setup_mrqc(adapter);
1097         igb_setup_rctl(adapter);
1098
1099         igb_configure_tx(adapter);
1100         igb_configure_rx(adapter);
1101
1102         igb_rx_fifo_flush_82575(&adapter->hw);
1103
1104         /* call igb_desc_unused which always leaves
1105          * at least 1 descriptor unused to make sure
1106          * next_to_use != next_to_clean */
1107         for (i = 0; i < adapter->num_rx_queues; i++) {
1108                 struct igb_ring *ring = adapter->rx_ring[i];
1109                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1110         }
1111
1112
1113         adapter->tx_queue_len = netdev->tx_queue_len;
1114 }
1115
1116 /**
1117  * igb_power_up_link - Power up the phy/serdes link
1118  * @adapter: address of board private structure
1119  **/
1120 void igb_power_up_link(struct igb_adapter *adapter)
1121 {
1122         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1123                 igb_power_up_phy_copper(&adapter->hw);
1124         else
1125                 igb_power_up_serdes_link_82575(&adapter->hw);
1126 }
1127
1128 /**
1129  * igb_power_down_link - Power down the phy/serdes link
1130  * @adapter: address of board private structure
1131  */
1132 static void igb_power_down_link(struct igb_adapter *adapter)
1133 {
1134         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1135                 igb_power_down_phy_copper_82575(&adapter->hw);
1136         else
1137                 igb_shutdown_serdes_link_82575(&adapter->hw);
1138 }
1139
1140 /**
1141  * igb_up - Open the interface and prepare it to handle traffic
1142  * @adapter: board private structure
1143  **/
1144 int igb_up(struct igb_adapter *adapter)
1145 {
1146         struct e1000_hw *hw = &adapter->hw;
1147         int i;
1148
1149         /* hardware has been reset, we need to reload some things */
1150         igb_configure(adapter);
1151
1152         clear_bit(__IGB_DOWN, &adapter->state);
1153
1154         for (i = 0; i < adapter->num_q_vectors; i++) {
1155                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1156                 napi_enable(&q_vector->napi);
1157         }
1158         if (adapter->msix_entries)
1159                 igb_configure_msix(adapter);
1160         else
1161                 igb_assign_vector(adapter->q_vector[0], 0);
1162
1163         /* Clear any pending interrupts. */
1164         rd32(E1000_ICR);
1165         igb_irq_enable(adapter);
1166
1167         /* notify VFs that reset has been completed */
1168         if (adapter->vfs_allocated_count) {
1169                 u32 reg_data = rd32(E1000_CTRL_EXT);
1170                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1171                 wr32(E1000_CTRL_EXT, reg_data);
1172         }
1173
1174         netif_tx_start_all_queues(adapter->netdev);
1175
1176         /* start the watchdog. */
1177         hw->mac.get_link_status = 1;
1178         schedule_work(&adapter->watchdog_task);
1179
1180         return 0;
1181 }
1182
1183 void igb_down(struct igb_adapter *adapter)
1184 {
1185         struct net_device *netdev = adapter->netdev;
1186         struct e1000_hw *hw = &adapter->hw;
1187         u32 tctl, rctl;
1188         int i;
1189
1190         /* signal that we're down so the interrupt handler does not
1191          * reschedule our watchdog timer */
1192         set_bit(__IGB_DOWN, &adapter->state);
1193
1194         /* disable receives in the hardware */
1195         rctl = rd32(E1000_RCTL);
1196         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1197         /* flush and sleep below */
1198
1199         netif_tx_stop_all_queues(netdev);
1200
1201         /* disable transmits in the hardware */
1202         tctl = rd32(E1000_TCTL);
1203         tctl &= ~E1000_TCTL_EN;
1204         wr32(E1000_TCTL, tctl);
1205         /* flush both disables and wait for them to finish */
1206         wrfl();
1207         msleep(10);
1208
1209         for (i = 0; i < adapter->num_q_vectors; i++) {
1210                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1211                 napi_disable(&q_vector->napi);
1212         }
1213
1214         igb_irq_disable(adapter);
1215
1216         del_timer_sync(&adapter->watchdog_timer);
1217         del_timer_sync(&adapter->phy_info_timer);
1218
1219         netdev->tx_queue_len = adapter->tx_queue_len;
1220         netif_carrier_off(netdev);
1221
1222         /* record the stats before reset*/
1223         igb_update_stats(adapter);
1224
1225         adapter->link_speed = 0;
1226         adapter->link_duplex = 0;
1227
1228         if (!pci_channel_offline(adapter->pdev))
1229                 igb_reset(adapter);
1230         igb_clean_all_tx_rings(adapter);
1231         igb_clean_all_rx_rings(adapter);
1232 #ifdef CONFIG_IGB_DCA
1233
1234         /* since we reset the hardware DCA settings were cleared */
1235         igb_setup_dca(adapter);
1236 #endif
1237 }
1238
1239 void igb_reinit_locked(struct igb_adapter *adapter)
1240 {
1241         WARN_ON(in_interrupt());
1242         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1243                 msleep(1);
1244         igb_down(adapter);
1245         igb_up(adapter);
1246         clear_bit(__IGB_RESETTING, &adapter->state);
1247 }
1248
1249 void igb_reset(struct igb_adapter *adapter)
1250 {
1251         struct pci_dev *pdev = adapter->pdev;
1252         struct e1000_hw *hw = &adapter->hw;
1253         struct e1000_mac_info *mac = &hw->mac;
1254         struct e1000_fc_info *fc = &hw->fc;
1255         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1256         u16 hwm;
1257
1258         /* Repartition Pba for greater than 9k mtu
1259          * To take effect CTRL.RST is required.
1260          */
1261         switch (mac->type) {
1262         case e1000_82580:
1263                 pba = rd32(E1000_RXPBS);
1264                 pba = igb_rxpbs_adjust_82580(pba);
1265                 break;
1266         case e1000_82576:
1267                 pba = rd32(E1000_RXPBS);
1268                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1269                 break;
1270         case e1000_82575:
1271         default:
1272                 pba = E1000_PBA_34K;
1273                 break;
1274         }
1275
1276         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1277             (mac->type < e1000_82576)) {
1278                 /* adjust PBA for jumbo frames */
1279                 wr32(E1000_PBA, pba);
1280
1281                 /* To maintain wire speed transmits, the Tx FIFO should be
1282                  * large enough to accommodate two full transmit packets,
1283                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1284                  * the Rx FIFO should be large enough to accommodate at least
1285                  * one full receive packet and is similarly rounded up and
1286                  * expressed in KB. */
1287                 pba = rd32(E1000_PBA);
1288                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1289                 tx_space = pba >> 16;
1290                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1291                 pba &= 0xffff;
1292                 /* the tx fifo also stores 16 bytes of information about the tx
1293                  * but don't include ethernet FCS because hardware appends it */
1294                 min_tx_space = (adapter->max_frame_size +
1295                                 sizeof(union e1000_adv_tx_desc) -
1296                                 ETH_FCS_LEN) * 2;
1297                 min_tx_space = ALIGN(min_tx_space, 1024);
1298                 min_tx_space >>= 10;
1299                 /* software strips receive CRC, so leave room for it */
1300                 min_rx_space = adapter->max_frame_size;
1301                 min_rx_space = ALIGN(min_rx_space, 1024);
1302                 min_rx_space >>= 10;
1303
1304                 /* If current Tx allocation is less than the min Tx FIFO size,
1305                  * and the min Tx FIFO size is less than the current Rx FIFO
1306                  * allocation, take space away from current Rx allocation */
1307                 if (tx_space < min_tx_space &&
1308                     ((min_tx_space - tx_space) < pba)) {
1309                         pba = pba - (min_tx_space - tx_space);
1310
1311                         /* if short on rx space, rx wins and must trump tx
1312                          * adjustment */
1313                         if (pba < min_rx_space)
1314                                 pba = min_rx_space;
1315                 }
1316                 wr32(E1000_PBA, pba);
1317         }
1318
1319         /* flow control settings */
1320         /* The high water mark must be low enough to fit one full frame
1321          * (or the size used for early receive) above it in the Rx FIFO.
1322          * Set it to the lower of:
1323          * - 90% of the Rx FIFO size, or
1324          * - the full Rx FIFO size minus one full frame */
1325         hwm = min(((pba << 10) * 9 / 10),
1326                         ((pba << 10) - 2 * adapter->max_frame_size));
1327
1328         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1329         fc->low_water = fc->high_water - 16;
1330         fc->pause_time = 0xFFFF;
1331         fc->send_xon = 1;
1332         fc->current_mode = fc->requested_mode;
1333
1334         /* disable receive for all VFs and wait one second */
1335         if (adapter->vfs_allocated_count) {
1336                 int i;
1337                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1338                         adapter->vf_data[i].flags = 0;
1339
1340                 /* ping all the active vfs to let them know we are going down */
1341                 igb_ping_all_vfs(adapter);
1342
1343                 /* disable transmits and receives */
1344                 wr32(E1000_VFRE, 0);
1345                 wr32(E1000_VFTE, 0);
1346         }
1347
1348         /* Allow time for pending master requests to run */
1349         hw->mac.ops.reset_hw(hw);
1350         wr32(E1000_WUC, 0);
1351
1352         if (hw->mac.ops.init_hw(hw))
1353                 dev_err(&pdev->dev, "Hardware Error\n");
1354
1355         if (hw->mac.type == e1000_82580) {
1356                 u32 reg = rd32(E1000_PCIEMISC);
1357                 wr32(E1000_PCIEMISC,
1358                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1359         }
1360         if (!netif_running(adapter->netdev))
1361                 igb_power_down_link(adapter);
1362
1363         igb_update_mng_vlan(adapter);
1364
1365         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1366         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1367
1368         igb_get_phy_info(hw);
1369 }
1370
1371 static const struct net_device_ops igb_netdev_ops = {
1372         .ndo_open               = igb_open,
1373         .ndo_stop               = igb_close,
1374         .ndo_start_xmit         = igb_xmit_frame_adv,
1375         .ndo_get_stats          = igb_get_stats,
1376         .ndo_set_rx_mode        = igb_set_rx_mode,
1377         .ndo_set_multicast_list = igb_set_rx_mode,
1378         .ndo_set_mac_address    = igb_set_mac,
1379         .ndo_change_mtu         = igb_change_mtu,
1380         .ndo_do_ioctl           = igb_ioctl,
1381         .ndo_tx_timeout         = igb_tx_timeout,
1382         .ndo_validate_addr      = eth_validate_addr,
1383         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1384         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1385         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1386         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1387         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1388         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1389         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1390 #ifdef CONFIG_NET_POLL_CONTROLLER
1391         .ndo_poll_controller    = igb_netpoll,
1392 #endif
1393 };
1394
1395 /**
1396  * igb_probe - Device Initialization Routine
1397  * @pdev: PCI device information struct
1398  * @ent: entry in igb_pci_tbl
1399  *
1400  * Returns 0 on success, negative on failure
1401  *
1402  * igb_probe initializes an adapter identified by a pci_dev structure.
1403  * The OS initialization, configuring of the adapter private structure,
1404  * and a hardware reset occur.
1405  **/
1406 static int __devinit igb_probe(struct pci_dev *pdev,
1407                                const struct pci_device_id *ent)
1408 {
1409         struct net_device *netdev;
1410         struct igb_adapter *adapter;
1411         struct e1000_hw *hw;
1412         u16 eeprom_data = 0;
1413         static int global_quad_port_a; /* global quad port a indication */
1414         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1415         unsigned long mmio_start, mmio_len;
1416         int err, pci_using_dac;
1417         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1418         u32 part_num;
1419
1420         err = pci_enable_device_mem(pdev);
1421         if (err)
1422                 return err;
1423
1424         pci_using_dac = 0;
1425         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1426         if (!err) {
1427                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1428                 if (!err)
1429                         pci_using_dac = 1;
1430         } else {
1431                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1432                 if (err) {
1433                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1434                         if (err) {
1435                                 dev_err(&pdev->dev, "No usable DMA "
1436                                         "configuration, aborting\n");
1437                                 goto err_dma;
1438                         }
1439                 }
1440         }
1441
1442         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1443                                            IORESOURCE_MEM),
1444                                            igb_driver_name);
1445         if (err)
1446                 goto err_pci_reg;
1447
1448         pci_enable_pcie_error_reporting(pdev);
1449
1450         pci_set_master(pdev);
1451         pci_save_state(pdev);
1452
1453         err = -ENOMEM;
1454         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1455                                    IGB_ABS_MAX_TX_QUEUES);
1456         if (!netdev)
1457                 goto err_alloc_etherdev;
1458
1459         SET_NETDEV_DEV(netdev, &pdev->dev);
1460
1461         pci_set_drvdata(pdev, netdev);
1462         adapter = netdev_priv(netdev);
1463         adapter->netdev = netdev;
1464         adapter->pdev = pdev;
1465         hw = &adapter->hw;
1466         hw->back = adapter;
1467         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1468
1469         mmio_start = pci_resource_start(pdev, 0);
1470         mmio_len = pci_resource_len(pdev, 0);
1471
1472         err = -EIO;
1473         hw->hw_addr = ioremap(mmio_start, mmio_len);
1474         if (!hw->hw_addr)
1475                 goto err_ioremap;
1476
1477         netdev->netdev_ops = &igb_netdev_ops;
1478         igb_set_ethtool_ops(netdev);
1479         netdev->watchdog_timeo = 5 * HZ;
1480
1481         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1482
1483         netdev->mem_start = mmio_start;
1484         netdev->mem_end = mmio_start + mmio_len;
1485
1486         /* PCI config space info */
1487         hw->vendor_id = pdev->vendor;
1488         hw->device_id = pdev->device;
1489         hw->revision_id = pdev->revision;
1490         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1491         hw->subsystem_device_id = pdev->subsystem_device;
1492
1493         /* Copy the default MAC, PHY and NVM function pointers */
1494         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1495         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1496         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1497         /* Initialize skew-specific constants */
1498         err = ei->get_invariants(hw);
1499         if (err)
1500                 goto err_sw_init;
1501
1502         /* setup the private structure */
1503         err = igb_sw_init(adapter);
1504         if (err)
1505                 goto err_sw_init;
1506
1507         igb_get_bus_info_pcie(hw);
1508
1509         hw->phy.autoneg_wait_to_complete = false;
1510
1511         /* Copper options */
1512         if (hw->phy.media_type == e1000_media_type_copper) {
1513                 hw->phy.mdix = AUTO_ALL_MODES;
1514                 hw->phy.disable_polarity_correction = false;
1515                 hw->phy.ms_type = e1000_ms_hw_default;
1516         }
1517
1518         if (igb_check_reset_block(hw))
1519                 dev_info(&pdev->dev,
1520                         "PHY reset is blocked due to SOL/IDER session.\n");
1521
1522         netdev->features = NETIF_F_SG |
1523                            NETIF_F_IP_CSUM |
1524                            NETIF_F_HW_VLAN_TX |
1525                            NETIF_F_HW_VLAN_RX |
1526                            NETIF_F_HW_VLAN_FILTER;
1527
1528         netdev->features |= NETIF_F_IPV6_CSUM;
1529         netdev->features |= NETIF_F_TSO;
1530         netdev->features |= NETIF_F_TSO6;
1531         netdev->features |= NETIF_F_GRO;
1532
1533         netdev->vlan_features |= NETIF_F_TSO;
1534         netdev->vlan_features |= NETIF_F_TSO6;
1535         netdev->vlan_features |= NETIF_F_IP_CSUM;
1536         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1537         netdev->vlan_features |= NETIF_F_SG;
1538
1539         if (pci_using_dac)
1540                 netdev->features |= NETIF_F_HIGHDMA;
1541
1542         if (hw->mac.type >= e1000_82576)
1543                 netdev->features |= NETIF_F_SCTP_CSUM;
1544
1545         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1546
1547         /* before reading the NVM, reset the controller to put the device in a
1548          * known good starting state */
1549         hw->mac.ops.reset_hw(hw);
1550
1551         /* make sure the NVM is good */
1552         if (igb_validate_nvm_checksum(hw) < 0) {
1553                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1554                 err = -EIO;
1555                 goto err_eeprom;
1556         }
1557
1558         /* copy the MAC address out of the NVM */
1559         if (hw->mac.ops.read_mac_addr(hw))
1560                 dev_err(&pdev->dev, "NVM Read Error\n");
1561
1562         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1563         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1564
1565         if (!is_valid_ether_addr(netdev->perm_addr)) {
1566                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1567                 err = -EIO;
1568                 goto err_eeprom;
1569         }
1570
1571         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1572                     (unsigned long) adapter);
1573         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1574                     (unsigned long) adapter);
1575
1576         INIT_WORK(&adapter->reset_task, igb_reset_task);
1577         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1578
1579         /* Initialize link properties that are user-changeable */
1580         adapter->fc_autoneg = true;
1581         hw->mac.autoneg = true;
1582         hw->phy.autoneg_advertised = 0x2f;
1583
1584         hw->fc.requested_mode = e1000_fc_default;
1585         hw->fc.current_mode = e1000_fc_default;
1586
1587         igb_validate_mdi_setting(hw);
1588
1589         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1590          * enable the ACPI Magic Packet filter
1591          */
1592
1593         if (hw->bus.func == 0)
1594                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1595         else if (hw->mac.type == e1000_82580)
1596                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1597                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1598                                  &eeprom_data);
1599         else if (hw->bus.func == 1)
1600                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1601
1602         if (eeprom_data & eeprom_apme_mask)
1603                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1604
1605         /* now that we have the eeprom settings, apply the special cases where
1606          * the eeprom may be wrong or the board simply won't support wake on
1607          * lan on a particular port */
1608         switch (pdev->device) {
1609         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1610                 adapter->eeprom_wol = 0;
1611                 break;
1612         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1613         case E1000_DEV_ID_82576_FIBER:
1614         case E1000_DEV_ID_82576_SERDES:
1615                 /* Wake events only supported on port A for dual fiber
1616                  * regardless of eeprom setting */
1617                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1618                         adapter->eeprom_wol = 0;
1619                 break;
1620         case E1000_DEV_ID_82576_QUAD_COPPER:
1621                 /* if quad port adapter, disable WoL on all but port A */
1622                 if (global_quad_port_a != 0)
1623                         adapter->eeprom_wol = 0;
1624                 else
1625                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1626                 /* Reset for multiple quad port adapters */
1627                 if (++global_quad_port_a == 4)
1628                         global_quad_port_a = 0;
1629                 break;
1630         }
1631
1632         /* initialize the wol settings based on the eeprom settings */
1633         adapter->wol = adapter->eeprom_wol;
1634         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1635
1636         /* reset the hardware with the new settings */
1637         igb_reset(adapter);
1638
1639         /* let the f/w know that the h/w is now under the control of the
1640          * driver. */
1641         igb_get_hw_control(adapter);
1642
1643         strcpy(netdev->name, "eth%d");
1644         err = register_netdev(netdev);
1645         if (err)
1646                 goto err_register;
1647
1648         /* carrier off reporting is important to ethtool even BEFORE open */
1649         netif_carrier_off(netdev);
1650
1651 #ifdef CONFIG_IGB_DCA
1652         if (dca_add_requester(&pdev->dev) == 0) {
1653                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1654                 dev_info(&pdev->dev, "DCA enabled\n");
1655                 igb_setup_dca(adapter);
1656         }
1657
1658 #endif
1659         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1660         /* print bus type/speed/width info */
1661         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1662                  netdev->name,
1663                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1664                                                             "unknown"),
1665                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1666                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1667                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1668                    "unknown"),
1669                  netdev->dev_addr);
1670
1671         igb_read_part_num(hw, &part_num);
1672         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1673                 (part_num >> 8), (part_num & 0xff));
1674
1675         dev_info(&pdev->dev,
1676                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1677                 adapter->msix_entries ? "MSI-X" :
1678                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1679                 adapter->num_rx_queues, adapter->num_tx_queues);
1680
1681         return 0;
1682
1683 err_register:
1684         igb_release_hw_control(adapter);
1685 err_eeprom:
1686         if (!igb_check_reset_block(hw))
1687                 igb_reset_phy(hw);
1688
1689         if (hw->flash_address)
1690                 iounmap(hw->flash_address);
1691 err_sw_init:
1692         igb_clear_interrupt_scheme(adapter);
1693         iounmap(hw->hw_addr);
1694 err_ioremap:
1695         free_netdev(netdev);
1696 err_alloc_etherdev:
1697         pci_release_selected_regions(pdev,
1698                                      pci_select_bars(pdev, IORESOURCE_MEM));
1699 err_pci_reg:
1700 err_dma:
1701         pci_disable_device(pdev);
1702         return err;
1703 }
1704
1705 /**
1706  * igb_remove - Device Removal Routine
1707  * @pdev: PCI device information struct
1708  *
1709  * igb_remove is called by the PCI subsystem to alert the driver
1710  * that it should release a PCI device.  The could be caused by a
1711  * Hot-Plug event, or because the driver is going to be removed from
1712  * memory.
1713  **/
1714 static void __devexit igb_remove(struct pci_dev *pdev)
1715 {
1716         struct net_device *netdev = pci_get_drvdata(pdev);
1717         struct igb_adapter *adapter = netdev_priv(netdev);
1718         struct e1000_hw *hw = &adapter->hw;
1719
1720         /* flush_scheduled work may reschedule our watchdog task, so
1721          * explicitly disable watchdog tasks from being rescheduled  */
1722         set_bit(__IGB_DOWN, &adapter->state);
1723         del_timer_sync(&adapter->watchdog_timer);
1724         del_timer_sync(&adapter->phy_info_timer);
1725
1726         flush_scheduled_work();
1727
1728 #ifdef CONFIG_IGB_DCA
1729         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1730                 dev_info(&pdev->dev, "DCA disabled\n");
1731                 dca_remove_requester(&pdev->dev);
1732                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1733                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1734         }
1735 #endif
1736
1737         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1738          * would have already happened in close and is redundant. */
1739         igb_release_hw_control(adapter);
1740
1741         unregister_netdev(netdev);
1742
1743         igb_clear_interrupt_scheme(adapter);
1744
1745 #ifdef CONFIG_PCI_IOV
1746         /* reclaim resources allocated to VFs */
1747         if (adapter->vf_data) {
1748                 /* disable iov and allow time for transactions to clear */
1749                 pci_disable_sriov(pdev);
1750                 msleep(500);
1751
1752                 kfree(adapter->vf_data);
1753                 adapter->vf_data = NULL;
1754                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1755                 msleep(100);
1756                 dev_info(&pdev->dev, "IOV Disabled\n");
1757         }
1758 #endif
1759
1760         iounmap(hw->hw_addr);
1761         if (hw->flash_address)
1762                 iounmap(hw->flash_address);
1763         pci_release_selected_regions(pdev,
1764                                      pci_select_bars(pdev, IORESOURCE_MEM));
1765
1766         free_netdev(netdev);
1767
1768         pci_disable_pcie_error_reporting(pdev);
1769
1770         pci_disable_device(pdev);
1771 }
1772
1773 /**
1774  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1775  * @adapter: board private structure to initialize
1776  *
1777  * This function initializes the vf specific data storage and then attempts to
1778  * allocate the VFs.  The reason for ordering it this way is because it is much
1779  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1780  * the memory for the VFs.
1781  **/
1782 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1783 {
1784 #ifdef CONFIG_PCI_IOV
1785         struct pci_dev *pdev = adapter->pdev;
1786
1787         if (adapter->vfs_allocated_count > 7)
1788                 adapter->vfs_allocated_count = 7;
1789
1790         if (adapter->vfs_allocated_count) {
1791                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1792                                            sizeof(struct vf_data_storage),
1793                                            GFP_KERNEL);
1794                 /* if allocation failed then we do not support SR-IOV */
1795                 if (!adapter->vf_data) {
1796                         adapter->vfs_allocated_count = 0;
1797                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1798                                 "Data Storage\n");
1799                 }
1800         }
1801
1802         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1803                 kfree(adapter->vf_data);
1804                 adapter->vf_data = NULL;
1805 #endif /* CONFIG_PCI_IOV */
1806                 adapter->vfs_allocated_count = 0;
1807 #ifdef CONFIG_PCI_IOV
1808         } else {
1809                 unsigned char mac_addr[ETH_ALEN];
1810                 int i;
1811                 dev_info(&pdev->dev, "%d vfs allocated\n",
1812                          adapter->vfs_allocated_count);
1813                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1814                         random_ether_addr(mac_addr);
1815                         igb_set_vf_mac(adapter, i, mac_addr);
1816                 }
1817         }
1818 #endif /* CONFIG_PCI_IOV */
1819 }
1820
1821
1822 /**
1823  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1824  * @adapter: board private structure to initialize
1825  *
1826  * igb_init_hw_timer initializes the function pointer and values for the hw
1827  * timer found in hardware.
1828  **/
1829 static void igb_init_hw_timer(struct igb_adapter *adapter)
1830 {
1831         struct e1000_hw *hw = &adapter->hw;
1832
1833         switch (hw->mac.type) {
1834         case e1000_82580:
1835                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1836                 adapter->cycles.read = igb_read_clock;
1837                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1838                 adapter->cycles.mult = 1;
1839                 /*
1840                  * The 82580 timesync updates the system timer every 8ns by 8ns
1841                  * and the value cannot be shifted.  Instead we need to shift
1842                  * the registers to generate a 64bit timer value.  As a result
1843                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1844                  * 24 in order to generate a larger value for synchronization.
1845                  */
1846                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1847                 /* disable system timer temporarily by setting bit 31 */
1848                 wr32(E1000_TSAUXC, 0x80000000);
1849                 wrfl();
1850
1851                 /* Set registers so that rollover occurs soon to test this. */
1852                 wr32(E1000_SYSTIMR, 0x00000000);
1853                 wr32(E1000_SYSTIML, 0x80000000);
1854                 wr32(E1000_SYSTIMH, 0x000000FF);
1855                 wrfl();
1856
1857                 /* enable system timer by clearing bit 31 */
1858                 wr32(E1000_TSAUXC, 0x0);
1859                 wrfl();
1860
1861                 timecounter_init(&adapter->clock,
1862                                  &adapter->cycles,
1863                                  ktime_to_ns(ktime_get_real()));
1864                 /*
1865                  * Synchronize our NIC clock against system wall clock. NIC
1866                  * time stamp reading requires ~3us per sample, each sample
1867                  * was pretty stable even under load => only require 10
1868                  * samples for each offset comparison.
1869                  */
1870                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1871                 adapter->compare.source = &adapter->clock;
1872                 adapter->compare.target = ktime_get_real;
1873                 adapter->compare.num_samples = 10;
1874                 timecompare_update(&adapter->compare, 0);
1875                 break;
1876         case e1000_82576:
1877                 /*
1878                  * Initialize hardware timer: we keep it running just in case
1879                  * that some program needs it later on.
1880                  */
1881                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1882                 adapter->cycles.read = igb_read_clock;
1883                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1884                 adapter->cycles.mult = 1;
1885                 /**
1886                  * Scale the NIC clock cycle by a large factor so that
1887                  * relatively small clock corrections can be added or
1888                  * substracted at each clock tick. The drawbacks of a large
1889                  * factor are a) that the clock register overflows more quickly
1890                  * (not such a big deal) and b) that the increment per tick has
1891                  * to fit into 24 bits.  As a result we need to use a shift of
1892                  * 19 so we can fit a value of 16 into the TIMINCA register.
1893                  */
1894                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1895                 wr32(E1000_TIMINCA,
1896                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1897                                 (16 << IGB_82576_TSYNC_SHIFT));
1898
1899                 /* Set registers so that rollover occurs soon to test this. */
1900                 wr32(E1000_SYSTIML, 0x00000000);
1901                 wr32(E1000_SYSTIMH, 0xFF800000);
1902                 wrfl();
1903
1904                 timecounter_init(&adapter->clock,
1905                                  &adapter->cycles,
1906                                  ktime_to_ns(ktime_get_real()));
1907                 /*
1908                  * Synchronize our NIC clock against system wall clock. NIC
1909                  * time stamp reading requires ~3us per sample, each sample
1910                  * was pretty stable even under load => only require 10
1911                  * samples for each offset comparison.
1912                  */
1913                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1914                 adapter->compare.source = &adapter->clock;
1915                 adapter->compare.target = ktime_get_real;
1916                 adapter->compare.num_samples = 10;
1917                 timecompare_update(&adapter->compare, 0);
1918                 break;
1919         case e1000_82575:
1920                 /* 82575 does not support timesync */
1921         default:
1922                 break;
1923         }
1924
1925 }
1926
1927 /**
1928  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1929  * @adapter: board private structure to initialize
1930  *
1931  * igb_sw_init initializes the Adapter private data structure.
1932  * Fields are initialized based on PCI device information and
1933  * OS network device settings (MTU size).
1934  **/
1935 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1936 {
1937         struct e1000_hw *hw = &adapter->hw;
1938         struct net_device *netdev = adapter->netdev;
1939         struct pci_dev *pdev = adapter->pdev;
1940
1941         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1942
1943         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1944         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1945         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1946         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1947
1948         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1949         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1950
1951 #ifdef CONFIG_PCI_IOV
1952         if (hw->mac.type == e1000_82576)
1953                 adapter->vfs_allocated_count = max_vfs;
1954
1955 #endif /* CONFIG_PCI_IOV */
1956         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1957
1958         /*
1959          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1960          * then we should combine the queues into a queue pair in order to
1961          * conserve interrupts due to limited supply
1962          */
1963         if ((adapter->rss_queues > 4) ||
1964             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1965                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1966
1967         /* This call may decrease the number of queues */
1968         if (igb_init_interrupt_scheme(adapter)) {
1969                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1970                 return -ENOMEM;
1971         }
1972
1973         igb_init_hw_timer(adapter);
1974         igb_probe_vfs(adapter);
1975
1976         /* Explicitly disable IRQ since the NIC can be in any state. */
1977         igb_irq_disable(adapter);
1978
1979         set_bit(__IGB_DOWN, &adapter->state);
1980         return 0;
1981 }
1982
1983 /**
1984  * igb_open - Called when a network interface is made active
1985  * @netdev: network interface device structure
1986  *
1987  * Returns 0 on success, negative value on failure
1988  *
1989  * The open entry point is called when a network interface is made
1990  * active by the system (IFF_UP).  At this point all resources needed
1991  * for transmit and receive operations are allocated, the interrupt
1992  * handler is registered with the OS, the watchdog timer is started,
1993  * and the stack is notified that the interface is ready.
1994  **/
1995 static int igb_open(struct net_device *netdev)
1996 {
1997         struct igb_adapter *adapter = netdev_priv(netdev);
1998         struct e1000_hw *hw = &adapter->hw;
1999         int err;
2000         int i;
2001
2002         /* disallow open during test */
2003         if (test_bit(__IGB_TESTING, &adapter->state))
2004                 return -EBUSY;
2005
2006         netif_carrier_off(netdev);
2007
2008         /* allocate transmit descriptors */
2009         err = igb_setup_all_tx_resources(adapter);
2010         if (err)
2011                 goto err_setup_tx;
2012
2013         /* allocate receive descriptors */
2014         err = igb_setup_all_rx_resources(adapter);
2015         if (err)
2016                 goto err_setup_rx;
2017
2018         igb_power_up_link(adapter);
2019
2020         /* before we allocate an interrupt, we must be ready to handle it.
2021          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2022          * as soon as we call pci_request_irq, so we have to setup our
2023          * clean_rx handler before we do so.  */
2024         igb_configure(adapter);
2025
2026         err = igb_request_irq(adapter);
2027         if (err)
2028                 goto err_req_irq;
2029
2030         /* From here on the code is the same as igb_up() */
2031         clear_bit(__IGB_DOWN, &adapter->state);
2032
2033         for (i = 0; i < adapter->num_q_vectors; i++) {
2034                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2035                 napi_enable(&q_vector->napi);
2036         }
2037
2038         /* Clear any pending interrupts. */
2039         rd32(E1000_ICR);
2040
2041         igb_irq_enable(adapter);
2042
2043         /* notify VFs that reset has been completed */
2044         if (adapter->vfs_allocated_count) {
2045                 u32 reg_data = rd32(E1000_CTRL_EXT);
2046                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2047                 wr32(E1000_CTRL_EXT, reg_data);
2048         }
2049
2050         netif_tx_start_all_queues(netdev);
2051
2052         /* start the watchdog. */
2053         hw->mac.get_link_status = 1;
2054         schedule_work(&adapter->watchdog_task);
2055
2056         return 0;
2057
2058 err_req_irq:
2059         igb_release_hw_control(adapter);
2060         igb_power_down_link(adapter);
2061         igb_free_all_rx_resources(adapter);
2062 err_setup_rx:
2063         igb_free_all_tx_resources(adapter);
2064 err_setup_tx:
2065         igb_reset(adapter);
2066
2067         return err;
2068 }
2069
2070 /**
2071  * igb_close - Disables a network interface
2072  * @netdev: network interface device structure
2073  *
2074  * Returns 0, this is not allowed to fail
2075  *
2076  * The close entry point is called when an interface is de-activated
2077  * by the OS.  The hardware is still under the driver's control, but
2078  * needs to be disabled.  A global MAC reset is issued to stop the
2079  * hardware, and all transmit and receive resources are freed.
2080  **/
2081 static int igb_close(struct net_device *netdev)
2082 {
2083         struct igb_adapter *adapter = netdev_priv(netdev);
2084
2085         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2086         igb_down(adapter);
2087
2088         igb_free_irq(adapter);
2089
2090         igb_free_all_tx_resources(adapter);
2091         igb_free_all_rx_resources(adapter);
2092
2093         return 0;
2094 }
2095
2096 /**
2097  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2098  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2099  *
2100  * Return 0 on success, negative on failure
2101  **/
2102 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2103 {
2104         struct pci_dev *pdev = tx_ring->pdev;
2105         int size;
2106
2107         size = sizeof(struct igb_buffer) * tx_ring->count;
2108         tx_ring->buffer_info = vmalloc(size);
2109         if (!tx_ring->buffer_info)
2110                 goto err;
2111         memset(tx_ring->buffer_info, 0, size);
2112
2113         /* round up to nearest 4K */
2114         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2115         tx_ring->size = ALIGN(tx_ring->size, 4096);
2116
2117         tx_ring->desc = pci_alloc_consistent(pdev,
2118                                              tx_ring->size,
2119                                              &tx_ring->dma);
2120
2121         if (!tx_ring->desc)
2122                 goto err;
2123
2124         tx_ring->next_to_use = 0;
2125         tx_ring->next_to_clean = 0;
2126         return 0;
2127
2128 err:
2129         vfree(tx_ring->buffer_info);
2130         dev_err(&pdev->dev,
2131                 "Unable to allocate memory for the transmit descriptor ring\n");
2132         return -ENOMEM;
2133 }
2134
2135 /**
2136  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2137  *                                (Descriptors) for all queues
2138  * @adapter: board private structure
2139  *
2140  * Return 0 on success, negative on failure
2141  **/
2142 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2143 {
2144         struct pci_dev *pdev = adapter->pdev;
2145         int i, err = 0;
2146
2147         for (i = 0; i < adapter->num_tx_queues; i++) {
2148                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2149                 if (err) {
2150                         dev_err(&pdev->dev,
2151                                 "Allocation for Tx Queue %u failed\n", i);
2152                         for (i--; i >= 0; i--)
2153                                 igb_free_tx_resources(adapter->tx_ring[i]);
2154                         break;
2155                 }
2156         }
2157
2158         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2159                 int r_idx = i % adapter->num_tx_queues;
2160                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2161         }
2162         return err;
2163 }
2164
2165 /**
2166  * igb_setup_tctl - configure the transmit control registers
2167  * @adapter: Board private structure
2168  **/
2169 void igb_setup_tctl(struct igb_adapter *adapter)
2170 {
2171         struct e1000_hw *hw = &adapter->hw;
2172         u32 tctl;
2173
2174         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2175         wr32(E1000_TXDCTL(0), 0);
2176
2177         /* Program the Transmit Control Register */
2178         tctl = rd32(E1000_TCTL);
2179         tctl &= ~E1000_TCTL_CT;
2180         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2181                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2182
2183         igb_config_collision_dist(hw);
2184
2185         /* Enable transmits */
2186         tctl |= E1000_TCTL_EN;
2187
2188         wr32(E1000_TCTL, tctl);
2189 }
2190
2191 /**
2192  * igb_configure_tx_ring - Configure transmit ring after Reset
2193  * @adapter: board private structure
2194  * @ring: tx ring to configure
2195  *
2196  * Configure a transmit ring after a reset.
2197  **/
2198 void igb_configure_tx_ring(struct igb_adapter *adapter,
2199                            struct igb_ring *ring)
2200 {
2201         struct e1000_hw *hw = &adapter->hw;
2202         u32 txdctl;
2203         u64 tdba = ring->dma;
2204         int reg_idx = ring->reg_idx;
2205
2206         /* disable the queue */
2207         txdctl = rd32(E1000_TXDCTL(reg_idx));
2208         wr32(E1000_TXDCTL(reg_idx),
2209                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2210         wrfl();
2211         mdelay(10);
2212
2213         wr32(E1000_TDLEN(reg_idx),
2214                         ring->count * sizeof(union e1000_adv_tx_desc));
2215         wr32(E1000_TDBAL(reg_idx),
2216                         tdba & 0x00000000ffffffffULL);
2217         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2218
2219         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2220         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2221         writel(0, ring->head);
2222         writel(0, ring->tail);
2223
2224         txdctl |= IGB_TX_PTHRESH;
2225         txdctl |= IGB_TX_HTHRESH << 8;
2226         txdctl |= IGB_TX_WTHRESH << 16;
2227
2228         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2229         wr32(E1000_TXDCTL(reg_idx), txdctl);
2230 }
2231
2232 /**
2233  * igb_configure_tx - Configure transmit Unit after Reset
2234  * @adapter: board private structure
2235  *
2236  * Configure the Tx unit of the MAC after a reset.
2237  **/
2238 static void igb_configure_tx(struct igb_adapter *adapter)
2239 {
2240         int i;
2241
2242         for (i = 0; i < adapter->num_tx_queues; i++)
2243                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2244 }
2245
2246 /**
2247  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2248  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2249  *
2250  * Returns 0 on success, negative on failure
2251  **/
2252 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2253 {
2254         struct pci_dev *pdev = rx_ring->pdev;
2255         int size, desc_len;
2256
2257         size = sizeof(struct igb_buffer) * rx_ring->count;
2258         rx_ring->buffer_info = vmalloc(size);
2259         if (!rx_ring->buffer_info)
2260                 goto err;
2261         memset(rx_ring->buffer_info, 0, size);
2262
2263         desc_len = sizeof(union e1000_adv_rx_desc);
2264
2265         /* Round up to nearest 4K */
2266         rx_ring->size = rx_ring->count * desc_len;
2267         rx_ring->size = ALIGN(rx_ring->size, 4096);
2268
2269         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2270                                              &rx_ring->dma);
2271
2272         if (!rx_ring->desc)
2273                 goto err;
2274
2275         rx_ring->next_to_clean = 0;
2276         rx_ring->next_to_use = 0;
2277
2278         return 0;
2279
2280 err:
2281         vfree(rx_ring->buffer_info);
2282         rx_ring->buffer_info = NULL;
2283         dev_err(&pdev->dev, "Unable to allocate memory for "
2284                 "the receive descriptor ring\n");
2285         return -ENOMEM;
2286 }
2287
2288 /**
2289  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2290  *                                (Descriptors) for all queues
2291  * @adapter: board private structure
2292  *
2293  * Return 0 on success, negative on failure
2294  **/
2295 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2296 {
2297         struct pci_dev *pdev = adapter->pdev;
2298         int i, err = 0;
2299
2300         for (i = 0; i < adapter->num_rx_queues; i++) {
2301                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2302                 if (err) {
2303                         dev_err(&pdev->dev,
2304                                 "Allocation for Rx Queue %u failed\n", i);
2305                         for (i--; i >= 0; i--)
2306                                 igb_free_rx_resources(adapter->rx_ring[i]);
2307                         break;
2308                 }
2309         }
2310
2311         return err;
2312 }
2313
2314 /**
2315  * igb_setup_mrqc - configure the multiple receive queue control registers
2316  * @adapter: Board private structure
2317  **/
2318 static void igb_setup_mrqc(struct igb_adapter *adapter)
2319 {
2320         struct e1000_hw *hw = &adapter->hw;
2321         u32 mrqc, rxcsum;
2322         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2323         union e1000_reta {
2324                 u32 dword;
2325                 u8  bytes[4];
2326         } reta;
2327         static const u8 rsshash[40] = {
2328                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2329                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2330                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2331                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2332
2333         /* Fill out hash function seeds */
2334         for (j = 0; j < 10; j++) {
2335                 u32 rsskey = rsshash[(j * 4)];
2336                 rsskey |= rsshash[(j * 4) + 1] << 8;
2337                 rsskey |= rsshash[(j * 4) + 2] << 16;
2338                 rsskey |= rsshash[(j * 4) + 3] << 24;
2339                 array_wr32(E1000_RSSRK(0), j, rsskey);
2340         }
2341
2342         num_rx_queues = adapter->rss_queues;
2343
2344         if (adapter->vfs_allocated_count) {
2345                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2346                 switch (hw->mac.type) {
2347                 case e1000_82580:
2348                         num_rx_queues = 1;
2349                         shift = 0;
2350                         break;
2351                 case e1000_82576:
2352                         shift = 3;
2353                         num_rx_queues = 2;
2354                         break;
2355                 case e1000_82575:
2356                         shift = 2;
2357                         shift2 = 6;
2358                 default:
2359                         break;
2360                 }
2361         } else {
2362                 if (hw->mac.type == e1000_82575)
2363                         shift = 6;
2364         }
2365
2366         for (j = 0; j < (32 * 4); j++) {
2367                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2368                 if (shift2)
2369                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2370                 if ((j & 3) == 3)
2371                         wr32(E1000_RETA(j >> 2), reta.dword);
2372         }
2373
2374         /*
2375          * Disable raw packet checksumming so that RSS hash is placed in
2376          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2377          * offloads as they are enabled by default
2378          */
2379         rxcsum = rd32(E1000_RXCSUM);
2380         rxcsum |= E1000_RXCSUM_PCSD;
2381
2382         if (adapter->hw.mac.type >= e1000_82576)
2383                 /* Enable Receive Checksum Offload for SCTP */
2384                 rxcsum |= E1000_RXCSUM_CRCOFL;
2385
2386         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2387         wr32(E1000_RXCSUM, rxcsum);
2388
2389         /* If VMDq is enabled then we set the appropriate mode for that, else
2390          * we default to RSS so that an RSS hash is calculated per packet even
2391          * if we are only using one queue */
2392         if (adapter->vfs_allocated_count) {
2393                 if (hw->mac.type > e1000_82575) {
2394                         /* Set the default pool for the PF's first queue */
2395                         u32 vtctl = rd32(E1000_VT_CTL);
2396                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2397                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2398                         vtctl |= adapter->vfs_allocated_count <<
2399                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2400                         wr32(E1000_VT_CTL, vtctl);
2401                 }
2402                 if (adapter->rss_queues > 1)
2403                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2404                 else
2405                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2406         } else {
2407                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2408         }
2409         igb_vmm_control(adapter);
2410
2411         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2412                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2413         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2414                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2415         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2416                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2417         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2418                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2419
2420         wr32(E1000_MRQC, mrqc);
2421 }
2422
2423 /**
2424  * igb_setup_rctl - configure the receive control registers
2425  * @adapter: Board private structure
2426  **/
2427 void igb_setup_rctl(struct igb_adapter *adapter)
2428 {
2429         struct e1000_hw *hw = &adapter->hw;
2430         u32 rctl;
2431
2432         rctl = rd32(E1000_RCTL);
2433
2434         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2435         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2436
2437         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2438                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2439
2440         /*
2441          * enable stripping of CRC. It's unlikely this will break BMC
2442          * redirection as it did with e1000. Newer features require
2443          * that the HW strips the CRC.
2444          */
2445         rctl |= E1000_RCTL_SECRC;
2446
2447         /* disable store bad packets and clear size bits. */
2448         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2449
2450         /* enable LPE to prevent packets larger than max_frame_size */
2451         rctl |= E1000_RCTL_LPE;
2452
2453         /* disable queue 0 to prevent tail write w/o re-config */
2454         wr32(E1000_RXDCTL(0), 0);
2455
2456         /* Attention!!!  For SR-IOV PF driver operations you must enable
2457          * queue drop for all VF and PF queues to prevent head of line blocking
2458          * if an un-trusted VF does not provide descriptors to hardware.
2459          */
2460         if (adapter->vfs_allocated_count) {
2461                 /* set all queue drop enable bits */
2462                 wr32(E1000_QDE, ALL_QUEUES);
2463         }
2464
2465         wr32(E1000_RCTL, rctl);
2466 }
2467
2468 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2469                                    int vfn)
2470 {
2471         struct e1000_hw *hw = &adapter->hw;
2472         u32 vmolr;
2473
2474         /* if it isn't the PF check to see if VFs are enabled and
2475          * increase the size to support vlan tags */
2476         if (vfn < adapter->vfs_allocated_count &&
2477             adapter->vf_data[vfn].vlans_enabled)
2478                 size += VLAN_TAG_SIZE;
2479
2480         vmolr = rd32(E1000_VMOLR(vfn));
2481         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2482         vmolr |= size | E1000_VMOLR_LPE;
2483         wr32(E1000_VMOLR(vfn), vmolr);
2484
2485         return 0;
2486 }
2487
2488 /**
2489  * igb_rlpml_set - set maximum receive packet size
2490  * @adapter: board private structure
2491  *
2492  * Configure maximum receivable packet size.
2493  **/
2494 static void igb_rlpml_set(struct igb_adapter *adapter)
2495 {
2496         u32 max_frame_size = adapter->max_frame_size;
2497         struct e1000_hw *hw = &adapter->hw;
2498         u16 pf_id = adapter->vfs_allocated_count;
2499
2500         if (adapter->vlgrp)
2501                 max_frame_size += VLAN_TAG_SIZE;
2502
2503         /* if vfs are enabled we set RLPML to the largest possible request
2504          * size and set the VMOLR RLPML to the size we need */
2505         if (pf_id) {
2506                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2507                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2508         }
2509
2510         wr32(E1000_RLPML, max_frame_size);
2511 }
2512
2513 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2514                                  int vfn, bool aupe)
2515 {
2516         struct e1000_hw *hw = &adapter->hw;
2517         u32 vmolr;
2518
2519         /*
2520          * This register exists only on 82576 and newer so if we are older then
2521          * we should exit and do nothing
2522          */
2523         if (hw->mac.type < e1000_82576)
2524                 return;
2525
2526         vmolr = rd32(E1000_VMOLR(vfn));
2527         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2528         if (aupe)
2529                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2530         else
2531                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2532
2533         /* clear all bits that might not be set */
2534         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2535
2536         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2537                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2538         /*
2539          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2540          * multicast packets
2541          */
2542         if (vfn <= adapter->vfs_allocated_count)
2543                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2544
2545         wr32(E1000_VMOLR(vfn), vmolr);
2546 }
2547
2548 /**
2549  * igb_configure_rx_ring - Configure a receive ring after Reset
2550  * @adapter: board private structure
2551  * @ring: receive ring to be configured
2552  *
2553  * Configure the Rx unit of the MAC after a reset.
2554  **/
2555 void igb_configure_rx_ring(struct igb_adapter *adapter,
2556                            struct igb_ring *ring)
2557 {
2558         struct e1000_hw *hw = &adapter->hw;
2559         u64 rdba = ring->dma;
2560         int reg_idx = ring->reg_idx;
2561         u32 srrctl, rxdctl;
2562
2563         /* disable the queue */
2564         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2565         wr32(E1000_RXDCTL(reg_idx),
2566                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2567
2568         /* Set DMA base address registers */
2569         wr32(E1000_RDBAL(reg_idx),
2570              rdba & 0x00000000ffffffffULL);
2571         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2572         wr32(E1000_RDLEN(reg_idx),
2573                        ring->count * sizeof(union e1000_adv_rx_desc));
2574
2575         /* initialize head and tail */
2576         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2577         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2578         writel(0, ring->head);
2579         writel(0, ring->tail);
2580
2581         /* set descriptor configuration */
2582         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2583                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2584                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2585 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2586                 srrctl |= IGB_RXBUFFER_16384 >>
2587                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2588 #else
2589                 srrctl |= (PAGE_SIZE / 2) >>
2590                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2591 #endif
2592                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2593         } else {
2594                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2595                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2596                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2597         }
2598         /* Only set Drop Enable if we are supporting multiple queues */
2599         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2600                 srrctl |= E1000_SRRCTL_DROP_EN;
2601
2602         wr32(E1000_SRRCTL(reg_idx), srrctl);
2603
2604         /* set filtering for VMDQ pools */
2605         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2606
2607         /* enable receive descriptor fetching */
2608         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2609         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2610         rxdctl &= 0xFFF00000;
2611         rxdctl |= IGB_RX_PTHRESH;
2612         rxdctl |= IGB_RX_HTHRESH << 8;
2613         rxdctl |= IGB_RX_WTHRESH << 16;
2614         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2615 }
2616
2617 /**
2618  * igb_configure_rx - Configure receive Unit after Reset
2619  * @adapter: board private structure
2620  *
2621  * Configure the Rx unit of the MAC after a reset.
2622  **/
2623 static void igb_configure_rx(struct igb_adapter *adapter)
2624 {
2625         int i;
2626
2627         /* set UTA to appropriate mode */
2628         igb_set_uta(adapter);
2629
2630         /* set the correct pool for the PF default MAC address in entry 0 */
2631         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2632                          adapter->vfs_allocated_count);
2633
2634         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2635          * the Base and Length of the Rx Descriptor Ring */
2636         for (i = 0; i < adapter->num_rx_queues; i++)
2637                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2638 }
2639
2640 /**
2641  * igb_free_tx_resources - Free Tx Resources per Queue
2642  * @tx_ring: Tx descriptor ring for a specific queue
2643  *
2644  * Free all transmit software resources
2645  **/
2646 void igb_free_tx_resources(struct igb_ring *tx_ring)
2647 {
2648         igb_clean_tx_ring(tx_ring);
2649
2650         vfree(tx_ring->buffer_info);
2651         tx_ring->buffer_info = NULL;
2652
2653         /* if not set, then don't free */
2654         if (!tx_ring->desc)
2655                 return;
2656
2657         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2658                             tx_ring->desc, tx_ring->dma);
2659
2660         tx_ring->desc = NULL;
2661 }
2662
2663 /**
2664  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2665  * @adapter: board private structure
2666  *
2667  * Free all transmit software resources
2668  **/
2669 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2670 {
2671         int i;
2672
2673         for (i = 0; i < adapter->num_tx_queues; i++)
2674                 igb_free_tx_resources(adapter->tx_ring[i]);
2675 }
2676
2677 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2678                                     struct igb_buffer *buffer_info)
2679 {
2680         if (buffer_info->dma) {
2681                 if (buffer_info->mapped_as_page)
2682                         pci_unmap_page(tx_ring->pdev,
2683                                         buffer_info->dma,
2684                                         buffer_info->length,
2685                                         PCI_DMA_TODEVICE);
2686                 else
2687                         pci_unmap_single(tx_ring->pdev,
2688                                         buffer_info->dma,
2689                                         buffer_info->length,
2690                                         PCI_DMA_TODEVICE);
2691                 buffer_info->dma = 0;
2692         }
2693         if (buffer_info->skb) {
2694                 dev_kfree_skb_any(buffer_info->skb);
2695                 buffer_info->skb = NULL;
2696         }
2697         buffer_info->time_stamp = 0;
2698         buffer_info->length = 0;
2699         buffer_info->next_to_watch = 0;
2700         buffer_info->mapped_as_page = false;
2701 }
2702
2703 /**
2704  * igb_clean_tx_ring - Free Tx Buffers
2705  * @tx_ring: ring to be cleaned
2706  **/
2707 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2708 {
2709         struct igb_buffer *buffer_info;
2710         unsigned long size;
2711         unsigned int i;
2712
2713         if (!tx_ring->buffer_info)
2714                 return;
2715         /* Free all the Tx ring sk_buffs */
2716
2717         for (i = 0; i < tx_ring->count; i++) {
2718                 buffer_info = &tx_ring->buffer_info[i];
2719                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2720         }
2721
2722         size = sizeof(struct igb_buffer) * tx_ring->count;
2723         memset(tx_ring->buffer_info, 0, size);
2724
2725         /* Zero out the descriptor ring */
2726         memset(tx_ring->desc, 0, tx_ring->size);
2727
2728         tx_ring->next_to_use = 0;
2729         tx_ring->next_to_clean = 0;
2730 }
2731
2732 /**
2733  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2734  * @adapter: board private structure
2735  **/
2736 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2737 {
2738         int i;
2739
2740         for (i = 0; i < adapter->num_tx_queues; i++)
2741                 igb_clean_tx_ring(adapter->tx_ring[i]);
2742 }
2743
2744 /**
2745  * igb_free_rx_resources - Free Rx Resources
2746  * @rx_ring: ring to clean the resources from
2747  *
2748  * Free all receive software resources
2749  **/
2750 void igb_free_rx_resources(struct igb_ring *rx_ring)
2751 {
2752         igb_clean_rx_ring(rx_ring);
2753
2754         vfree(rx_ring->buffer_info);
2755         rx_ring->buffer_info = NULL;
2756
2757         /* if not set, then don't free */
2758         if (!rx_ring->desc)
2759                 return;
2760
2761         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2762                             rx_ring->desc, rx_ring->dma);
2763
2764         rx_ring->desc = NULL;
2765 }
2766
2767 /**
2768  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2769  * @adapter: board private structure
2770  *
2771  * Free all receive software resources
2772  **/
2773 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2774 {
2775         int i;
2776
2777         for (i = 0; i < adapter->num_rx_queues; i++)
2778                 igb_free_rx_resources(adapter->rx_ring[i]);
2779 }
2780
2781 /**
2782  * igb_clean_rx_ring - Free Rx Buffers per Queue
2783  * @rx_ring: ring to free buffers from
2784  **/
2785 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2786 {
2787         struct igb_buffer *buffer_info;
2788         unsigned long size;
2789         unsigned int i;
2790
2791         if (!rx_ring->buffer_info)
2792                 return;
2793
2794         /* Free all the Rx ring sk_buffs */
2795         for (i = 0; i < rx_ring->count; i++) {
2796                 buffer_info = &rx_ring->buffer_info[i];
2797                 if (buffer_info->dma) {
2798                         pci_unmap_single(rx_ring->pdev,
2799                                          buffer_info->dma,
2800                                          rx_ring->rx_buffer_len,
2801                                          PCI_DMA_FROMDEVICE);
2802                         buffer_info->dma = 0;
2803                 }
2804
2805                 if (buffer_info->skb) {
2806                         dev_kfree_skb(buffer_info->skb);
2807                         buffer_info->skb = NULL;
2808                 }
2809                 if (buffer_info->page_dma) {
2810                         pci_unmap_page(rx_ring->pdev,
2811                                        buffer_info->page_dma,
2812                                        PAGE_SIZE / 2,
2813                                        PCI_DMA_FROMDEVICE);
2814                         buffer_info->page_dma = 0;
2815                 }
2816                 if (buffer_info->page) {
2817                         put_page(buffer_info->page);
2818                         buffer_info->page = NULL;
2819                         buffer_info->page_offset = 0;
2820                 }
2821         }
2822
2823         size = sizeof(struct igb_buffer) * rx_ring->count;
2824         memset(rx_ring->buffer_info, 0, size);
2825
2826         /* Zero out the descriptor ring */
2827         memset(rx_ring->desc, 0, rx_ring->size);
2828
2829         rx_ring->next_to_clean = 0;
2830         rx_ring->next_to_use = 0;
2831 }
2832
2833 /**
2834  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2835  * @adapter: board private structure
2836  **/
2837 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2838 {
2839         int i;
2840
2841         for (i = 0; i < adapter->num_rx_queues; i++)
2842                 igb_clean_rx_ring(adapter->rx_ring[i]);
2843 }
2844
2845 /**
2846  * igb_set_mac - Change the Ethernet Address of the NIC
2847  * @netdev: network interface device structure
2848  * @p: pointer to an address structure
2849  *
2850  * Returns 0 on success, negative on failure
2851  **/
2852 static int igb_set_mac(struct net_device *netdev, void *p)
2853 {
2854         struct igb_adapter *adapter = netdev_priv(netdev);
2855         struct e1000_hw *hw = &adapter->hw;
2856         struct sockaddr *addr = p;
2857
2858         if (!is_valid_ether_addr(addr->sa_data))
2859                 return -EADDRNOTAVAIL;
2860
2861         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2862         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2863
2864         /* set the correct pool for the new PF MAC address in entry 0 */
2865         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2866                          adapter->vfs_allocated_count);
2867
2868         return 0;
2869 }
2870
2871 /**
2872  * igb_write_mc_addr_list - write multicast addresses to MTA
2873  * @netdev: network interface device structure
2874  *
2875  * Writes multicast address list to the MTA hash table.
2876  * Returns: -ENOMEM on failure
2877  *                0 on no addresses written
2878  *                X on writing X addresses to MTA
2879  **/
2880 static int igb_write_mc_addr_list(struct net_device *netdev)
2881 {
2882         struct igb_adapter *adapter = netdev_priv(netdev);
2883         struct e1000_hw *hw = &adapter->hw;
2884         struct dev_mc_list *mc_ptr = netdev->mc_list;
2885         u8  *mta_list;
2886         u32 vmolr = 0;
2887         int i;
2888
2889         if (netdev_mc_empty(netdev)) {
2890                 /* nothing to program, so clear mc list */
2891                 igb_update_mc_addr_list(hw, NULL, 0);
2892                 igb_restore_vf_multicasts(adapter);
2893                 return 0;
2894         }
2895
2896         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2897         if (!mta_list)
2898                 return -ENOMEM;
2899
2900         /* set vmolr receive overflow multicast bit */
2901         vmolr |= E1000_VMOLR_ROMPE;
2902
2903         /* The shared function expects a packed array of only addresses. */
2904         mc_ptr = netdev->mc_list;
2905
2906         for (i = 0; i < netdev_mc_count(netdev); i++) {
2907                 if (!mc_ptr)
2908                         break;
2909                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2910                 mc_ptr = mc_ptr->next;
2911         }
2912         igb_update_mc_addr_list(hw, mta_list, i);
2913         kfree(mta_list);
2914
2915         return netdev_mc_count(netdev);
2916 }
2917
2918 /**
2919  * igb_write_uc_addr_list - write unicast addresses to RAR table
2920  * @netdev: network interface device structure
2921  *
2922  * Writes unicast address list to the RAR table.
2923  * Returns: -ENOMEM on failure/insufficient address space
2924  *                0 on no addresses written
2925  *                X on writing X addresses to the RAR table
2926  **/
2927 static int igb_write_uc_addr_list(struct net_device *netdev)
2928 {
2929         struct igb_adapter *adapter = netdev_priv(netdev);
2930         struct e1000_hw *hw = &adapter->hw;
2931         unsigned int vfn = adapter->vfs_allocated_count;
2932         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2933         int count = 0;
2934
2935         /* return ENOMEM indicating insufficient memory for addresses */
2936         if (netdev_uc_count(netdev) > rar_entries)
2937                 return -ENOMEM;
2938
2939         if (!netdev_uc_empty(netdev) && rar_entries) {
2940                 struct netdev_hw_addr *ha;
2941
2942                 netdev_for_each_uc_addr(ha, netdev) {
2943                         if (!rar_entries)
2944                                 break;
2945                         igb_rar_set_qsel(adapter, ha->addr,
2946                                          rar_entries--,
2947                                          vfn);
2948                         count++;
2949                 }
2950         }
2951         /* write the addresses in reverse order to avoid write combining */
2952         for (; rar_entries > 0 ; rar_entries--) {
2953                 wr32(E1000_RAH(rar_entries), 0);
2954                 wr32(E1000_RAL(rar_entries), 0);
2955         }
2956         wrfl();
2957
2958         return count;
2959 }
2960
2961 /**
2962  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2963  * @netdev: network interface device structure
2964  *
2965  * The set_rx_mode entry point is called whenever the unicast or multicast
2966  * address lists or the network interface flags are updated.  This routine is
2967  * responsible for configuring the hardware for proper unicast, multicast,
2968  * promiscuous mode, and all-multi behavior.
2969  **/
2970 static void igb_set_rx_mode(struct net_device *netdev)
2971 {
2972         struct igb_adapter *adapter = netdev_priv(netdev);
2973         struct e1000_hw *hw = &adapter->hw;
2974         unsigned int vfn = adapter->vfs_allocated_count;
2975         u32 rctl, vmolr = 0;
2976         int count;
2977
2978         /* Check for Promiscuous and All Multicast modes */
2979         rctl = rd32(E1000_RCTL);
2980
2981         /* clear the effected bits */
2982         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2983
2984         if (netdev->flags & IFF_PROMISC) {
2985                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2986                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2987         } else {
2988                 if (netdev->flags & IFF_ALLMULTI) {
2989                         rctl |= E1000_RCTL_MPE;
2990                         vmolr |= E1000_VMOLR_MPME;
2991                 } else {
2992                         /*
2993                          * Write addresses to the MTA, if the attempt fails
2994                          * then we should just turn on promiscous mode so
2995                          * that we can at least receive multicast traffic
2996                          */
2997                         count = igb_write_mc_addr_list(netdev);
2998                         if (count < 0) {
2999                                 rctl |= E1000_RCTL_MPE;
3000                                 vmolr |= E1000_VMOLR_MPME;
3001                         } else if (count) {
3002                                 vmolr |= E1000_VMOLR_ROMPE;
3003                         }
3004                 }
3005                 /*
3006                  * Write addresses to available RAR registers, if there is not
3007                  * sufficient space to store all the addresses then enable
3008                  * unicast promiscous mode
3009                  */
3010                 count = igb_write_uc_addr_list(netdev);
3011                 if (count < 0) {
3012                         rctl |= E1000_RCTL_UPE;
3013                         vmolr |= E1000_VMOLR_ROPE;
3014                 }
3015                 rctl |= E1000_RCTL_VFE;
3016         }
3017         wr32(E1000_RCTL, rctl);
3018
3019         /*
3020          * In order to support SR-IOV and eventually VMDq it is necessary to set
3021          * the VMOLR to enable the appropriate modes.  Without this workaround
3022          * we will have issues with VLAN tag stripping not being done for frames
3023          * that are only arriving because we are the default pool
3024          */
3025         if (hw->mac.type < e1000_82576)
3026                 return;
3027
3028         vmolr |= rd32(E1000_VMOLR(vfn)) &
3029                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3030         wr32(E1000_VMOLR(vfn), vmolr);
3031         igb_restore_vf_multicasts(adapter);
3032 }
3033
3034 /* Need to wait a few seconds after link up to get diagnostic information from
3035  * the phy */
3036 static void igb_update_phy_info(unsigned long data)
3037 {
3038         struct igb_adapter *adapter = (struct igb_adapter *) data;
3039         igb_get_phy_info(&adapter->hw);
3040 }
3041
3042 /**
3043  * igb_has_link - check shared code for link and determine up/down
3044  * @adapter: pointer to driver private info
3045  **/
3046 bool igb_has_link(struct igb_adapter *adapter)
3047 {
3048         struct e1000_hw *hw = &adapter->hw;
3049         bool link_active = false;
3050         s32 ret_val = 0;
3051
3052         /* get_link_status is set on LSC (link status) interrupt or
3053          * rx sequence error interrupt.  get_link_status will stay
3054          * false until the e1000_check_for_link establishes link
3055          * for copper adapters ONLY
3056          */
3057         switch (hw->phy.media_type) {
3058         case e1000_media_type_copper:
3059                 if (hw->mac.get_link_status) {
3060                         ret_val = hw->mac.ops.check_for_link(hw);
3061                         link_active = !hw->mac.get_link_status;
3062                 } else {
3063                         link_active = true;
3064                 }
3065                 break;
3066         case e1000_media_type_internal_serdes:
3067                 ret_val = hw->mac.ops.check_for_link(hw);
3068                 link_active = hw->mac.serdes_has_link;
3069                 break;
3070         default:
3071         case e1000_media_type_unknown:
3072                 break;
3073         }
3074
3075         return link_active;
3076 }
3077
3078 /**
3079  * igb_watchdog - Timer Call-back
3080  * @data: pointer to adapter cast into an unsigned long
3081  **/
3082 static void igb_watchdog(unsigned long data)
3083 {
3084         struct igb_adapter *adapter = (struct igb_adapter *)data;
3085         /* Do the rest outside of interrupt context */
3086         schedule_work(&adapter->watchdog_task);
3087 }
3088
3089 static void igb_watchdog_task(struct work_struct *work)
3090 {
3091         struct igb_adapter *adapter = container_of(work,
3092                                                    struct igb_adapter,
3093                                                    watchdog_task);
3094         struct e1000_hw *hw = &adapter->hw;
3095         struct net_device *netdev = adapter->netdev;
3096         u32 link;
3097         int i;
3098
3099         link = igb_has_link(adapter);
3100         if (link) {
3101                 if (!netif_carrier_ok(netdev)) {
3102                         u32 ctrl;
3103                         hw->mac.ops.get_speed_and_duplex(hw,
3104                                                          &adapter->link_speed,
3105                                                          &adapter->link_duplex);
3106
3107                         ctrl = rd32(E1000_CTRL);
3108                         /* Links status message must follow this format */
3109                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3110                                  "Flow Control: %s\n",
3111                                netdev->name,
3112                                adapter->link_speed,
3113                                adapter->link_duplex == FULL_DUPLEX ?
3114                                  "Full Duplex" : "Half Duplex",
3115                                ((ctrl & E1000_CTRL_TFCE) &&
3116                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3117                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3118                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3119
3120                         /* tweak tx_queue_len according to speed/duplex and
3121                          * adjust the timeout factor */
3122                         netdev->tx_queue_len = adapter->tx_queue_len;
3123                         adapter->tx_timeout_factor = 1;
3124                         switch (adapter->link_speed) {
3125                         case SPEED_10:
3126                                 netdev->tx_queue_len = 10;
3127                                 adapter->tx_timeout_factor = 14;
3128                                 break;
3129                         case SPEED_100:
3130                                 netdev->tx_queue_len = 100;
3131                                 /* maybe add some timeout factor ? */
3132                                 break;
3133                         }
3134
3135                         netif_carrier_on(netdev);
3136
3137                         igb_ping_all_vfs(adapter);
3138
3139                         /* link state has changed, schedule phy info update */
3140                         if (!test_bit(__IGB_DOWN, &adapter->state))
3141                                 mod_timer(&adapter->phy_info_timer,
3142                                           round_jiffies(jiffies + 2 * HZ));
3143                 }
3144         } else {
3145                 if (netif_carrier_ok(netdev)) {
3146                         adapter->link_speed = 0;
3147                         adapter->link_duplex = 0;
3148                         /* Links status message must follow this format */
3149                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3150                                netdev->name);
3151                         netif_carrier_off(netdev);
3152
3153                         igb_ping_all_vfs(adapter);
3154
3155                         /* link state has changed, schedule phy info update */
3156                         if (!test_bit(__IGB_DOWN, &adapter->state))
3157                                 mod_timer(&adapter->phy_info_timer,
3158                                           round_jiffies(jiffies + 2 * HZ));
3159                 }
3160         }
3161
3162         igb_update_stats(adapter);
3163
3164         for (i = 0; i < adapter->num_tx_queues; i++) {
3165                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3166                 if (!netif_carrier_ok(netdev)) {
3167                         /* We've lost link, so the controller stops DMA,
3168                          * but we've got queued Tx work that's never going
3169                          * to get done, so reset controller to flush Tx.
3170                          * (Do the reset outside of interrupt context). */
3171                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3172                                 adapter->tx_timeout_count++;
3173                                 schedule_work(&adapter->reset_task);
3174                                 /* return immediately since reset is imminent */
3175                                 return;
3176                         }
3177                 }
3178
3179                 /* Force detection of hung controller every watchdog period */
3180                 tx_ring->detect_tx_hung = true;
3181         }
3182
3183         /* Cause software interrupt to ensure rx ring is cleaned */
3184         if (adapter->msix_entries) {
3185                 u32 eics = 0;
3186                 for (i = 0; i < adapter->num_q_vectors; i++) {
3187                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3188                         eics |= q_vector->eims_value;
3189                 }
3190                 wr32(E1000_EICS, eics);
3191         } else {
3192                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3193         }
3194
3195         /* Reset the timer */
3196         if (!test_bit(__IGB_DOWN, &adapter->state))
3197                 mod_timer(&adapter->watchdog_timer,
3198                           round_jiffies(jiffies + 2 * HZ));
3199 }
3200
3201 enum latency_range {
3202         lowest_latency = 0,
3203         low_latency = 1,
3204         bulk_latency = 2,
3205         latency_invalid = 255
3206 };
3207
3208 /**
3209  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3210  *
3211  *      Stores a new ITR value based on strictly on packet size.  This
3212  *      algorithm is less sophisticated than that used in igb_update_itr,
3213  *      due to the difficulty of synchronizing statistics across multiple
3214  *      receive rings.  The divisors and thresholds used by this fuction
3215  *      were determined based on theoretical maximum wire speed and testing
3216  *      data, in order to minimize response time while increasing bulk
3217  *      throughput.
3218  *      This functionality is controlled by the InterruptThrottleRate module
3219  *      parameter (see igb_param.c)
3220  *      NOTE:  This function is called only when operating in a multiqueue
3221  *             receive environment.
3222  * @q_vector: pointer to q_vector
3223  **/
3224 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3225 {
3226         int new_val = q_vector->itr_val;
3227         int avg_wire_size = 0;
3228         struct igb_adapter *adapter = q_vector->adapter;
3229
3230         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3231          * ints/sec - ITR timer value of 120 ticks.
3232          */
3233         if (adapter->link_speed != SPEED_1000) {
3234                 new_val = 976;
3235                 goto set_itr_val;
3236         }
3237
3238         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3239                 struct igb_ring *ring = q_vector->rx_ring;
3240                 avg_wire_size = ring->total_bytes / ring->total_packets;
3241         }
3242
3243         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3244                 struct igb_ring *ring = q_vector->tx_ring;
3245                 avg_wire_size = max_t(u32, avg_wire_size,
3246                                       (ring->total_bytes /
3247                                        ring->total_packets));
3248         }
3249
3250         /* if avg_wire_size isn't set no work was done */
3251         if (!avg_wire_size)
3252                 goto clear_counts;
3253
3254         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3255         avg_wire_size += 24;
3256
3257         /* Don't starve jumbo frames */
3258         avg_wire_size = min(avg_wire_size, 3000);
3259
3260         /* Give a little boost to mid-size frames */
3261         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3262                 new_val = avg_wire_size / 3;
3263         else
3264                 new_val = avg_wire_size / 2;
3265
3266         /* when in itr mode 3 do not exceed 20K ints/sec */
3267         if (adapter->rx_itr_setting == 3 && new_val < 196)
3268                 new_val = 196;
3269
3270 set_itr_val:
3271         if (new_val != q_vector->itr_val) {
3272                 q_vector->itr_val = new_val;
3273                 q_vector->set_itr = 1;
3274         }
3275 clear_counts:
3276         if (q_vector->rx_ring) {
3277                 q_vector->rx_ring->total_bytes = 0;
3278                 q_vector->rx_ring->total_packets = 0;
3279         }
3280         if (q_vector->tx_ring) {
3281                 q_vector->tx_ring->total_bytes = 0;
3282                 q_vector->tx_ring->total_packets = 0;
3283         }
3284 }
3285
3286 /**
3287  * igb_update_itr - update the dynamic ITR value based on statistics
3288  *      Stores a new ITR value based on packets and byte
3289  *      counts during the last interrupt.  The advantage of per interrupt
3290  *      computation is faster updates and more accurate ITR for the current
3291  *      traffic pattern.  Constants in this function were computed
3292  *      based on theoretical maximum wire speed and thresholds were set based
3293  *      on testing data as well as attempting to minimize response time
3294  *      while increasing bulk throughput.
3295  *      this functionality is controlled by the InterruptThrottleRate module
3296  *      parameter (see igb_param.c)
3297  *      NOTE:  These calculations are only valid when operating in a single-
3298  *             queue environment.
3299  * @adapter: pointer to adapter
3300  * @itr_setting: current q_vector->itr_val
3301  * @packets: the number of packets during this measurement interval
3302  * @bytes: the number of bytes during this measurement interval
3303  **/
3304 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3305                                    int packets, int bytes)
3306 {
3307         unsigned int retval = itr_setting;
3308
3309         if (packets == 0)
3310                 goto update_itr_done;
3311
3312         switch (itr_setting) {
3313         case lowest_latency:
3314                 /* handle TSO and jumbo frames */
3315                 if (bytes/packets > 8000)
3316                         retval = bulk_latency;
3317                 else if ((packets < 5) && (bytes > 512))
3318                         retval = low_latency;
3319                 break;
3320         case low_latency:  /* 50 usec aka 20000 ints/s */
3321                 if (bytes > 10000) {
3322                         /* this if handles the TSO accounting */
3323                         if (bytes/packets > 8000) {
3324                                 retval = bulk_latency;
3325                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3326                                 retval = bulk_latency;
3327                         } else if ((packets > 35)) {
3328                                 retval = lowest_latency;
3329                         }
3330                 } else if (bytes/packets > 2000) {
3331                         retval = bulk_latency;
3332                 } else if (packets <= 2 && bytes < 512) {
3333                         retval = lowest_latency;
3334                 }
3335                 break;
3336         case bulk_latency: /* 250 usec aka 4000 ints/s */
3337                 if (bytes > 25000) {
3338                         if (packets > 35)
3339                                 retval = low_latency;
3340                 } else if (bytes < 1500) {
3341                         retval = low_latency;
3342                 }
3343                 break;
3344         }
3345
3346 update_itr_done:
3347         return retval;
3348 }
3349
3350 static void igb_set_itr(struct igb_adapter *adapter)
3351 {
3352         struct igb_q_vector *q_vector = adapter->q_vector[0];
3353         u16 current_itr;
3354         u32 new_itr = q_vector->itr_val;
3355
3356         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3357         if (adapter->link_speed != SPEED_1000) {
3358                 current_itr = 0;
3359                 new_itr = 4000;
3360                 goto set_itr_now;
3361         }
3362
3363         adapter->rx_itr = igb_update_itr(adapter,
3364                                     adapter->rx_itr,
3365                                     q_vector->rx_ring->total_packets,
3366                                     q_vector->rx_ring->total_bytes);
3367
3368         adapter->tx_itr = igb_update_itr(adapter,
3369                                     adapter->tx_itr,
3370                                     q_vector->tx_ring->total_packets,
3371                                     q_vector->tx_ring->total_bytes);
3372         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3373
3374         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3375         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3376                 current_itr = low_latency;
3377
3378         switch (current_itr) {
3379         /* counts and packets in update_itr are dependent on these numbers */
3380         case lowest_latency:
3381                 new_itr = 56;  /* aka 70,000 ints/sec */
3382                 break;
3383         case low_latency:
3384                 new_itr = 196; /* aka 20,000 ints/sec */
3385                 break;
3386         case bulk_latency:
3387                 new_itr = 980; /* aka 4,000 ints/sec */
3388                 break;
3389         default:
3390                 break;
3391         }
3392
3393 set_itr_now:
3394         q_vector->rx_ring->total_bytes = 0;
3395         q_vector->rx_ring->total_packets = 0;
3396         q_vector->tx_ring->total_bytes = 0;
3397         q_vector->tx_ring->total_packets = 0;
3398
3399         if (new_itr != q_vector->itr_val) {
3400                 /* this attempts to bias the interrupt rate towards Bulk
3401                  * by adding intermediate steps when interrupt rate is
3402                  * increasing */
3403                 new_itr = new_itr > q_vector->itr_val ?
3404                              max((new_itr * q_vector->itr_val) /
3405                                  (new_itr + (q_vector->itr_val >> 2)),
3406                                  new_itr) :
3407                              new_itr;
3408                 /* Don't write the value here; it resets the adapter's
3409                  * internal timer, and causes us to delay far longer than
3410                  * we should between interrupts.  Instead, we write the ITR
3411                  * value at the beginning of the next interrupt so the timing
3412                  * ends up being correct.
3413                  */
3414                 q_vector->itr_val = new_itr;
3415                 q_vector->set_itr = 1;
3416         }
3417
3418         return;
3419 }
3420
3421 #define IGB_TX_FLAGS_CSUM               0x00000001
3422 #define IGB_TX_FLAGS_VLAN               0x00000002
3423 #define IGB_TX_FLAGS_TSO                0x00000004
3424 #define IGB_TX_FLAGS_IPV4               0x00000008
3425 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3426 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3427 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3428
3429 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3430                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3431 {
3432         struct e1000_adv_tx_context_desc *context_desc;
3433         unsigned int i;
3434         int err;
3435         struct igb_buffer *buffer_info;
3436         u32 info = 0, tu_cmd = 0;
3437         u32 mss_l4len_idx, l4len;
3438         *hdr_len = 0;
3439
3440         if (skb_header_cloned(skb)) {
3441                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3442                 if (err)
3443                         return err;
3444         }
3445
3446         l4len = tcp_hdrlen(skb);
3447         *hdr_len += l4len;
3448
3449         if (skb->protocol == htons(ETH_P_IP)) {
3450                 struct iphdr *iph = ip_hdr(skb);
3451                 iph->tot_len = 0;
3452                 iph->check = 0;
3453                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3454                                                          iph->daddr, 0,
3455                                                          IPPROTO_TCP,
3456                                                          0);
3457         } else if (skb_is_gso_v6(skb)) {
3458                 ipv6_hdr(skb)->payload_len = 0;
3459                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3460                                                        &ipv6_hdr(skb)->daddr,
3461                                                        0, IPPROTO_TCP, 0);
3462         }
3463
3464         i = tx_ring->next_to_use;
3465
3466         buffer_info = &tx_ring->buffer_info[i];
3467         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3468         /* VLAN MACLEN IPLEN */
3469         if (tx_flags & IGB_TX_FLAGS_VLAN)
3470                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3471         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3472         *hdr_len += skb_network_offset(skb);
3473         info |= skb_network_header_len(skb);
3474         *hdr_len += skb_network_header_len(skb);
3475         context_desc->vlan_macip_lens = cpu_to_le32(info);
3476
3477         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3478         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3479
3480         if (skb->protocol == htons(ETH_P_IP))
3481                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3482         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3483
3484         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3485
3486         /* MSS L4LEN IDX */
3487         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3488         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3489
3490         /* For 82575, context index must be unique per ring. */
3491         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3492                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3493
3494         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3495         context_desc->seqnum_seed = 0;
3496
3497         buffer_info->time_stamp = jiffies;
3498         buffer_info->next_to_watch = i;
3499         buffer_info->dma = 0;
3500         i++;
3501         if (i == tx_ring->count)
3502                 i = 0;
3503
3504         tx_ring->next_to_use = i;
3505
3506         return true;
3507 }
3508
3509 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3510                                    struct sk_buff *skb, u32 tx_flags)
3511 {
3512         struct e1000_adv_tx_context_desc *context_desc;
3513         struct pci_dev *pdev = tx_ring->pdev;
3514         struct igb_buffer *buffer_info;
3515         u32 info = 0, tu_cmd = 0;
3516         unsigned int i;
3517
3518         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3519             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3520                 i = tx_ring->next_to_use;
3521                 buffer_info = &tx_ring->buffer_info[i];
3522                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3523
3524                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3525                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3526
3527                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3528                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3529                         info |= skb_network_header_len(skb);
3530
3531                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3532
3533                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3534
3535                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3536                         __be16 protocol;
3537
3538                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3539                                 const struct vlan_ethhdr *vhdr =
3540                                           (const struct vlan_ethhdr*)skb->data;
3541
3542                                 protocol = vhdr->h_vlan_encapsulated_proto;
3543                         } else {
3544                                 protocol = skb->protocol;
3545                         }
3546
3547                         switch (protocol) {
3548                         case cpu_to_be16(ETH_P_IP):
3549                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3550                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3551                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3552                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3553                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3554                                 break;
3555                         case cpu_to_be16(ETH_P_IPV6):
3556                                 /* XXX what about other V6 headers?? */
3557                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3558                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3559                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3560                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3561                                 break;
3562                         default:
3563                                 if (unlikely(net_ratelimit()))
3564                                         dev_warn(&pdev->dev,
3565                                             "partial checksum but proto=%x!\n",
3566                                             skb->protocol);
3567                                 break;
3568                         }
3569                 }
3570
3571                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3572                 context_desc->seqnum_seed = 0;
3573                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3574                         context_desc->mss_l4len_idx =
3575                                 cpu_to_le32(tx_ring->reg_idx << 4);
3576
3577                 buffer_info->time_stamp = jiffies;
3578                 buffer_info->next_to_watch = i;
3579                 buffer_info->dma = 0;
3580
3581                 i++;
3582                 if (i == tx_ring->count)
3583                         i = 0;
3584                 tx_ring->next_to_use = i;
3585
3586                 return true;
3587         }
3588         return false;
3589 }
3590
3591 #define IGB_MAX_TXD_PWR 16
3592 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3593
3594 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3595                                  unsigned int first)
3596 {
3597         struct igb_buffer *buffer_info;
3598         struct pci_dev *pdev = tx_ring->pdev;
3599         unsigned int len = skb_headlen(skb);
3600         unsigned int count = 0, i;
3601         unsigned int f;
3602
3603         i = tx_ring->next_to_use;
3604
3605         buffer_info = &tx_ring->buffer_info[i];
3606         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3607         buffer_info->length = len;
3608         /* set time_stamp *before* dma to help avoid a possible race */
3609         buffer_info->time_stamp = jiffies;
3610         buffer_info->next_to_watch = i;
3611         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3612                                           PCI_DMA_TODEVICE);
3613         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3614                 goto dma_error;
3615
3616         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3617                 struct skb_frag_struct *frag;
3618
3619                 count++;
3620                 i++;
3621                 if (i == tx_ring->count)
3622                         i = 0;
3623
3624                 frag = &skb_shinfo(skb)->frags[f];
3625                 len = frag->size;
3626
3627                 buffer_info = &tx_ring->buffer_info[i];
3628                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3629                 buffer_info->length = len;
3630                 buffer_info->time_stamp = jiffies;
3631                 buffer_info->next_to_watch = i;
3632                 buffer_info->mapped_as_page = true;
3633                 buffer_info->dma = pci_map_page(pdev,
3634                                                 frag->page,
3635                                                 frag->page_offset,
3636                                                 len,
3637                                                 PCI_DMA_TODEVICE);
3638                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3639                         goto dma_error;
3640
3641         }
3642
3643         tx_ring->buffer_info[i].skb = skb;
3644         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3645         tx_ring->buffer_info[first].next_to_watch = i;
3646
3647         return ++count;
3648
3649 dma_error:
3650         dev_err(&pdev->dev, "TX DMA map failed\n");
3651
3652         /* clear timestamp and dma mappings for failed buffer_info mapping */
3653         buffer_info->dma = 0;
3654         buffer_info->time_stamp = 0;
3655         buffer_info->length = 0;
3656         buffer_info->next_to_watch = 0;
3657         buffer_info->mapped_as_page = false;
3658         count--;
3659
3660         /* clear timestamp and dma mappings for remaining portion of packet */
3661         while (count >= 0) {
3662                 count--;
3663                 i--;
3664                 if (i < 0)
3665                         i += tx_ring->count;
3666                 buffer_info = &tx_ring->buffer_info[i];
3667                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3668         }
3669
3670         return 0;
3671 }
3672
3673 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3674                                     int tx_flags, int count, u32 paylen,
3675                                     u8 hdr_len)
3676 {
3677         union e1000_adv_tx_desc *tx_desc;
3678         struct igb_buffer *buffer_info;
3679         u32 olinfo_status = 0, cmd_type_len;
3680         unsigned int i = tx_ring->next_to_use;
3681
3682         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3683                         E1000_ADVTXD_DCMD_DEXT);
3684
3685         if (tx_flags & IGB_TX_FLAGS_VLAN)
3686                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3687
3688         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3689                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3690
3691         if (tx_flags & IGB_TX_FLAGS_TSO) {
3692                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3693
3694                 /* insert tcp checksum */
3695                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3696
3697                 /* insert ip checksum */
3698                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3699                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3700
3701         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3702                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3703         }
3704
3705         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3706             (tx_flags & (IGB_TX_FLAGS_CSUM |
3707                          IGB_TX_FLAGS_TSO |
3708                          IGB_TX_FLAGS_VLAN)))
3709                 olinfo_status |= tx_ring->reg_idx << 4;
3710
3711         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3712
3713         do {
3714                 buffer_info = &tx_ring->buffer_info[i];
3715                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3716                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3717                 tx_desc->read.cmd_type_len =
3718                         cpu_to_le32(cmd_type_len | buffer_info->length);
3719                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3720                 count--;
3721                 i++;
3722                 if (i == tx_ring->count)
3723                         i = 0;
3724         } while (count > 0);
3725
3726         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3727         /* Force memory writes to complete before letting h/w
3728          * know there are new descriptors to fetch.  (Only
3729          * applicable for weak-ordered memory model archs,
3730          * such as IA-64). */
3731         wmb();
3732
3733         tx_ring->next_to_use = i;
3734         writel(i, tx_ring->tail);
3735         /* we need this if more than one processor can write to our tail
3736          * at a time, it syncronizes IO on IA64/Altix systems */
3737         mmiowb();
3738 }
3739
3740 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3741 {
3742         struct net_device *netdev = tx_ring->netdev;
3743
3744         netif_stop_subqueue(netdev, tx_ring->queue_index);
3745
3746         /* Herbert's original patch had:
3747          *  smp_mb__after_netif_stop_queue();
3748          * but since that doesn't exist yet, just open code it. */
3749         smp_mb();
3750
3751         /* We need to check again in a case another CPU has just
3752          * made room available. */
3753         if (igb_desc_unused(tx_ring) < size)
3754                 return -EBUSY;
3755
3756         /* A reprieve! */
3757         netif_wake_subqueue(netdev, tx_ring->queue_index);
3758         tx_ring->tx_stats.restart_queue++;
3759         return 0;
3760 }
3761
3762 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3763 {
3764         if (igb_desc_unused(tx_ring) >= size)
3765                 return 0;
3766         return __igb_maybe_stop_tx(tx_ring, size);
3767 }
3768
3769 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3770                                     struct igb_ring *tx_ring)
3771 {
3772         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3773         unsigned int first;
3774         unsigned int tx_flags = 0;
3775         u8 hdr_len = 0;
3776         int tso = 0, count;
3777         union skb_shared_tx *shtx = skb_tx(skb);
3778
3779         /* need: 1 descriptor per page,
3780          *       + 2 desc gap to keep tail from touching head,
3781          *       + 1 desc for skb->data,
3782          *       + 1 desc for context descriptor,
3783          * otherwise try next time */
3784         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3785                 /* this is a hard error */
3786                 return NETDEV_TX_BUSY;
3787         }
3788
3789         if (unlikely(shtx->hardware)) {
3790                 shtx->in_progress = 1;
3791                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3792         }
3793
3794         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3795                 tx_flags |= IGB_TX_FLAGS_VLAN;
3796                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3797         }
3798
3799         if (skb->protocol == htons(ETH_P_IP))
3800                 tx_flags |= IGB_TX_FLAGS_IPV4;
3801
3802         first = tx_ring->next_to_use;
3803         if (skb_is_gso(skb)) {
3804                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3805
3806                 if (tso < 0) {
3807                         dev_kfree_skb_any(skb);
3808                         return NETDEV_TX_OK;
3809                 }
3810         }
3811
3812         if (tso)
3813                 tx_flags |= IGB_TX_FLAGS_TSO;
3814         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3815                  (skb->ip_summed == CHECKSUM_PARTIAL))
3816                 tx_flags |= IGB_TX_FLAGS_CSUM;
3817
3818         /*
3819          * count reflects descriptors mapped, if 0 or less then mapping error
3820          * has occured and we need to rewind the descriptor queue
3821          */
3822         count = igb_tx_map_adv(tx_ring, skb, first);
3823         if (!count) {
3824                 dev_kfree_skb_any(skb);
3825                 tx_ring->buffer_info[first].time_stamp = 0;
3826                 tx_ring->next_to_use = first;
3827                 return NETDEV_TX_OK;
3828         }
3829
3830         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3831
3832         /* Make sure there is space in the ring for the next send. */
3833         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3834
3835         return NETDEV_TX_OK;
3836 }
3837
3838 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3839                                       struct net_device *netdev)
3840 {
3841         struct igb_adapter *adapter = netdev_priv(netdev);
3842         struct igb_ring *tx_ring;
3843         int r_idx = 0;
3844
3845         if (test_bit(__IGB_DOWN, &adapter->state)) {
3846                 dev_kfree_skb_any(skb);
3847                 return NETDEV_TX_OK;
3848         }
3849
3850         if (skb->len <= 0) {
3851                 dev_kfree_skb_any(skb);
3852                 return NETDEV_TX_OK;
3853         }
3854
3855         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3856         tx_ring = adapter->multi_tx_table[r_idx];
3857
3858         /* This goes back to the question of how to logically map a tx queue
3859          * to a flow.  Right now, performance is impacted slightly negatively
3860          * if using multiple tx queues.  If the stack breaks away from a
3861          * single qdisc implementation, we can look at this again. */
3862         return igb_xmit_frame_ring_adv(skb, tx_ring);
3863 }
3864
3865 /**
3866  * igb_tx_timeout - Respond to a Tx Hang
3867  * @netdev: network interface device structure
3868  **/
3869 static void igb_tx_timeout(struct net_device *netdev)
3870 {
3871         struct igb_adapter *adapter = netdev_priv(netdev);
3872         struct e1000_hw *hw = &adapter->hw;
3873
3874         /* Do the reset outside of interrupt context */
3875         adapter->tx_timeout_count++;
3876
3877         if (hw->mac.type == e1000_82580)
3878                 hw->dev_spec._82575.global_device_reset = true;
3879
3880         schedule_work(&adapter->reset_task);
3881         wr32(E1000_EICS,
3882              (adapter->eims_enable_mask & ~adapter->eims_other));
3883 }
3884
3885 static void igb_reset_task(struct work_struct *work)
3886 {
3887         struct igb_adapter *adapter;
3888         adapter = container_of(work, struct igb_adapter, reset_task);
3889
3890         igb_reinit_locked(adapter);
3891 }
3892
3893 /**
3894  * igb_get_stats - Get System Network Statistics
3895  * @netdev: network interface device structure
3896  *
3897  * Returns the address of the device statistics structure.
3898  * The statistics are actually updated from the timer callback.
3899  **/
3900 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3901 {
3902         /* only return the current stats */
3903         return &netdev->stats;
3904 }
3905
3906 /**
3907  * igb_change_mtu - Change the Maximum Transfer Unit
3908  * @netdev: network interface device structure
3909  * @new_mtu: new value for maximum frame size
3910  *
3911  * Returns 0 on success, negative on failure
3912  **/
3913 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3914 {
3915         struct igb_adapter *adapter = netdev_priv(netdev);
3916         struct pci_dev *pdev = adapter->pdev;
3917         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3918         u32 rx_buffer_len, i;
3919
3920         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3921                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3922                 return -EINVAL;
3923         }
3924
3925         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3926                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3927                 return -EINVAL;
3928         }
3929
3930         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3931                 msleep(1);
3932
3933         /* igb_down has a dependency on max_frame_size */
3934         adapter->max_frame_size = max_frame;
3935
3936         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3937          * means we reserve 2 more, this pushes us to allocate from the next
3938          * larger slab size.
3939          * i.e. RXBUFFER_2048 --> size-4096 slab
3940          */
3941
3942         if (max_frame <= IGB_RXBUFFER_1024)
3943                 rx_buffer_len = IGB_RXBUFFER_1024;
3944         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3945                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3946         else
3947                 rx_buffer_len = IGB_RXBUFFER_128;
3948
3949         if (netif_running(netdev))
3950                 igb_down(adapter);
3951
3952         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3953                  netdev->mtu, new_mtu);
3954         netdev->mtu = new_mtu;
3955
3956         for (i = 0; i < adapter->num_rx_queues; i++)
3957                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3958
3959         if (netif_running(netdev))
3960                 igb_up(adapter);
3961         else
3962                 igb_reset(adapter);
3963
3964         clear_bit(__IGB_RESETTING, &adapter->state);
3965
3966         return 0;
3967 }
3968
3969 /**
3970  * igb_update_stats - Update the board statistics counters
3971  * @adapter: board private structure
3972  **/
3973
3974 void igb_update_stats(struct igb_adapter *adapter)
3975 {
3976         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3977         struct e1000_hw *hw = &adapter->hw;
3978         struct pci_dev *pdev = adapter->pdev;
3979         u32 rnbc, reg;
3980         u16 phy_tmp;
3981         int i;
3982         u64 bytes, packets;
3983
3984 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3985
3986         /*
3987          * Prevent stats update while adapter is being reset, or if the pci
3988          * connection is down.
3989          */
3990         if (adapter->link_speed == 0)
3991                 return;
3992         if (pci_channel_offline(pdev))
3993                 return;
3994
3995         bytes = 0;
3996         packets = 0;
3997         for (i = 0; i < adapter->num_rx_queues; i++) {
3998                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3999                 struct igb_ring *ring = adapter->rx_ring[i];
4000                 ring->rx_stats.drops += rqdpc_tmp;
4001                 net_stats->rx_fifo_errors += rqdpc_tmp;
4002                 bytes += ring->rx_stats.bytes;
4003                 packets += ring->rx_stats.packets;
4004         }
4005
4006         net_stats->rx_bytes = bytes;
4007         net_stats->rx_packets = packets;
4008
4009         bytes = 0;
4010         packets = 0;
4011         for (i = 0; i < adapter->num_tx_queues; i++) {
4012                 struct igb_ring *ring = adapter->tx_ring[i];
4013                 bytes += ring->tx_stats.bytes;
4014                 packets += ring->tx_stats.packets;
4015         }
4016         net_stats->tx_bytes = bytes;
4017         net_stats->tx_packets = packets;
4018
4019         /* read stats registers */
4020         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4021         adapter->stats.gprc += rd32(E1000_GPRC);
4022         adapter->stats.gorc += rd32(E1000_GORCL);
4023         rd32(E1000_GORCH); /* clear GORCL */
4024         adapter->stats.bprc += rd32(E1000_BPRC);
4025         adapter->stats.mprc += rd32(E1000_MPRC);
4026         adapter->stats.roc += rd32(E1000_ROC);
4027
4028         adapter->stats.prc64 += rd32(E1000_PRC64);
4029         adapter->stats.prc127 += rd32(E1000_PRC127);
4030         adapter->stats.prc255 += rd32(E1000_PRC255);
4031         adapter->stats.prc511 += rd32(E1000_PRC511);
4032         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4033         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4034         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4035         adapter->stats.sec += rd32(E1000_SEC);
4036
4037         adapter->stats.mpc += rd32(E1000_MPC);
4038         adapter->stats.scc += rd32(E1000_SCC);
4039         adapter->stats.ecol += rd32(E1000_ECOL);
4040         adapter->stats.mcc += rd32(E1000_MCC);
4041         adapter->stats.latecol += rd32(E1000_LATECOL);
4042         adapter->stats.dc += rd32(E1000_DC);
4043         adapter->stats.rlec += rd32(E1000_RLEC);
4044         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4045         adapter->stats.xontxc += rd32(E1000_XONTXC);
4046         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4047         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4048         adapter->stats.fcruc += rd32(E1000_FCRUC);
4049         adapter->stats.gptc += rd32(E1000_GPTC);
4050         adapter->stats.gotc += rd32(E1000_GOTCL);
4051         rd32(E1000_GOTCH); /* clear GOTCL */
4052         rnbc = rd32(E1000_RNBC);
4053         adapter->stats.rnbc += rnbc;
4054         net_stats->rx_fifo_errors += rnbc;
4055         adapter->stats.ruc += rd32(E1000_RUC);
4056         adapter->stats.rfc += rd32(E1000_RFC);
4057         adapter->stats.rjc += rd32(E1000_RJC);
4058         adapter->stats.tor += rd32(E1000_TORH);
4059         adapter->stats.tot += rd32(E1000_TOTH);
4060         adapter->stats.tpr += rd32(E1000_TPR);
4061
4062         adapter->stats.ptc64 += rd32(E1000_PTC64);
4063         adapter->stats.ptc127 += rd32(E1000_PTC127);
4064         adapter->stats.ptc255 += rd32(E1000_PTC255);
4065         adapter->stats.ptc511 += rd32(E1000_PTC511);
4066         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4067         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4068
4069         adapter->stats.mptc += rd32(E1000_MPTC);
4070         adapter->stats.bptc += rd32(E1000_BPTC);
4071
4072         adapter->stats.tpt += rd32(E1000_TPT);
4073         adapter->stats.colc += rd32(E1000_COLC);
4074
4075         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4076         /* read internal phy specific stats */
4077         reg = rd32(E1000_CTRL_EXT);
4078         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4079                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4080                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4081         }
4082
4083         adapter->stats.tsctc += rd32(E1000_TSCTC);
4084         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4085
4086         adapter->stats.iac += rd32(E1000_IAC);
4087         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4088         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4089         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4090         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4091         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4092         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4093         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4094         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4095
4096         /* Fill out the OS statistics structure */
4097         net_stats->multicast = adapter->stats.mprc;
4098         net_stats->collisions = adapter->stats.colc;
4099
4100         /* Rx Errors */
4101
4102         /* RLEC on some newer hardware can be incorrect so build
4103          * our own version based on RUC and ROC */
4104         net_stats->rx_errors = adapter->stats.rxerrc +
4105                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4106                 adapter->stats.ruc + adapter->stats.roc +
4107                 adapter->stats.cexterr;
4108         net_stats->rx_length_errors = adapter->stats.ruc +
4109                                       adapter->stats.roc;
4110         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4111         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4112         net_stats->rx_missed_errors = adapter->stats.mpc;
4113
4114         /* Tx Errors */
4115         net_stats->tx_errors = adapter->stats.ecol +
4116                                adapter->stats.latecol;
4117         net_stats->tx_aborted_errors = adapter->stats.ecol;
4118         net_stats->tx_window_errors = adapter->stats.latecol;
4119         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4120
4121         /* Tx Dropped needs to be maintained elsewhere */
4122
4123         /* Phy Stats */
4124         if (hw->phy.media_type == e1000_media_type_copper) {
4125                 if ((adapter->link_speed == SPEED_1000) &&
4126                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4127                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4128                         adapter->phy_stats.idle_errors += phy_tmp;
4129                 }
4130         }
4131
4132         /* Management Stats */
4133         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4134         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4135         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4136 }
4137
4138 static irqreturn_t igb_msix_other(int irq, void *data)
4139 {
4140         struct igb_adapter *adapter = data;
4141         struct e1000_hw *hw = &adapter->hw;
4142         u32 icr = rd32(E1000_ICR);
4143         /* reading ICR causes bit 31 of EICR to be cleared */
4144
4145         if (icr & E1000_ICR_DRSTA)
4146                 schedule_work(&adapter->reset_task);
4147
4148         if (icr & E1000_ICR_DOUTSYNC) {
4149                 /* HW is reporting DMA is out of sync */
4150                 adapter->stats.doosync++;
4151         }
4152
4153         /* Check for a mailbox event */
4154         if (icr & E1000_ICR_VMMB)
4155                 igb_msg_task(adapter);
4156
4157         if (icr & E1000_ICR_LSC) {
4158                 hw->mac.get_link_status = 1;
4159                 /* guard against interrupt when we're going down */
4160                 if (!test_bit(__IGB_DOWN, &adapter->state))
4161                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4162         }
4163
4164         if (adapter->vfs_allocated_count)
4165                 wr32(E1000_IMS, E1000_IMS_LSC |
4166                                 E1000_IMS_VMMB |
4167                                 E1000_IMS_DOUTSYNC);
4168         else
4169                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4170         wr32(E1000_EIMS, adapter->eims_other);
4171
4172         return IRQ_HANDLED;
4173 }
4174
4175 static void igb_write_itr(struct igb_q_vector *q_vector)
4176 {
4177         struct igb_adapter *adapter = q_vector->adapter;
4178         u32 itr_val = q_vector->itr_val & 0x7FFC;
4179
4180         if (!q_vector->set_itr)
4181                 return;
4182
4183         if (!itr_val)
4184                 itr_val = 0x4;
4185
4186         if (adapter->hw.mac.type == e1000_82575)
4187                 itr_val |= itr_val << 16;
4188         else
4189                 itr_val |= 0x8000000;
4190
4191         writel(itr_val, q_vector->itr_register);
4192         q_vector->set_itr = 0;
4193 }
4194
4195 static irqreturn_t igb_msix_ring(int irq, void *data)
4196 {
4197         struct igb_q_vector *q_vector = data;
4198
4199         /* Write the ITR value calculated from the previous interrupt. */
4200         igb_write_itr(q_vector);
4201
4202         napi_schedule(&q_vector->napi);
4203
4204         return IRQ_HANDLED;
4205 }
4206
4207 #ifdef CONFIG_IGB_DCA
4208 static void igb_update_dca(struct igb_q_vector *q_vector)
4209 {
4210         struct igb_adapter *adapter = q_vector->adapter;
4211         struct e1000_hw *hw = &adapter->hw;
4212         int cpu = get_cpu();
4213
4214         if (q_vector->cpu == cpu)
4215                 goto out_no_update;
4216
4217         if (q_vector->tx_ring) {
4218                 int q = q_vector->tx_ring->reg_idx;
4219                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4220                 if (hw->mac.type == e1000_82575) {
4221                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4222                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4223                 } else {
4224                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4225                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4226                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4227                 }
4228                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4229                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4230         }
4231         if (q_vector->rx_ring) {
4232                 int q = q_vector->rx_ring->reg_idx;
4233                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4234                 if (hw->mac.type == e1000_82575) {
4235                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4236                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4237                 } else {
4238                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4239                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4240                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4241                 }
4242                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4243                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4244                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4245                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4246         }
4247         q_vector->cpu = cpu;
4248 out_no_update:
4249         put_cpu();
4250 }
4251
4252 static void igb_setup_dca(struct igb_adapter *adapter)
4253 {
4254         struct e1000_hw *hw = &adapter->hw;
4255         int i;
4256
4257         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4258                 return;
4259
4260         /* Always use CB2 mode, difference is masked in the CB driver. */
4261         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4262
4263         for (i = 0; i < adapter->num_q_vectors; i++) {
4264                 adapter->q_vector[i]->cpu = -1;
4265                 igb_update_dca(adapter->q_vector[i]);
4266         }
4267 }
4268
4269 static int __igb_notify_dca(struct device *dev, void *data)
4270 {
4271         struct net_device *netdev = dev_get_drvdata(dev);
4272         struct igb_adapter *adapter = netdev_priv(netdev);
4273         struct pci_dev *pdev = adapter->pdev;
4274         struct e1000_hw *hw = &adapter->hw;
4275         unsigned long event = *(unsigned long *)data;
4276
4277         switch (event) {
4278         case DCA_PROVIDER_ADD:
4279                 /* if already enabled, don't do it again */
4280                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4281                         break;
4282                 if (dca_add_requester(dev) == 0) {
4283                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4284                         dev_info(&pdev->dev, "DCA enabled\n");
4285                         igb_setup_dca(adapter);
4286                         break;
4287                 }
4288                 /* Fall Through since DCA is disabled. */
4289         case DCA_PROVIDER_REMOVE:
4290                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4291                         /* without this a class_device is left
4292                          * hanging around in the sysfs model */
4293                         dca_remove_requester(dev);
4294                         dev_info(&pdev->dev, "DCA disabled\n");
4295                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4296                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4297                 }
4298                 break;
4299         }
4300
4301         return 0;
4302 }
4303
4304 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4305                           void *p)
4306 {
4307         int ret_val;
4308
4309         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4310                                          __igb_notify_dca);
4311
4312         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4313 }
4314 #endif /* CONFIG_IGB_DCA */
4315
4316 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4317 {
4318         struct e1000_hw *hw = &adapter->hw;
4319         u32 ping;
4320         int i;
4321
4322         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4323                 ping = E1000_PF_CONTROL_MSG;
4324                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4325                         ping |= E1000_VT_MSGTYPE_CTS;
4326                 igb_write_mbx(hw, &ping, 1, i);
4327         }
4328 }
4329
4330 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4331 {
4332         struct e1000_hw *hw = &adapter->hw;
4333         u32 vmolr = rd32(E1000_VMOLR(vf));
4334         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4335
4336         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4337                             IGB_VF_FLAG_MULTI_PROMISC);
4338         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4339
4340         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4341                 vmolr |= E1000_VMOLR_MPME;
4342                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4343         } else {
4344                 /*
4345                  * if we have hashes and we are clearing a multicast promisc
4346                  * flag we need to write the hashes to the MTA as this step
4347                  * was previously skipped
4348                  */
4349                 if (vf_data->num_vf_mc_hashes > 30) {
4350                         vmolr |= E1000_VMOLR_MPME;
4351                 } else if (vf_data->num_vf_mc_hashes) {
4352                         int j;
4353                         vmolr |= E1000_VMOLR_ROMPE;
4354                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4355                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4356                 }
4357         }
4358
4359         wr32(E1000_VMOLR(vf), vmolr);
4360
4361         /* there are flags left unprocessed, likely not supported */
4362         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4363                 return -EINVAL;
4364
4365         return 0;
4366
4367 }
4368
4369 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4370                                   u32 *msgbuf, u32 vf)
4371 {
4372         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4373         u16 *hash_list = (u16 *)&msgbuf[1];
4374         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4375         int i;
4376
4377         /* salt away the number of multicast addresses assigned
4378          * to this VF for later use to restore when the PF multi cast
4379          * list changes
4380          */
4381         vf_data->num_vf_mc_hashes = n;
4382
4383         /* only up to 30 hash values supported */
4384         if (n > 30)
4385                 n = 30;
4386
4387         /* store the hashes for later use */
4388         for (i = 0; i < n; i++)
4389                 vf_data->vf_mc_hashes[i] = hash_list[i];
4390
4391         /* Flush and reset the mta with the new values */
4392         igb_set_rx_mode(adapter->netdev);
4393
4394         return 0;
4395 }
4396
4397 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4398 {
4399         struct e1000_hw *hw = &adapter->hw;
4400         struct vf_data_storage *vf_data;
4401         int i, j;
4402
4403         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4404                 u32 vmolr = rd32(E1000_VMOLR(i));
4405                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4406
4407                 vf_data = &adapter->vf_data[i];
4408
4409                 if ((vf_data->num_vf_mc_hashes > 30) ||
4410                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4411                         vmolr |= E1000_VMOLR_MPME;
4412                 } else if (vf_data->num_vf_mc_hashes) {
4413                         vmolr |= E1000_VMOLR_ROMPE;
4414                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4415                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4416                 }
4417                 wr32(E1000_VMOLR(i), vmolr);
4418         }
4419 }
4420
4421 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4422 {
4423         struct e1000_hw *hw = &adapter->hw;
4424         u32 pool_mask, reg, vid;
4425         int i;
4426
4427         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4428
4429         /* Find the vlan filter for this id */
4430         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4431                 reg = rd32(E1000_VLVF(i));
4432
4433                 /* remove the vf from the pool */
4434                 reg &= ~pool_mask;
4435
4436                 /* if pool is empty then remove entry from vfta */
4437                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4438                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4439                         reg = 0;
4440                         vid = reg & E1000_VLVF_VLANID_MASK;
4441                         igb_vfta_set(hw, vid, false);
4442                 }
4443
4444                 wr32(E1000_VLVF(i), reg);
4445         }
4446
4447         adapter->vf_data[vf].vlans_enabled = 0;
4448 }
4449
4450 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4451 {
4452         struct e1000_hw *hw = &adapter->hw;
4453         u32 reg, i;
4454
4455         /* The vlvf table only exists on 82576 hardware and newer */
4456         if (hw->mac.type < e1000_82576)
4457                 return -1;
4458
4459         /* we only need to do this if VMDq is enabled */
4460         if (!adapter->vfs_allocated_count)
4461                 return -1;
4462
4463         /* Find the vlan filter for this id */
4464         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4465                 reg = rd32(E1000_VLVF(i));
4466                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4467                     vid == (reg & E1000_VLVF_VLANID_MASK))
4468                         break;
4469         }
4470
4471         if (add) {
4472                 if (i == E1000_VLVF_ARRAY_SIZE) {
4473                         /* Did not find a matching VLAN ID entry that was
4474                          * enabled.  Search for a free filter entry, i.e.
4475                          * one without the enable bit set
4476                          */
4477                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4478                                 reg = rd32(E1000_VLVF(i));
4479                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4480                                         break;
4481                         }
4482                 }
4483                 if (i < E1000_VLVF_ARRAY_SIZE) {
4484                         /* Found an enabled/available entry */
4485                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4486
4487                         /* if !enabled we need to set this up in vfta */
4488                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4489                                 /* add VID to filter table */
4490                                 igb_vfta_set(hw, vid, true);
4491                                 reg |= E1000_VLVF_VLANID_ENABLE;
4492                         }
4493                         reg &= ~E1000_VLVF_VLANID_MASK;
4494                         reg |= vid;
4495                         wr32(E1000_VLVF(i), reg);
4496
4497                         /* do not modify RLPML for PF devices */
4498                         if (vf >= adapter->vfs_allocated_count)
4499                                 return 0;
4500
4501                         if (!adapter->vf_data[vf].vlans_enabled) {
4502                                 u32 size;
4503                                 reg = rd32(E1000_VMOLR(vf));
4504                                 size = reg & E1000_VMOLR_RLPML_MASK;
4505                                 size += 4;
4506                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4507                                 reg |= size;
4508                                 wr32(E1000_VMOLR(vf), reg);
4509                         }
4510
4511                         adapter->vf_data[vf].vlans_enabled++;
4512                         return 0;
4513                 }
4514         } else {
4515                 if (i < E1000_VLVF_ARRAY_SIZE) {
4516                         /* remove vf from the pool */
4517                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4518                         /* if pool is empty then remove entry from vfta */
4519                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4520                                 reg = 0;
4521                                 igb_vfta_set(hw, vid, false);
4522                         }
4523                         wr32(E1000_VLVF(i), reg);
4524
4525                         /* do not modify RLPML for PF devices */
4526                         if (vf >= adapter->vfs_allocated_count)
4527                                 return 0;
4528
4529                         adapter->vf_data[vf].vlans_enabled--;
4530                         if (!adapter->vf_data[vf].vlans_enabled) {
4531                                 u32 size;
4532                                 reg = rd32(E1000_VMOLR(vf));
4533                                 size = reg & E1000_VMOLR_RLPML_MASK;
4534                                 size -= 4;
4535                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4536                                 reg |= size;
4537                                 wr32(E1000_VMOLR(vf), reg);
4538                         }
4539                 }
4540         }
4541         return 0;
4542 }
4543
4544 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4545 {
4546         struct e1000_hw *hw = &adapter->hw;
4547
4548         if (vid)
4549                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4550         else
4551                 wr32(E1000_VMVIR(vf), 0);
4552 }
4553
4554 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4555                                int vf, u16 vlan, u8 qos)
4556 {
4557         int err = 0;
4558         struct igb_adapter *adapter = netdev_priv(netdev);
4559
4560         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4561                 return -EINVAL;
4562         if (vlan || qos) {
4563                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4564                 if (err)
4565                         goto out;
4566                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4567                 igb_set_vmolr(adapter, vf, !vlan);
4568                 adapter->vf_data[vf].pf_vlan = vlan;
4569                 adapter->vf_data[vf].pf_qos = qos;
4570                 dev_info(&adapter->pdev->dev,
4571                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4572                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4573                         dev_warn(&adapter->pdev->dev,
4574                                  "The VF VLAN has been set,"
4575                                  " but the PF device is not up.\n");
4576                         dev_warn(&adapter->pdev->dev,
4577                                  "Bring the PF device up before"
4578                                  " attempting to use the VF device.\n");
4579                 }
4580         } else {
4581                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4582                                    false, vf);
4583                 igb_set_vmvir(adapter, vlan, vf);
4584                 igb_set_vmolr(adapter, vf, true);
4585                 adapter->vf_data[vf].pf_vlan = 0;
4586                 adapter->vf_data[vf].pf_qos = 0;
4587        }
4588 out:
4589        return err;
4590 }
4591
4592 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4593 {
4594         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4595         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4596
4597         return igb_vlvf_set(adapter, vid, add, vf);
4598 }
4599
4600 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4601 {
4602         /* clear flags */
4603         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4604         adapter->vf_data[vf].last_nack = jiffies;
4605
4606         /* reset offloads to defaults */
4607         igb_set_vmolr(adapter, vf, true);
4608
4609         /* reset vlans for device */
4610         igb_clear_vf_vfta(adapter, vf);
4611         if (adapter->vf_data[vf].pf_vlan)
4612                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4613                                     adapter->vf_data[vf].pf_vlan,
4614                                     adapter->vf_data[vf].pf_qos);
4615         else
4616                 igb_clear_vf_vfta(adapter, vf);
4617
4618         /* reset multicast table array for vf */
4619         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4620
4621         /* Flush and reset the mta with the new values */
4622         igb_set_rx_mode(adapter->netdev);
4623 }
4624
4625 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4626 {
4627         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4628
4629         /* generate a new mac address as we were hotplug removed/added */
4630         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4631                 random_ether_addr(vf_mac);
4632
4633         /* process remaining reset events */
4634         igb_vf_reset(adapter, vf);
4635 }
4636
4637 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4638 {
4639         struct e1000_hw *hw = &adapter->hw;
4640         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4641         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4642         u32 reg, msgbuf[3];
4643         u8 *addr = (u8 *)(&msgbuf[1]);
4644
4645         /* process all the same items cleared in a function level reset */
4646         igb_vf_reset(adapter, vf);
4647
4648         /* set vf mac address */
4649         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4650
4651         /* enable transmit and receive for vf */
4652         reg = rd32(E1000_VFTE);
4653         wr32(E1000_VFTE, reg | (1 << vf));
4654         reg = rd32(E1000_VFRE);
4655         wr32(E1000_VFRE, reg | (1 << vf));
4656
4657         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4658
4659         /* reply to reset with ack and vf mac address */
4660         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4661         memcpy(addr, vf_mac, 6);
4662         igb_write_mbx(hw, msgbuf, 3, vf);
4663 }
4664
4665 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4666 {
4667         unsigned char *addr = (char *)&msg[1];
4668         int err = -1;
4669
4670         if (is_valid_ether_addr(addr))
4671                 err = igb_set_vf_mac(adapter, vf, addr);
4672
4673         return err;
4674 }
4675
4676 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4677 {
4678         struct e1000_hw *hw = &adapter->hw;
4679         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4680         u32 msg = E1000_VT_MSGTYPE_NACK;
4681
4682         /* if device isn't clear to send it shouldn't be reading either */
4683         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4684             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4685                 igb_write_mbx(hw, &msg, 1, vf);
4686                 vf_data->last_nack = jiffies;
4687         }
4688 }
4689
4690 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4691 {
4692         struct pci_dev *pdev = adapter->pdev;
4693         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4694         struct e1000_hw *hw = &adapter->hw;
4695         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4696         s32 retval;
4697
4698         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4699
4700         if (retval) {
4701                 /* if receive failed revoke VF CTS stats and restart init */
4702                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4703                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4704                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4705                         return;
4706                 goto out;
4707         }
4708
4709         /* this is a message we already processed, do nothing */
4710         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4711                 return;
4712
4713         /*
4714          * until the vf completes a reset it should not be
4715          * allowed to start any configuration.
4716          */
4717
4718         if (msgbuf[0] == E1000_VF_RESET) {
4719                 igb_vf_reset_msg(adapter, vf);
4720                 return;
4721         }
4722
4723         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4724                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4725                         return;
4726                 retval = -1;
4727                 goto out;
4728         }
4729
4730         switch ((msgbuf[0] & 0xFFFF)) {
4731         case E1000_VF_SET_MAC_ADDR:
4732                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4733                 break;
4734         case E1000_VF_SET_PROMISC:
4735                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4736                 break;
4737         case E1000_VF_SET_MULTICAST:
4738                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4739                 break;
4740         case E1000_VF_SET_LPE:
4741                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4742                 break;
4743         case E1000_VF_SET_VLAN:
4744                 if (adapter->vf_data[vf].pf_vlan)
4745                         retval = -1;
4746                 else
4747                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4748                 break;
4749         default:
4750                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4751                 retval = -1;
4752                 break;
4753         }
4754
4755         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4756 out:
4757         /* notify the VF of the results of what it sent us */
4758         if (retval)
4759                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4760         else
4761                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4762
4763         igb_write_mbx(hw, msgbuf, 1, vf);
4764 }
4765
4766 static void igb_msg_task(struct igb_adapter *adapter)
4767 {
4768         struct e1000_hw *hw = &adapter->hw;
4769         u32 vf;
4770
4771         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4772                 /* process any reset requests */
4773                 if (!igb_check_for_rst(hw, vf))
4774                         igb_vf_reset_event(adapter, vf);
4775
4776                 /* process any messages pending */
4777                 if (!igb_check_for_msg(hw, vf))
4778                         igb_rcv_msg_from_vf(adapter, vf);
4779
4780                 /* process any acks */
4781                 if (!igb_check_for_ack(hw, vf))
4782                         igb_rcv_ack_from_vf(adapter, vf);
4783         }
4784 }
4785
4786 /**
4787  *  igb_set_uta - Set unicast filter table address
4788  *  @adapter: board private structure
4789  *
4790  *  The unicast table address is a register array of 32-bit registers.
4791  *  The table is meant to be used in a way similar to how the MTA is used
4792  *  however due to certain limitations in the hardware it is necessary to
4793  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4794  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4795  **/
4796 static void igb_set_uta(struct igb_adapter *adapter)
4797 {
4798         struct e1000_hw *hw = &adapter->hw;
4799         int i;
4800
4801         /* The UTA table only exists on 82576 hardware and newer */
4802         if (hw->mac.type < e1000_82576)
4803                 return;
4804
4805         /* we only need to do this if VMDq is enabled */
4806         if (!adapter->vfs_allocated_count)
4807                 return;
4808
4809         for (i = 0; i < hw->mac.uta_reg_count; i++)
4810                 array_wr32(E1000_UTA, i, ~0);
4811 }
4812
4813 /**
4814  * igb_intr_msi - Interrupt Handler
4815  * @irq: interrupt number
4816  * @data: pointer to a network interface device structure
4817  **/
4818 static irqreturn_t igb_intr_msi(int irq, void *data)
4819 {
4820         struct igb_adapter *adapter = data;
4821         struct igb_q_vector *q_vector = adapter->q_vector[0];
4822         struct e1000_hw *hw = &adapter->hw;
4823         /* read ICR disables interrupts using IAM */
4824         u32 icr = rd32(E1000_ICR);
4825
4826         igb_write_itr(q_vector);
4827
4828         if (icr & E1000_ICR_DRSTA)
4829                 schedule_work(&adapter->reset_task);
4830
4831         if (icr & E1000_ICR_DOUTSYNC) {
4832                 /* HW is reporting DMA is out of sync */
4833                 adapter->stats.doosync++;
4834         }
4835
4836         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4837                 hw->mac.get_link_status = 1;
4838                 if (!test_bit(__IGB_DOWN, &adapter->state))
4839                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4840         }
4841
4842         napi_schedule(&q_vector->napi);
4843
4844         return IRQ_HANDLED;
4845 }
4846
4847 /**
4848  * igb_intr - Legacy Interrupt Handler
4849  * @irq: interrupt number
4850  * @data: pointer to a network interface device structure
4851  **/
4852 static irqreturn_t igb_intr(int irq, void *data)
4853 {
4854         struct igb_adapter *adapter = data;
4855         struct igb_q_vector *q_vector = adapter->q_vector[0];
4856         struct e1000_hw *hw = &adapter->hw;
4857         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4858          * need for the IMC write */
4859         u32 icr = rd32(E1000_ICR);
4860         if (!icr)
4861                 return IRQ_NONE;  /* Not our interrupt */
4862
4863         igb_write_itr(q_vector);
4864
4865         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4866          * not set, then the adapter didn't send an interrupt */
4867         if (!(icr & E1000_ICR_INT_ASSERTED))
4868                 return IRQ_NONE;
4869
4870         if (icr & E1000_ICR_DRSTA)
4871                 schedule_work(&adapter->reset_task);
4872
4873         if (icr & E1000_ICR_DOUTSYNC) {
4874                 /* HW is reporting DMA is out of sync */
4875                 adapter->stats.doosync++;
4876         }
4877
4878         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4879                 hw->mac.get_link_status = 1;
4880                 /* guard against interrupt when we're going down */
4881                 if (!test_bit(__IGB_DOWN, &adapter->state))
4882                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4883         }
4884
4885         napi_schedule(&q_vector->napi);
4886
4887         return IRQ_HANDLED;
4888 }
4889
4890 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4891 {
4892         struct igb_adapter *adapter = q_vector->adapter;
4893         struct e1000_hw *hw = &adapter->hw;
4894
4895         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4896             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4897                 if (!adapter->msix_entries)
4898                         igb_set_itr(adapter);
4899                 else
4900                         igb_update_ring_itr(q_vector);
4901         }
4902
4903         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4904                 if (adapter->msix_entries)
4905                         wr32(E1000_EIMS, q_vector->eims_value);
4906                 else
4907                         igb_irq_enable(adapter);
4908         }
4909 }
4910
4911 /**
4912  * igb_poll - NAPI Rx polling callback
4913  * @napi: napi polling structure
4914  * @budget: count of how many packets we should handle
4915  **/
4916 static int igb_poll(struct napi_struct *napi, int budget)
4917 {
4918         struct igb_q_vector *q_vector = container_of(napi,
4919                                                      struct igb_q_vector,
4920                                                      napi);
4921         int tx_clean_complete = 1, work_done = 0;
4922
4923 #ifdef CONFIG_IGB_DCA
4924         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4925                 igb_update_dca(q_vector);
4926 #endif
4927         if (q_vector->tx_ring)
4928                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4929
4930         if (q_vector->rx_ring)
4931                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4932
4933         if (!tx_clean_complete)
4934                 work_done = budget;
4935
4936         /* If not enough Rx work done, exit the polling mode */
4937         if (work_done < budget) {
4938                 napi_complete(napi);
4939                 igb_ring_irq_enable(q_vector);
4940         }
4941
4942         return work_done;
4943 }
4944
4945 /**
4946  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4947  * @adapter: board private structure
4948  * @shhwtstamps: timestamp structure to update
4949  * @regval: unsigned 64bit system time value.
4950  *
4951  * We need to convert the system time value stored in the RX/TXSTMP registers
4952  * into a hwtstamp which can be used by the upper level timestamping functions
4953  */
4954 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4955                                    struct skb_shared_hwtstamps *shhwtstamps,
4956                                    u64 regval)
4957 {
4958         u64 ns;
4959
4960         /*
4961          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4962          * 24 to match clock shift we setup earlier.
4963          */
4964         if (adapter->hw.mac.type == e1000_82580)
4965                 regval <<= IGB_82580_TSYNC_SHIFT;
4966
4967         ns = timecounter_cyc2time(&adapter->clock, regval);
4968         timecompare_update(&adapter->compare, ns);
4969         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4970         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4971         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4972 }
4973
4974 /**
4975  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4976  * @q_vector: pointer to q_vector containing needed info
4977  * @skb: packet that was just sent
4978  *
4979  * If we were asked to do hardware stamping and such a time stamp is
4980  * available, then it must have been for this skb here because we only
4981  * allow only one such packet into the queue.
4982  */
4983 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4984 {
4985         struct igb_adapter *adapter = q_vector->adapter;
4986         union skb_shared_tx *shtx = skb_tx(skb);
4987         struct e1000_hw *hw = &adapter->hw;
4988         struct skb_shared_hwtstamps shhwtstamps;
4989         u64 regval;
4990
4991         /* if skb does not support hw timestamp or TX stamp not valid exit */
4992         if (likely(!shtx->hardware) ||
4993             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4994                 return;
4995
4996         regval = rd32(E1000_TXSTMPL);
4997         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4998
4999         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5000         skb_tstamp_tx(skb, &shhwtstamps);
5001 }
5002
5003 /**
5004  * igb_clean_tx_irq - Reclaim resources after transmit completes
5005  * @q_vector: pointer to q_vector containing needed info
5006  * returns true if ring is completely cleaned
5007  **/
5008 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5009 {
5010         struct igb_adapter *adapter = q_vector->adapter;
5011         struct igb_ring *tx_ring = q_vector->tx_ring;
5012         struct net_device *netdev = tx_ring->netdev;
5013         struct e1000_hw *hw = &adapter->hw;
5014         struct igb_buffer *buffer_info;
5015         struct sk_buff *skb;
5016         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5017         unsigned int total_bytes = 0, total_packets = 0;
5018         unsigned int i, eop, count = 0;
5019         bool cleaned = false;
5020
5021         i = tx_ring->next_to_clean;
5022         eop = tx_ring->buffer_info[i].next_to_watch;
5023         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5024
5025         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5026                (count < tx_ring->count)) {
5027                 for (cleaned = false; !cleaned; count++) {
5028                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5029                         buffer_info = &tx_ring->buffer_info[i];
5030                         cleaned = (i == eop);
5031                         skb = buffer_info->skb;
5032
5033                         if (skb) {
5034                                 unsigned int segs, bytecount;
5035                                 /* gso_segs is currently only valid for tcp */
5036                                 segs = buffer_info->gso_segs;
5037                                 /* multiply data chunks by size of headers */
5038                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5039                                             skb->len;
5040                                 total_packets += segs;
5041                                 total_bytes += bytecount;
5042
5043                                 igb_tx_hwtstamp(q_vector, skb);
5044                         }
5045
5046                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5047                         tx_desc->wb.status = 0;
5048
5049                         i++;
5050                         if (i == tx_ring->count)
5051                                 i = 0;
5052                 }
5053                 eop = tx_ring->buffer_info[i].next_to_watch;
5054                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5055         }
5056
5057         tx_ring->next_to_clean = i;
5058
5059         if (unlikely(count &&
5060                      netif_carrier_ok(netdev) &&
5061                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5062                 /* Make sure that anybody stopping the queue after this
5063                  * sees the new next_to_clean.
5064                  */
5065                 smp_mb();
5066                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5067                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5068                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5069                         tx_ring->tx_stats.restart_queue++;
5070                 }
5071         }
5072
5073         if (tx_ring->detect_tx_hung) {
5074                 /* Detect a transmit hang in hardware, this serializes the
5075                  * check with the clearing of time_stamp and movement of i */
5076                 tx_ring->detect_tx_hung = false;
5077                 if (tx_ring->buffer_info[i].time_stamp &&
5078                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5079                                (adapter->tx_timeout_factor * HZ)) &&
5080                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5081
5082                         /* detected Tx unit hang */
5083                         dev_err(&tx_ring->pdev->dev,
5084                                 "Detected Tx Unit Hang\n"
5085                                 "  Tx Queue             <%d>\n"
5086                                 "  TDH                  <%x>\n"
5087                                 "  TDT                  <%x>\n"
5088                                 "  next_to_use          <%x>\n"
5089                                 "  next_to_clean        <%x>\n"
5090                                 "buffer_info[next_to_clean]\n"
5091                                 "  time_stamp           <%lx>\n"
5092                                 "  next_to_watch        <%x>\n"
5093                                 "  jiffies              <%lx>\n"
5094                                 "  desc.status          <%x>\n",
5095                                 tx_ring->queue_index,
5096                                 readl(tx_ring->head),
5097                                 readl(tx_ring->tail),
5098                                 tx_ring->next_to_use,
5099                                 tx_ring->next_to_clean,
5100                                 tx_ring->buffer_info[eop].time_stamp,
5101                                 eop,
5102                                 jiffies,
5103                                 eop_desc->wb.status);
5104                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5105                 }
5106         }
5107         tx_ring->total_bytes += total_bytes;
5108         tx_ring->total_packets += total_packets;
5109         tx_ring->tx_stats.bytes += total_bytes;
5110         tx_ring->tx_stats.packets += total_packets;
5111         return (count < tx_ring->count);
5112 }
5113
5114 /**
5115  * igb_receive_skb - helper function to handle rx indications
5116  * @q_vector: structure containing interrupt and ring information
5117  * @skb: packet to send up
5118  * @vlan_tag: vlan tag for packet
5119  **/
5120 static void igb_receive_skb(struct igb_q_vector *q_vector,
5121                             struct sk_buff *skb,
5122                             u16 vlan_tag)
5123 {
5124         struct igb_adapter *adapter = q_vector->adapter;
5125
5126         if (vlan_tag)
5127                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5128                                  vlan_tag, skb);
5129         else
5130                 napi_gro_receive(&q_vector->napi, skb);
5131 }
5132
5133 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5134                                        u32 status_err, struct sk_buff *skb)
5135 {
5136         skb->ip_summed = CHECKSUM_NONE;
5137
5138         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5139         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5140              (status_err & E1000_RXD_STAT_IXSM))
5141                 return;
5142
5143         /* TCP/UDP checksum error bit is set */
5144         if (status_err &
5145             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5146                 /*
5147                  * work around errata with sctp packets where the TCPE aka
5148                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5149                  * packets, (aka let the stack check the crc32c)
5150                  */
5151                 if ((skb->len == 60) &&
5152                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5153                         ring->rx_stats.csum_err++;
5154
5155                 /* let the stack verify checksum errors */
5156                 return;
5157         }
5158         /* It must be a TCP or UDP packet with a valid checksum */
5159         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5160                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5161
5162         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5163 }
5164
5165 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5166                                    struct sk_buff *skb)
5167 {
5168         struct igb_adapter *adapter = q_vector->adapter;
5169         struct e1000_hw *hw = &adapter->hw;
5170         u64 regval;
5171
5172         /*
5173          * If this bit is set, then the RX registers contain the time stamp. No
5174          * other packet will be time stamped until we read these registers, so
5175          * read the registers to make them available again. Because only one
5176          * packet can be time stamped at a time, we know that the register
5177          * values must belong to this one here and therefore we don't need to
5178          * compare any of the additional attributes stored for it.
5179          *
5180          * If nothing went wrong, then it should have a skb_shared_tx that we
5181          * can turn into a skb_shared_hwtstamps.
5182          */
5183         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5184                 return;
5185         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5186                 return;
5187
5188         regval = rd32(E1000_RXSTMPL);
5189         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5190
5191         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5192 }
5193 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5194                                union e1000_adv_rx_desc *rx_desc)
5195 {
5196         /* HW will not DMA in data larger than the given buffer, even if it
5197          * parses the (NFS, of course) header to be larger.  In that case, it
5198          * fills the header buffer and spills the rest into the page.
5199          */
5200         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5201                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5202         if (hlen > rx_ring->rx_buffer_len)
5203                 hlen = rx_ring->rx_buffer_len;
5204         return hlen;
5205 }
5206
5207 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5208                                  int *work_done, int budget)
5209 {
5210         struct igb_ring *rx_ring = q_vector->rx_ring;
5211         struct net_device *netdev = rx_ring->netdev;
5212         struct pci_dev *pdev = rx_ring->pdev;
5213         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5214         struct igb_buffer *buffer_info , *next_buffer;
5215         struct sk_buff *skb;
5216         bool cleaned = false;
5217         int cleaned_count = 0;
5218         int current_node = numa_node_id();
5219         unsigned int total_bytes = 0, total_packets = 0;
5220         unsigned int i;
5221         u32 staterr;
5222         u16 length;
5223         u16 vlan_tag;
5224
5225         i = rx_ring->next_to_clean;
5226         buffer_info = &rx_ring->buffer_info[i];
5227         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5228         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5229
5230         while (staterr & E1000_RXD_STAT_DD) {
5231                 if (*work_done >= budget)
5232                         break;
5233                 (*work_done)++;
5234
5235                 skb = buffer_info->skb;
5236                 prefetch(skb->data - NET_IP_ALIGN);
5237                 buffer_info->skb = NULL;
5238
5239                 i++;
5240                 if (i == rx_ring->count)
5241                         i = 0;
5242
5243                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5244                 prefetch(next_rxd);
5245                 next_buffer = &rx_ring->buffer_info[i];
5246
5247                 length = le16_to_cpu(rx_desc->wb.upper.length);
5248                 cleaned = true;
5249                 cleaned_count++;
5250
5251                 if (buffer_info->dma) {
5252                         pci_unmap_single(pdev, buffer_info->dma,
5253                                          rx_ring->rx_buffer_len,
5254                                          PCI_DMA_FROMDEVICE);
5255                         buffer_info->dma = 0;
5256                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5257                                 skb_put(skb, length);
5258                                 goto send_up;
5259                         }
5260                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5261                 }
5262
5263                 if (length) {
5264                         pci_unmap_page(pdev, buffer_info->page_dma,
5265                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5266                         buffer_info->page_dma = 0;
5267
5268                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5269                                                 buffer_info->page,
5270                                                 buffer_info->page_offset,
5271                                                 length);
5272
5273                         if ((page_count(buffer_info->page) != 1) ||
5274                             (page_to_nid(buffer_info->page) != current_node))
5275                                 buffer_info->page = NULL;
5276                         else
5277                                 get_page(buffer_info->page);
5278
5279                         skb->len += length;
5280                         skb->data_len += length;
5281                         skb->truesize += length;
5282                 }
5283
5284                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5285                         buffer_info->skb = next_buffer->skb;
5286                         buffer_info->dma = next_buffer->dma;
5287                         next_buffer->skb = skb;
5288                         next_buffer->dma = 0;
5289                         goto next_desc;
5290                 }
5291 send_up:
5292                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5293                         dev_kfree_skb_irq(skb);
5294                         goto next_desc;
5295                 }
5296
5297                 igb_rx_hwtstamp(q_vector, staterr, skb);
5298                 total_bytes += skb->len;
5299                 total_packets++;
5300
5301                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5302
5303                 skb->protocol = eth_type_trans(skb, netdev);
5304                 skb_record_rx_queue(skb, rx_ring->queue_index);
5305
5306                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5307                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5308
5309                 igb_receive_skb(q_vector, skb, vlan_tag);
5310
5311 next_desc:
5312                 rx_desc->wb.upper.status_error = 0;
5313
5314                 /* return some buffers to hardware, one at a time is too slow */
5315                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5316                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5317                         cleaned_count = 0;
5318                 }
5319
5320                 /* use prefetched values */
5321                 rx_desc = next_rxd;
5322                 buffer_info = next_buffer;
5323                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5324         }
5325
5326         rx_ring->next_to_clean = i;
5327         cleaned_count = igb_desc_unused(rx_ring);
5328
5329         if (cleaned_count)
5330                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5331
5332         rx_ring->total_packets += total_packets;
5333         rx_ring->total_bytes += total_bytes;
5334         rx_ring->rx_stats.packets += total_packets;
5335         rx_ring->rx_stats.bytes += total_bytes;
5336         return cleaned;
5337 }
5338
5339 /**
5340  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5341  * @adapter: address of board private structure
5342  **/
5343 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5344 {
5345         struct net_device *netdev = rx_ring->netdev;
5346         union e1000_adv_rx_desc *rx_desc;
5347         struct igb_buffer *buffer_info;
5348         struct sk_buff *skb;
5349         unsigned int i;
5350         int bufsz;
5351
5352         i = rx_ring->next_to_use;
5353         buffer_info = &rx_ring->buffer_info[i];
5354
5355         bufsz = rx_ring->rx_buffer_len;
5356
5357         while (cleaned_count--) {
5358                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5359
5360                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5361                         if (!buffer_info->page) {
5362                                 buffer_info->page = netdev_alloc_page(netdev);
5363                                 if (!buffer_info->page) {
5364                                         rx_ring->rx_stats.alloc_failed++;
5365                                         goto no_buffers;
5366                                 }
5367                                 buffer_info->page_offset = 0;
5368                         } else {
5369                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5370                         }
5371                         buffer_info->page_dma =
5372                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5373                                              buffer_info->page_offset,
5374                                              PAGE_SIZE / 2,
5375                                              PCI_DMA_FROMDEVICE);
5376                         if (pci_dma_mapping_error(rx_ring->pdev,
5377                                                   buffer_info->page_dma)) {
5378                                 buffer_info->page_dma = 0;
5379                                 rx_ring->rx_stats.alloc_failed++;
5380                                 goto no_buffers;
5381                         }
5382                 }
5383
5384                 skb = buffer_info->skb;
5385                 if (!skb) {
5386                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5387                         if (!skb) {
5388                                 rx_ring->rx_stats.alloc_failed++;
5389                                 goto no_buffers;
5390                         }
5391
5392                         buffer_info->skb = skb;
5393                 }
5394                 if (!buffer_info->dma) {
5395                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5396                                                           skb->data,
5397                                                           bufsz,
5398                                                           PCI_DMA_FROMDEVICE);
5399                         if (pci_dma_mapping_error(rx_ring->pdev,
5400                                                   buffer_info->dma)) {
5401                                 buffer_info->dma = 0;
5402                                 rx_ring->rx_stats.alloc_failed++;
5403                                 goto no_buffers;
5404                         }
5405                 }
5406                 /* Refresh the desc even if buffer_addrs didn't change because
5407                  * each write-back erases this info. */
5408                 if (bufsz < IGB_RXBUFFER_1024) {
5409                         rx_desc->read.pkt_addr =
5410                              cpu_to_le64(buffer_info->page_dma);
5411                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5412                 } else {
5413                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5414                         rx_desc->read.hdr_addr = 0;
5415                 }
5416
5417                 i++;
5418                 if (i == rx_ring->count)
5419                         i = 0;
5420                 buffer_info = &rx_ring->buffer_info[i];
5421         }
5422
5423 no_buffers:
5424         if (rx_ring->next_to_use != i) {
5425                 rx_ring->next_to_use = i;
5426                 if (i == 0)
5427                         i = (rx_ring->count - 1);
5428                 else
5429                         i--;
5430
5431                 /* Force memory writes to complete before letting h/w
5432                  * know there are new descriptors to fetch.  (Only
5433                  * applicable for weak-ordered memory model archs,
5434                  * such as IA-64). */
5435                 wmb();
5436                 writel(i, rx_ring->tail);
5437         }
5438 }
5439
5440 /**
5441  * igb_mii_ioctl -
5442  * @netdev:
5443  * @ifreq:
5444  * @cmd:
5445  **/
5446 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5447 {
5448         struct igb_adapter *adapter = netdev_priv(netdev);
5449         struct mii_ioctl_data *data = if_mii(ifr);
5450
5451         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5452                 return -EOPNOTSUPP;
5453
5454         switch (cmd) {
5455         case SIOCGMIIPHY:
5456                 data->phy_id = adapter->hw.phy.addr;
5457                 break;
5458         case SIOCGMIIREG:
5459                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5460                                      &data->val_out))
5461                         return -EIO;
5462                 break;
5463         case SIOCSMIIREG:
5464         default:
5465                 return -EOPNOTSUPP;
5466         }
5467         return 0;
5468 }
5469
5470 /**
5471  * igb_hwtstamp_ioctl - control hardware time stamping
5472  * @netdev:
5473  * @ifreq:
5474  * @cmd:
5475  *
5476  * Outgoing time stamping can be enabled and disabled. Play nice and
5477  * disable it when requested, although it shouldn't case any overhead
5478  * when no packet needs it. At most one packet in the queue may be
5479  * marked for time stamping, otherwise it would be impossible to tell
5480  * for sure to which packet the hardware time stamp belongs.
5481  *
5482  * Incoming time stamping has to be configured via the hardware
5483  * filters. Not all combinations are supported, in particular event
5484  * type has to be specified. Matching the kind of event packet is
5485  * not supported, with the exception of "all V2 events regardless of
5486  * level 2 or 4".
5487  *
5488  **/
5489 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5490                               struct ifreq *ifr, int cmd)
5491 {
5492         struct igb_adapter *adapter = netdev_priv(netdev);
5493         struct e1000_hw *hw = &adapter->hw;
5494         struct hwtstamp_config config;
5495         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5496         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5497         u32 tsync_rx_cfg = 0;
5498         bool is_l4 = false;
5499         bool is_l2 = false;
5500         u32 regval;
5501
5502         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5503                 return -EFAULT;
5504
5505         /* reserved for future extensions */
5506         if (config.flags)
5507                 return -EINVAL;
5508
5509         switch (config.tx_type) {
5510         case HWTSTAMP_TX_OFF:
5511                 tsync_tx_ctl = 0;
5512         case HWTSTAMP_TX_ON:
5513                 break;
5514         default:
5515                 return -ERANGE;
5516         }
5517
5518         switch (config.rx_filter) {
5519         case HWTSTAMP_FILTER_NONE:
5520                 tsync_rx_ctl = 0;
5521                 break;
5522         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5523         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5524         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5525         case HWTSTAMP_FILTER_ALL:
5526                 /*
5527                  * register TSYNCRXCFG must be set, therefore it is not
5528                  * possible to time stamp both Sync and Delay_Req messages
5529                  * => fall back to time stamping all packets
5530                  */
5531                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5532                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5533                 break;
5534         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5535                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5536                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5537                 is_l4 = true;
5538                 break;
5539         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5540                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5541                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5542                 is_l4 = true;
5543                 break;
5544         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5545         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5546                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5547                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5548                 is_l2 = true;
5549                 is_l4 = true;
5550                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5551                 break;
5552         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5553         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5554                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5555                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5556                 is_l2 = true;
5557                 is_l4 = true;
5558                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5559                 break;
5560         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5561         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5562         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5563                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5564                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5565                 is_l2 = true;
5566                 break;
5567         default:
5568                 return -ERANGE;
5569         }
5570
5571         if (hw->mac.type == e1000_82575) {
5572                 if (tsync_rx_ctl | tsync_tx_ctl)
5573                         return -EINVAL;
5574                 return 0;
5575         }
5576
5577         /* enable/disable TX */
5578         regval = rd32(E1000_TSYNCTXCTL);
5579         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5580         regval |= tsync_tx_ctl;
5581         wr32(E1000_TSYNCTXCTL, regval);
5582
5583         /* enable/disable RX */
5584         regval = rd32(E1000_TSYNCRXCTL);
5585         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5586         regval |= tsync_rx_ctl;
5587         wr32(E1000_TSYNCRXCTL, regval);
5588
5589         /* define which PTP packets are time stamped */
5590         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5591
5592         /* define ethertype filter for timestamped packets */
5593         if (is_l2)
5594                 wr32(E1000_ETQF(3),
5595                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5596                                  E1000_ETQF_1588 | /* enable timestamping */
5597                                  ETH_P_1588));     /* 1588 eth protocol type */
5598         else
5599                 wr32(E1000_ETQF(3), 0);
5600
5601 #define PTP_PORT 319
5602         /* L4 Queue Filter[3]: filter by destination port and protocol */
5603         if (is_l4) {
5604                 u32 ftqf = (IPPROTO_UDP /* UDP */
5605                         | E1000_FTQF_VF_BP /* VF not compared */
5606                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5607                         | E1000_FTQF_MASK); /* mask all inputs */
5608                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5609
5610                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5611                 wr32(E1000_IMIREXT(3),
5612                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5613                 if (hw->mac.type == e1000_82576) {
5614                         /* enable source port check */
5615                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5616                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5617                 }
5618                 wr32(E1000_FTQF(3), ftqf);
5619         } else {
5620                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5621         }
5622         wrfl();
5623
5624         adapter->hwtstamp_config = config;
5625
5626         /* clear TX/RX time stamp registers, just to be sure */
5627         regval = rd32(E1000_TXSTMPH);
5628         regval = rd32(E1000_RXSTMPH);
5629
5630         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5631                 -EFAULT : 0;
5632 }
5633
5634 /**
5635  * igb_ioctl -
5636  * @netdev:
5637  * @ifreq:
5638  * @cmd:
5639  **/
5640 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5641 {
5642         switch (cmd) {
5643         case SIOCGMIIPHY:
5644         case SIOCGMIIREG:
5645         case SIOCSMIIREG:
5646                 return igb_mii_ioctl(netdev, ifr, cmd);
5647         case SIOCSHWTSTAMP:
5648                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5649         default:
5650                 return -EOPNOTSUPP;
5651         }
5652 }
5653
5654 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5655 {
5656         struct igb_adapter *adapter = hw->back;
5657         u16 cap_offset;
5658
5659         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5660         if (!cap_offset)
5661                 return -E1000_ERR_CONFIG;
5662
5663         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5664
5665         return 0;
5666 }
5667
5668 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5669 {
5670         struct igb_adapter *adapter = hw->back;
5671         u16 cap_offset;
5672
5673         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5674         if (!cap_offset)
5675                 return -E1000_ERR_CONFIG;
5676
5677         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5678
5679         return 0;
5680 }
5681
5682 static void igb_vlan_rx_register(struct net_device *netdev,
5683                                  struct vlan_group *grp)
5684 {
5685         struct igb_adapter *adapter = netdev_priv(netdev);
5686         struct e1000_hw *hw = &adapter->hw;
5687         u32 ctrl, rctl;
5688
5689         igb_irq_disable(adapter);
5690         adapter->vlgrp = grp;
5691
5692         if (grp) {
5693                 /* enable VLAN tag insert/strip */
5694                 ctrl = rd32(E1000_CTRL);
5695                 ctrl |= E1000_CTRL_VME;
5696                 wr32(E1000_CTRL, ctrl);
5697
5698                 /* Disable CFI check */
5699                 rctl = rd32(E1000_RCTL);
5700                 rctl &= ~E1000_RCTL_CFIEN;
5701                 wr32(E1000_RCTL, rctl);
5702         } else {
5703                 /* disable VLAN tag insert/strip */
5704                 ctrl = rd32(E1000_CTRL);
5705                 ctrl &= ~E1000_CTRL_VME;
5706                 wr32(E1000_CTRL, ctrl);
5707         }
5708
5709         igb_rlpml_set(adapter);
5710
5711         if (!test_bit(__IGB_DOWN, &adapter->state))
5712                 igb_irq_enable(adapter);
5713 }
5714
5715 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5716 {
5717         struct igb_adapter *adapter = netdev_priv(netdev);
5718         struct e1000_hw *hw = &adapter->hw;
5719         int pf_id = adapter->vfs_allocated_count;
5720
5721         /* attempt to add filter to vlvf array */
5722         igb_vlvf_set(adapter, vid, true, pf_id);
5723
5724         /* add the filter since PF can receive vlans w/o entry in vlvf */
5725         igb_vfta_set(hw, vid, true);
5726 }
5727
5728 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5729 {
5730         struct igb_adapter *adapter = netdev_priv(netdev);
5731         struct e1000_hw *hw = &adapter->hw;
5732         int pf_id = adapter->vfs_allocated_count;
5733         s32 err;
5734
5735         igb_irq_disable(adapter);
5736         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5737
5738         if (!test_bit(__IGB_DOWN, &adapter->state))
5739                 igb_irq_enable(adapter);
5740
5741         /* remove vlan from VLVF table array */
5742         err = igb_vlvf_set(adapter, vid, false, pf_id);
5743
5744         /* if vid was not present in VLVF just remove it from table */
5745         if (err)
5746                 igb_vfta_set(hw, vid, false);
5747 }
5748
5749 static void igb_restore_vlan(struct igb_adapter *adapter)
5750 {
5751         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5752
5753         if (adapter->vlgrp) {
5754                 u16 vid;
5755                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5756                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5757                                 continue;
5758                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5759                 }
5760         }
5761 }
5762
5763 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5764 {
5765         struct pci_dev *pdev = adapter->pdev;
5766         struct e1000_mac_info *mac = &adapter->hw.mac;
5767
5768         mac->autoneg = 0;
5769
5770         switch (spddplx) {
5771         case SPEED_10 + DUPLEX_HALF:
5772                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5773                 break;
5774         case SPEED_10 + DUPLEX_FULL:
5775                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5776                 break;
5777         case SPEED_100 + DUPLEX_HALF:
5778                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5779                 break;
5780         case SPEED_100 + DUPLEX_FULL:
5781                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5782                 break;
5783         case SPEED_1000 + DUPLEX_FULL:
5784                 mac->autoneg = 1;
5785                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5786                 break;
5787         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5788         default:
5789                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5790                 return -EINVAL;
5791         }
5792         return 0;
5793 }
5794
5795 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5796 {
5797         struct net_device *netdev = pci_get_drvdata(pdev);
5798         struct igb_adapter *adapter = netdev_priv(netdev);
5799         struct e1000_hw *hw = &adapter->hw;
5800         u32 ctrl, rctl, status;
5801         u32 wufc = adapter->wol;
5802 #ifdef CONFIG_PM
5803         int retval = 0;
5804 #endif
5805
5806         netif_device_detach(netdev);
5807
5808         if (netif_running(netdev))
5809                 igb_close(netdev);
5810
5811         igb_clear_interrupt_scheme(adapter);
5812
5813 #ifdef CONFIG_PM
5814         retval = pci_save_state(pdev);
5815         if (retval)
5816                 return retval;
5817 #endif
5818
5819         status = rd32(E1000_STATUS);
5820         if (status & E1000_STATUS_LU)
5821                 wufc &= ~E1000_WUFC_LNKC;
5822
5823         if (wufc) {
5824                 igb_setup_rctl(adapter);
5825                 igb_set_rx_mode(netdev);
5826
5827                 /* turn on all-multi mode if wake on multicast is enabled */
5828                 if (wufc & E1000_WUFC_MC) {
5829                         rctl = rd32(E1000_RCTL);
5830                         rctl |= E1000_RCTL_MPE;
5831                         wr32(E1000_RCTL, rctl);
5832                 }
5833
5834                 ctrl = rd32(E1000_CTRL);
5835                 /* advertise wake from D3Cold */
5836                 #define E1000_CTRL_ADVD3WUC 0x00100000
5837                 /* phy power management enable */
5838                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5839                 ctrl |= E1000_CTRL_ADVD3WUC;
5840                 wr32(E1000_CTRL, ctrl);
5841
5842                 /* Allow time for pending master requests to run */
5843                 igb_disable_pcie_master(hw);
5844
5845                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5846                 wr32(E1000_WUFC, wufc);
5847         } else {
5848                 wr32(E1000_WUC, 0);
5849                 wr32(E1000_WUFC, 0);
5850         }
5851
5852         *enable_wake = wufc || adapter->en_mng_pt;
5853         if (!*enable_wake)
5854                 igb_power_down_link(adapter);
5855         else
5856                 igb_power_up_link(adapter);
5857
5858         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5859          * would have already happened in close and is redundant. */
5860         igb_release_hw_control(adapter);
5861
5862         pci_disable_device(pdev);
5863
5864         return 0;
5865 }
5866
5867 #ifdef CONFIG_PM
5868 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5869 {
5870         int retval;
5871         bool wake;
5872
5873         retval = __igb_shutdown(pdev, &wake);
5874         if (retval)
5875                 return retval;
5876
5877         if (wake) {
5878                 pci_prepare_to_sleep(pdev);
5879         } else {
5880                 pci_wake_from_d3(pdev, false);
5881                 pci_set_power_state(pdev, PCI_D3hot);
5882         }
5883
5884         return 0;
5885 }
5886
5887 static int igb_resume(struct pci_dev *pdev)
5888 {
5889         struct net_device *netdev = pci_get_drvdata(pdev);
5890         struct igb_adapter *adapter = netdev_priv(netdev);
5891         struct e1000_hw *hw = &adapter->hw;
5892         u32 err;
5893
5894         pci_set_power_state(pdev, PCI_D0);
5895         pci_restore_state(pdev);
5896         pci_save_state(pdev);
5897
5898         err = pci_enable_device_mem(pdev);
5899         if (err) {
5900                 dev_err(&pdev->dev,
5901                         "igb: Cannot enable PCI device from suspend\n");
5902                 return err;
5903         }
5904         pci_set_master(pdev);
5905
5906         pci_enable_wake(pdev, PCI_D3hot, 0);
5907         pci_enable_wake(pdev, PCI_D3cold, 0);
5908
5909         if (igb_init_interrupt_scheme(adapter)) {
5910                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5911                 return -ENOMEM;
5912         }
5913
5914         igb_reset(adapter);
5915
5916         /* let the f/w know that the h/w is now under the control of the
5917          * driver. */
5918         igb_get_hw_control(adapter);
5919
5920         wr32(E1000_WUS, ~0);
5921
5922         if (netif_running(netdev)) {
5923                 err = igb_open(netdev);
5924                 if (err)
5925                         return err;
5926         }
5927
5928         netif_device_attach(netdev);
5929
5930         return 0;
5931 }
5932 #endif
5933
5934 static void igb_shutdown(struct pci_dev *pdev)
5935 {
5936         bool wake;
5937
5938         __igb_shutdown(pdev, &wake);
5939
5940         if (system_state == SYSTEM_POWER_OFF) {
5941                 pci_wake_from_d3(pdev, wake);
5942                 pci_set_power_state(pdev, PCI_D3hot);
5943         }
5944 }
5945
5946 #ifdef CONFIG_NET_POLL_CONTROLLER
5947 /*
5948  * Polling 'interrupt' - used by things like netconsole to send skbs
5949  * without having to re-enable interrupts. It's not called while
5950  * the interrupt routine is executing.
5951  */
5952 static void igb_netpoll(struct net_device *netdev)
5953 {
5954         struct igb_adapter *adapter = netdev_priv(netdev);
5955         struct e1000_hw *hw = &adapter->hw;
5956         int i;
5957
5958         if (!adapter->msix_entries) {
5959                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5960                 igb_irq_disable(adapter);
5961                 napi_schedule(&q_vector->napi);
5962                 return;
5963         }
5964
5965         for (i = 0; i < adapter->num_q_vectors; i++) {
5966                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5967                 wr32(E1000_EIMC, q_vector->eims_value);
5968                 napi_schedule(&q_vector->napi);
5969         }
5970 }
5971 #endif /* CONFIG_NET_POLL_CONTROLLER */
5972
5973 /**
5974  * igb_io_error_detected - called when PCI error is detected
5975  * @pdev: Pointer to PCI device
5976  * @state: The current pci connection state
5977  *
5978  * This function is called after a PCI bus error affecting
5979  * this device has been detected.
5980  */
5981 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5982                                               pci_channel_state_t state)
5983 {
5984         struct net_device *netdev = pci_get_drvdata(pdev);
5985         struct igb_adapter *adapter = netdev_priv(netdev);
5986
5987         netif_device_detach(netdev);
5988
5989         if (state == pci_channel_io_perm_failure)
5990                 return PCI_ERS_RESULT_DISCONNECT;
5991
5992         if (netif_running(netdev))
5993                 igb_down(adapter);
5994         pci_disable_device(pdev);
5995
5996         /* Request a slot slot reset. */
5997         return PCI_ERS_RESULT_NEED_RESET;
5998 }
5999
6000 /**
6001  * igb_io_slot_reset - called after the pci bus has been reset.
6002  * @pdev: Pointer to PCI device
6003  *
6004  * Restart the card from scratch, as if from a cold-boot. Implementation
6005  * resembles the first-half of the igb_resume routine.
6006  */
6007 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6008 {
6009         struct net_device *netdev = pci_get_drvdata(pdev);
6010         struct igb_adapter *adapter = netdev_priv(netdev);
6011         struct e1000_hw *hw = &adapter->hw;
6012         pci_ers_result_t result;
6013         int err;
6014
6015         if (pci_enable_device_mem(pdev)) {
6016                 dev_err(&pdev->dev,
6017                         "Cannot re-enable PCI device after reset.\n");
6018                 result = PCI_ERS_RESULT_DISCONNECT;
6019         } else {
6020                 pci_set_master(pdev);
6021                 pci_restore_state(pdev);
6022                 pci_save_state(pdev);
6023
6024                 pci_enable_wake(pdev, PCI_D3hot, 0);
6025                 pci_enable_wake(pdev, PCI_D3cold, 0);
6026
6027                 igb_reset(adapter);
6028                 wr32(E1000_WUS, ~0);
6029                 result = PCI_ERS_RESULT_RECOVERED;
6030         }
6031
6032         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6033         if (err) {
6034                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6035                         "failed 0x%0x\n", err);
6036                 /* non-fatal, continue */
6037         }
6038
6039         return result;
6040 }
6041
6042 /**
6043  * igb_io_resume - called when traffic can start flowing again.
6044  * @pdev: Pointer to PCI device
6045  *
6046  * This callback is called when the error recovery driver tells us that
6047  * its OK to resume normal operation. Implementation resembles the
6048  * second-half of the igb_resume routine.
6049  */
6050 static void igb_io_resume(struct pci_dev *pdev)
6051 {
6052         struct net_device *netdev = pci_get_drvdata(pdev);
6053         struct igb_adapter *adapter = netdev_priv(netdev);
6054
6055         if (netif_running(netdev)) {
6056                 if (igb_up(adapter)) {
6057                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6058                         return;
6059                 }
6060         }
6061
6062         netif_device_attach(netdev);
6063
6064         /* let the f/w know that the h/w is now under the control of the
6065          * driver. */
6066         igb_get_hw_control(adapter);
6067 }
6068
6069 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6070                              u8 qsel)
6071 {
6072         u32 rar_low, rar_high;
6073         struct e1000_hw *hw = &adapter->hw;
6074
6075         /* HW expects these in little endian so we reverse the byte order
6076          * from network order (big endian) to little endian
6077          */
6078         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6079                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6080         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6081
6082         /* Indicate to hardware the Address is Valid. */
6083         rar_high |= E1000_RAH_AV;
6084
6085         if (hw->mac.type == e1000_82575)
6086                 rar_high |= E1000_RAH_POOL_1 * qsel;
6087         else
6088                 rar_high |= E1000_RAH_POOL_1 << qsel;
6089
6090         wr32(E1000_RAL(index), rar_low);
6091         wrfl();
6092         wr32(E1000_RAH(index), rar_high);
6093         wrfl();
6094 }
6095
6096 static int igb_set_vf_mac(struct igb_adapter *adapter,
6097                           int vf, unsigned char *mac_addr)
6098 {
6099         struct e1000_hw *hw = &adapter->hw;
6100         /* VF MAC addresses start at end of receive addresses and moves
6101          * torwards the first, as a result a collision should not be possible */
6102         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6103
6104         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6105
6106         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6107
6108         return 0;
6109 }
6110
6111 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6112 {
6113         struct igb_adapter *adapter = netdev_priv(netdev);
6114         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6115                 return -EINVAL;
6116         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6117         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6118         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6119                                       " change effective.");
6120         if (test_bit(__IGB_DOWN, &adapter->state)) {
6121                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6122                          " but the PF device is not up.\n");
6123                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6124                          " attempting to use the VF device.\n");
6125         }
6126         return igb_set_vf_mac(adapter, vf, mac);
6127 }
6128
6129 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6130 {
6131         return -EOPNOTSUPP;
6132 }
6133
6134 static int igb_ndo_get_vf_config(struct net_device *netdev,
6135                                  int vf, struct ifla_vf_info *ivi)
6136 {
6137         struct igb_adapter *adapter = netdev_priv(netdev);
6138         if (vf >= adapter->vfs_allocated_count)
6139                 return -EINVAL;
6140         ivi->vf = vf;
6141         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6142         ivi->tx_rate = 0;
6143         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6144         ivi->qos = adapter->vf_data[vf].pf_qos;
6145         return 0;
6146 }
6147
6148 static void igb_vmm_control(struct igb_adapter *adapter)
6149 {
6150         struct e1000_hw *hw = &adapter->hw;
6151         u32 reg;
6152
6153         /* replication is not supported for 82575 */
6154         if (hw->mac.type == e1000_82575)
6155                 return;
6156
6157         /* enable replication vlan tag stripping */
6158         reg = rd32(E1000_RPLOLR);
6159         reg |= E1000_RPLOLR_STRVLAN;
6160         wr32(E1000_RPLOLR, reg);
6161
6162         /* notify HW that the MAC is adding vlan tags */
6163         reg = rd32(E1000_DTXCTL);
6164         reg |= E1000_DTXCTL_VLAN_ADDED;
6165         wr32(E1000_DTXCTL, reg);
6166
6167         if (adapter->vfs_allocated_count) {
6168                 igb_vmdq_set_loopback_pf(hw, true);
6169                 igb_vmdq_set_replication_pf(hw, true);
6170         } else {
6171                 igb_vmdq_set_loopback_pf(hw, false);
6172                 igb_vmdq_set_replication_pf(hw, false);
6173         }
6174 }
6175
6176 /* igb_main.c */