igb: add support for Intel I350 Gigabit Network Connection
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
84         /* required last entry */
85         {0, }
86 };
87
88 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
89
90 void igb_reset(struct igb_adapter *);
91 static int igb_setup_all_tx_resources(struct igb_adapter *);
92 static int igb_setup_all_rx_resources(struct igb_adapter *);
93 static void igb_free_all_tx_resources(struct igb_adapter *);
94 static void igb_free_all_rx_resources(struct igb_adapter *);
95 static void igb_setup_mrqc(struct igb_adapter *);
96 void igb_update_stats(struct igb_adapter *);
97 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
98 static void __devexit igb_remove(struct pci_dev *pdev);
99 static int igb_sw_init(struct igb_adapter *);
100 static int igb_open(struct net_device *);
101 static int igb_close(struct net_device *);
102 static void igb_configure_tx(struct igb_adapter *);
103 static void igb_configure_rx(struct igb_adapter *);
104 static void igb_clean_all_tx_rings(struct igb_adapter *);
105 static void igb_clean_all_rx_rings(struct igb_adapter *);
106 static void igb_clean_tx_ring(struct igb_ring *);
107 static void igb_clean_rx_ring(struct igb_ring *);
108 static void igb_set_rx_mode(struct net_device *);
109 static void igb_update_phy_info(unsigned long);
110 static void igb_watchdog(unsigned long);
111 static void igb_watchdog_task(struct work_struct *);
112 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
113 static struct net_device_stats *igb_get_stats(struct net_device *);
114 static int igb_change_mtu(struct net_device *, int);
115 static int igb_set_mac(struct net_device *, void *);
116 static void igb_set_uta(struct igb_adapter *adapter);
117 static irqreturn_t igb_intr(int irq, void *);
118 static irqreturn_t igb_intr_msi(int irq, void *);
119 static irqreturn_t igb_msix_other(int irq, void *);
120 static irqreturn_t igb_msix_ring(int irq, void *);
121 #ifdef CONFIG_IGB_DCA
122 static void igb_update_dca(struct igb_q_vector *);
123 static void igb_setup_dca(struct igb_adapter *);
124 #endif /* CONFIG_IGB_DCA */
125 static bool igb_clean_tx_irq(struct igb_q_vector *);
126 static int igb_poll(struct napi_struct *, int);
127 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
128 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
129 static void igb_tx_timeout(struct net_device *);
130 static void igb_reset_task(struct work_struct *);
131 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
132 static void igb_vlan_rx_add_vid(struct net_device *, u16);
133 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
134 static void igb_restore_vlan(struct igb_adapter *);
135 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
136 static void igb_ping_all_vfs(struct igb_adapter *);
137 static void igb_msg_task(struct igb_adapter *);
138 static void igb_vmm_control(struct igb_adapter *);
139 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
140 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
141 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
142 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
143                                int vf, u16 vlan, u8 qos);
144 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
145 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
146                                  struct ifla_vf_info *ivi);
147
148 #ifdef CONFIG_PM
149 static int igb_suspend(struct pci_dev *, pm_message_t);
150 static int igb_resume(struct pci_dev *);
151 #endif
152 static void igb_shutdown(struct pci_dev *);
153 #ifdef CONFIG_IGB_DCA
154 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
155 static struct notifier_block dca_notifier = {
156         .notifier_call  = igb_notify_dca,
157         .next           = NULL,
158         .priority       = 0
159 };
160 #endif
161 #ifdef CONFIG_NET_POLL_CONTROLLER
162 /* for netdump / net console */
163 static void igb_netpoll(struct net_device *);
164 #endif
165 #ifdef CONFIG_PCI_IOV
166 static unsigned int max_vfs = 0;
167 module_param(max_vfs, uint, 0);
168 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
169                  "per physical function");
170 #endif /* CONFIG_PCI_IOV */
171
172 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
173                      pci_channel_state_t);
174 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
175 static void igb_io_resume(struct pci_dev *);
176
177 static struct pci_error_handlers igb_err_handler = {
178         .error_detected = igb_io_error_detected,
179         .slot_reset = igb_io_slot_reset,
180         .resume = igb_io_resume,
181 };
182
183
184 static struct pci_driver igb_driver = {
185         .name     = igb_driver_name,
186         .id_table = igb_pci_tbl,
187         .probe    = igb_probe,
188         .remove   = __devexit_p(igb_remove),
189 #ifdef CONFIG_PM
190         /* Power Managment Hooks */
191         .suspend  = igb_suspend,
192         .resume   = igb_resume,
193 #endif
194         .shutdown = igb_shutdown,
195         .err_handler = &igb_err_handler
196 };
197
198 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
199 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
200 MODULE_LICENSE("GPL");
201 MODULE_VERSION(DRV_VERSION);
202
203 /**
204  * igb_read_clock - read raw cycle counter (to be used by time counter)
205  */
206 static cycle_t igb_read_clock(const struct cyclecounter *tc)
207 {
208         struct igb_adapter *adapter =
209                 container_of(tc, struct igb_adapter, cycles);
210         struct e1000_hw *hw = &adapter->hw;
211         u64 stamp = 0;
212         int shift = 0;
213
214         /*
215          * The timestamp latches on lowest register read. For the 82580
216          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
217          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
218          */
219         if (hw->mac.type == e1000_82580) {
220                 stamp = rd32(E1000_SYSTIMR) >> 8;
221                 shift = IGB_82580_TSYNC_SHIFT;
222         }
223
224         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
225         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
226         return stamp;
227 }
228
229 #ifdef DEBUG
230 /**
231  * igb_get_hw_dev_name - return device name string
232  * used by hardware layer to print debugging information
233  **/
234 char *igb_get_hw_dev_name(struct e1000_hw *hw)
235 {
236         struct igb_adapter *adapter = hw->back;
237         return adapter->netdev->name;
238 }
239
240 /**
241  * igb_get_time_str - format current NIC and system time as string
242  */
243 static char *igb_get_time_str(struct igb_adapter *adapter,
244                               char buffer[160])
245 {
246         cycle_t hw = adapter->cycles.read(&adapter->cycles);
247         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
248         struct timespec sys;
249         struct timespec delta;
250         getnstimeofday(&sys);
251
252         delta = timespec_sub(nic, sys);
253
254         sprintf(buffer,
255                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
256                 hw,
257                 (long)nic.tv_sec, nic.tv_nsec,
258                 (long)sys.tv_sec, sys.tv_nsec,
259                 (long)delta.tv_sec, delta.tv_nsec);
260
261         return buffer;
262 }
263 #endif
264
265 /**
266  * igb_init_module - Driver Registration Routine
267  *
268  * igb_init_module is the first routine called when the driver is
269  * loaded. All it does is register with the PCI subsystem.
270  **/
271 static int __init igb_init_module(void)
272 {
273         int ret;
274         printk(KERN_INFO "%s - version %s\n",
275                igb_driver_string, igb_driver_version);
276
277         printk(KERN_INFO "%s\n", igb_copyright);
278
279 #ifdef CONFIG_IGB_DCA
280         dca_register_notify(&dca_notifier);
281 #endif
282         ret = pci_register_driver(&igb_driver);
283         return ret;
284 }
285
286 module_init(igb_init_module);
287
288 /**
289  * igb_exit_module - Driver Exit Cleanup Routine
290  *
291  * igb_exit_module is called just before the driver is removed
292  * from memory.
293  **/
294 static void __exit igb_exit_module(void)
295 {
296 #ifdef CONFIG_IGB_DCA
297         dca_unregister_notify(&dca_notifier);
298 #endif
299         pci_unregister_driver(&igb_driver);
300 }
301
302 module_exit(igb_exit_module);
303
304 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
305 /**
306  * igb_cache_ring_register - Descriptor ring to register mapping
307  * @adapter: board private structure to initialize
308  *
309  * Once we know the feature-set enabled for the device, we'll cache
310  * the register offset the descriptor ring is assigned to.
311  **/
312 static void igb_cache_ring_register(struct igb_adapter *adapter)
313 {
314         int i = 0, j = 0;
315         u32 rbase_offset = adapter->vfs_allocated_count;
316
317         switch (adapter->hw.mac.type) {
318         case e1000_82576:
319                 /* The queues are allocated for virtualization such that VF 0
320                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
321                  * In order to avoid collision we start at the first free queue
322                  * and continue consuming queues in the same sequence
323                  */
324                 if (adapter->vfs_allocated_count) {
325                         for (; i < adapter->rss_queues; i++)
326                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
327                                                                Q_IDX_82576(i);
328                         for (; j < adapter->rss_queues; j++)
329                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
330                                                                Q_IDX_82576(j);
331                 }
332         case e1000_82575:
333         case e1000_82580:
334         case e1000_i350:
335         default:
336                 for (; i < adapter->num_rx_queues; i++)
337                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
338                 for (; j < adapter->num_tx_queues; j++)
339                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
340                 break;
341         }
342 }
343
344 static void igb_free_queues(struct igb_adapter *adapter)
345 {
346         int i;
347
348         for (i = 0; i < adapter->num_tx_queues; i++) {
349                 kfree(adapter->tx_ring[i]);
350                 adapter->tx_ring[i] = NULL;
351         }
352         for (i = 0; i < adapter->num_rx_queues; i++) {
353                 kfree(adapter->rx_ring[i]);
354                 adapter->rx_ring[i] = NULL;
355         }
356         adapter->num_rx_queues = 0;
357         adapter->num_tx_queues = 0;
358 }
359
360 /**
361  * igb_alloc_queues - Allocate memory for all rings
362  * @adapter: board private structure to initialize
363  *
364  * We allocate one ring per queue at run-time since we don't know the
365  * number of queues at compile-time.
366  **/
367 static int igb_alloc_queues(struct igb_adapter *adapter)
368 {
369         struct igb_ring *ring;
370         int i;
371
372         for (i = 0; i < adapter->num_tx_queues; i++) {
373                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
374                 if (!ring)
375                         goto err;
376                 ring->count = adapter->tx_ring_count;
377                 ring->queue_index = i;
378                 ring->pdev = adapter->pdev;
379                 ring->netdev = adapter->netdev;
380                 /* For 82575, context index must be unique per ring. */
381                 if (adapter->hw.mac.type == e1000_82575)
382                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
383                 adapter->tx_ring[i] = ring;
384         }
385
386         for (i = 0; i < adapter->num_rx_queues; i++) {
387                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
388                 if (!ring)
389                         goto err;
390                 ring->count = adapter->rx_ring_count;
391                 ring->queue_index = i;
392                 ring->pdev = adapter->pdev;
393                 ring->netdev = adapter->netdev;
394                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
395                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
396                 /* set flag indicating ring supports SCTP checksum offload */
397                 if (adapter->hw.mac.type >= e1000_82576)
398                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
399                 adapter->rx_ring[i] = ring;
400         }
401
402         igb_cache_ring_register(adapter);
403
404         return 0;
405
406 err:
407         igb_free_queues(adapter);
408
409         return -ENOMEM;
410 }
411
412 #define IGB_N0_QUEUE -1
413 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
414 {
415         u32 msixbm = 0;
416         struct igb_adapter *adapter = q_vector->adapter;
417         struct e1000_hw *hw = &adapter->hw;
418         u32 ivar, index;
419         int rx_queue = IGB_N0_QUEUE;
420         int tx_queue = IGB_N0_QUEUE;
421
422         if (q_vector->rx_ring)
423                 rx_queue = q_vector->rx_ring->reg_idx;
424         if (q_vector->tx_ring)
425                 tx_queue = q_vector->tx_ring->reg_idx;
426
427         switch (hw->mac.type) {
428         case e1000_82575:
429                 /* The 82575 assigns vectors using a bitmask, which matches the
430                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
431                    or more queues to a vector, we write the appropriate bits
432                    into the MSIXBM register for that vector. */
433                 if (rx_queue > IGB_N0_QUEUE)
434                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
435                 if (tx_queue > IGB_N0_QUEUE)
436                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
437                 if (!adapter->msix_entries && msix_vector == 0)
438                         msixbm |= E1000_EIMS_OTHER;
439                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
440                 q_vector->eims_value = msixbm;
441                 break;
442         case e1000_82576:
443                 /* 82576 uses a table-based method for assigning vectors.
444                    Each queue has a single entry in the table to which we write
445                    a vector number along with a "valid" bit.  Sadly, the layout
446                    of the table is somewhat counterintuitive. */
447                 if (rx_queue > IGB_N0_QUEUE) {
448                         index = (rx_queue & 0x7);
449                         ivar = array_rd32(E1000_IVAR0, index);
450                         if (rx_queue < 8) {
451                                 /* vector goes into low byte of register */
452                                 ivar = ivar & 0xFFFFFF00;
453                                 ivar |= msix_vector | E1000_IVAR_VALID;
454                         } else {
455                                 /* vector goes into third byte of register */
456                                 ivar = ivar & 0xFF00FFFF;
457                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
458                         }
459                         array_wr32(E1000_IVAR0, index, ivar);
460                 }
461                 if (tx_queue > IGB_N0_QUEUE) {
462                         index = (tx_queue & 0x7);
463                         ivar = array_rd32(E1000_IVAR0, index);
464                         if (tx_queue < 8) {
465                                 /* vector goes into second byte of register */
466                                 ivar = ivar & 0xFFFF00FF;
467                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
468                         } else {
469                                 /* vector goes into high byte of register */
470                                 ivar = ivar & 0x00FFFFFF;
471                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
472                         }
473                         array_wr32(E1000_IVAR0, index, ivar);
474                 }
475                 q_vector->eims_value = 1 << msix_vector;
476                 break;
477         case e1000_82580:
478         case e1000_i350:
479                 /* 82580 uses the same table-based approach as 82576 but has fewer
480                    entries as a result we carry over for queues greater than 4. */
481                 if (rx_queue > IGB_N0_QUEUE) {
482                         index = (rx_queue >> 1);
483                         ivar = array_rd32(E1000_IVAR0, index);
484                         if (rx_queue & 0x1) {
485                                 /* vector goes into third byte of register */
486                                 ivar = ivar & 0xFF00FFFF;
487                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
488                         } else {
489                                 /* vector goes into low byte of register */
490                                 ivar = ivar & 0xFFFFFF00;
491                                 ivar |= msix_vector | E1000_IVAR_VALID;
492                         }
493                         array_wr32(E1000_IVAR0, index, ivar);
494                 }
495                 if (tx_queue > IGB_N0_QUEUE) {
496                         index = (tx_queue >> 1);
497                         ivar = array_rd32(E1000_IVAR0, index);
498                         if (tx_queue & 0x1) {
499                                 /* vector goes into high byte of register */
500                                 ivar = ivar & 0x00FFFFFF;
501                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
502                         } else {
503                                 /* vector goes into second byte of register */
504                                 ivar = ivar & 0xFFFF00FF;
505                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
506                         }
507                         array_wr32(E1000_IVAR0, index, ivar);
508                 }
509                 q_vector->eims_value = 1 << msix_vector;
510                 break;
511         default:
512                 BUG();
513                 break;
514         }
515
516         /* add q_vector eims value to global eims_enable_mask */
517         adapter->eims_enable_mask |= q_vector->eims_value;
518
519         /* configure q_vector to set itr on first interrupt */
520         q_vector->set_itr = 1;
521 }
522
523 /**
524  * igb_configure_msix - Configure MSI-X hardware
525  *
526  * igb_configure_msix sets up the hardware to properly
527  * generate MSI-X interrupts.
528  **/
529 static void igb_configure_msix(struct igb_adapter *adapter)
530 {
531         u32 tmp;
532         int i, vector = 0;
533         struct e1000_hw *hw = &adapter->hw;
534
535         adapter->eims_enable_mask = 0;
536
537         /* set vector for other causes, i.e. link changes */
538         switch (hw->mac.type) {
539         case e1000_82575:
540                 tmp = rd32(E1000_CTRL_EXT);
541                 /* enable MSI-X PBA support*/
542                 tmp |= E1000_CTRL_EXT_PBA_CLR;
543
544                 /* Auto-Mask interrupts upon ICR read. */
545                 tmp |= E1000_CTRL_EXT_EIAME;
546                 tmp |= E1000_CTRL_EXT_IRCA;
547
548                 wr32(E1000_CTRL_EXT, tmp);
549
550                 /* enable msix_other interrupt */
551                 array_wr32(E1000_MSIXBM(0), vector++,
552                                       E1000_EIMS_OTHER);
553                 adapter->eims_other = E1000_EIMS_OTHER;
554
555                 break;
556
557         case e1000_82576:
558         case e1000_82580:
559         case e1000_i350:
560                 /* Turn on MSI-X capability first, or our settings
561                  * won't stick.  And it will take days to debug. */
562                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
563                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
564                                 E1000_GPIE_NSICR);
565
566                 /* enable msix_other interrupt */
567                 adapter->eims_other = 1 << vector;
568                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
569
570                 wr32(E1000_IVAR_MISC, tmp);
571                 break;
572         default:
573                 /* do nothing, since nothing else supports MSI-X */
574                 break;
575         } /* switch (hw->mac.type) */
576
577         adapter->eims_enable_mask |= adapter->eims_other;
578
579         for (i = 0; i < adapter->num_q_vectors; i++)
580                 igb_assign_vector(adapter->q_vector[i], vector++);
581
582         wrfl();
583 }
584
585 /**
586  * igb_request_msix - Initialize MSI-X interrupts
587  *
588  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
589  * kernel.
590  **/
591 static int igb_request_msix(struct igb_adapter *adapter)
592 {
593         struct net_device *netdev = adapter->netdev;
594         struct e1000_hw *hw = &adapter->hw;
595         int i, err = 0, vector = 0;
596
597         err = request_irq(adapter->msix_entries[vector].vector,
598                           igb_msix_other, 0, netdev->name, adapter);
599         if (err)
600                 goto out;
601         vector++;
602
603         for (i = 0; i < adapter->num_q_vectors; i++) {
604                 struct igb_q_vector *q_vector = adapter->q_vector[i];
605
606                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
607
608                 if (q_vector->rx_ring && q_vector->tx_ring)
609                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
610                                 q_vector->rx_ring->queue_index);
611                 else if (q_vector->tx_ring)
612                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
613                                 q_vector->tx_ring->queue_index);
614                 else if (q_vector->rx_ring)
615                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
616                                 q_vector->rx_ring->queue_index);
617                 else
618                         sprintf(q_vector->name, "%s-unused", netdev->name);
619
620                 err = request_irq(adapter->msix_entries[vector].vector,
621                                   igb_msix_ring, 0, q_vector->name,
622                                   q_vector);
623                 if (err)
624                         goto out;
625                 vector++;
626         }
627
628         igb_configure_msix(adapter);
629         return 0;
630 out:
631         return err;
632 }
633
634 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
635 {
636         if (adapter->msix_entries) {
637                 pci_disable_msix(adapter->pdev);
638                 kfree(adapter->msix_entries);
639                 adapter->msix_entries = NULL;
640         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
641                 pci_disable_msi(adapter->pdev);
642         }
643 }
644
645 /**
646  * igb_free_q_vectors - Free memory allocated for interrupt vectors
647  * @adapter: board private structure to initialize
648  *
649  * This function frees the memory allocated to the q_vectors.  In addition if
650  * NAPI is enabled it will delete any references to the NAPI struct prior
651  * to freeing the q_vector.
652  **/
653 static void igb_free_q_vectors(struct igb_adapter *adapter)
654 {
655         int v_idx;
656
657         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
658                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
659                 adapter->q_vector[v_idx] = NULL;
660                 if (!q_vector)
661                         continue;
662                 netif_napi_del(&q_vector->napi);
663                 kfree(q_vector);
664         }
665         adapter->num_q_vectors = 0;
666 }
667
668 /**
669  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
670  *
671  * This function resets the device so that it has 0 rx queues, tx queues, and
672  * MSI-X interrupts allocated.
673  */
674 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
675 {
676         igb_free_queues(adapter);
677         igb_free_q_vectors(adapter);
678         igb_reset_interrupt_capability(adapter);
679 }
680
681 /**
682  * igb_set_interrupt_capability - set MSI or MSI-X if supported
683  *
684  * Attempt to configure interrupts using the best available
685  * capabilities of the hardware and kernel.
686  **/
687 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
688 {
689         int err;
690         int numvecs, i;
691
692         /* Number of supported queues. */
693         adapter->num_rx_queues = adapter->rss_queues;
694         adapter->num_tx_queues = adapter->rss_queues;
695
696         /* start with one vector for every rx queue */
697         numvecs = adapter->num_rx_queues;
698
699         /* if tx handler is separate add 1 for every tx queue */
700         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
701                 numvecs += adapter->num_tx_queues;
702
703         /* store the number of vectors reserved for queues */
704         adapter->num_q_vectors = numvecs;
705
706         /* add 1 vector for link status interrupts */
707         numvecs++;
708         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
709                                         GFP_KERNEL);
710         if (!adapter->msix_entries)
711                 goto msi_only;
712
713         for (i = 0; i < numvecs; i++)
714                 adapter->msix_entries[i].entry = i;
715
716         err = pci_enable_msix(adapter->pdev,
717                               adapter->msix_entries,
718                               numvecs);
719         if (err == 0)
720                 goto out;
721
722         igb_reset_interrupt_capability(adapter);
723
724         /* If we can't do MSI-X, try MSI */
725 msi_only:
726 #ifdef CONFIG_PCI_IOV
727         /* disable SR-IOV for non MSI-X configurations */
728         if (adapter->vf_data) {
729                 struct e1000_hw *hw = &adapter->hw;
730                 /* disable iov and allow time for transactions to clear */
731                 pci_disable_sriov(adapter->pdev);
732                 msleep(500);
733
734                 kfree(adapter->vf_data);
735                 adapter->vf_data = NULL;
736                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
737                 msleep(100);
738                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
739         }
740 #endif
741         adapter->vfs_allocated_count = 0;
742         adapter->rss_queues = 1;
743         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
744         adapter->num_rx_queues = 1;
745         adapter->num_tx_queues = 1;
746         adapter->num_q_vectors = 1;
747         if (!pci_enable_msi(adapter->pdev))
748                 adapter->flags |= IGB_FLAG_HAS_MSI;
749 out:
750         /* Notify the stack of the (possibly) reduced Tx Queue count. */
751         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
752         return;
753 }
754
755 /**
756  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
757  * @adapter: board private structure to initialize
758  *
759  * We allocate one q_vector per queue interrupt.  If allocation fails we
760  * return -ENOMEM.
761  **/
762 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
763 {
764         struct igb_q_vector *q_vector;
765         struct e1000_hw *hw = &adapter->hw;
766         int v_idx;
767
768         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
769                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
770                 if (!q_vector)
771                         goto err_out;
772                 q_vector->adapter = adapter;
773                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
774                 q_vector->itr_val = IGB_START_ITR;
775                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
776                 adapter->q_vector[v_idx] = q_vector;
777         }
778         return 0;
779
780 err_out:
781         igb_free_q_vectors(adapter);
782         return -ENOMEM;
783 }
784
785 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
786                                       int ring_idx, int v_idx)
787 {
788         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
789
790         q_vector->rx_ring = adapter->rx_ring[ring_idx];
791         q_vector->rx_ring->q_vector = q_vector;
792         q_vector->itr_val = adapter->rx_itr_setting;
793         if (q_vector->itr_val && q_vector->itr_val <= 3)
794                 q_vector->itr_val = IGB_START_ITR;
795 }
796
797 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
798                                       int ring_idx, int v_idx)
799 {
800         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
801
802         q_vector->tx_ring = adapter->tx_ring[ring_idx];
803         q_vector->tx_ring->q_vector = q_vector;
804         q_vector->itr_val = adapter->tx_itr_setting;
805         if (q_vector->itr_val && q_vector->itr_val <= 3)
806                 q_vector->itr_val = IGB_START_ITR;
807 }
808
809 /**
810  * igb_map_ring_to_vector - maps allocated queues to vectors
811  *
812  * This function maps the recently allocated queues to vectors.
813  **/
814 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
815 {
816         int i;
817         int v_idx = 0;
818
819         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
820             (adapter->num_q_vectors < adapter->num_tx_queues))
821                 return -ENOMEM;
822
823         if (adapter->num_q_vectors >=
824             (adapter->num_rx_queues + adapter->num_tx_queues)) {
825                 for (i = 0; i < adapter->num_rx_queues; i++)
826                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
827                 for (i = 0; i < adapter->num_tx_queues; i++)
828                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
829         } else {
830                 for (i = 0; i < adapter->num_rx_queues; i++) {
831                         if (i < adapter->num_tx_queues)
832                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
833                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
834                 }
835                 for (; i < adapter->num_tx_queues; i++)
836                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
837         }
838         return 0;
839 }
840
841 /**
842  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
843  *
844  * This function initializes the interrupts and allocates all of the queues.
845  **/
846 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
847 {
848         struct pci_dev *pdev = adapter->pdev;
849         int err;
850
851         igb_set_interrupt_capability(adapter);
852
853         err = igb_alloc_q_vectors(adapter);
854         if (err) {
855                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
856                 goto err_alloc_q_vectors;
857         }
858
859         err = igb_alloc_queues(adapter);
860         if (err) {
861                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
862                 goto err_alloc_queues;
863         }
864
865         err = igb_map_ring_to_vector(adapter);
866         if (err) {
867                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
868                 goto err_map_queues;
869         }
870
871
872         return 0;
873 err_map_queues:
874         igb_free_queues(adapter);
875 err_alloc_queues:
876         igb_free_q_vectors(adapter);
877 err_alloc_q_vectors:
878         igb_reset_interrupt_capability(adapter);
879         return err;
880 }
881
882 /**
883  * igb_request_irq - initialize interrupts
884  *
885  * Attempts to configure interrupts using the best available
886  * capabilities of the hardware and kernel.
887  **/
888 static int igb_request_irq(struct igb_adapter *adapter)
889 {
890         struct net_device *netdev = adapter->netdev;
891         struct pci_dev *pdev = adapter->pdev;
892         int err = 0;
893
894         if (adapter->msix_entries) {
895                 err = igb_request_msix(adapter);
896                 if (!err)
897                         goto request_done;
898                 /* fall back to MSI */
899                 igb_clear_interrupt_scheme(adapter);
900                 if (!pci_enable_msi(adapter->pdev))
901                         adapter->flags |= IGB_FLAG_HAS_MSI;
902                 igb_free_all_tx_resources(adapter);
903                 igb_free_all_rx_resources(adapter);
904                 adapter->num_tx_queues = 1;
905                 adapter->num_rx_queues = 1;
906                 adapter->num_q_vectors = 1;
907                 err = igb_alloc_q_vectors(adapter);
908                 if (err) {
909                         dev_err(&pdev->dev,
910                                 "Unable to allocate memory for vectors\n");
911                         goto request_done;
912                 }
913                 err = igb_alloc_queues(adapter);
914                 if (err) {
915                         dev_err(&pdev->dev,
916                                 "Unable to allocate memory for queues\n");
917                         igb_free_q_vectors(adapter);
918                         goto request_done;
919                 }
920                 igb_setup_all_tx_resources(adapter);
921                 igb_setup_all_rx_resources(adapter);
922         } else {
923                 igb_assign_vector(adapter->q_vector[0], 0);
924         }
925
926         if (adapter->flags & IGB_FLAG_HAS_MSI) {
927                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
928                                   netdev->name, adapter);
929                 if (!err)
930                         goto request_done;
931
932                 /* fall back to legacy interrupts */
933                 igb_reset_interrupt_capability(adapter);
934                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
935         }
936
937         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
938                           netdev->name, adapter);
939
940         if (err)
941                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
942                         err);
943
944 request_done:
945         return err;
946 }
947
948 static void igb_free_irq(struct igb_adapter *adapter)
949 {
950         if (adapter->msix_entries) {
951                 int vector = 0, i;
952
953                 free_irq(adapter->msix_entries[vector++].vector, adapter);
954
955                 for (i = 0; i < adapter->num_q_vectors; i++) {
956                         struct igb_q_vector *q_vector = adapter->q_vector[i];
957                         free_irq(adapter->msix_entries[vector++].vector,
958                                  q_vector);
959                 }
960         } else {
961                 free_irq(adapter->pdev->irq, adapter);
962         }
963 }
964
965 /**
966  * igb_irq_disable - Mask off interrupt generation on the NIC
967  * @adapter: board private structure
968  **/
969 static void igb_irq_disable(struct igb_adapter *adapter)
970 {
971         struct e1000_hw *hw = &adapter->hw;
972
973         /*
974          * we need to be careful when disabling interrupts.  The VFs are also
975          * mapped into these registers and so clearing the bits can cause
976          * issues on the VF drivers so we only need to clear what we set
977          */
978         if (adapter->msix_entries) {
979                 u32 regval = rd32(E1000_EIAM);
980                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
981                 wr32(E1000_EIMC, adapter->eims_enable_mask);
982                 regval = rd32(E1000_EIAC);
983                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
984         }
985
986         wr32(E1000_IAM, 0);
987         wr32(E1000_IMC, ~0);
988         wrfl();
989         synchronize_irq(adapter->pdev->irq);
990 }
991
992 /**
993  * igb_irq_enable - Enable default interrupt generation settings
994  * @adapter: board private structure
995  **/
996 static void igb_irq_enable(struct igb_adapter *adapter)
997 {
998         struct e1000_hw *hw = &adapter->hw;
999
1000         if (adapter->msix_entries) {
1001                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1002                 u32 regval = rd32(E1000_EIAC);
1003                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1004                 regval = rd32(E1000_EIAM);
1005                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1006                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1007                 if (adapter->vfs_allocated_count) {
1008                         wr32(E1000_MBVFIMR, 0xFF);
1009                         ims |= E1000_IMS_VMMB;
1010                 }
1011                 if (adapter->hw.mac.type == e1000_82580)
1012                         ims |= E1000_IMS_DRSTA;
1013
1014                 wr32(E1000_IMS, ims);
1015         } else {
1016                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1017                                 E1000_IMS_DRSTA);
1018                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1019                                 E1000_IMS_DRSTA);
1020         }
1021 }
1022
1023 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1024 {
1025         struct e1000_hw *hw = &adapter->hw;
1026         u16 vid = adapter->hw.mng_cookie.vlan_id;
1027         u16 old_vid = adapter->mng_vlan_id;
1028
1029         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1030                 /* add VID to filter table */
1031                 igb_vfta_set(hw, vid, true);
1032                 adapter->mng_vlan_id = vid;
1033         } else {
1034                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1035         }
1036
1037         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1038             (vid != old_vid) &&
1039             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1040                 /* remove VID from filter table */
1041                 igb_vfta_set(hw, old_vid, false);
1042         }
1043 }
1044
1045 /**
1046  * igb_release_hw_control - release control of the h/w to f/w
1047  * @adapter: address of board private structure
1048  *
1049  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1050  * For ASF and Pass Through versions of f/w this means that the
1051  * driver is no longer loaded.
1052  *
1053  **/
1054 static void igb_release_hw_control(struct igb_adapter *adapter)
1055 {
1056         struct e1000_hw *hw = &adapter->hw;
1057         u32 ctrl_ext;
1058
1059         /* Let firmware take over control of h/w */
1060         ctrl_ext = rd32(E1000_CTRL_EXT);
1061         wr32(E1000_CTRL_EXT,
1062                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1063 }
1064
1065 /**
1066  * igb_get_hw_control - get control of the h/w from f/w
1067  * @adapter: address of board private structure
1068  *
1069  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1070  * For ASF and Pass Through versions of f/w this means that
1071  * the driver is loaded.
1072  *
1073  **/
1074 static void igb_get_hw_control(struct igb_adapter *adapter)
1075 {
1076         struct e1000_hw *hw = &adapter->hw;
1077         u32 ctrl_ext;
1078
1079         /* Let firmware know the driver has taken over */
1080         ctrl_ext = rd32(E1000_CTRL_EXT);
1081         wr32(E1000_CTRL_EXT,
1082                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1083 }
1084
1085 /**
1086  * igb_configure - configure the hardware for RX and TX
1087  * @adapter: private board structure
1088  **/
1089 static void igb_configure(struct igb_adapter *adapter)
1090 {
1091         struct net_device *netdev = adapter->netdev;
1092         int i;
1093
1094         igb_get_hw_control(adapter);
1095         igb_set_rx_mode(netdev);
1096
1097         igb_restore_vlan(adapter);
1098
1099         igb_setup_tctl(adapter);
1100         igb_setup_mrqc(adapter);
1101         igb_setup_rctl(adapter);
1102
1103         igb_configure_tx(adapter);
1104         igb_configure_rx(adapter);
1105
1106         igb_rx_fifo_flush_82575(&adapter->hw);
1107
1108         /* call igb_desc_unused which always leaves
1109          * at least 1 descriptor unused to make sure
1110          * next_to_use != next_to_clean */
1111         for (i = 0; i < adapter->num_rx_queues; i++) {
1112                 struct igb_ring *ring = adapter->rx_ring[i];
1113                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1114         }
1115
1116
1117         adapter->tx_queue_len = netdev->tx_queue_len;
1118 }
1119
1120 /**
1121  * igb_power_up_link - Power up the phy/serdes link
1122  * @adapter: address of board private structure
1123  **/
1124 void igb_power_up_link(struct igb_adapter *adapter)
1125 {
1126         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1127                 igb_power_up_phy_copper(&adapter->hw);
1128         else
1129                 igb_power_up_serdes_link_82575(&adapter->hw);
1130 }
1131
1132 /**
1133  * igb_power_down_link - Power down the phy/serdes link
1134  * @adapter: address of board private structure
1135  */
1136 static void igb_power_down_link(struct igb_adapter *adapter)
1137 {
1138         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1139                 igb_power_down_phy_copper_82575(&adapter->hw);
1140         else
1141                 igb_shutdown_serdes_link_82575(&adapter->hw);
1142 }
1143
1144 /**
1145  * igb_up - Open the interface and prepare it to handle traffic
1146  * @adapter: board private structure
1147  **/
1148 int igb_up(struct igb_adapter *adapter)
1149 {
1150         struct e1000_hw *hw = &adapter->hw;
1151         int i;
1152
1153         /* hardware has been reset, we need to reload some things */
1154         igb_configure(adapter);
1155
1156         clear_bit(__IGB_DOWN, &adapter->state);
1157
1158         for (i = 0; i < adapter->num_q_vectors; i++) {
1159                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1160                 napi_enable(&q_vector->napi);
1161         }
1162         if (adapter->msix_entries)
1163                 igb_configure_msix(adapter);
1164         else
1165                 igb_assign_vector(adapter->q_vector[0], 0);
1166
1167         /* Clear any pending interrupts. */
1168         rd32(E1000_ICR);
1169         igb_irq_enable(adapter);
1170
1171         /* notify VFs that reset has been completed */
1172         if (adapter->vfs_allocated_count) {
1173                 u32 reg_data = rd32(E1000_CTRL_EXT);
1174                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1175                 wr32(E1000_CTRL_EXT, reg_data);
1176         }
1177
1178         netif_tx_start_all_queues(adapter->netdev);
1179
1180         /* start the watchdog. */
1181         hw->mac.get_link_status = 1;
1182         schedule_work(&adapter->watchdog_task);
1183
1184         return 0;
1185 }
1186
1187 void igb_down(struct igb_adapter *adapter)
1188 {
1189         struct net_device *netdev = adapter->netdev;
1190         struct e1000_hw *hw = &adapter->hw;
1191         u32 tctl, rctl;
1192         int i;
1193
1194         /* signal that we're down so the interrupt handler does not
1195          * reschedule our watchdog timer */
1196         set_bit(__IGB_DOWN, &adapter->state);
1197
1198         /* disable receives in the hardware */
1199         rctl = rd32(E1000_RCTL);
1200         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1201         /* flush and sleep below */
1202
1203         netif_tx_stop_all_queues(netdev);
1204
1205         /* disable transmits in the hardware */
1206         tctl = rd32(E1000_TCTL);
1207         tctl &= ~E1000_TCTL_EN;
1208         wr32(E1000_TCTL, tctl);
1209         /* flush both disables and wait for them to finish */
1210         wrfl();
1211         msleep(10);
1212
1213         for (i = 0; i < adapter->num_q_vectors; i++) {
1214                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1215                 napi_disable(&q_vector->napi);
1216         }
1217
1218         igb_irq_disable(adapter);
1219
1220         del_timer_sync(&adapter->watchdog_timer);
1221         del_timer_sync(&adapter->phy_info_timer);
1222
1223         netdev->tx_queue_len = adapter->tx_queue_len;
1224         netif_carrier_off(netdev);
1225
1226         /* record the stats before reset*/
1227         igb_update_stats(adapter);
1228
1229         adapter->link_speed = 0;
1230         adapter->link_duplex = 0;
1231
1232         if (!pci_channel_offline(adapter->pdev))
1233                 igb_reset(adapter);
1234         igb_clean_all_tx_rings(adapter);
1235         igb_clean_all_rx_rings(adapter);
1236 #ifdef CONFIG_IGB_DCA
1237
1238         /* since we reset the hardware DCA settings were cleared */
1239         igb_setup_dca(adapter);
1240 #endif
1241 }
1242
1243 void igb_reinit_locked(struct igb_adapter *adapter)
1244 {
1245         WARN_ON(in_interrupt());
1246         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1247                 msleep(1);
1248         igb_down(adapter);
1249         igb_up(adapter);
1250         clear_bit(__IGB_RESETTING, &adapter->state);
1251 }
1252
1253 void igb_reset(struct igb_adapter *adapter)
1254 {
1255         struct pci_dev *pdev = adapter->pdev;
1256         struct e1000_hw *hw = &adapter->hw;
1257         struct e1000_mac_info *mac = &hw->mac;
1258         struct e1000_fc_info *fc = &hw->fc;
1259         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1260         u16 hwm;
1261
1262         /* Repartition Pba for greater than 9k mtu
1263          * To take effect CTRL.RST is required.
1264          */
1265         switch (mac->type) {
1266         case e1000_i350:
1267         case e1000_82580:
1268                 pba = rd32(E1000_RXPBS);
1269                 pba = igb_rxpbs_adjust_82580(pba);
1270                 break;
1271         case e1000_82576:
1272                 pba = rd32(E1000_RXPBS);
1273                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1274                 break;
1275         case e1000_82575:
1276         default:
1277                 pba = E1000_PBA_34K;
1278                 break;
1279         }
1280
1281         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1282             (mac->type < e1000_82576)) {
1283                 /* adjust PBA for jumbo frames */
1284                 wr32(E1000_PBA, pba);
1285
1286                 /* To maintain wire speed transmits, the Tx FIFO should be
1287                  * large enough to accommodate two full transmit packets,
1288                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1289                  * the Rx FIFO should be large enough to accommodate at least
1290                  * one full receive packet and is similarly rounded up and
1291                  * expressed in KB. */
1292                 pba = rd32(E1000_PBA);
1293                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1294                 tx_space = pba >> 16;
1295                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1296                 pba &= 0xffff;
1297                 /* the tx fifo also stores 16 bytes of information about the tx
1298                  * but don't include ethernet FCS because hardware appends it */
1299                 min_tx_space = (adapter->max_frame_size +
1300                                 sizeof(union e1000_adv_tx_desc) -
1301                                 ETH_FCS_LEN) * 2;
1302                 min_tx_space = ALIGN(min_tx_space, 1024);
1303                 min_tx_space >>= 10;
1304                 /* software strips receive CRC, so leave room for it */
1305                 min_rx_space = adapter->max_frame_size;
1306                 min_rx_space = ALIGN(min_rx_space, 1024);
1307                 min_rx_space >>= 10;
1308
1309                 /* If current Tx allocation is less than the min Tx FIFO size,
1310                  * and the min Tx FIFO size is less than the current Rx FIFO
1311                  * allocation, take space away from current Rx allocation */
1312                 if (tx_space < min_tx_space &&
1313                     ((min_tx_space - tx_space) < pba)) {
1314                         pba = pba - (min_tx_space - tx_space);
1315
1316                         /* if short on rx space, rx wins and must trump tx
1317                          * adjustment */
1318                         if (pba < min_rx_space)
1319                                 pba = min_rx_space;
1320                 }
1321                 wr32(E1000_PBA, pba);
1322         }
1323
1324         /* flow control settings */
1325         /* The high water mark must be low enough to fit one full frame
1326          * (or the size used for early receive) above it in the Rx FIFO.
1327          * Set it to the lower of:
1328          * - 90% of the Rx FIFO size, or
1329          * - the full Rx FIFO size minus one full frame */
1330         hwm = min(((pba << 10) * 9 / 10),
1331                         ((pba << 10) - 2 * adapter->max_frame_size));
1332
1333         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1334         fc->low_water = fc->high_water - 16;
1335         fc->pause_time = 0xFFFF;
1336         fc->send_xon = 1;
1337         fc->current_mode = fc->requested_mode;
1338
1339         /* disable receive for all VFs and wait one second */
1340         if (adapter->vfs_allocated_count) {
1341                 int i;
1342                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1343                         adapter->vf_data[i].flags = 0;
1344
1345                 /* ping all the active vfs to let them know we are going down */
1346                 igb_ping_all_vfs(adapter);
1347
1348                 /* disable transmits and receives */
1349                 wr32(E1000_VFRE, 0);
1350                 wr32(E1000_VFTE, 0);
1351         }
1352
1353         /* Allow time for pending master requests to run */
1354         hw->mac.ops.reset_hw(hw);
1355         wr32(E1000_WUC, 0);
1356
1357         if (hw->mac.ops.init_hw(hw))
1358                 dev_err(&pdev->dev, "Hardware Error\n");
1359
1360         if (hw->mac.type == e1000_82580) {
1361                 u32 reg = rd32(E1000_PCIEMISC);
1362                 wr32(E1000_PCIEMISC,
1363                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1364         }
1365         if (!netif_running(adapter->netdev))
1366                 igb_power_down_link(adapter);
1367
1368         igb_update_mng_vlan(adapter);
1369
1370         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1371         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1372
1373         igb_get_phy_info(hw);
1374 }
1375
1376 static const struct net_device_ops igb_netdev_ops = {
1377         .ndo_open               = igb_open,
1378         .ndo_stop               = igb_close,
1379         .ndo_start_xmit         = igb_xmit_frame_adv,
1380         .ndo_get_stats          = igb_get_stats,
1381         .ndo_set_rx_mode        = igb_set_rx_mode,
1382         .ndo_set_multicast_list = igb_set_rx_mode,
1383         .ndo_set_mac_address    = igb_set_mac,
1384         .ndo_change_mtu         = igb_change_mtu,
1385         .ndo_do_ioctl           = igb_ioctl,
1386         .ndo_tx_timeout         = igb_tx_timeout,
1387         .ndo_validate_addr      = eth_validate_addr,
1388         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1389         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1390         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1391         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1392         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1393         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1394         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1395 #ifdef CONFIG_NET_POLL_CONTROLLER
1396         .ndo_poll_controller    = igb_netpoll,
1397 #endif
1398 };
1399
1400 /**
1401  * igb_probe - Device Initialization Routine
1402  * @pdev: PCI device information struct
1403  * @ent: entry in igb_pci_tbl
1404  *
1405  * Returns 0 on success, negative on failure
1406  *
1407  * igb_probe initializes an adapter identified by a pci_dev structure.
1408  * The OS initialization, configuring of the adapter private structure,
1409  * and a hardware reset occur.
1410  **/
1411 static int __devinit igb_probe(struct pci_dev *pdev,
1412                                const struct pci_device_id *ent)
1413 {
1414         struct net_device *netdev;
1415         struct igb_adapter *adapter;
1416         struct e1000_hw *hw;
1417         u16 eeprom_data = 0;
1418         static int global_quad_port_a; /* global quad port a indication */
1419         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1420         unsigned long mmio_start, mmio_len;
1421         int err, pci_using_dac;
1422         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1423         u32 part_num;
1424
1425         err = pci_enable_device_mem(pdev);
1426         if (err)
1427                 return err;
1428
1429         pci_using_dac = 0;
1430         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1431         if (!err) {
1432                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1433                 if (!err)
1434                         pci_using_dac = 1;
1435         } else {
1436                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1437                 if (err) {
1438                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1439                         if (err) {
1440                                 dev_err(&pdev->dev, "No usable DMA "
1441                                         "configuration, aborting\n");
1442                                 goto err_dma;
1443                         }
1444                 }
1445         }
1446
1447         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1448                                            IORESOURCE_MEM),
1449                                            igb_driver_name);
1450         if (err)
1451                 goto err_pci_reg;
1452
1453         pci_enable_pcie_error_reporting(pdev);
1454
1455         pci_set_master(pdev);
1456         pci_save_state(pdev);
1457
1458         err = -ENOMEM;
1459         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1460                                    IGB_ABS_MAX_TX_QUEUES);
1461         if (!netdev)
1462                 goto err_alloc_etherdev;
1463
1464         SET_NETDEV_DEV(netdev, &pdev->dev);
1465
1466         pci_set_drvdata(pdev, netdev);
1467         adapter = netdev_priv(netdev);
1468         adapter->netdev = netdev;
1469         adapter->pdev = pdev;
1470         hw = &adapter->hw;
1471         hw->back = adapter;
1472         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1473
1474         mmio_start = pci_resource_start(pdev, 0);
1475         mmio_len = pci_resource_len(pdev, 0);
1476
1477         err = -EIO;
1478         hw->hw_addr = ioremap(mmio_start, mmio_len);
1479         if (!hw->hw_addr)
1480                 goto err_ioremap;
1481
1482         netdev->netdev_ops = &igb_netdev_ops;
1483         igb_set_ethtool_ops(netdev);
1484         netdev->watchdog_timeo = 5 * HZ;
1485
1486         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1487
1488         netdev->mem_start = mmio_start;
1489         netdev->mem_end = mmio_start + mmio_len;
1490
1491         /* PCI config space info */
1492         hw->vendor_id = pdev->vendor;
1493         hw->device_id = pdev->device;
1494         hw->revision_id = pdev->revision;
1495         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1496         hw->subsystem_device_id = pdev->subsystem_device;
1497
1498         /* Copy the default MAC, PHY and NVM function pointers */
1499         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1500         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1501         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1502         /* Initialize skew-specific constants */
1503         err = ei->get_invariants(hw);
1504         if (err)
1505                 goto err_sw_init;
1506
1507         /* setup the private structure */
1508         err = igb_sw_init(adapter);
1509         if (err)
1510                 goto err_sw_init;
1511
1512         igb_get_bus_info_pcie(hw);
1513
1514         hw->phy.autoneg_wait_to_complete = false;
1515
1516         /* Copper options */
1517         if (hw->phy.media_type == e1000_media_type_copper) {
1518                 hw->phy.mdix = AUTO_ALL_MODES;
1519                 hw->phy.disable_polarity_correction = false;
1520                 hw->phy.ms_type = e1000_ms_hw_default;
1521         }
1522
1523         if (igb_check_reset_block(hw))
1524                 dev_info(&pdev->dev,
1525                         "PHY reset is blocked due to SOL/IDER session.\n");
1526
1527         netdev->features = NETIF_F_SG |
1528                            NETIF_F_IP_CSUM |
1529                            NETIF_F_HW_VLAN_TX |
1530                            NETIF_F_HW_VLAN_RX |
1531                            NETIF_F_HW_VLAN_FILTER;
1532
1533         netdev->features |= NETIF_F_IPV6_CSUM;
1534         netdev->features |= NETIF_F_TSO;
1535         netdev->features |= NETIF_F_TSO6;
1536         netdev->features |= NETIF_F_GRO;
1537
1538         netdev->vlan_features |= NETIF_F_TSO;
1539         netdev->vlan_features |= NETIF_F_TSO6;
1540         netdev->vlan_features |= NETIF_F_IP_CSUM;
1541         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1542         netdev->vlan_features |= NETIF_F_SG;
1543
1544         if (pci_using_dac)
1545                 netdev->features |= NETIF_F_HIGHDMA;
1546
1547         if (hw->mac.type >= e1000_82576)
1548                 netdev->features |= NETIF_F_SCTP_CSUM;
1549
1550         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1551
1552         /* before reading the NVM, reset the controller to put the device in a
1553          * known good starting state */
1554         hw->mac.ops.reset_hw(hw);
1555
1556         /* make sure the NVM is good */
1557         if (igb_validate_nvm_checksum(hw) < 0) {
1558                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1559                 err = -EIO;
1560                 goto err_eeprom;
1561         }
1562
1563         /* copy the MAC address out of the NVM */
1564         if (hw->mac.ops.read_mac_addr(hw))
1565                 dev_err(&pdev->dev, "NVM Read Error\n");
1566
1567         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1568         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1569
1570         if (!is_valid_ether_addr(netdev->perm_addr)) {
1571                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1572                 err = -EIO;
1573                 goto err_eeprom;
1574         }
1575
1576         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1577                     (unsigned long) adapter);
1578         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1579                     (unsigned long) adapter);
1580
1581         INIT_WORK(&adapter->reset_task, igb_reset_task);
1582         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1583
1584         /* Initialize link properties that are user-changeable */
1585         adapter->fc_autoneg = true;
1586         hw->mac.autoneg = true;
1587         hw->phy.autoneg_advertised = 0x2f;
1588
1589         hw->fc.requested_mode = e1000_fc_default;
1590         hw->fc.current_mode = e1000_fc_default;
1591
1592         igb_validate_mdi_setting(hw);
1593
1594         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1595          * enable the ACPI Magic Packet filter
1596          */
1597
1598         if (hw->bus.func == 0)
1599                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1600         else if (hw->mac.type == e1000_82580)
1601                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1602                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1603                                  &eeprom_data);
1604         else if (hw->bus.func == 1)
1605                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1606
1607         if (eeprom_data & eeprom_apme_mask)
1608                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1609
1610         /* now that we have the eeprom settings, apply the special cases where
1611          * the eeprom may be wrong or the board simply won't support wake on
1612          * lan on a particular port */
1613         switch (pdev->device) {
1614         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1615                 adapter->eeprom_wol = 0;
1616                 break;
1617         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1618         case E1000_DEV_ID_82576_FIBER:
1619         case E1000_DEV_ID_82576_SERDES:
1620                 /* Wake events only supported on port A for dual fiber
1621                  * regardless of eeprom setting */
1622                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1623                         adapter->eeprom_wol = 0;
1624                 break;
1625         case E1000_DEV_ID_82576_QUAD_COPPER:
1626                 /* if quad port adapter, disable WoL on all but port A */
1627                 if (global_quad_port_a != 0)
1628                         adapter->eeprom_wol = 0;
1629                 else
1630                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1631                 /* Reset for multiple quad port adapters */
1632                 if (++global_quad_port_a == 4)
1633                         global_quad_port_a = 0;
1634                 break;
1635         }
1636
1637         /* initialize the wol settings based on the eeprom settings */
1638         adapter->wol = adapter->eeprom_wol;
1639         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1640
1641         /* reset the hardware with the new settings */
1642         igb_reset(adapter);
1643
1644         /* let the f/w know that the h/w is now under the control of the
1645          * driver. */
1646         igb_get_hw_control(adapter);
1647
1648         strcpy(netdev->name, "eth%d");
1649         err = register_netdev(netdev);
1650         if (err)
1651                 goto err_register;
1652
1653         /* carrier off reporting is important to ethtool even BEFORE open */
1654         netif_carrier_off(netdev);
1655
1656 #ifdef CONFIG_IGB_DCA
1657         if (dca_add_requester(&pdev->dev) == 0) {
1658                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1659                 dev_info(&pdev->dev, "DCA enabled\n");
1660                 igb_setup_dca(adapter);
1661         }
1662
1663 #endif
1664         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1665         /* print bus type/speed/width info */
1666         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1667                  netdev->name,
1668                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1669                                                             "unknown"),
1670                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1671                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1672                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1673                    "unknown"),
1674                  netdev->dev_addr);
1675
1676         igb_read_part_num(hw, &part_num);
1677         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1678                 (part_num >> 8), (part_num & 0xff));
1679
1680         dev_info(&pdev->dev,
1681                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1682                 adapter->msix_entries ? "MSI-X" :
1683                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1684                 adapter->num_rx_queues, adapter->num_tx_queues);
1685
1686         return 0;
1687
1688 err_register:
1689         igb_release_hw_control(adapter);
1690 err_eeprom:
1691         if (!igb_check_reset_block(hw))
1692                 igb_reset_phy(hw);
1693
1694         if (hw->flash_address)
1695                 iounmap(hw->flash_address);
1696 err_sw_init:
1697         igb_clear_interrupt_scheme(adapter);
1698         iounmap(hw->hw_addr);
1699 err_ioremap:
1700         free_netdev(netdev);
1701 err_alloc_etherdev:
1702         pci_release_selected_regions(pdev,
1703                                      pci_select_bars(pdev, IORESOURCE_MEM));
1704 err_pci_reg:
1705 err_dma:
1706         pci_disable_device(pdev);
1707         return err;
1708 }
1709
1710 /**
1711  * igb_remove - Device Removal Routine
1712  * @pdev: PCI device information struct
1713  *
1714  * igb_remove is called by the PCI subsystem to alert the driver
1715  * that it should release a PCI device.  The could be caused by a
1716  * Hot-Plug event, or because the driver is going to be removed from
1717  * memory.
1718  **/
1719 static void __devexit igb_remove(struct pci_dev *pdev)
1720 {
1721         struct net_device *netdev = pci_get_drvdata(pdev);
1722         struct igb_adapter *adapter = netdev_priv(netdev);
1723         struct e1000_hw *hw = &adapter->hw;
1724
1725         /* flush_scheduled work may reschedule our watchdog task, so
1726          * explicitly disable watchdog tasks from being rescheduled  */
1727         set_bit(__IGB_DOWN, &adapter->state);
1728         del_timer_sync(&adapter->watchdog_timer);
1729         del_timer_sync(&adapter->phy_info_timer);
1730
1731         flush_scheduled_work();
1732
1733 #ifdef CONFIG_IGB_DCA
1734         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1735                 dev_info(&pdev->dev, "DCA disabled\n");
1736                 dca_remove_requester(&pdev->dev);
1737                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1738                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1739         }
1740 #endif
1741
1742         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1743          * would have already happened in close and is redundant. */
1744         igb_release_hw_control(adapter);
1745
1746         unregister_netdev(netdev);
1747
1748         igb_clear_interrupt_scheme(adapter);
1749
1750 #ifdef CONFIG_PCI_IOV
1751         /* reclaim resources allocated to VFs */
1752         if (adapter->vf_data) {
1753                 /* disable iov and allow time for transactions to clear */
1754                 pci_disable_sriov(pdev);
1755                 msleep(500);
1756
1757                 kfree(adapter->vf_data);
1758                 adapter->vf_data = NULL;
1759                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1760                 msleep(100);
1761                 dev_info(&pdev->dev, "IOV Disabled\n");
1762         }
1763 #endif
1764
1765         iounmap(hw->hw_addr);
1766         if (hw->flash_address)
1767                 iounmap(hw->flash_address);
1768         pci_release_selected_regions(pdev,
1769                                      pci_select_bars(pdev, IORESOURCE_MEM));
1770
1771         free_netdev(netdev);
1772
1773         pci_disable_pcie_error_reporting(pdev);
1774
1775         pci_disable_device(pdev);
1776 }
1777
1778 /**
1779  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1780  * @adapter: board private structure to initialize
1781  *
1782  * This function initializes the vf specific data storage and then attempts to
1783  * allocate the VFs.  The reason for ordering it this way is because it is much
1784  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1785  * the memory for the VFs.
1786  **/
1787 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1788 {
1789 #ifdef CONFIG_PCI_IOV
1790         struct pci_dev *pdev = adapter->pdev;
1791
1792         if (adapter->vfs_allocated_count > 7)
1793                 adapter->vfs_allocated_count = 7;
1794
1795         if (adapter->vfs_allocated_count) {
1796                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1797                                            sizeof(struct vf_data_storage),
1798                                            GFP_KERNEL);
1799                 /* if allocation failed then we do not support SR-IOV */
1800                 if (!adapter->vf_data) {
1801                         adapter->vfs_allocated_count = 0;
1802                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1803                                 "Data Storage\n");
1804                 }
1805         }
1806
1807         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1808                 kfree(adapter->vf_data);
1809                 adapter->vf_data = NULL;
1810 #endif /* CONFIG_PCI_IOV */
1811                 adapter->vfs_allocated_count = 0;
1812 #ifdef CONFIG_PCI_IOV
1813         } else {
1814                 unsigned char mac_addr[ETH_ALEN];
1815                 int i;
1816                 dev_info(&pdev->dev, "%d vfs allocated\n",
1817                          adapter->vfs_allocated_count);
1818                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1819                         random_ether_addr(mac_addr);
1820                         igb_set_vf_mac(adapter, i, mac_addr);
1821                 }
1822         }
1823 #endif /* CONFIG_PCI_IOV */
1824 }
1825
1826
1827 /**
1828  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1829  * @adapter: board private structure to initialize
1830  *
1831  * igb_init_hw_timer initializes the function pointer and values for the hw
1832  * timer found in hardware.
1833  **/
1834 static void igb_init_hw_timer(struct igb_adapter *adapter)
1835 {
1836         struct e1000_hw *hw = &adapter->hw;
1837
1838         switch (hw->mac.type) {
1839         case e1000_i350:
1840         case e1000_82580:
1841                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1842                 adapter->cycles.read = igb_read_clock;
1843                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1844                 adapter->cycles.mult = 1;
1845                 /*
1846                  * The 82580 timesync updates the system timer every 8ns by 8ns
1847                  * and the value cannot be shifted.  Instead we need to shift
1848                  * the registers to generate a 64bit timer value.  As a result
1849                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1850                  * 24 in order to generate a larger value for synchronization.
1851                  */
1852                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1853                 /* disable system timer temporarily by setting bit 31 */
1854                 wr32(E1000_TSAUXC, 0x80000000);
1855                 wrfl();
1856
1857                 /* Set registers so that rollover occurs soon to test this. */
1858                 wr32(E1000_SYSTIMR, 0x00000000);
1859                 wr32(E1000_SYSTIML, 0x80000000);
1860                 wr32(E1000_SYSTIMH, 0x000000FF);
1861                 wrfl();
1862
1863                 /* enable system timer by clearing bit 31 */
1864                 wr32(E1000_TSAUXC, 0x0);
1865                 wrfl();
1866
1867                 timecounter_init(&adapter->clock,
1868                                  &adapter->cycles,
1869                                  ktime_to_ns(ktime_get_real()));
1870                 /*
1871                  * Synchronize our NIC clock against system wall clock. NIC
1872                  * time stamp reading requires ~3us per sample, each sample
1873                  * was pretty stable even under load => only require 10
1874                  * samples for each offset comparison.
1875                  */
1876                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1877                 adapter->compare.source = &adapter->clock;
1878                 adapter->compare.target = ktime_get_real;
1879                 adapter->compare.num_samples = 10;
1880                 timecompare_update(&adapter->compare, 0);
1881                 break;
1882         case e1000_82576:
1883                 /*
1884                  * Initialize hardware timer: we keep it running just in case
1885                  * that some program needs it later on.
1886                  */
1887                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1888                 adapter->cycles.read = igb_read_clock;
1889                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1890                 adapter->cycles.mult = 1;
1891                 /**
1892                  * Scale the NIC clock cycle by a large factor so that
1893                  * relatively small clock corrections can be added or
1894                  * substracted at each clock tick. The drawbacks of a large
1895                  * factor are a) that the clock register overflows more quickly
1896                  * (not such a big deal) and b) that the increment per tick has
1897                  * to fit into 24 bits.  As a result we need to use a shift of
1898                  * 19 so we can fit a value of 16 into the TIMINCA register.
1899                  */
1900                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1901                 wr32(E1000_TIMINCA,
1902                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1903                                 (16 << IGB_82576_TSYNC_SHIFT));
1904
1905                 /* Set registers so that rollover occurs soon to test this. */
1906                 wr32(E1000_SYSTIML, 0x00000000);
1907                 wr32(E1000_SYSTIMH, 0xFF800000);
1908                 wrfl();
1909
1910                 timecounter_init(&adapter->clock,
1911                                  &adapter->cycles,
1912                                  ktime_to_ns(ktime_get_real()));
1913                 /*
1914                  * Synchronize our NIC clock against system wall clock. NIC
1915                  * time stamp reading requires ~3us per sample, each sample
1916                  * was pretty stable even under load => only require 10
1917                  * samples for each offset comparison.
1918                  */
1919                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1920                 adapter->compare.source = &adapter->clock;
1921                 adapter->compare.target = ktime_get_real;
1922                 adapter->compare.num_samples = 10;
1923                 timecompare_update(&adapter->compare, 0);
1924                 break;
1925         case e1000_82575:
1926                 /* 82575 does not support timesync */
1927         default:
1928                 break;
1929         }
1930
1931 }
1932
1933 /**
1934  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1935  * @adapter: board private structure to initialize
1936  *
1937  * igb_sw_init initializes the Adapter private data structure.
1938  * Fields are initialized based on PCI device information and
1939  * OS network device settings (MTU size).
1940  **/
1941 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1942 {
1943         struct e1000_hw *hw = &adapter->hw;
1944         struct net_device *netdev = adapter->netdev;
1945         struct pci_dev *pdev = adapter->pdev;
1946
1947         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1948
1949         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1950         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1951         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1952         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1953
1954         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1955         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1956
1957 #ifdef CONFIG_PCI_IOV
1958         if (hw->mac.type == e1000_82576)
1959                 adapter->vfs_allocated_count = max_vfs;
1960
1961 #endif /* CONFIG_PCI_IOV */
1962         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1963
1964         /*
1965          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1966          * then we should combine the queues into a queue pair in order to
1967          * conserve interrupts due to limited supply
1968          */
1969         if ((adapter->rss_queues > 4) ||
1970             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1971                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1972
1973         /* This call may decrease the number of queues */
1974         if (igb_init_interrupt_scheme(adapter)) {
1975                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1976                 return -ENOMEM;
1977         }
1978
1979         igb_init_hw_timer(adapter);
1980         igb_probe_vfs(adapter);
1981
1982         /* Explicitly disable IRQ since the NIC can be in any state. */
1983         igb_irq_disable(adapter);
1984
1985         set_bit(__IGB_DOWN, &adapter->state);
1986         return 0;
1987 }
1988
1989 /**
1990  * igb_open - Called when a network interface is made active
1991  * @netdev: network interface device structure
1992  *
1993  * Returns 0 on success, negative value on failure
1994  *
1995  * The open entry point is called when a network interface is made
1996  * active by the system (IFF_UP).  At this point all resources needed
1997  * for transmit and receive operations are allocated, the interrupt
1998  * handler is registered with the OS, the watchdog timer is started,
1999  * and the stack is notified that the interface is ready.
2000  **/
2001 static int igb_open(struct net_device *netdev)
2002 {
2003         struct igb_adapter *adapter = netdev_priv(netdev);
2004         struct e1000_hw *hw = &adapter->hw;
2005         int err;
2006         int i;
2007
2008         /* disallow open during test */
2009         if (test_bit(__IGB_TESTING, &adapter->state))
2010                 return -EBUSY;
2011
2012         netif_carrier_off(netdev);
2013
2014         /* allocate transmit descriptors */
2015         err = igb_setup_all_tx_resources(adapter);
2016         if (err)
2017                 goto err_setup_tx;
2018
2019         /* allocate receive descriptors */
2020         err = igb_setup_all_rx_resources(adapter);
2021         if (err)
2022                 goto err_setup_rx;
2023
2024         igb_power_up_link(adapter);
2025
2026         /* before we allocate an interrupt, we must be ready to handle it.
2027          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2028          * as soon as we call pci_request_irq, so we have to setup our
2029          * clean_rx handler before we do so.  */
2030         igb_configure(adapter);
2031
2032         err = igb_request_irq(adapter);
2033         if (err)
2034                 goto err_req_irq;
2035
2036         /* From here on the code is the same as igb_up() */
2037         clear_bit(__IGB_DOWN, &adapter->state);
2038
2039         for (i = 0; i < adapter->num_q_vectors; i++) {
2040                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2041                 napi_enable(&q_vector->napi);
2042         }
2043
2044         /* Clear any pending interrupts. */
2045         rd32(E1000_ICR);
2046
2047         igb_irq_enable(adapter);
2048
2049         /* notify VFs that reset has been completed */
2050         if (adapter->vfs_allocated_count) {
2051                 u32 reg_data = rd32(E1000_CTRL_EXT);
2052                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2053                 wr32(E1000_CTRL_EXT, reg_data);
2054         }
2055
2056         netif_tx_start_all_queues(netdev);
2057
2058         /* start the watchdog. */
2059         hw->mac.get_link_status = 1;
2060         schedule_work(&adapter->watchdog_task);
2061
2062         return 0;
2063
2064 err_req_irq:
2065         igb_release_hw_control(adapter);
2066         igb_power_down_link(adapter);
2067         igb_free_all_rx_resources(adapter);
2068 err_setup_rx:
2069         igb_free_all_tx_resources(adapter);
2070 err_setup_tx:
2071         igb_reset(adapter);
2072
2073         return err;
2074 }
2075
2076 /**
2077  * igb_close - Disables a network interface
2078  * @netdev: network interface device structure
2079  *
2080  * Returns 0, this is not allowed to fail
2081  *
2082  * The close entry point is called when an interface is de-activated
2083  * by the OS.  The hardware is still under the driver's control, but
2084  * needs to be disabled.  A global MAC reset is issued to stop the
2085  * hardware, and all transmit and receive resources are freed.
2086  **/
2087 static int igb_close(struct net_device *netdev)
2088 {
2089         struct igb_adapter *adapter = netdev_priv(netdev);
2090
2091         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2092         igb_down(adapter);
2093
2094         igb_free_irq(adapter);
2095
2096         igb_free_all_tx_resources(adapter);
2097         igb_free_all_rx_resources(adapter);
2098
2099         return 0;
2100 }
2101
2102 /**
2103  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2104  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2105  *
2106  * Return 0 on success, negative on failure
2107  **/
2108 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2109 {
2110         struct pci_dev *pdev = tx_ring->pdev;
2111         int size;
2112
2113         size = sizeof(struct igb_buffer) * tx_ring->count;
2114         tx_ring->buffer_info = vmalloc(size);
2115         if (!tx_ring->buffer_info)
2116                 goto err;
2117         memset(tx_ring->buffer_info, 0, size);
2118
2119         /* round up to nearest 4K */
2120         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2121         tx_ring->size = ALIGN(tx_ring->size, 4096);
2122
2123         tx_ring->desc = pci_alloc_consistent(pdev,
2124                                              tx_ring->size,
2125                                              &tx_ring->dma);
2126
2127         if (!tx_ring->desc)
2128                 goto err;
2129
2130         tx_ring->next_to_use = 0;
2131         tx_ring->next_to_clean = 0;
2132         return 0;
2133
2134 err:
2135         vfree(tx_ring->buffer_info);
2136         dev_err(&pdev->dev,
2137                 "Unable to allocate memory for the transmit descriptor ring\n");
2138         return -ENOMEM;
2139 }
2140
2141 /**
2142  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2143  *                                (Descriptors) for all queues
2144  * @adapter: board private structure
2145  *
2146  * Return 0 on success, negative on failure
2147  **/
2148 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2149 {
2150         struct pci_dev *pdev = adapter->pdev;
2151         int i, err = 0;
2152
2153         for (i = 0; i < adapter->num_tx_queues; i++) {
2154                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2155                 if (err) {
2156                         dev_err(&pdev->dev,
2157                                 "Allocation for Tx Queue %u failed\n", i);
2158                         for (i--; i >= 0; i--)
2159                                 igb_free_tx_resources(adapter->tx_ring[i]);
2160                         break;
2161                 }
2162         }
2163
2164         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2165                 int r_idx = i % adapter->num_tx_queues;
2166                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2167         }
2168         return err;
2169 }
2170
2171 /**
2172  * igb_setup_tctl - configure the transmit control registers
2173  * @adapter: Board private structure
2174  **/
2175 void igb_setup_tctl(struct igb_adapter *adapter)
2176 {
2177         struct e1000_hw *hw = &adapter->hw;
2178         u32 tctl;
2179
2180         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2181         wr32(E1000_TXDCTL(0), 0);
2182
2183         /* Program the Transmit Control Register */
2184         tctl = rd32(E1000_TCTL);
2185         tctl &= ~E1000_TCTL_CT;
2186         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2187                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2188
2189         igb_config_collision_dist(hw);
2190
2191         /* Enable transmits */
2192         tctl |= E1000_TCTL_EN;
2193
2194         wr32(E1000_TCTL, tctl);
2195 }
2196
2197 /**
2198  * igb_configure_tx_ring - Configure transmit ring after Reset
2199  * @adapter: board private structure
2200  * @ring: tx ring to configure
2201  *
2202  * Configure a transmit ring after a reset.
2203  **/
2204 void igb_configure_tx_ring(struct igb_adapter *adapter,
2205                            struct igb_ring *ring)
2206 {
2207         struct e1000_hw *hw = &adapter->hw;
2208         u32 txdctl;
2209         u64 tdba = ring->dma;
2210         int reg_idx = ring->reg_idx;
2211
2212         /* disable the queue */
2213         txdctl = rd32(E1000_TXDCTL(reg_idx));
2214         wr32(E1000_TXDCTL(reg_idx),
2215                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2216         wrfl();
2217         mdelay(10);
2218
2219         wr32(E1000_TDLEN(reg_idx),
2220                         ring->count * sizeof(union e1000_adv_tx_desc));
2221         wr32(E1000_TDBAL(reg_idx),
2222                         tdba & 0x00000000ffffffffULL);
2223         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2224
2225         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2226         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2227         writel(0, ring->head);
2228         writel(0, ring->tail);
2229
2230         txdctl |= IGB_TX_PTHRESH;
2231         txdctl |= IGB_TX_HTHRESH << 8;
2232         txdctl |= IGB_TX_WTHRESH << 16;
2233
2234         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2235         wr32(E1000_TXDCTL(reg_idx), txdctl);
2236 }
2237
2238 /**
2239  * igb_configure_tx - Configure transmit Unit after Reset
2240  * @adapter: board private structure
2241  *
2242  * Configure the Tx unit of the MAC after a reset.
2243  **/
2244 static void igb_configure_tx(struct igb_adapter *adapter)
2245 {
2246         int i;
2247
2248         for (i = 0; i < adapter->num_tx_queues; i++)
2249                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2250 }
2251
2252 /**
2253  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2254  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2255  *
2256  * Returns 0 on success, negative on failure
2257  **/
2258 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2259 {
2260         struct pci_dev *pdev = rx_ring->pdev;
2261         int size, desc_len;
2262
2263         size = sizeof(struct igb_buffer) * rx_ring->count;
2264         rx_ring->buffer_info = vmalloc(size);
2265         if (!rx_ring->buffer_info)
2266                 goto err;
2267         memset(rx_ring->buffer_info, 0, size);
2268
2269         desc_len = sizeof(union e1000_adv_rx_desc);
2270
2271         /* Round up to nearest 4K */
2272         rx_ring->size = rx_ring->count * desc_len;
2273         rx_ring->size = ALIGN(rx_ring->size, 4096);
2274
2275         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2276                                              &rx_ring->dma);
2277
2278         if (!rx_ring->desc)
2279                 goto err;
2280
2281         rx_ring->next_to_clean = 0;
2282         rx_ring->next_to_use = 0;
2283
2284         return 0;
2285
2286 err:
2287         vfree(rx_ring->buffer_info);
2288         rx_ring->buffer_info = NULL;
2289         dev_err(&pdev->dev, "Unable to allocate memory for "
2290                 "the receive descriptor ring\n");
2291         return -ENOMEM;
2292 }
2293
2294 /**
2295  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2296  *                                (Descriptors) for all queues
2297  * @adapter: board private structure
2298  *
2299  * Return 0 on success, negative on failure
2300  **/
2301 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2302 {
2303         struct pci_dev *pdev = adapter->pdev;
2304         int i, err = 0;
2305
2306         for (i = 0; i < adapter->num_rx_queues; i++) {
2307                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2308                 if (err) {
2309                         dev_err(&pdev->dev,
2310                                 "Allocation for Rx Queue %u failed\n", i);
2311                         for (i--; i >= 0; i--)
2312                                 igb_free_rx_resources(adapter->rx_ring[i]);
2313                         break;
2314                 }
2315         }
2316
2317         return err;
2318 }
2319
2320 /**
2321  * igb_setup_mrqc - configure the multiple receive queue control registers
2322  * @adapter: Board private structure
2323  **/
2324 static void igb_setup_mrqc(struct igb_adapter *adapter)
2325 {
2326         struct e1000_hw *hw = &adapter->hw;
2327         u32 mrqc, rxcsum;
2328         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2329         union e1000_reta {
2330                 u32 dword;
2331                 u8  bytes[4];
2332         } reta;
2333         static const u8 rsshash[40] = {
2334                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2335                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2336                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2337                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2338
2339         /* Fill out hash function seeds */
2340         for (j = 0; j < 10; j++) {
2341                 u32 rsskey = rsshash[(j * 4)];
2342                 rsskey |= rsshash[(j * 4) + 1] << 8;
2343                 rsskey |= rsshash[(j * 4) + 2] << 16;
2344                 rsskey |= rsshash[(j * 4) + 3] << 24;
2345                 array_wr32(E1000_RSSRK(0), j, rsskey);
2346         }
2347
2348         num_rx_queues = adapter->rss_queues;
2349
2350         if (adapter->vfs_allocated_count) {
2351                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2352                 switch (hw->mac.type) {
2353                 case e1000_i350:
2354                 case e1000_82580:
2355                         num_rx_queues = 1;
2356                         shift = 0;
2357                         break;
2358                 case e1000_82576:
2359                         shift = 3;
2360                         num_rx_queues = 2;
2361                         break;
2362                 case e1000_82575:
2363                         shift = 2;
2364                         shift2 = 6;
2365                 default:
2366                         break;
2367                 }
2368         } else {
2369                 if (hw->mac.type == e1000_82575)
2370                         shift = 6;
2371         }
2372
2373         for (j = 0; j < (32 * 4); j++) {
2374                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2375                 if (shift2)
2376                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2377                 if ((j & 3) == 3)
2378                         wr32(E1000_RETA(j >> 2), reta.dword);
2379         }
2380
2381         /*
2382          * Disable raw packet checksumming so that RSS hash is placed in
2383          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2384          * offloads as they are enabled by default
2385          */
2386         rxcsum = rd32(E1000_RXCSUM);
2387         rxcsum |= E1000_RXCSUM_PCSD;
2388
2389         if (adapter->hw.mac.type >= e1000_82576)
2390                 /* Enable Receive Checksum Offload for SCTP */
2391                 rxcsum |= E1000_RXCSUM_CRCOFL;
2392
2393         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2394         wr32(E1000_RXCSUM, rxcsum);
2395
2396         /* If VMDq is enabled then we set the appropriate mode for that, else
2397          * we default to RSS so that an RSS hash is calculated per packet even
2398          * if we are only using one queue */
2399         if (adapter->vfs_allocated_count) {
2400                 if (hw->mac.type > e1000_82575) {
2401                         /* Set the default pool for the PF's first queue */
2402                         u32 vtctl = rd32(E1000_VT_CTL);
2403                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2404                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2405                         vtctl |= adapter->vfs_allocated_count <<
2406                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2407                         wr32(E1000_VT_CTL, vtctl);
2408                 }
2409                 if (adapter->rss_queues > 1)
2410                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2411                 else
2412                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2413         } else {
2414                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2415         }
2416         igb_vmm_control(adapter);
2417
2418         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2419                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2420         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2421                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2422         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2423                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2424         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2425                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2426
2427         wr32(E1000_MRQC, mrqc);
2428 }
2429
2430 /**
2431  * igb_setup_rctl - configure the receive control registers
2432  * @adapter: Board private structure
2433  **/
2434 void igb_setup_rctl(struct igb_adapter *adapter)
2435 {
2436         struct e1000_hw *hw = &adapter->hw;
2437         u32 rctl;
2438
2439         rctl = rd32(E1000_RCTL);
2440
2441         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2442         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2443
2444         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2445                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2446
2447         /*
2448          * enable stripping of CRC. It's unlikely this will break BMC
2449          * redirection as it did with e1000. Newer features require
2450          * that the HW strips the CRC.
2451          */
2452         rctl |= E1000_RCTL_SECRC;
2453
2454         /* disable store bad packets and clear size bits. */
2455         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2456
2457         /* enable LPE to prevent packets larger than max_frame_size */
2458         rctl |= E1000_RCTL_LPE;
2459
2460         /* disable queue 0 to prevent tail write w/o re-config */
2461         wr32(E1000_RXDCTL(0), 0);
2462
2463         /* Attention!!!  For SR-IOV PF driver operations you must enable
2464          * queue drop for all VF and PF queues to prevent head of line blocking
2465          * if an un-trusted VF does not provide descriptors to hardware.
2466          */
2467         if (adapter->vfs_allocated_count) {
2468                 /* set all queue drop enable bits */
2469                 wr32(E1000_QDE, ALL_QUEUES);
2470         }
2471
2472         wr32(E1000_RCTL, rctl);
2473 }
2474
2475 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2476                                    int vfn)
2477 {
2478         struct e1000_hw *hw = &adapter->hw;
2479         u32 vmolr;
2480
2481         /* if it isn't the PF check to see if VFs are enabled and
2482          * increase the size to support vlan tags */
2483         if (vfn < adapter->vfs_allocated_count &&
2484             adapter->vf_data[vfn].vlans_enabled)
2485                 size += VLAN_TAG_SIZE;
2486
2487         vmolr = rd32(E1000_VMOLR(vfn));
2488         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2489         vmolr |= size | E1000_VMOLR_LPE;
2490         wr32(E1000_VMOLR(vfn), vmolr);
2491
2492         return 0;
2493 }
2494
2495 /**
2496  * igb_rlpml_set - set maximum receive packet size
2497  * @adapter: board private structure
2498  *
2499  * Configure maximum receivable packet size.
2500  **/
2501 static void igb_rlpml_set(struct igb_adapter *adapter)
2502 {
2503         u32 max_frame_size = adapter->max_frame_size;
2504         struct e1000_hw *hw = &adapter->hw;
2505         u16 pf_id = adapter->vfs_allocated_count;
2506
2507         if (adapter->vlgrp)
2508                 max_frame_size += VLAN_TAG_SIZE;
2509
2510         /* if vfs are enabled we set RLPML to the largest possible request
2511          * size and set the VMOLR RLPML to the size we need */
2512         if (pf_id) {
2513                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2514                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2515         }
2516
2517         wr32(E1000_RLPML, max_frame_size);
2518 }
2519
2520 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2521                                  int vfn, bool aupe)
2522 {
2523         struct e1000_hw *hw = &adapter->hw;
2524         u32 vmolr;
2525
2526         /*
2527          * This register exists only on 82576 and newer so if we are older then
2528          * we should exit and do nothing
2529          */
2530         if (hw->mac.type < e1000_82576)
2531                 return;
2532
2533         vmolr = rd32(E1000_VMOLR(vfn));
2534         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2535         if (aupe)
2536                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2537         else
2538                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2539
2540         /* clear all bits that might not be set */
2541         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2542
2543         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2544                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2545         /*
2546          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2547          * multicast packets
2548          */
2549         if (vfn <= adapter->vfs_allocated_count)
2550                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2551
2552         wr32(E1000_VMOLR(vfn), vmolr);
2553 }
2554
2555 /**
2556  * igb_configure_rx_ring - Configure a receive ring after Reset
2557  * @adapter: board private structure
2558  * @ring: receive ring to be configured
2559  *
2560  * Configure the Rx unit of the MAC after a reset.
2561  **/
2562 void igb_configure_rx_ring(struct igb_adapter *adapter,
2563                            struct igb_ring *ring)
2564 {
2565         struct e1000_hw *hw = &adapter->hw;
2566         u64 rdba = ring->dma;
2567         int reg_idx = ring->reg_idx;
2568         u32 srrctl, rxdctl;
2569
2570         /* disable the queue */
2571         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2572         wr32(E1000_RXDCTL(reg_idx),
2573                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2574
2575         /* Set DMA base address registers */
2576         wr32(E1000_RDBAL(reg_idx),
2577              rdba & 0x00000000ffffffffULL);
2578         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2579         wr32(E1000_RDLEN(reg_idx),
2580                        ring->count * sizeof(union e1000_adv_rx_desc));
2581
2582         /* initialize head and tail */
2583         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2584         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2585         writel(0, ring->head);
2586         writel(0, ring->tail);
2587
2588         /* set descriptor configuration */
2589         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2590                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2591                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2592 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2593                 srrctl |= IGB_RXBUFFER_16384 >>
2594                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2595 #else
2596                 srrctl |= (PAGE_SIZE / 2) >>
2597                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2598 #endif
2599                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2600         } else {
2601                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2602                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2603                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2604         }
2605         /* Only set Drop Enable if we are supporting multiple queues */
2606         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2607                 srrctl |= E1000_SRRCTL_DROP_EN;
2608
2609         wr32(E1000_SRRCTL(reg_idx), srrctl);
2610
2611         /* set filtering for VMDQ pools */
2612         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2613
2614         /* enable receive descriptor fetching */
2615         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2616         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2617         rxdctl &= 0xFFF00000;
2618         rxdctl |= IGB_RX_PTHRESH;
2619         rxdctl |= IGB_RX_HTHRESH << 8;
2620         rxdctl |= IGB_RX_WTHRESH << 16;
2621         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2622 }
2623
2624 /**
2625  * igb_configure_rx - Configure receive Unit after Reset
2626  * @adapter: board private structure
2627  *
2628  * Configure the Rx unit of the MAC after a reset.
2629  **/
2630 static void igb_configure_rx(struct igb_adapter *adapter)
2631 {
2632         int i;
2633
2634         /* set UTA to appropriate mode */
2635         igb_set_uta(adapter);
2636
2637         /* set the correct pool for the PF default MAC address in entry 0 */
2638         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2639                          adapter->vfs_allocated_count);
2640
2641         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2642          * the Base and Length of the Rx Descriptor Ring */
2643         for (i = 0; i < adapter->num_rx_queues; i++)
2644                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2645 }
2646
2647 /**
2648  * igb_free_tx_resources - Free Tx Resources per Queue
2649  * @tx_ring: Tx descriptor ring for a specific queue
2650  *
2651  * Free all transmit software resources
2652  **/
2653 void igb_free_tx_resources(struct igb_ring *tx_ring)
2654 {
2655         igb_clean_tx_ring(tx_ring);
2656
2657         vfree(tx_ring->buffer_info);
2658         tx_ring->buffer_info = NULL;
2659
2660         /* if not set, then don't free */
2661         if (!tx_ring->desc)
2662                 return;
2663
2664         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2665                             tx_ring->desc, tx_ring->dma);
2666
2667         tx_ring->desc = NULL;
2668 }
2669
2670 /**
2671  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2672  * @adapter: board private structure
2673  *
2674  * Free all transmit software resources
2675  **/
2676 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2677 {
2678         int i;
2679
2680         for (i = 0; i < adapter->num_tx_queues; i++)
2681                 igb_free_tx_resources(adapter->tx_ring[i]);
2682 }
2683
2684 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2685                                     struct igb_buffer *buffer_info)
2686 {
2687         if (buffer_info->dma) {
2688                 if (buffer_info->mapped_as_page)
2689                         pci_unmap_page(tx_ring->pdev,
2690                                         buffer_info->dma,
2691                                         buffer_info->length,
2692                                         PCI_DMA_TODEVICE);
2693                 else
2694                         pci_unmap_single(tx_ring->pdev,
2695                                         buffer_info->dma,
2696                                         buffer_info->length,
2697                                         PCI_DMA_TODEVICE);
2698                 buffer_info->dma = 0;
2699         }
2700         if (buffer_info->skb) {
2701                 dev_kfree_skb_any(buffer_info->skb);
2702                 buffer_info->skb = NULL;
2703         }
2704         buffer_info->time_stamp = 0;
2705         buffer_info->length = 0;
2706         buffer_info->next_to_watch = 0;
2707         buffer_info->mapped_as_page = false;
2708 }
2709
2710 /**
2711  * igb_clean_tx_ring - Free Tx Buffers
2712  * @tx_ring: ring to be cleaned
2713  **/
2714 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2715 {
2716         struct igb_buffer *buffer_info;
2717         unsigned long size;
2718         unsigned int i;
2719
2720         if (!tx_ring->buffer_info)
2721                 return;
2722         /* Free all the Tx ring sk_buffs */
2723
2724         for (i = 0; i < tx_ring->count; i++) {
2725                 buffer_info = &tx_ring->buffer_info[i];
2726                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2727         }
2728
2729         size = sizeof(struct igb_buffer) * tx_ring->count;
2730         memset(tx_ring->buffer_info, 0, size);
2731
2732         /* Zero out the descriptor ring */
2733         memset(tx_ring->desc, 0, tx_ring->size);
2734
2735         tx_ring->next_to_use = 0;
2736         tx_ring->next_to_clean = 0;
2737 }
2738
2739 /**
2740  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2741  * @adapter: board private structure
2742  **/
2743 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2744 {
2745         int i;
2746
2747         for (i = 0; i < adapter->num_tx_queues; i++)
2748                 igb_clean_tx_ring(adapter->tx_ring[i]);
2749 }
2750
2751 /**
2752  * igb_free_rx_resources - Free Rx Resources
2753  * @rx_ring: ring to clean the resources from
2754  *
2755  * Free all receive software resources
2756  **/
2757 void igb_free_rx_resources(struct igb_ring *rx_ring)
2758 {
2759         igb_clean_rx_ring(rx_ring);
2760
2761         vfree(rx_ring->buffer_info);
2762         rx_ring->buffer_info = NULL;
2763
2764         /* if not set, then don't free */
2765         if (!rx_ring->desc)
2766                 return;
2767
2768         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2769                             rx_ring->desc, rx_ring->dma);
2770
2771         rx_ring->desc = NULL;
2772 }
2773
2774 /**
2775  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2776  * @adapter: board private structure
2777  *
2778  * Free all receive software resources
2779  **/
2780 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2781 {
2782         int i;
2783
2784         for (i = 0; i < adapter->num_rx_queues; i++)
2785                 igb_free_rx_resources(adapter->rx_ring[i]);
2786 }
2787
2788 /**
2789  * igb_clean_rx_ring - Free Rx Buffers per Queue
2790  * @rx_ring: ring to free buffers from
2791  **/
2792 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2793 {
2794         struct igb_buffer *buffer_info;
2795         unsigned long size;
2796         unsigned int i;
2797
2798         if (!rx_ring->buffer_info)
2799                 return;
2800
2801         /* Free all the Rx ring sk_buffs */
2802         for (i = 0; i < rx_ring->count; i++) {
2803                 buffer_info = &rx_ring->buffer_info[i];
2804                 if (buffer_info->dma) {
2805                         pci_unmap_single(rx_ring->pdev,
2806                                          buffer_info->dma,
2807                                          rx_ring->rx_buffer_len,
2808                                          PCI_DMA_FROMDEVICE);
2809                         buffer_info->dma = 0;
2810                 }
2811
2812                 if (buffer_info->skb) {
2813                         dev_kfree_skb(buffer_info->skb);
2814                         buffer_info->skb = NULL;
2815                 }
2816                 if (buffer_info->page_dma) {
2817                         pci_unmap_page(rx_ring->pdev,
2818                                        buffer_info->page_dma,
2819                                        PAGE_SIZE / 2,
2820                                        PCI_DMA_FROMDEVICE);
2821                         buffer_info->page_dma = 0;
2822                 }
2823                 if (buffer_info->page) {
2824                         put_page(buffer_info->page);
2825                         buffer_info->page = NULL;
2826                         buffer_info->page_offset = 0;
2827                 }
2828         }
2829
2830         size = sizeof(struct igb_buffer) * rx_ring->count;
2831         memset(rx_ring->buffer_info, 0, size);
2832
2833         /* Zero out the descriptor ring */
2834         memset(rx_ring->desc, 0, rx_ring->size);
2835
2836         rx_ring->next_to_clean = 0;
2837         rx_ring->next_to_use = 0;
2838 }
2839
2840 /**
2841  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2842  * @adapter: board private structure
2843  **/
2844 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2845 {
2846         int i;
2847
2848         for (i = 0; i < adapter->num_rx_queues; i++)
2849                 igb_clean_rx_ring(adapter->rx_ring[i]);
2850 }
2851
2852 /**
2853  * igb_set_mac - Change the Ethernet Address of the NIC
2854  * @netdev: network interface device structure
2855  * @p: pointer to an address structure
2856  *
2857  * Returns 0 on success, negative on failure
2858  **/
2859 static int igb_set_mac(struct net_device *netdev, void *p)
2860 {
2861         struct igb_adapter *adapter = netdev_priv(netdev);
2862         struct e1000_hw *hw = &adapter->hw;
2863         struct sockaddr *addr = p;
2864
2865         if (!is_valid_ether_addr(addr->sa_data))
2866                 return -EADDRNOTAVAIL;
2867
2868         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2869         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2870
2871         /* set the correct pool for the new PF MAC address in entry 0 */
2872         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2873                          adapter->vfs_allocated_count);
2874
2875         return 0;
2876 }
2877
2878 /**
2879  * igb_write_mc_addr_list - write multicast addresses to MTA
2880  * @netdev: network interface device structure
2881  *
2882  * Writes multicast address list to the MTA hash table.
2883  * Returns: -ENOMEM on failure
2884  *                0 on no addresses written
2885  *                X on writing X addresses to MTA
2886  **/
2887 static int igb_write_mc_addr_list(struct net_device *netdev)
2888 {
2889         struct igb_adapter *adapter = netdev_priv(netdev);
2890         struct e1000_hw *hw = &adapter->hw;
2891         struct dev_mc_list *mc_ptr;
2892         u8  *mta_list;
2893         int i;
2894
2895         if (netdev_mc_empty(netdev)) {
2896                 /* nothing to program, so clear mc list */
2897                 igb_update_mc_addr_list(hw, NULL, 0);
2898                 igb_restore_vf_multicasts(adapter);
2899                 return 0;
2900         }
2901
2902         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2903         if (!mta_list)
2904                 return -ENOMEM;
2905
2906         /* The shared function expects a packed array of only addresses. */
2907         i = 0;
2908         netdev_for_each_mc_addr(mc_ptr, netdev)
2909                 memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2910
2911         igb_update_mc_addr_list(hw, mta_list, i);
2912         kfree(mta_list);
2913
2914         return netdev_mc_count(netdev);
2915 }
2916
2917 /**
2918  * igb_write_uc_addr_list - write unicast addresses to RAR table
2919  * @netdev: network interface device structure
2920  *
2921  * Writes unicast address list to the RAR table.
2922  * Returns: -ENOMEM on failure/insufficient address space
2923  *                0 on no addresses written
2924  *                X on writing X addresses to the RAR table
2925  **/
2926 static int igb_write_uc_addr_list(struct net_device *netdev)
2927 {
2928         struct igb_adapter *adapter = netdev_priv(netdev);
2929         struct e1000_hw *hw = &adapter->hw;
2930         unsigned int vfn = adapter->vfs_allocated_count;
2931         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2932         int count = 0;
2933
2934         /* return ENOMEM indicating insufficient memory for addresses */
2935         if (netdev_uc_count(netdev) > rar_entries)
2936                 return -ENOMEM;
2937
2938         if (!netdev_uc_empty(netdev) && rar_entries) {
2939                 struct netdev_hw_addr *ha;
2940
2941                 netdev_for_each_uc_addr(ha, netdev) {
2942                         if (!rar_entries)
2943                                 break;
2944                         igb_rar_set_qsel(adapter, ha->addr,
2945                                          rar_entries--,
2946                                          vfn);
2947                         count++;
2948                 }
2949         }
2950         /* write the addresses in reverse order to avoid write combining */
2951         for (; rar_entries > 0 ; rar_entries--) {
2952                 wr32(E1000_RAH(rar_entries), 0);
2953                 wr32(E1000_RAL(rar_entries), 0);
2954         }
2955         wrfl();
2956
2957         return count;
2958 }
2959
2960 /**
2961  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2962  * @netdev: network interface device structure
2963  *
2964  * The set_rx_mode entry point is called whenever the unicast or multicast
2965  * address lists or the network interface flags are updated.  This routine is
2966  * responsible for configuring the hardware for proper unicast, multicast,
2967  * promiscuous mode, and all-multi behavior.
2968  **/
2969 static void igb_set_rx_mode(struct net_device *netdev)
2970 {
2971         struct igb_adapter *adapter = netdev_priv(netdev);
2972         struct e1000_hw *hw = &adapter->hw;
2973         unsigned int vfn = adapter->vfs_allocated_count;
2974         u32 rctl, vmolr = 0;
2975         int count;
2976
2977         /* Check for Promiscuous and All Multicast modes */
2978         rctl = rd32(E1000_RCTL);
2979
2980         /* clear the effected bits */
2981         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2982
2983         if (netdev->flags & IFF_PROMISC) {
2984                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2985                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2986         } else {
2987                 if (netdev->flags & IFF_ALLMULTI) {
2988                         rctl |= E1000_RCTL_MPE;
2989                         vmolr |= E1000_VMOLR_MPME;
2990                 } else {
2991                         /*
2992                          * Write addresses to the MTA, if the attempt fails
2993                          * then we should just turn on promiscous mode so
2994                          * that we can at least receive multicast traffic
2995                          */
2996                         count = igb_write_mc_addr_list(netdev);
2997                         if (count < 0) {
2998                                 rctl |= E1000_RCTL_MPE;
2999                                 vmolr |= E1000_VMOLR_MPME;
3000                         } else if (count) {
3001                                 vmolr |= E1000_VMOLR_ROMPE;
3002                         }
3003                 }
3004                 /*
3005                  * Write addresses to available RAR registers, if there is not
3006                  * sufficient space to store all the addresses then enable
3007                  * unicast promiscous mode
3008                  */
3009                 count = igb_write_uc_addr_list(netdev);
3010                 if (count < 0) {
3011                         rctl |= E1000_RCTL_UPE;
3012                         vmolr |= E1000_VMOLR_ROPE;
3013                 }
3014                 rctl |= E1000_RCTL_VFE;
3015         }
3016         wr32(E1000_RCTL, rctl);
3017
3018         /*
3019          * In order to support SR-IOV and eventually VMDq it is necessary to set
3020          * the VMOLR to enable the appropriate modes.  Without this workaround
3021          * we will have issues with VLAN tag stripping not being done for frames
3022          * that are only arriving because we are the default pool
3023          */
3024         if (hw->mac.type < e1000_82576)
3025                 return;
3026
3027         vmolr |= rd32(E1000_VMOLR(vfn)) &
3028                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3029         wr32(E1000_VMOLR(vfn), vmolr);
3030         igb_restore_vf_multicasts(adapter);
3031 }
3032
3033 /* Need to wait a few seconds after link up to get diagnostic information from
3034  * the phy */
3035 static void igb_update_phy_info(unsigned long data)
3036 {
3037         struct igb_adapter *adapter = (struct igb_adapter *) data;
3038         igb_get_phy_info(&adapter->hw);
3039 }
3040
3041 /**
3042  * igb_has_link - check shared code for link and determine up/down
3043  * @adapter: pointer to driver private info
3044  **/
3045 bool igb_has_link(struct igb_adapter *adapter)
3046 {
3047         struct e1000_hw *hw = &adapter->hw;
3048         bool link_active = false;
3049         s32 ret_val = 0;
3050
3051         /* get_link_status is set on LSC (link status) interrupt or
3052          * rx sequence error interrupt.  get_link_status will stay
3053          * false until the e1000_check_for_link establishes link
3054          * for copper adapters ONLY
3055          */
3056         switch (hw->phy.media_type) {
3057         case e1000_media_type_copper:
3058                 if (hw->mac.get_link_status) {
3059                         ret_val = hw->mac.ops.check_for_link(hw);
3060                         link_active = !hw->mac.get_link_status;
3061                 } else {
3062                         link_active = true;
3063                 }
3064                 break;
3065         case e1000_media_type_internal_serdes:
3066                 ret_val = hw->mac.ops.check_for_link(hw);
3067                 link_active = hw->mac.serdes_has_link;
3068                 break;
3069         default:
3070         case e1000_media_type_unknown:
3071                 break;
3072         }
3073
3074         return link_active;
3075 }
3076
3077 /**
3078  * igb_watchdog - Timer Call-back
3079  * @data: pointer to adapter cast into an unsigned long
3080  **/
3081 static void igb_watchdog(unsigned long data)
3082 {
3083         struct igb_adapter *adapter = (struct igb_adapter *)data;
3084         /* Do the rest outside of interrupt context */
3085         schedule_work(&adapter->watchdog_task);
3086 }
3087
3088 static void igb_watchdog_task(struct work_struct *work)
3089 {
3090         struct igb_adapter *adapter = container_of(work,
3091                                                    struct igb_adapter,
3092                                                    watchdog_task);
3093         struct e1000_hw *hw = &adapter->hw;
3094         struct net_device *netdev = adapter->netdev;
3095         u32 link;
3096         int i;
3097
3098         link = igb_has_link(adapter);
3099         if (link) {
3100                 if (!netif_carrier_ok(netdev)) {
3101                         u32 ctrl;
3102                         hw->mac.ops.get_speed_and_duplex(hw,
3103                                                          &adapter->link_speed,
3104                                                          &adapter->link_duplex);
3105
3106                         ctrl = rd32(E1000_CTRL);
3107                         /* Links status message must follow this format */
3108                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3109                                  "Flow Control: %s\n",
3110                                netdev->name,
3111                                adapter->link_speed,
3112                                adapter->link_duplex == FULL_DUPLEX ?
3113                                  "Full Duplex" : "Half Duplex",
3114                                ((ctrl & E1000_CTRL_TFCE) &&
3115                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3116                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3117                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3118
3119                         /* tweak tx_queue_len according to speed/duplex and
3120                          * adjust the timeout factor */
3121                         netdev->tx_queue_len = adapter->tx_queue_len;
3122                         adapter->tx_timeout_factor = 1;
3123                         switch (adapter->link_speed) {
3124                         case SPEED_10:
3125                                 netdev->tx_queue_len = 10;
3126                                 adapter->tx_timeout_factor = 14;
3127                                 break;
3128                         case SPEED_100:
3129                                 netdev->tx_queue_len = 100;
3130                                 /* maybe add some timeout factor ? */
3131                                 break;
3132                         }
3133
3134                         netif_carrier_on(netdev);
3135
3136                         igb_ping_all_vfs(adapter);
3137
3138                         /* link state has changed, schedule phy info update */
3139                         if (!test_bit(__IGB_DOWN, &adapter->state))
3140                                 mod_timer(&adapter->phy_info_timer,
3141                                           round_jiffies(jiffies + 2 * HZ));
3142                 }
3143         } else {
3144                 if (netif_carrier_ok(netdev)) {
3145                         adapter->link_speed = 0;
3146                         adapter->link_duplex = 0;
3147                         /* Links status message must follow this format */
3148                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3149                                netdev->name);
3150                         netif_carrier_off(netdev);
3151
3152                         igb_ping_all_vfs(adapter);
3153
3154                         /* link state has changed, schedule phy info update */
3155                         if (!test_bit(__IGB_DOWN, &adapter->state))
3156                                 mod_timer(&adapter->phy_info_timer,
3157                                           round_jiffies(jiffies + 2 * HZ));
3158                 }
3159         }
3160
3161         igb_update_stats(adapter);
3162
3163         for (i = 0; i < adapter->num_tx_queues; i++) {
3164                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3165                 if (!netif_carrier_ok(netdev)) {
3166                         /* We've lost link, so the controller stops DMA,
3167                          * but we've got queued Tx work that's never going
3168                          * to get done, so reset controller to flush Tx.
3169                          * (Do the reset outside of interrupt context). */
3170                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3171                                 adapter->tx_timeout_count++;
3172                                 schedule_work(&adapter->reset_task);
3173                                 /* return immediately since reset is imminent */
3174                                 return;
3175                         }
3176                 }
3177
3178                 /* Force detection of hung controller every watchdog period */
3179                 tx_ring->detect_tx_hung = true;
3180         }
3181
3182         /* Cause software interrupt to ensure rx ring is cleaned */
3183         if (adapter->msix_entries) {
3184                 u32 eics = 0;
3185                 for (i = 0; i < adapter->num_q_vectors; i++) {
3186                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3187                         eics |= q_vector->eims_value;
3188                 }
3189                 wr32(E1000_EICS, eics);
3190         } else {
3191                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3192         }
3193
3194         /* Reset the timer */
3195         if (!test_bit(__IGB_DOWN, &adapter->state))
3196                 mod_timer(&adapter->watchdog_timer,
3197                           round_jiffies(jiffies + 2 * HZ));
3198 }
3199
3200 enum latency_range {
3201         lowest_latency = 0,
3202         low_latency = 1,
3203         bulk_latency = 2,
3204         latency_invalid = 255
3205 };
3206
3207 /**
3208  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3209  *
3210  *      Stores a new ITR value based on strictly on packet size.  This
3211  *      algorithm is less sophisticated than that used in igb_update_itr,
3212  *      due to the difficulty of synchronizing statistics across multiple
3213  *      receive rings.  The divisors and thresholds used by this fuction
3214  *      were determined based on theoretical maximum wire speed and testing
3215  *      data, in order to minimize response time while increasing bulk
3216  *      throughput.
3217  *      This functionality is controlled by the InterruptThrottleRate module
3218  *      parameter (see igb_param.c)
3219  *      NOTE:  This function is called only when operating in a multiqueue
3220  *             receive environment.
3221  * @q_vector: pointer to q_vector
3222  **/
3223 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3224 {
3225         int new_val = q_vector->itr_val;
3226         int avg_wire_size = 0;
3227         struct igb_adapter *adapter = q_vector->adapter;
3228
3229         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3230          * ints/sec - ITR timer value of 120 ticks.
3231          */
3232         if (adapter->link_speed != SPEED_1000) {
3233                 new_val = 976;
3234                 goto set_itr_val;
3235         }
3236
3237         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3238                 struct igb_ring *ring = q_vector->rx_ring;
3239                 avg_wire_size = ring->total_bytes / ring->total_packets;
3240         }
3241
3242         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3243                 struct igb_ring *ring = q_vector->tx_ring;
3244                 avg_wire_size = max_t(u32, avg_wire_size,
3245                                       (ring->total_bytes /
3246                                        ring->total_packets));
3247         }
3248
3249         /* if avg_wire_size isn't set no work was done */
3250         if (!avg_wire_size)
3251                 goto clear_counts;
3252
3253         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3254         avg_wire_size += 24;
3255
3256         /* Don't starve jumbo frames */
3257         avg_wire_size = min(avg_wire_size, 3000);
3258
3259         /* Give a little boost to mid-size frames */
3260         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3261                 new_val = avg_wire_size / 3;
3262         else
3263                 new_val = avg_wire_size / 2;
3264
3265         /* when in itr mode 3 do not exceed 20K ints/sec */
3266         if (adapter->rx_itr_setting == 3 && new_val < 196)
3267                 new_val = 196;
3268
3269 set_itr_val:
3270         if (new_val != q_vector->itr_val) {
3271                 q_vector->itr_val = new_val;
3272                 q_vector->set_itr = 1;
3273         }
3274 clear_counts:
3275         if (q_vector->rx_ring) {
3276                 q_vector->rx_ring->total_bytes = 0;
3277                 q_vector->rx_ring->total_packets = 0;
3278         }
3279         if (q_vector->tx_ring) {
3280                 q_vector->tx_ring->total_bytes = 0;
3281                 q_vector->tx_ring->total_packets = 0;
3282         }
3283 }
3284
3285 /**
3286  * igb_update_itr - update the dynamic ITR value based on statistics
3287  *      Stores a new ITR value based on packets and byte
3288  *      counts during the last interrupt.  The advantage of per interrupt
3289  *      computation is faster updates and more accurate ITR for the current
3290  *      traffic pattern.  Constants in this function were computed
3291  *      based on theoretical maximum wire speed and thresholds were set based
3292  *      on testing data as well as attempting to minimize response time
3293  *      while increasing bulk throughput.
3294  *      this functionality is controlled by the InterruptThrottleRate module
3295  *      parameter (see igb_param.c)
3296  *      NOTE:  These calculations are only valid when operating in a single-
3297  *             queue environment.
3298  * @adapter: pointer to adapter
3299  * @itr_setting: current q_vector->itr_val
3300  * @packets: the number of packets during this measurement interval
3301  * @bytes: the number of bytes during this measurement interval
3302  **/
3303 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3304                                    int packets, int bytes)
3305 {
3306         unsigned int retval = itr_setting;
3307
3308         if (packets == 0)
3309                 goto update_itr_done;
3310
3311         switch (itr_setting) {
3312         case lowest_latency:
3313                 /* handle TSO and jumbo frames */
3314                 if (bytes/packets > 8000)
3315                         retval = bulk_latency;
3316                 else if ((packets < 5) && (bytes > 512))
3317                         retval = low_latency;
3318                 break;
3319         case low_latency:  /* 50 usec aka 20000 ints/s */
3320                 if (bytes > 10000) {
3321                         /* this if handles the TSO accounting */
3322                         if (bytes/packets > 8000) {
3323                                 retval = bulk_latency;
3324                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3325                                 retval = bulk_latency;
3326                         } else if ((packets > 35)) {
3327                                 retval = lowest_latency;
3328                         }
3329                 } else if (bytes/packets > 2000) {
3330                         retval = bulk_latency;
3331                 } else if (packets <= 2 && bytes < 512) {
3332                         retval = lowest_latency;
3333                 }
3334                 break;
3335         case bulk_latency: /* 250 usec aka 4000 ints/s */
3336                 if (bytes > 25000) {
3337                         if (packets > 35)
3338                                 retval = low_latency;
3339                 } else if (bytes < 1500) {
3340                         retval = low_latency;
3341                 }
3342                 break;
3343         }
3344
3345 update_itr_done:
3346         return retval;
3347 }
3348
3349 static void igb_set_itr(struct igb_adapter *adapter)
3350 {
3351         struct igb_q_vector *q_vector = adapter->q_vector[0];
3352         u16 current_itr;
3353         u32 new_itr = q_vector->itr_val;
3354
3355         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3356         if (adapter->link_speed != SPEED_1000) {
3357                 current_itr = 0;
3358                 new_itr = 4000;
3359                 goto set_itr_now;
3360         }
3361
3362         adapter->rx_itr = igb_update_itr(adapter,
3363                                     adapter->rx_itr,
3364                                     q_vector->rx_ring->total_packets,
3365                                     q_vector->rx_ring->total_bytes);
3366
3367         adapter->tx_itr = igb_update_itr(adapter,
3368                                     adapter->tx_itr,
3369                                     q_vector->tx_ring->total_packets,
3370                                     q_vector->tx_ring->total_bytes);
3371         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3372
3373         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3374         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3375                 current_itr = low_latency;
3376
3377         switch (current_itr) {
3378         /* counts and packets in update_itr are dependent on these numbers */
3379         case lowest_latency:
3380                 new_itr = 56;  /* aka 70,000 ints/sec */
3381                 break;
3382         case low_latency:
3383                 new_itr = 196; /* aka 20,000 ints/sec */
3384                 break;
3385         case bulk_latency:
3386                 new_itr = 980; /* aka 4,000 ints/sec */
3387                 break;
3388         default:
3389                 break;
3390         }
3391
3392 set_itr_now:
3393         q_vector->rx_ring->total_bytes = 0;
3394         q_vector->rx_ring->total_packets = 0;
3395         q_vector->tx_ring->total_bytes = 0;
3396         q_vector->tx_ring->total_packets = 0;
3397
3398         if (new_itr != q_vector->itr_val) {
3399                 /* this attempts to bias the interrupt rate towards Bulk
3400                  * by adding intermediate steps when interrupt rate is
3401                  * increasing */
3402                 new_itr = new_itr > q_vector->itr_val ?
3403                              max((new_itr * q_vector->itr_val) /
3404                                  (new_itr + (q_vector->itr_val >> 2)),
3405                                  new_itr) :
3406                              new_itr;
3407                 /* Don't write the value here; it resets the adapter's
3408                  * internal timer, and causes us to delay far longer than
3409                  * we should between interrupts.  Instead, we write the ITR
3410                  * value at the beginning of the next interrupt so the timing
3411                  * ends up being correct.
3412                  */
3413                 q_vector->itr_val = new_itr;
3414                 q_vector->set_itr = 1;
3415         }
3416
3417         return;
3418 }
3419
3420 #define IGB_TX_FLAGS_CSUM               0x00000001
3421 #define IGB_TX_FLAGS_VLAN               0x00000002
3422 #define IGB_TX_FLAGS_TSO                0x00000004
3423 #define IGB_TX_FLAGS_IPV4               0x00000008
3424 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3425 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3426 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3427
3428 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3429                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3430 {
3431         struct e1000_adv_tx_context_desc *context_desc;
3432         unsigned int i;
3433         int err;
3434         struct igb_buffer *buffer_info;
3435         u32 info = 0, tu_cmd = 0;
3436         u32 mss_l4len_idx;
3437         u8 l4len;
3438
3439         if (skb_header_cloned(skb)) {
3440                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3441                 if (err)
3442                         return err;
3443         }
3444
3445         l4len = tcp_hdrlen(skb);
3446         *hdr_len += l4len;
3447
3448         if (skb->protocol == htons(ETH_P_IP)) {
3449                 struct iphdr *iph = ip_hdr(skb);
3450                 iph->tot_len = 0;
3451                 iph->check = 0;
3452                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3453                                                          iph->daddr, 0,
3454                                                          IPPROTO_TCP,
3455                                                          0);
3456         } else if (skb_is_gso_v6(skb)) {
3457                 ipv6_hdr(skb)->payload_len = 0;
3458                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3459                                                        &ipv6_hdr(skb)->daddr,
3460                                                        0, IPPROTO_TCP, 0);
3461         }
3462
3463         i = tx_ring->next_to_use;
3464
3465         buffer_info = &tx_ring->buffer_info[i];
3466         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3467         /* VLAN MACLEN IPLEN */
3468         if (tx_flags & IGB_TX_FLAGS_VLAN)
3469                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3470         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3471         *hdr_len += skb_network_offset(skb);
3472         info |= skb_network_header_len(skb);
3473         *hdr_len += skb_network_header_len(skb);
3474         context_desc->vlan_macip_lens = cpu_to_le32(info);
3475
3476         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3477         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3478
3479         if (skb->protocol == htons(ETH_P_IP))
3480                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3481         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3482
3483         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3484
3485         /* MSS L4LEN IDX */
3486         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3487         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3488
3489         /* For 82575, context index must be unique per ring. */
3490         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3491                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3492
3493         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3494         context_desc->seqnum_seed = 0;
3495
3496         buffer_info->time_stamp = jiffies;
3497         buffer_info->next_to_watch = i;
3498         buffer_info->dma = 0;
3499         i++;
3500         if (i == tx_ring->count)
3501                 i = 0;
3502
3503         tx_ring->next_to_use = i;
3504
3505         return true;
3506 }
3507
3508 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3509                                    struct sk_buff *skb, u32 tx_flags)
3510 {
3511         struct e1000_adv_tx_context_desc *context_desc;
3512         struct pci_dev *pdev = tx_ring->pdev;
3513         struct igb_buffer *buffer_info;
3514         u32 info = 0, tu_cmd = 0;
3515         unsigned int i;
3516
3517         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3518             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3519                 i = tx_ring->next_to_use;
3520                 buffer_info = &tx_ring->buffer_info[i];
3521                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3522
3523                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3524                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3525
3526                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3527                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3528                         info |= skb_network_header_len(skb);
3529
3530                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3531
3532                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3533
3534                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3535                         __be16 protocol;
3536
3537                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3538                                 const struct vlan_ethhdr *vhdr =
3539                                           (const struct vlan_ethhdr*)skb->data;
3540
3541                                 protocol = vhdr->h_vlan_encapsulated_proto;
3542                         } else {
3543                                 protocol = skb->protocol;
3544                         }
3545
3546                         switch (protocol) {
3547                         case cpu_to_be16(ETH_P_IP):
3548                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3549                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3550                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3551                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3552                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3553                                 break;
3554                         case cpu_to_be16(ETH_P_IPV6):
3555                                 /* XXX what about other V6 headers?? */
3556                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3557                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3558                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3559                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3560                                 break;
3561                         default:
3562                                 if (unlikely(net_ratelimit()))
3563                                         dev_warn(&pdev->dev,
3564                                             "partial checksum but proto=%x!\n",
3565                                             skb->protocol);
3566                                 break;
3567                         }
3568                 }
3569
3570                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3571                 context_desc->seqnum_seed = 0;
3572                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3573                         context_desc->mss_l4len_idx =
3574                                 cpu_to_le32(tx_ring->reg_idx << 4);
3575
3576                 buffer_info->time_stamp = jiffies;
3577                 buffer_info->next_to_watch = i;
3578                 buffer_info->dma = 0;
3579
3580                 i++;
3581                 if (i == tx_ring->count)
3582                         i = 0;
3583                 tx_ring->next_to_use = i;
3584
3585                 return true;
3586         }
3587         return false;
3588 }
3589
3590 #define IGB_MAX_TXD_PWR 16
3591 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3592
3593 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3594                                  unsigned int first)
3595 {
3596         struct igb_buffer *buffer_info;
3597         struct pci_dev *pdev = tx_ring->pdev;
3598         unsigned int len = skb_headlen(skb);
3599         unsigned int count = 0, i;
3600         unsigned int f;
3601
3602         i = tx_ring->next_to_use;
3603
3604         buffer_info = &tx_ring->buffer_info[i];
3605         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3606         buffer_info->length = len;
3607         /* set time_stamp *before* dma to help avoid a possible race */
3608         buffer_info->time_stamp = jiffies;
3609         buffer_info->next_to_watch = i;
3610         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3611                                           PCI_DMA_TODEVICE);
3612         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3613                 goto dma_error;
3614
3615         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3616                 struct skb_frag_struct *frag;
3617
3618                 count++;
3619                 i++;
3620                 if (i == tx_ring->count)
3621                         i = 0;
3622
3623                 frag = &skb_shinfo(skb)->frags[f];
3624                 len = frag->size;
3625
3626                 buffer_info = &tx_ring->buffer_info[i];
3627                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3628                 buffer_info->length = len;
3629                 buffer_info->time_stamp = jiffies;
3630                 buffer_info->next_to_watch = i;
3631                 buffer_info->mapped_as_page = true;
3632                 buffer_info->dma = pci_map_page(pdev,
3633                                                 frag->page,
3634                                                 frag->page_offset,
3635                                                 len,
3636                                                 PCI_DMA_TODEVICE);
3637                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3638                         goto dma_error;
3639
3640         }
3641
3642         tx_ring->buffer_info[i].skb = skb;
3643         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3644         tx_ring->buffer_info[first].next_to_watch = i;
3645
3646         return ++count;
3647
3648 dma_error:
3649         dev_err(&pdev->dev, "TX DMA map failed\n");
3650
3651         /* clear timestamp and dma mappings for failed buffer_info mapping */
3652         buffer_info->dma = 0;
3653         buffer_info->time_stamp = 0;
3654         buffer_info->length = 0;
3655         buffer_info->next_to_watch = 0;
3656         buffer_info->mapped_as_page = false;
3657
3658         /* clear timestamp and dma mappings for remaining portion of packet */
3659         while (count--) {
3660                 if (i == 0)
3661                         i = tx_ring->count;
3662                 i--;
3663                 buffer_info = &tx_ring->buffer_info[i];
3664                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3665         }
3666
3667         return 0;
3668 }
3669
3670 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3671                                     u32 tx_flags, int count, u32 paylen,
3672                                     u8 hdr_len)
3673 {
3674         union e1000_adv_tx_desc *tx_desc;
3675         struct igb_buffer *buffer_info;
3676         u32 olinfo_status = 0, cmd_type_len;
3677         unsigned int i = tx_ring->next_to_use;
3678
3679         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3680                         E1000_ADVTXD_DCMD_DEXT);
3681
3682         if (tx_flags & IGB_TX_FLAGS_VLAN)
3683                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3684
3685         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3686                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3687
3688         if (tx_flags & IGB_TX_FLAGS_TSO) {
3689                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3690
3691                 /* insert tcp checksum */
3692                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3693
3694                 /* insert ip checksum */
3695                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3696                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3697
3698         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3699                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3700         }
3701
3702         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3703             (tx_flags & (IGB_TX_FLAGS_CSUM |
3704                          IGB_TX_FLAGS_TSO |
3705                          IGB_TX_FLAGS_VLAN)))
3706                 olinfo_status |= tx_ring->reg_idx << 4;
3707
3708         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3709
3710         do {
3711                 buffer_info = &tx_ring->buffer_info[i];
3712                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3713                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3714                 tx_desc->read.cmd_type_len =
3715                         cpu_to_le32(cmd_type_len | buffer_info->length);
3716                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3717                 count--;
3718                 i++;
3719                 if (i == tx_ring->count)
3720                         i = 0;
3721         } while (count > 0);
3722
3723         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3724         /* Force memory writes to complete before letting h/w
3725          * know there are new descriptors to fetch.  (Only
3726          * applicable for weak-ordered memory model archs,
3727          * such as IA-64). */
3728         wmb();
3729
3730         tx_ring->next_to_use = i;
3731         writel(i, tx_ring->tail);
3732         /* we need this if more than one processor can write to our tail
3733          * at a time, it syncronizes IO on IA64/Altix systems */
3734         mmiowb();
3735 }
3736
3737 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3738 {
3739         struct net_device *netdev = tx_ring->netdev;
3740
3741         netif_stop_subqueue(netdev, tx_ring->queue_index);
3742
3743         /* Herbert's original patch had:
3744          *  smp_mb__after_netif_stop_queue();
3745          * but since that doesn't exist yet, just open code it. */
3746         smp_mb();
3747
3748         /* We need to check again in a case another CPU has just
3749          * made room available. */
3750         if (igb_desc_unused(tx_ring) < size)
3751                 return -EBUSY;
3752
3753         /* A reprieve! */
3754         netif_wake_subqueue(netdev, tx_ring->queue_index);
3755         tx_ring->tx_stats.restart_queue++;
3756         return 0;
3757 }
3758
3759 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3760 {
3761         if (igb_desc_unused(tx_ring) >= size)
3762                 return 0;
3763         return __igb_maybe_stop_tx(tx_ring, size);
3764 }
3765
3766 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3767                                     struct igb_ring *tx_ring)
3768 {
3769         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3770         int tso = 0, count;
3771         u32 tx_flags = 0;
3772         u16 first;
3773         u8 hdr_len = 0;
3774         union skb_shared_tx *shtx = skb_tx(skb);
3775
3776         /* need: 1 descriptor per page,
3777          *       + 2 desc gap to keep tail from touching head,
3778          *       + 1 desc for skb->data,
3779          *       + 1 desc for context descriptor,
3780          * otherwise try next time */
3781         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3782                 /* this is a hard error */
3783                 return NETDEV_TX_BUSY;
3784         }
3785
3786         if (unlikely(shtx->hardware)) {
3787                 shtx->in_progress = 1;
3788                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3789         }
3790
3791         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3792                 tx_flags |= IGB_TX_FLAGS_VLAN;
3793                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3794         }
3795
3796         if (skb->protocol == htons(ETH_P_IP))
3797                 tx_flags |= IGB_TX_FLAGS_IPV4;
3798
3799         first = tx_ring->next_to_use;
3800         if (skb_is_gso(skb)) {
3801                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3802
3803                 if (tso < 0) {
3804                         dev_kfree_skb_any(skb);
3805                         return NETDEV_TX_OK;
3806                 }
3807         }
3808
3809         if (tso)
3810                 tx_flags |= IGB_TX_FLAGS_TSO;
3811         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3812                  (skb->ip_summed == CHECKSUM_PARTIAL))
3813                 tx_flags |= IGB_TX_FLAGS_CSUM;
3814
3815         /*
3816          * count reflects descriptors mapped, if 0 or less then mapping error
3817          * has occured and we need to rewind the descriptor queue
3818          */
3819         count = igb_tx_map_adv(tx_ring, skb, first);
3820         if (!count) {
3821                 dev_kfree_skb_any(skb);
3822                 tx_ring->buffer_info[first].time_stamp = 0;
3823                 tx_ring->next_to_use = first;
3824                 return NETDEV_TX_OK;
3825         }
3826
3827         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3828
3829         /* Make sure there is space in the ring for the next send. */
3830         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3831
3832         return NETDEV_TX_OK;
3833 }
3834
3835 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3836                                       struct net_device *netdev)
3837 {
3838         struct igb_adapter *adapter = netdev_priv(netdev);
3839         struct igb_ring *tx_ring;
3840         int r_idx = 0;
3841
3842         if (test_bit(__IGB_DOWN, &adapter->state)) {
3843                 dev_kfree_skb_any(skb);
3844                 return NETDEV_TX_OK;
3845         }
3846
3847         if (skb->len <= 0) {
3848                 dev_kfree_skb_any(skb);
3849                 return NETDEV_TX_OK;
3850         }
3851
3852         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3853         tx_ring = adapter->multi_tx_table[r_idx];
3854
3855         /* This goes back to the question of how to logically map a tx queue
3856          * to a flow.  Right now, performance is impacted slightly negatively
3857          * if using multiple tx queues.  If the stack breaks away from a
3858          * single qdisc implementation, we can look at this again. */
3859         return igb_xmit_frame_ring_adv(skb, tx_ring);
3860 }
3861
3862 /**
3863  * igb_tx_timeout - Respond to a Tx Hang
3864  * @netdev: network interface device structure
3865  **/
3866 static void igb_tx_timeout(struct net_device *netdev)
3867 {
3868         struct igb_adapter *adapter = netdev_priv(netdev);
3869         struct e1000_hw *hw = &adapter->hw;
3870
3871         /* Do the reset outside of interrupt context */
3872         adapter->tx_timeout_count++;
3873
3874         if (hw->mac.type == e1000_82580)
3875                 hw->dev_spec._82575.global_device_reset = true;
3876
3877         schedule_work(&adapter->reset_task);
3878         wr32(E1000_EICS,
3879              (adapter->eims_enable_mask & ~adapter->eims_other));
3880 }
3881
3882 static void igb_reset_task(struct work_struct *work)
3883 {
3884         struct igb_adapter *adapter;
3885         adapter = container_of(work, struct igb_adapter, reset_task);
3886
3887         igb_reinit_locked(adapter);
3888 }
3889
3890 /**
3891  * igb_get_stats - Get System Network Statistics
3892  * @netdev: network interface device structure
3893  *
3894  * Returns the address of the device statistics structure.
3895  * The statistics are actually updated from the timer callback.
3896  **/
3897 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3898 {
3899         /* only return the current stats */
3900         return &netdev->stats;
3901 }
3902
3903 /**
3904  * igb_change_mtu - Change the Maximum Transfer Unit
3905  * @netdev: network interface device structure
3906  * @new_mtu: new value for maximum frame size
3907  *
3908  * Returns 0 on success, negative on failure
3909  **/
3910 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3911 {
3912         struct igb_adapter *adapter = netdev_priv(netdev);
3913         struct pci_dev *pdev = adapter->pdev;
3914         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3915         u32 rx_buffer_len, i;
3916
3917         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3918                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3919                 return -EINVAL;
3920         }
3921
3922         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3923                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3924                 return -EINVAL;
3925         }
3926
3927         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3928                 msleep(1);
3929
3930         /* igb_down has a dependency on max_frame_size */
3931         adapter->max_frame_size = max_frame;
3932
3933         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3934          * means we reserve 2 more, this pushes us to allocate from the next
3935          * larger slab size.
3936          * i.e. RXBUFFER_2048 --> size-4096 slab
3937          */
3938
3939         if (max_frame <= IGB_RXBUFFER_1024)
3940                 rx_buffer_len = IGB_RXBUFFER_1024;
3941         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3942                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3943         else
3944                 rx_buffer_len = IGB_RXBUFFER_128;
3945
3946         if (netif_running(netdev))
3947                 igb_down(adapter);
3948
3949         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3950                  netdev->mtu, new_mtu);
3951         netdev->mtu = new_mtu;
3952
3953         for (i = 0; i < adapter->num_rx_queues; i++)
3954                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3955
3956         if (netif_running(netdev))
3957                 igb_up(adapter);
3958         else
3959                 igb_reset(adapter);
3960
3961         clear_bit(__IGB_RESETTING, &adapter->state);
3962
3963         return 0;
3964 }
3965
3966 /**
3967  * igb_update_stats - Update the board statistics counters
3968  * @adapter: board private structure
3969  **/
3970
3971 void igb_update_stats(struct igb_adapter *adapter)
3972 {
3973         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3974         struct e1000_hw *hw = &adapter->hw;
3975         struct pci_dev *pdev = adapter->pdev;
3976         u32 rnbc, reg;
3977         u16 phy_tmp;
3978         int i;
3979         u64 bytes, packets;
3980
3981 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3982
3983         /*
3984          * Prevent stats update while adapter is being reset, or if the pci
3985          * connection is down.
3986          */
3987         if (adapter->link_speed == 0)
3988                 return;
3989         if (pci_channel_offline(pdev))
3990                 return;
3991
3992         bytes = 0;
3993         packets = 0;
3994         for (i = 0; i < adapter->num_rx_queues; i++) {
3995                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3996                 struct igb_ring *ring = adapter->rx_ring[i];
3997                 ring->rx_stats.drops += rqdpc_tmp;
3998                 net_stats->rx_fifo_errors += rqdpc_tmp;
3999                 bytes += ring->rx_stats.bytes;
4000                 packets += ring->rx_stats.packets;
4001         }
4002
4003         net_stats->rx_bytes = bytes;
4004         net_stats->rx_packets = packets;
4005
4006         bytes = 0;
4007         packets = 0;
4008         for (i = 0; i < adapter->num_tx_queues; i++) {
4009                 struct igb_ring *ring = adapter->tx_ring[i];
4010                 bytes += ring->tx_stats.bytes;
4011                 packets += ring->tx_stats.packets;
4012         }
4013         net_stats->tx_bytes = bytes;
4014         net_stats->tx_packets = packets;
4015
4016         /* read stats registers */
4017         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4018         adapter->stats.gprc += rd32(E1000_GPRC);
4019         adapter->stats.gorc += rd32(E1000_GORCL);
4020         rd32(E1000_GORCH); /* clear GORCL */
4021         adapter->stats.bprc += rd32(E1000_BPRC);
4022         adapter->stats.mprc += rd32(E1000_MPRC);
4023         adapter->stats.roc += rd32(E1000_ROC);
4024
4025         adapter->stats.prc64 += rd32(E1000_PRC64);
4026         adapter->stats.prc127 += rd32(E1000_PRC127);
4027         adapter->stats.prc255 += rd32(E1000_PRC255);
4028         adapter->stats.prc511 += rd32(E1000_PRC511);
4029         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4030         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4031         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4032         adapter->stats.sec += rd32(E1000_SEC);
4033
4034         adapter->stats.mpc += rd32(E1000_MPC);
4035         adapter->stats.scc += rd32(E1000_SCC);
4036         adapter->stats.ecol += rd32(E1000_ECOL);
4037         adapter->stats.mcc += rd32(E1000_MCC);
4038         adapter->stats.latecol += rd32(E1000_LATECOL);
4039         adapter->stats.dc += rd32(E1000_DC);
4040         adapter->stats.rlec += rd32(E1000_RLEC);
4041         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4042         adapter->stats.xontxc += rd32(E1000_XONTXC);
4043         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4044         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4045         adapter->stats.fcruc += rd32(E1000_FCRUC);
4046         adapter->stats.gptc += rd32(E1000_GPTC);
4047         adapter->stats.gotc += rd32(E1000_GOTCL);
4048         rd32(E1000_GOTCH); /* clear GOTCL */
4049         rnbc = rd32(E1000_RNBC);
4050         adapter->stats.rnbc += rnbc;
4051         net_stats->rx_fifo_errors += rnbc;
4052         adapter->stats.ruc += rd32(E1000_RUC);
4053         adapter->stats.rfc += rd32(E1000_RFC);
4054         adapter->stats.rjc += rd32(E1000_RJC);
4055         adapter->stats.tor += rd32(E1000_TORH);
4056         adapter->stats.tot += rd32(E1000_TOTH);
4057         adapter->stats.tpr += rd32(E1000_TPR);
4058
4059         adapter->stats.ptc64 += rd32(E1000_PTC64);
4060         adapter->stats.ptc127 += rd32(E1000_PTC127);
4061         adapter->stats.ptc255 += rd32(E1000_PTC255);
4062         adapter->stats.ptc511 += rd32(E1000_PTC511);
4063         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4064         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4065
4066         adapter->stats.mptc += rd32(E1000_MPTC);
4067         adapter->stats.bptc += rd32(E1000_BPTC);
4068
4069         adapter->stats.tpt += rd32(E1000_TPT);
4070         adapter->stats.colc += rd32(E1000_COLC);
4071
4072         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4073         /* read internal phy specific stats */
4074         reg = rd32(E1000_CTRL_EXT);
4075         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4076                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4077                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4078         }
4079
4080         adapter->stats.tsctc += rd32(E1000_TSCTC);
4081         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4082
4083         adapter->stats.iac += rd32(E1000_IAC);
4084         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4085         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4086         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4087         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4088         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4089         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4090         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4091         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4092
4093         /* Fill out the OS statistics structure */
4094         net_stats->multicast = adapter->stats.mprc;
4095         net_stats->collisions = adapter->stats.colc;
4096
4097         /* Rx Errors */
4098
4099         /* RLEC on some newer hardware can be incorrect so build
4100          * our own version based on RUC and ROC */
4101         net_stats->rx_errors = adapter->stats.rxerrc +
4102                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4103                 adapter->stats.ruc + adapter->stats.roc +
4104                 adapter->stats.cexterr;
4105         net_stats->rx_length_errors = adapter->stats.ruc +
4106                                       adapter->stats.roc;
4107         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4108         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4109         net_stats->rx_missed_errors = adapter->stats.mpc;
4110
4111         /* Tx Errors */
4112         net_stats->tx_errors = adapter->stats.ecol +
4113                                adapter->stats.latecol;
4114         net_stats->tx_aborted_errors = adapter->stats.ecol;
4115         net_stats->tx_window_errors = adapter->stats.latecol;
4116         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4117
4118         /* Tx Dropped needs to be maintained elsewhere */
4119
4120         /* Phy Stats */
4121         if (hw->phy.media_type == e1000_media_type_copper) {
4122                 if ((adapter->link_speed == SPEED_1000) &&
4123                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4124                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4125                         adapter->phy_stats.idle_errors += phy_tmp;
4126                 }
4127         }
4128
4129         /* Management Stats */
4130         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4131         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4132         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4133 }
4134
4135 static irqreturn_t igb_msix_other(int irq, void *data)
4136 {
4137         struct igb_adapter *adapter = data;
4138         struct e1000_hw *hw = &adapter->hw;
4139         u32 icr = rd32(E1000_ICR);
4140         /* reading ICR causes bit 31 of EICR to be cleared */
4141
4142         if (icr & E1000_ICR_DRSTA)
4143                 schedule_work(&adapter->reset_task);
4144
4145         if (icr & E1000_ICR_DOUTSYNC) {
4146                 /* HW is reporting DMA is out of sync */
4147                 adapter->stats.doosync++;
4148         }
4149
4150         /* Check for a mailbox event */
4151         if (icr & E1000_ICR_VMMB)
4152                 igb_msg_task(adapter);
4153
4154         if (icr & E1000_ICR_LSC) {
4155                 hw->mac.get_link_status = 1;
4156                 /* guard against interrupt when we're going down */
4157                 if (!test_bit(__IGB_DOWN, &adapter->state))
4158                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4159         }
4160
4161         if (adapter->vfs_allocated_count)
4162                 wr32(E1000_IMS, E1000_IMS_LSC |
4163                                 E1000_IMS_VMMB |
4164                                 E1000_IMS_DOUTSYNC);
4165         else
4166                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4167         wr32(E1000_EIMS, adapter->eims_other);
4168
4169         return IRQ_HANDLED;
4170 }
4171
4172 static void igb_write_itr(struct igb_q_vector *q_vector)
4173 {
4174         struct igb_adapter *adapter = q_vector->adapter;
4175         u32 itr_val = q_vector->itr_val & 0x7FFC;
4176
4177         if (!q_vector->set_itr)
4178                 return;
4179
4180         if (!itr_val)
4181                 itr_val = 0x4;
4182
4183         if (adapter->hw.mac.type == e1000_82575)
4184                 itr_val |= itr_val << 16;
4185         else
4186                 itr_val |= 0x8000000;
4187
4188         writel(itr_val, q_vector->itr_register);
4189         q_vector->set_itr = 0;
4190 }
4191
4192 static irqreturn_t igb_msix_ring(int irq, void *data)
4193 {
4194         struct igb_q_vector *q_vector = data;
4195
4196         /* Write the ITR value calculated from the previous interrupt. */
4197         igb_write_itr(q_vector);
4198
4199         napi_schedule(&q_vector->napi);
4200
4201         return IRQ_HANDLED;
4202 }
4203
4204 #ifdef CONFIG_IGB_DCA
4205 static void igb_update_dca(struct igb_q_vector *q_vector)
4206 {
4207         struct igb_adapter *adapter = q_vector->adapter;
4208         struct e1000_hw *hw = &adapter->hw;
4209         int cpu = get_cpu();
4210
4211         if (q_vector->cpu == cpu)
4212                 goto out_no_update;
4213
4214         if (q_vector->tx_ring) {
4215                 int q = q_vector->tx_ring->reg_idx;
4216                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4217                 if (hw->mac.type == e1000_82575) {
4218                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4219                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4220                 } else {
4221                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4222                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4223                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4224                 }
4225                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4226                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4227         }
4228         if (q_vector->rx_ring) {
4229                 int q = q_vector->rx_ring->reg_idx;
4230                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4231                 if (hw->mac.type == e1000_82575) {
4232                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4233                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4234                 } else {
4235                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4236                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4237                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4238                 }
4239                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4240                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4241                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4242                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4243         }
4244         q_vector->cpu = cpu;
4245 out_no_update:
4246         put_cpu();
4247 }
4248
4249 static void igb_setup_dca(struct igb_adapter *adapter)
4250 {
4251         struct e1000_hw *hw = &adapter->hw;
4252         int i;
4253
4254         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4255                 return;
4256
4257         /* Always use CB2 mode, difference is masked in the CB driver. */
4258         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4259
4260         for (i = 0; i < adapter->num_q_vectors; i++) {
4261                 adapter->q_vector[i]->cpu = -1;
4262                 igb_update_dca(adapter->q_vector[i]);
4263         }
4264 }
4265
4266 static int __igb_notify_dca(struct device *dev, void *data)
4267 {
4268         struct net_device *netdev = dev_get_drvdata(dev);
4269         struct igb_adapter *adapter = netdev_priv(netdev);
4270         struct pci_dev *pdev = adapter->pdev;
4271         struct e1000_hw *hw = &adapter->hw;
4272         unsigned long event = *(unsigned long *)data;
4273
4274         switch (event) {
4275         case DCA_PROVIDER_ADD:
4276                 /* if already enabled, don't do it again */
4277                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4278                         break;
4279                 if (dca_add_requester(dev) == 0) {
4280                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4281                         dev_info(&pdev->dev, "DCA enabled\n");
4282                         igb_setup_dca(adapter);
4283                         break;
4284                 }
4285                 /* Fall Through since DCA is disabled. */
4286         case DCA_PROVIDER_REMOVE:
4287                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4288                         /* without this a class_device is left
4289                          * hanging around in the sysfs model */
4290                         dca_remove_requester(dev);
4291                         dev_info(&pdev->dev, "DCA disabled\n");
4292                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4293                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4294                 }
4295                 break;
4296         }
4297
4298         return 0;
4299 }
4300
4301 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4302                           void *p)
4303 {
4304         int ret_val;
4305
4306         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4307                                          __igb_notify_dca);
4308
4309         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4310 }
4311 #endif /* CONFIG_IGB_DCA */
4312
4313 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4314 {
4315         struct e1000_hw *hw = &adapter->hw;
4316         u32 ping;
4317         int i;
4318
4319         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4320                 ping = E1000_PF_CONTROL_MSG;
4321                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4322                         ping |= E1000_VT_MSGTYPE_CTS;
4323                 igb_write_mbx(hw, &ping, 1, i);
4324         }
4325 }
4326
4327 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4328 {
4329         struct e1000_hw *hw = &adapter->hw;
4330         u32 vmolr = rd32(E1000_VMOLR(vf));
4331         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4332
4333         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4334                             IGB_VF_FLAG_MULTI_PROMISC);
4335         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4336
4337         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4338                 vmolr |= E1000_VMOLR_MPME;
4339                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4340         } else {
4341                 /*
4342                  * if we have hashes and we are clearing a multicast promisc
4343                  * flag we need to write the hashes to the MTA as this step
4344                  * was previously skipped
4345                  */
4346                 if (vf_data->num_vf_mc_hashes > 30) {
4347                         vmolr |= E1000_VMOLR_MPME;
4348                 } else if (vf_data->num_vf_mc_hashes) {
4349                         int j;
4350                         vmolr |= E1000_VMOLR_ROMPE;
4351                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4352                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4353                 }
4354         }
4355
4356         wr32(E1000_VMOLR(vf), vmolr);
4357
4358         /* there are flags left unprocessed, likely not supported */
4359         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4360                 return -EINVAL;
4361
4362         return 0;
4363
4364 }
4365
4366 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4367                                   u32 *msgbuf, u32 vf)
4368 {
4369         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4370         u16 *hash_list = (u16 *)&msgbuf[1];
4371         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4372         int i;
4373
4374         /* salt away the number of multicast addresses assigned
4375          * to this VF for later use to restore when the PF multi cast
4376          * list changes
4377          */
4378         vf_data->num_vf_mc_hashes = n;
4379
4380         /* only up to 30 hash values supported */
4381         if (n > 30)
4382                 n = 30;
4383
4384         /* store the hashes for later use */
4385         for (i = 0; i < n; i++)
4386                 vf_data->vf_mc_hashes[i] = hash_list[i];
4387
4388         /* Flush and reset the mta with the new values */
4389         igb_set_rx_mode(adapter->netdev);
4390
4391         return 0;
4392 }
4393
4394 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4395 {
4396         struct e1000_hw *hw = &adapter->hw;
4397         struct vf_data_storage *vf_data;
4398         int i, j;
4399
4400         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4401                 u32 vmolr = rd32(E1000_VMOLR(i));
4402                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4403
4404                 vf_data = &adapter->vf_data[i];
4405
4406                 if ((vf_data->num_vf_mc_hashes > 30) ||
4407                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4408                         vmolr |= E1000_VMOLR_MPME;
4409                 } else if (vf_data->num_vf_mc_hashes) {
4410                         vmolr |= E1000_VMOLR_ROMPE;
4411                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4412                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4413                 }
4414                 wr32(E1000_VMOLR(i), vmolr);
4415         }
4416 }
4417
4418 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4419 {
4420         struct e1000_hw *hw = &adapter->hw;
4421         u32 pool_mask, reg, vid;
4422         int i;
4423
4424         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4425
4426         /* Find the vlan filter for this id */
4427         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4428                 reg = rd32(E1000_VLVF(i));
4429
4430                 /* remove the vf from the pool */
4431                 reg &= ~pool_mask;
4432
4433                 /* if pool is empty then remove entry from vfta */
4434                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4435                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4436                         reg = 0;
4437                         vid = reg & E1000_VLVF_VLANID_MASK;
4438                         igb_vfta_set(hw, vid, false);
4439                 }
4440
4441                 wr32(E1000_VLVF(i), reg);
4442         }
4443
4444         adapter->vf_data[vf].vlans_enabled = 0;
4445 }
4446
4447 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4448 {
4449         struct e1000_hw *hw = &adapter->hw;
4450         u32 reg, i;
4451
4452         /* The vlvf table only exists on 82576 hardware and newer */
4453         if (hw->mac.type < e1000_82576)
4454                 return -1;
4455
4456         /* we only need to do this if VMDq is enabled */
4457         if (!adapter->vfs_allocated_count)
4458                 return -1;
4459
4460         /* Find the vlan filter for this id */
4461         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4462                 reg = rd32(E1000_VLVF(i));
4463                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4464                     vid == (reg & E1000_VLVF_VLANID_MASK))
4465                         break;
4466         }
4467
4468         if (add) {
4469                 if (i == E1000_VLVF_ARRAY_SIZE) {
4470                         /* Did not find a matching VLAN ID entry that was
4471                          * enabled.  Search for a free filter entry, i.e.
4472                          * one without the enable bit set
4473                          */
4474                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4475                                 reg = rd32(E1000_VLVF(i));
4476                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4477                                         break;
4478                         }
4479                 }
4480                 if (i < E1000_VLVF_ARRAY_SIZE) {
4481                         /* Found an enabled/available entry */
4482                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4483
4484                         /* if !enabled we need to set this up in vfta */
4485                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4486                                 /* add VID to filter table */
4487                                 igb_vfta_set(hw, vid, true);
4488                                 reg |= E1000_VLVF_VLANID_ENABLE;
4489                         }
4490                         reg &= ~E1000_VLVF_VLANID_MASK;
4491                         reg |= vid;
4492                         wr32(E1000_VLVF(i), reg);
4493
4494                         /* do not modify RLPML for PF devices */
4495                         if (vf >= adapter->vfs_allocated_count)
4496                                 return 0;
4497
4498                         if (!adapter->vf_data[vf].vlans_enabled) {
4499                                 u32 size;
4500                                 reg = rd32(E1000_VMOLR(vf));
4501                                 size = reg & E1000_VMOLR_RLPML_MASK;
4502                                 size += 4;
4503                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4504                                 reg |= size;
4505                                 wr32(E1000_VMOLR(vf), reg);
4506                         }
4507
4508                         adapter->vf_data[vf].vlans_enabled++;
4509                         return 0;
4510                 }
4511         } else {
4512                 if (i < E1000_VLVF_ARRAY_SIZE) {
4513                         /* remove vf from the pool */
4514                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4515                         /* if pool is empty then remove entry from vfta */
4516                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4517                                 reg = 0;
4518                                 igb_vfta_set(hw, vid, false);
4519                         }
4520                         wr32(E1000_VLVF(i), reg);
4521
4522                         /* do not modify RLPML for PF devices */
4523                         if (vf >= adapter->vfs_allocated_count)
4524                                 return 0;
4525
4526                         adapter->vf_data[vf].vlans_enabled--;
4527                         if (!adapter->vf_data[vf].vlans_enabled) {
4528                                 u32 size;
4529                                 reg = rd32(E1000_VMOLR(vf));
4530                                 size = reg & E1000_VMOLR_RLPML_MASK;
4531                                 size -= 4;
4532                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4533                                 reg |= size;
4534                                 wr32(E1000_VMOLR(vf), reg);
4535                         }
4536                 }
4537         }
4538         return 0;
4539 }
4540
4541 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4542 {
4543         struct e1000_hw *hw = &adapter->hw;
4544
4545         if (vid)
4546                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4547         else
4548                 wr32(E1000_VMVIR(vf), 0);
4549 }
4550
4551 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4552                                int vf, u16 vlan, u8 qos)
4553 {
4554         int err = 0;
4555         struct igb_adapter *adapter = netdev_priv(netdev);
4556
4557         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4558                 return -EINVAL;
4559         if (vlan || qos) {
4560                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4561                 if (err)
4562                         goto out;
4563                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4564                 igb_set_vmolr(adapter, vf, !vlan);
4565                 adapter->vf_data[vf].pf_vlan = vlan;
4566                 adapter->vf_data[vf].pf_qos = qos;
4567                 dev_info(&adapter->pdev->dev,
4568                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4569                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4570                         dev_warn(&adapter->pdev->dev,
4571                                  "The VF VLAN has been set,"
4572                                  " but the PF device is not up.\n");
4573                         dev_warn(&adapter->pdev->dev,
4574                                  "Bring the PF device up before"
4575                                  " attempting to use the VF device.\n");
4576                 }
4577         } else {
4578                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4579                                    false, vf);
4580                 igb_set_vmvir(adapter, vlan, vf);
4581                 igb_set_vmolr(adapter, vf, true);
4582                 adapter->vf_data[vf].pf_vlan = 0;
4583                 adapter->vf_data[vf].pf_qos = 0;
4584        }
4585 out:
4586        return err;
4587 }
4588
4589 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4590 {
4591         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4592         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4593
4594         return igb_vlvf_set(adapter, vid, add, vf);
4595 }
4596
4597 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4598 {
4599         /* clear flags */
4600         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4601         adapter->vf_data[vf].last_nack = jiffies;
4602
4603         /* reset offloads to defaults */
4604         igb_set_vmolr(adapter, vf, true);
4605
4606         /* reset vlans for device */
4607         igb_clear_vf_vfta(adapter, vf);
4608         if (adapter->vf_data[vf].pf_vlan)
4609                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4610                                     adapter->vf_data[vf].pf_vlan,
4611                                     adapter->vf_data[vf].pf_qos);
4612         else
4613                 igb_clear_vf_vfta(adapter, vf);
4614
4615         /* reset multicast table array for vf */
4616         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4617
4618         /* Flush and reset the mta with the new values */
4619         igb_set_rx_mode(adapter->netdev);
4620 }
4621
4622 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4623 {
4624         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4625
4626         /* generate a new mac address as we were hotplug removed/added */
4627         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4628                 random_ether_addr(vf_mac);
4629
4630         /* process remaining reset events */
4631         igb_vf_reset(adapter, vf);
4632 }
4633
4634 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4635 {
4636         struct e1000_hw *hw = &adapter->hw;
4637         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4638         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4639         u32 reg, msgbuf[3];
4640         u8 *addr = (u8 *)(&msgbuf[1]);
4641
4642         /* process all the same items cleared in a function level reset */
4643         igb_vf_reset(adapter, vf);
4644
4645         /* set vf mac address */
4646         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4647
4648         /* enable transmit and receive for vf */
4649         reg = rd32(E1000_VFTE);
4650         wr32(E1000_VFTE, reg | (1 << vf));
4651         reg = rd32(E1000_VFRE);
4652         wr32(E1000_VFRE, reg | (1 << vf));
4653
4654         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4655
4656         /* reply to reset with ack and vf mac address */
4657         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4658         memcpy(addr, vf_mac, 6);
4659         igb_write_mbx(hw, msgbuf, 3, vf);
4660 }
4661
4662 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4663 {
4664         unsigned char *addr = (char *)&msg[1];
4665         int err = -1;
4666
4667         if (is_valid_ether_addr(addr))
4668                 err = igb_set_vf_mac(adapter, vf, addr);
4669
4670         return err;
4671 }
4672
4673 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4674 {
4675         struct e1000_hw *hw = &adapter->hw;
4676         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4677         u32 msg = E1000_VT_MSGTYPE_NACK;
4678
4679         /* if device isn't clear to send it shouldn't be reading either */
4680         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4681             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4682                 igb_write_mbx(hw, &msg, 1, vf);
4683                 vf_data->last_nack = jiffies;
4684         }
4685 }
4686
4687 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4688 {
4689         struct pci_dev *pdev = adapter->pdev;
4690         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4691         struct e1000_hw *hw = &adapter->hw;
4692         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4693         s32 retval;
4694
4695         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4696
4697         if (retval) {
4698                 /* if receive failed revoke VF CTS stats and restart init */
4699                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4700                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4701                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4702                         return;
4703                 goto out;
4704         }
4705
4706         /* this is a message we already processed, do nothing */
4707         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4708                 return;
4709
4710         /*
4711          * until the vf completes a reset it should not be
4712          * allowed to start any configuration.
4713          */
4714
4715         if (msgbuf[0] == E1000_VF_RESET) {
4716                 igb_vf_reset_msg(adapter, vf);
4717                 return;
4718         }
4719
4720         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4721                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4722                         return;
4723                 retval = -1;
4724                 goto out;
4725         }
4726
4727         switch ((msgbuf[0] & 0xFFFF)) {
4728         case E1000_VF_SET_MAC_ADDR:
4729                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4730                 break;
4731         case E1000_VF_SET_PROMISC:
4732                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4733                 break;
4734         case E1000_VF_SET_MULTICAST:
4735                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4736                 break;
4737         case E1000_VF_SET_LPE:
4738                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4739                 break;
4740         case E1000_VF_SET_VLAN:
4741                 if (adapter->vf_data[vf].pf_vlan)
4742                         retval = -1;
4743                 else
4744                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4745                 break;
4746         default:
4747                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4748                 retval = -1;
4749                 break;
4750         }
4751
4752         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4753 out:
4754         /* notify the VF of the results of what it sent us */
4755         if (retval)
4756                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4757         else
4758                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4759
4760         igb_write_mbx(hw, msgbuf, 1, vf);
4761 }
4762
4763 static void igb_msg_task(struct igb_adapter *adapter)
4764 {
4765         struct e1000_hw *hw = &adapter->hw;
4766         u32 vf;
4767
4768         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4769                 /* process any reset requests */
4770                 if (!igb_check_for_rst(hw, vf))
4771                         igb_vf_reset_event(adapter, vf);
4772
4773                 /* process any messages pending */
4774                 if (!igb_check_for_msg(hw, vf))
4775                         igb_rcv_msg_from_vf(adapter, vf);
4776
4777                 /* process any acks */
4778                 if (!igb_check_for_ack(hw, vf))
4779                         igb_rcv_ack_from_vf(adapter, vf);
4780         }
4781 }
4782
4783 /**
4784  *  igb_set_uta - Set unicast filter table address
4785  *  @adapter: board private structure
4786  *
4787  *  The unicast table address is a register array of 32-bit registers.
4788  *  The table is meant to be used in a way similar to how the MTA is used
4789  *  however due to certain limitations in the hardware it is necessary to
4790  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4791  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4792  **/
4793 static void igb_set_uta(struct igb_adapter *adapter)
4794 {
4795         struct e1000_hw *hw = &adapter->hw;
4796         int i;
4797
4798         /* The UTA table only exists on 82576 hardware and newer */
4799         if (hw->mac.type < e1000_82576)
4800                 return;
4801
4802         /* we only need to do this if VMDq is enabled */
4803         if (!adapter->vfs_allocated_count)
4804                 return;
4805
4806         for (i = 0; i < hw->mac.uta_reg_count; i++)
4807                 array_wr32(E1000_UTA, i, ~0);
4808 }
4809
4810 /**
4811  * igb_intr_msi - Interrupt Handler
4812  * @irq: interrupt number
4813  * @data: pointer to a network interface device structure
4814  **/
4815 static irqreturn_t igb_intr_msi(int irq, void *data)
4816 {
4817         struct igb_adapter *adapter = data;
4818         struct igb_q_vector *q_vector = adapter->q_vector[0];
4819         struct e1000_hw *hw = &adapter->hw;
4820         /* read ICR disables interrupts using IAM */
4821         u32 icr = rd32(E1000_ICR);
4822
4823         igb_write_itr(q_vector);
4824
4825         if (icr & E1000_ICR_DRSTA)
4826                 schedule_work(&adapter->reset_task);
4827
4828         if (icr & E1000_ICR_DOUTSYNC) {
4829                 /* HW is reporting DMA is out of sync */
4830                 adapter->stats.doosync++;
4831         }
4832
4833         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4834                 hw->mac.get_link_status = 1;
4835                 if (!test_bit(__IGB_DOWN, &adapter->state))
4836                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4837         }
4838
4839         napi_schedule(&q_vector->napi);
4840
4841         return IRQ_HANDLED;
4842 }
4843
4844 /**
4845  * igb_intr - Legacy Interrupt Handler
4846  * @irq: interrupt number
4847  * @data: pointer to a network interface device structure
4848  **/
4849 static irqreturn_t igb_intr(int irq, void *data)
4850 {
4851         struct igb_adapter *adapter = data;
4852         struct igb_q_vector *q_vector = adapter->q_vector[0];
4853         struct e1000_hw *hw = &adapter->hw;
4854         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4855          * need for the IMC write */
4856         u32 icr = rd32(E1000_ICR);
4857         if (!icr)
4858                 return IRQ_NONE;  /* Not our interrupt */
4859
4860         igb_write_itr(q_vector);
4861
4862         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4863          * not set, then the adapter didn't send an interrupt */
4864         if (!(icr & E1000_ICR_INT_ASSERTED))
4865                 return IRQ_NONE;
4866
4867         if (icr & E1000_ICR_DRSTA)
4868                 schedule_work(&adapter->reset_task);
4869
4870         if (icr & E1000_ICR_DOUTSYNC) {
4871                 /* HW is reporting DMA is out of sync */
4872                 adapter->stats.doosync++;
4873         }
4874
4875         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4876                 hw->mac.get_link_status = 1;
4877                 /* guard against interrupt when we're going down */
4878                 if (!test_bit(__IGB_DOWN, &adapter->state))
4879                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4880         }
4881
4882         napi_schedule(&q_vector->napi);
4883
4884         return IRQ_HANDLED;
4885 }
4886
4887 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4888 {
4889         struct igb_adapter *adapter = q_vector->adapter;
4890         struct e1000_hw *hw = &adapter->hw;
4891
4892         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4893             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4894                 if (!adapter->msix_entries)
4895                         igb_set_itr(adapter);
4896                 else
4897                         igb_update_ring_itr(q_vector);
4898         }
4899
4900         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4901                 if (adapter->msix_entries)
4902                         wr32(E1000_EIMS, q_vector->eims_value);
4903                 else
4904                         igb_irq_enable(adapter);
4905         }
4906 }
4907
4908 /**
4909  * igb_poll - NAPI Rx polling callback
4910  * @napi: napi polling structure
4911  * @budget: count of how many packets we should handle
4912  **/
4913 static int igb_poll(struct napi_struct *napi, int budget)
4914 {
4915         struct igb_q_vector *q_vector = container_of(napi,
4916                                                      struct igb_q_vector,
4917                                                      napi);
4918         int tx_clean_complete = 1, work_done = 0;
4919
4920 #ifdef CONFIG_IGB_DCA
4921         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4922                 igb_update_dca(q_vector);
4923 #endif
4924         if (q_vector->tx_ring)
4925                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4926
4927         if (q_vector->rx_ring)
4928                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4929
4930         if (!tx_clean_complete)
4931                 work_done = budget;
4932
4933         /* If not enough Rx work done, exit the polling mode */
4934         if (work_done < budget) {
4935                 napi_complete(napi);
4936                 igb_ring_irq_enable(q_vector);
4937         }
4938
4939         return work_done;
4940 }
4941
4942 /**
4943  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4944  * @adapter: board private structure
4945  * @shhwtstamps: timestamp structure to update
4946  * @regval: unsigned 64bit system time value.
4947  *
4948  * We need to convert the system time value stored in the RX/TXSTMP registers
4949  * into a hwtstamp which can be used by the upper level timestamping functions
4950  */
4951 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4952                                    struct skb_shared_hwtstamps *shhwtstamps,
4953                                    u64 regval)
4954 {
4955         u64 ns;
4956
4957         /*
4958          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4959          * 24 to match clock shift we setup earlier.
4960          */
4961         if (adapter->hw.mac.type == e1000_82580)
4962                 regval <<= IGB_82580_TSYNC_SHIFT;
4963
4964         ns = timecounter_cyc2time(&adapter->clock, regval);
4965         timecompare_update(&adapter->compare, ns);
4966         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4967         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4968         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4969 }
4970
4971 /**
4972  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4973  * @q_vector: pointer to q_vector containing needed info
4974  * @skb: packet that was just sent
4975  *
4976  * If we were asked to do hardware stamping and such a time stamp is
4977  * available, then it must have been for this skb here because we only
4978  * allow only one such packet into the queue.
4979  */
4980 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4981 {
4982         struct igb_adapter *adapter = q_vector->adapter;
4983         union skb_shared_tx *shtx = skb_tx(skb);
4984         struct e1000_hw *hw = &adapter->hw;
4985         struct skb_shared_hwtstamps shhwtstamps;
4986         u64 regval;
4987
4988         /* if skb does not support hw timestamp or TX stamp not valid exit */
4989         if (likely(!shtx->hardware) ||
4990             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4991                 return;
4992
4993         regval = rd32(E1000_TXSTMPL);
4994         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4995
4996         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4997         skb_tstamp_tx(skb, &shhwtstamps);
4998 }
4999
5000 /**
5001  * igb_clean_tx_irq - Reclaim resources after transmit completes
5002  * @q_vector: pointer to q_vector containing needed info
5003  * returns true if ring is completely cleaned
5004  **/
5005 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5006 {
5007         struct igb_adapter *adapter = q_vector->adapter;
5008         struct igb_ring *tx_ring = q_vector->tx_ring;
5009         struct net_device *netdev = tx_ring->netdev;
5010         struct e1000_hw *hw = &adapter->hw;
5011         struct igb_buffer *buffer_info;
5012         struct sk_buff *skb;
5013         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5014         unsigned int total_bytes = 0, total_packets = 0;
5015         unsigned int i, eop, count = 0;
5016         bool cleaned = false;
5017
5018         i = tx_ring->next_to_clean;
5019         eop = tx_ring->buffer_info[i].next_to_watch;
5020         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5021
5022         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5023                (count < tx_ring->count)) {
5024                 for (cleaned = false; !cleaned; count++) {
5025                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5026                         buffer_info = &tx_ring->buffer_info[i];
5027                         cleaned = (i == eop);
5028                         skb = buffer_info->skb;
5029
5030                         if (skb) {
5031                                 unsigned int segs, bytecount;
5032                                 /* gso_segs is currently only valid for tcp */
5033                                 segs = buffer_info->gso_segs;
5034                                 /* multiply data chunks by size of headers */
5035                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5036                                             skb->len;
5037                                 total_packets += segs;
5038                                 total_bytes += bytecount;
5039
5040                                 igb_tx_hwtstamp(q_vector, skb);
5041                         }
5042
5043                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5044                         tx_desc->wb.status = 0;
5045
5046                         i++;
5047                         if (i == tx_ring->count)
5048                                 i = 0;
5049                 }
5050                 eop = tx_ring->buffer_info[i].next_to_watch;
5051                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5052         }
5053
5054         tx_ring->next_to_clean = i;
5055
5056         if (unlikely(count &&
5057                      netif_carrier_ok(netdev) &&
5058                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5059                 /* Make sure that anybody stopping the queue after this
5060                  * sees the new next_to_clean.
5061                  */
5062                 smp_mb();
5063                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5064                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5065                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5066                         tx_ring->tx_stats.restart_queue++;
5067                 }
5068         }
5069
5070         if (tx_ring->detect_tx_hung) {
5071                 /* Detect a transmit hang in hardware, this serializes the
5072                  * check with the clearing of time_stamp and movement of i */
5073                 tx_ring->detect_tx_hung = false;
5074                 if (tx_ring->buffer_info[i].time_stamp &&
5075                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5076                                (adapter->tx_timeout_factor * HZ)) &&
5077                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5078
5079                         /* detected Tx unit hang */
5080                         dev_err(&tx_ring->pdev->dev,
5081                                 "Detected Tx Unit Hang\n"
5082                                 "  Tx Queue             <%d>\n"
5083                                 "  TDH                  <%x>\n"
5084                                 "  TDT                  <%x>\n"
5085                                 "  next_to_use          <%x>\n"
5086                                 "  next_to_clean        <%x>\n"
5087                                 "buffer_info[next_to_clean]\n"
5088                                 "  time_stamp           <%lx>\n"
5089                                 "  next_to_watch        <%x>\n"
5090                                 "  jiffies              <%lx>\n"
5091                                 "  desc.status          <%x>\n",
5092                                 tx_ring->queue_index,
5093                                 readl(tx_ring->head),
5094                                 readl(tx_ring->tail),
5095                                 tx_ring->next_to_use,
5096                                 tx_ring->next_to_clean,
5097                                 tx_ring->buffer_info[eop].time_stamp,
5098                                 eop,
5099                                 jiffies,
5100                                 eop_desc->wb.status);
5101                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5102                 }
5103         }
5104         tx_ring->total_bytes += total_bytes;
5105         tx_ring->total_packets += total_packets;
5106         tx_ring->tx_stats.bytes += total_bytes;
5107         tx_ring->tx_stats.packets += total_packets;
5108         return (count < tx_ring->count);
5109 }
5110
5111 /**
5112  * igb_receive_skb - helper function to handle rx indications
5113  * @q_vector: structure containing interrupt and ring information
5114  * @skb: packet to send up
5115  * @vlan_tag: vlan tag for packet
5116  **/
5117 static void igb_receive_skb(struct igb_q_vector *q_vector,
5118                             struct sk_buff *skb,
5119                             u16 vlan_tag)
5120 {
5121         struct igb_adapter *adapter = q_vector->adapter;
5122
5123         if (vlan_tag)
5124                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5125                                  vlan_tag, skb);
5126         else
5127                 napi_gro_receive(&q_vector->napi, skb);
5128 }
5129
5130 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5131                                        u32 status_err, struct sk_buff *skb)
5132 {
5133         skb->ip_summed = CHECKSUM_NONE;
5134
5135         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5136         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5137              (status_err & E1000_RXD_STAT_IXSM))
5138                 return;
5139
5140         /* TCP/UDP checksum error bit is set */
5141         if (status_err &
5142             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5143                 /*
5144                  * work around errata with sctp packets where the TCPE aka
5145                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5146                  * packets, (aka let the stack check the crc32c)
5147                  */
5148                 if ((skb->len == 60) &&
5149                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5150                         ring->rx_stats.csum_err++;
5151
5152                 /* let the stack verify checksum errors */
5153                 return;
5154         }
5155         /* It must be a TCP or UDP packet with a valid checksum */
5156         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5157                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5158
5159         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5160 }
5161
5162 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5163                                    struct sk_buff *skb)
5164 {
5165         struct igb_adapter *adapter = q_vector->adapter;
5166         struct e1000_hw *hw = &adapter->hw;
5167         u64 regval;
5168
5169         /*
5170          * If this bit is set, then the RX registers contain the time stamp. No
5171          * other packet will be time stamped until we read these registers, so
5172          * read the registers to make them available again. Because only one
5173          * packet can be time stamped at a time, we know that the register
5174          * values must belong to this one here and therefore we don't need to
5175          * compare any of the additional attributes stored for it.
5176          *
5177          * If nothing went wrong, then it should have a skb_shared_tx that we
5178          * can turn into a skb_shared_hwtstamps.
5179          */
5180         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5181                 return;
5182         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5183                 return;
5184
5185         regval = rd32(E1000_RXSTMPL);
5186         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5187
5188         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5189 }
5190 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5191                                union e1000_adv_rx_desc *rx_desc)
5192 {
5193         /* HW will not DMA in data larger than the given buffer, even if it
5194          * parses the (NFS, of course) header to be larger.  In that case, it
5195          * fills the header buffer and spills the rest into the page.
5196          */
5197         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5198                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5199         if (hlen > rx_ring->rx_buffer_len)
5200                 hlen = rx_ring->rx_buffer_len;
5201         return hlen;
5202 }
5203
5204 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5205                                  int *work_done, int budget)
5206 {
5207         struct igb_ring *rx_ring = q_vector->rx_ring;
5208         struct net_device *netdev = rx_ring->netdev;
5209         struct pci_dev *pdev = rx_ring->pdev;
5210         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5211         struct igb_buffer *buffer_info , *next_buffer;
5212         struct sk_buff *skb;
5213         bool cleaned = false;
5214         int cleaned_count = 0;
5215         int current_node = numa_node_id();
5216         unsigned int total_bytes = 0, total_packets = 0;
5217         unsigned int i;
5218         u32 staterr;
5219         u16 length;
5220         u16 vlan_tag;
5221
5222         i = rx_ring->next_to_clean;
5223         buffer_info = &rx_ring->buffer_info[i];
5224         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5225         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5226
5227         while (staterr & E1000_RXD_STAT_DD) {
5228                 if (*work_done >= budget)
5229                         break;
5230                 (*work_done)++;
5231
5232                 skb = buffer_info->skb;
5233                 prefetch(skb->data - NET_IP_ALIGN);
5234                 buffer_info->skb = NULL;
5235
5236                 i++;
5237                 if (i == rx_ring->count)
5238                         i = 0;
5239
5240                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5241                 prefetch(next_rxd);
5242                 next_buffer = &rx_ring->buffer_info[i];
5243
5244                 length = le16_to_cpu(rx_desc->wb.upper.length);
5245                 cleaned = true;
5246                 cleaned_count++;
5247
5248                 if (buffer_info->dma) {
5249                         pci_unmap_single(pdev, buffer_info->dma,
5250                                          rx_ring->rx_buffer_len,
5251                                          PCI_DMA_FROMDEVICE);
5252                         buffer_info->dma = 0;
5253                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5254                                 skb_put(skb, length);
5255                                 goto send_up;
5256                         }
5257                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5258                 }
5259
5260                 if (length) {
5261                         pci_unmap_page(pdev, buffer_info->page_dma,
5262                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5263                         buffer_info->page_dma = 0;
5264
5265                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5266                                                 buffer_info->page,
5267                                                 buffer_info->page_offset,
5268                                                 length);
5269
5270                         if ((page_count(buffer_info->page) != 1) ||
5271                             (page_to_nid(buffer_info->page) != current_node))
5272                                 buffer_info->page = NULL;
5273                         else
5274                                 get_page(buffer_info->page);
5275
5276                         skb->len += length;
5277                         skb->data_len += length;
5278                         skb->truesize += length;
5279                 }
5280
5281                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5282                         buffer_info->skb = next_buffer->skb;
5283                         buffer_info->dma = next_buffer->dma;
5284                         next_buffer->skb = skb;
5285                         next_buffer->dma = 0;
5286                         goto next_desc;
5287                 }
5288 send_up:
5289                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5290                         dev_kfree_skb_irq(skb);
5291                         goto next_desc;
5292                 }
5293
5294                 igb_rx_hwtstamp(q_vector, staterr, skb);
5295                 total_bytes += skb->len;
5296                 total_packets++;
5297
5298                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5299
5300                 skb->protocol = eth_type_trans(skb, netdev);
5301                 skb_record_rx_queue(skb, rx_ring->queue_index);
5302
5303                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5304                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5305
5306                 igb_receive_skb(q_vector, skb, vlan_tag);
5307
5308 next_desc:
5309                 rx_desc->wb.upper.status_error = 0;
5310
5311                 /* return some buffers to hardware, one at a time is too slow */
5312                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5313                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5314                         cleaned_count = 0;
5315                 }
5316
5317                 /* use prefetched values */
5318                 rx_desc = next_rxd;
5319                 buffer_info = next_buffer;
5320                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5321         }
5322
5323         rx_ring->next_to_clean = i;
5324         cleaned_count = igb_desc_unused(rx_ring);
5325
5326         if (cleaned_count)
5327                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5328
5329         rx_ring->total_packets += total_packets;
5330         rx_ring->total_bytes += total_bytes;
5331         rx_ring->rx_stats.packets += total_packets;
5332         rx_ring->rx_stats.bytes += total_bytes;
5333         return cleaned;
5334 }
5335
5336 /**
5337  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5338  * @adapter: address of board private structure
5339  **/
5340 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5341 {
5342         struct net_device *netdev = rx_ring->netdev;
5343         union e1000_adv_rx_desc *rx_desc;
5344         struct igb_buffer *buffer_info;
5345         struct sk_buff *skb;
5346         unsigned int i;
5347         int bufsz;
5348
5349         i = rx_ring->next_to_use;
5350         buffer_info = &rx_ring->buffer_info[i];
5351
5352         bufsz = rx_ring->rx_buffer_len;
5353
5354         while (cleaned_count--) {
5355                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5356
5357                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5358                         if (!buffer_info->page) {
5359                                 buffer_info->page = netdev_alloc_page(netdev);
5360                                 if (!buffer_info->page) {
5361                                         rx_ring->rx_stats.alloc_failed++;
5362                                         goto no_buffers;
5363                                 }
5364                                 buffer_info->page_offset = 0;
5365                         } else {
5366                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5367                         }
5368                         buffer_info->page_dma =
5369                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5370                                              buffer_info->page_offset,
5371                                              PAGE_SIZE / 2,
5372                                              PCI_DMA_FROMDEVICE);
5373                         if (pci_dma_mapping_error(rx_ring->pdev,
5374                                                   buffer_info->page_dma)) {
5375                                 buffer_info->page_dma = 0;
5376                                 rx_ring->rx_stats.alloc_failed++;
5377                                 goto no_buffers;
5378                         }
5379                 }
5380
5381                 skb = buffer_info->skb;
5382                 if (!skb) {
5383                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5384                         if (!skb) {
5385                                 rx_ring->rx_stats.alloc_failed++;
5386                                 goto no_buffers;
5387                         }
5388
5389                         buffer_info->skb = skb;
5390                 }
5391                 if (!buffer_info->dma) {
5392                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5393                                                           skb->data,
5394                                                           bufsz,
5395                                                           PCI_DMA_FROMDEVICE);
5396                         if (pci_dma_mapping_error(rx_ring->pdev,
5397                                                   buffer_info->dma)) {
5398                                 buffer_info->dma = 0;
5399                                 rx_ring->rx_stats.alloc_failed++;
5400                                 goto no_buffers;
5401                         }
5402                 }
5403                 /* Refresh the desc even if buffer_addrs didn't change because
5404                  * each write-back erases this info. */
5405                 if (bufsz < IGB_RXBUFFER_1024) {
5406                         rx_desc->read.pkt_addr =
5407                              cpu_to_le64(buffer_info->page_dma);
5408                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5409                 } else {
5410                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5411                         rx_desc->read.hdr_addr = 0;
5412                 }
5413
5414                 i++;
5415                 if (i == rx_ring->count)
5416                         i = 0;
5417                 buffer_info = &rx_ring->buffer_info[i];
5418         }
5419
5420 no_buffers:
5421         if (rx_ring->next_to_use != i) {
5422                 rx_ring->next_to_use = i;
5423                 if (i == 0)
5424                         i = (rx_ring->count - 1);
5425                 else
5426                         i--;
5427
5428                 /* Force memory writes to complete before letting h/w
5429                  * know there are new descriptors to fetch.  (Only
5430                  * applicable for weak-ordered memory model archs,
5431                  * such as IA-64). */
5432                 wmb();
5433                 writel(i, rx_ring->tail);
5434         }
5435 }
5436
5437 /**
5438  * igb_mii_ioctl -
5439  * @netdev:
5440  * @ifreq:
5441  * @cmd:
5442  **/
5443 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5444 {
5445         struct igb_adapter *adapter = netdev_priv(netdev);
5446         struct mii_ioctl_data *data = if_mii(ifr);
5447
5448         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5449                 return -EOPNOTSUPP;
5450
5451         switch (cmd) {
5452         case SIOCGMIIPHY:
5453                 data->phy_id = adapter->hw.phy.addr;
5454                 break;
5455         case SIOCGMIIREG:
5456                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5457                                      &data->val_out))
5458                         return -EIO;
5459                 break;
5460         case SIOCSMIIREG:
5461         default:
5462                 return -EOPNOTSUPP;
5463         }
5464         return 0;
5465 }
5466
5467 /**
5468  * igb_hwtstamp_ioctl - control hardware time stamping
5469  * @netdev:
5470  * @ifreq:
5471  * @cmd:
5472  *
5473  * Outgoing time stamping can be enabled and disabled. Play nice and
5474  * disable it when requested, although it shouldn't case any overhead
5475  * when no packet needs it. At most one packet in the queue may be
5476  * marked for time stamping, otherwise it would be impossible to tell
5477  * for sure to which packet the hardware time stamp belongs.
5478  *
5479  * Incoming time stamping has to be configured via the hardware
5480  * filters. Not all combinations are supported, in particular event
5481  * type has to be specified. Matching the kind of event packet is
5482  * not supported, with the exception of "all V2 events regardless of
5483  * level 2 or 4".
5484  *
5485  **/
5486 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5487                               struct ifreq *ifr, int cmd)
5488 {
5489         struct igb_adapter *adapter = netdev_priv(netdev);
5490         struct e1000_hw *hw = &adapter->hw;
5491         struct hwtstamp_config config;
5492         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5493         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5494         u32 tsync_rx_cfg = 0;
5495         bool is_l4 = false;
5496         bool is_l2 = false;
5497         u32 regval;
5498
5499         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5500                 return -EFAULT;
5501
5502         /* reserved for future extensions */
5503         if (config.flags)
5504                 return -EINVAL;
5505
5506         switch (config.tx_type) {
5507         case HWTSTAMP_TX_OFF:
5508                 tsync_tx_ctl = 0;
5509         case HWTSTAMP_TX_ON:
5510                 break;
5511         default:
5512                 return -ERANGE;
5513         }
5514
5515         switch (config.rx_filter) {
5516         case HWTSTAMP_FILTER_NONE:
5517                 tsync_rx_ctl = 0;
5518                 break;
5519         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5520         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5521         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5522         case HWTSTAMP_FILTER_ALL:
5523                 /*
5524                  * register TSYNCRXCFG must be set, therefore it is not
5525                  * possible to time stamp both Sync and Delay_Req messages
5526                  * => fall back to time stamping all packets
5527                  */
5528                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5529                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5530                 break;
5531         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5532                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5533                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5534                 is_l4 = true;
5535                 break;
5536         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5537                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5538                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5539                 is_l4 = true;
5540                 break;
5541         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5542         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5543                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5544                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5545                 is_l2 = true;
5546                 is_l4 = true;
5547                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5548                 break;
5549         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5550         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5551                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5552                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5553                 is_l2 = true;
5554                 is_l4 = true;
5555                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5556                 break;
5557         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5558         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5559         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5560                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5561                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5562                 is_l2 = true;
5563                 break;
5564         default:
5565                 return -ERANGE;
5566         }
5567
5568         if (hw->mac.type == e1000_82575) {
5569                 if (tsync_rx_ctl | tsync_tx_ctl)
5570                         return -EINVAL;
5571                 return 0;
5572         }
5573
5574         /* enable/disable TX */
5575         regval = rd32(E1000_TSYNCTXCTL);
5576         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5577         regval |= tsync_tx_ctl;
5578         wr32(E1000_TSYNCTXCTL, regval);
5579
5580         /* enable/disable RX */
5581         regval = rd32(E1000_TSYNCRXCTL);
5582         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5583         regval |= tsync_rx_ctl;
5584         wr32(E1000_TSYNCRXCTL, regval);
5585
5586         /* define which PTP packets are time stamped */
5587         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5588
5589         /* define ethertype filter for timestamped packets */
5590         if (is_l2)
5591                 wr32(E1000_ETQF(3),
5592                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5593                                  E1000_ETQF_1588 | /* enable timestamping */
5594                                  ETH_P_1588));     /* 1588 eth protocol type */
5595         else
5596                 wr32(E1000_ETQF(3), 0);
5597
5598 #define PTP_PORT 319
5599         /* L4 Queue Filter[3]: filter by destination port and protocol */
5600         if (is_l4) {
5601                 u32 ftqf = (IPPROTO_UDP /* UDP */
5602                         | E1000_FTQF_VF_BP /* VF not compared */
5603                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5604                         | E1000_FTQF_MASK); /* mask all inputs */
5605                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5606
5607                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5608                 wr32(E1000_IMIREXT(3),
5609                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5610                 if (hw->mac.type == e1000_82576) {
5611                         /* enable source port check */
5612                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5613                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5614                 }
5615                 wr32(E1000_FTQF(3), ftqf);
5616         } else {
5617                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5618         }
5619         wrfl();
5620
5621         adapter->hwtstamp_config = config;
5622
5623         /* clear TX/RX time stamp registers, just to be sure */
5624         regval = rd32(E1000_TXSTMPH);
5625         regval = rd32(E1000_RXSTMPH);
5626
5627         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5628                 -EFAULT : 0;
5629 }
5630
5631 /**
5632  * igb_ioctl -
5633  * @netdev:
5634  * @ifreq:
5635  * @cmd:
5636  **/
5637 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5638 {
5639         switch (cmd) {
5640         case SIOCGMIIPHY:
5641         case SIOCGMIIREG:
5642         case SIOCSMIIREG:
5643                 return igb_mii_ioctl(netdev, ifr, cmd);
5644         case SIOCSHWTSTAMP:
5645                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5646         default:
5647                 return -EOPNOTSUPP;
5648         }
5649 }
5650
5651 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5652 {
5653         struct igb_adapter *adapter = hw->back;
5654         u16 cap_offset;
5655
5656         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5657         if (!cap_offset)
5658                 return -E1000_ERR_CONFIG;
5659
5660         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5661
5662         return 0;
5663 }
5664
5665 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5666 {
5667         struct igb_adapter *adapter = hw->back;
5668         u16 cap_offset;
5669
5670         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5671         if (!cap_offset)
5672                 return -E1000_ERR_CONFIG;
5673
5674         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5675
5676         return 0;
5677 }
5678
5679 static void igb_vlan_rx_register(struct net_device *netdev,
5680                                  struct vlan_group *grp)
5681 {
5682         struct igb_adapter *adapter = netdev_priv(netdev);
5683         struct e1000_hw *hw = &adapter->hw;
5684         u32 ctrl, rctl;
5685
5686         igb_irq_disable(adapter);
5687         adapter->vlgrp = grp;
5688
5689         if (grp) {
5690                 /* enable VLAN tag insert/strip */
5691                 ctrl = rd32(E1000_CTRL);
5692                 ctrl |= E1000_CTRL_VME;
5693                 wr32(E1000_CTRL, ctrl);
5694
5695                 /* Disable CFI check */
5696                 rctl = rd32(E1000_RCTL);
5697                 rctl &= ~E1000_RCTL_CFIEN;
5698                 wr32(E1000_RCTL, rctl);
5699         } else {
5700                 /* disable VLAN tag insert/strip */
5701                 ctrl = rd32(E1000_CTRL);
5702                 ctrl &= ~E1000_CTRL_VME;
5703                 wr32(E1000_CTRL, ctrl);
5704         }
5705
5706         igb_rlpml_set(adapter);
5707
5708         if (!test_bit(__IGB_DOWN, &adapter->state))
5709                 igb_irq_enable(adapter);
5710 }
5711
5712 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5713 {
5714         struct igb_adapter *adapter = netdev_priv(netdev);
5715         struct e1000_hw *hw = &adapter->hw;
5716         int pf_id = adapter->vfs_allocated_count;
5717
5718         /* attempt to add filter to vlvf array */
5719         igb_vlvf_set(adapter, vid, true, pf_id);
5720
5721         /* add the filter since PF can receive vlans w/o entry in vlvf */
5722         igb_vfta_set(hw, vid, true);
5723 }
5724
5725 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5726 {
5727         struct igb_adapter *adapter = netdev_priv(netdev);
5728         struct e1000_hw *hw = &adapter->hw;
5729         int pf_id = adapter->vfs_allocated_count;
5730         s32 err;
5731
5732         igb_irq_disable(adapter);
5733         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5734
5735         if (!test_bit(__IGB_DOWN, &adapter->state))
5736                 igb_irq_enable(adapter);
5737
5738         /* remove vlan from VLVF table array */
5739         err = igb_vlvf_set(adapter, vid, false, pf_id);
5740
5741         /* if vid was not present in VLVF just remove it from table */
5742         if (err)
5743                 igb_vfta_set(hw, vid, false);
5744 }
5745
5746 static void igb_restore_vlan(struct igb_adapter *adapter)
5747 {
5748         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5749
5750         if (adapter->vlgrp) {
5751                 u16 vid;
5752                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5753                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5754                                 continue;
5755                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5756                 }
5757         }
5758 }
5759
5760 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5761 {
5762         struct pci_dev *pdev = adapter->pdev;
5763         struct e1000_mac_info *mac = &adapter->hw.mac;
5764
5765         mac->autoneg = 0;
5766
5767         switch (spddplx) {
5768         case SPEED_10 + DUPLEX_HALF:
5769                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5770                 break;
5771         case SPEED_10 + DUPLEX_FULL:
5772                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5773                 break;
5774         case SPEED_100 + DUPLEX_HALF:
5775                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5776                 break;
5777         case SPEED_100 + DUPLEX_FULL:
5778                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5779                 break;
5780         case SPEED_1000 + DUPLEX_FULL:
5781                 mac->autoneg = 1;
5782                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5783                 break;
5784         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5785         default:
5786                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5787                 return -EINVAL;
5788         }
5789         return 0;
5790 }
5791
5792 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5793 {
5794         struct net_device *netdev = pci_get_drvdata(pdev);
5795         struct igb_adapter *adapter = netdev_priv(netdev);
5796         struct e1000_hw *hw = &adapter->hw;
5797         u32 ctrl, rctl, status;
5798         u32 wufc = adapter->wol;
5799 #ifdef CONFIG_PM
5800         int retval = 0;
5801 #endif
5802
5803         netif_device_detach(netdev);
5804
5805         if (netif_running(netdev))
5806                 igb_close(netdev);
5807
5808         igb_clear_interrupt_scheme(adapter);
5809
5810 #ifdef CONFIG_PM
5811         retval = pci_save_state(pdev);
5812         if (retval)
5813                 return retval;
5814 #endif
5815
5816         status = rd32(E1000_STATUS);
5817         if (status & E1000_STATUS_LU)
5818                 wufc &= ~E1000_WUFC_LNKC;
5819
5820         if (wufc) {
5821                 igb_setup_rctl(adapter);
5822                 igb_set_rx_mode(netdev);
5823
5824                 /* turn on all-multi mode if wake on multicast is enabled */
5825                 if (wufc & E1000_WUFC_MC) {
5826                         rctl = rd32(E1000_RCTL);
5827                         rctl |= E1000_RCTL_MPE;
5828                         wr32(E1000_RCTL, rctl);
5829                 }
5830
5831                 ctrl = rd32(E1000_CTRL);
5832                 /* advertise wake from D3Cold */
5833                 #define E1000_CTRL_ADVD3WUC 0x00100000
5834                 /* phy power management enable */
5835                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5836                 ctrl |= E1000_CTRL_ADVD3WUC;
5837                 wr32(E1000_CTRL, ctrl);
5838
5839                 /* Allow time for pending master requests to run */
5840                 igb_disable_pcie_master(hw);
5841
5842                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5843                 wr32(E1000_WUFC, wufc);
5844         } else {
5845                 wr32(E1000_WUC, 0);
5846                 wr32(E1000_WUFC, 0);
5847         }
5848
5849         *enable_wake = wufc || adapter->en_mng_pt;
5850         if (!*enable_wake)
5851                 igb_power_down_link(adapter);
5852         else
5853                 igb_power_up_link(adapter);
5854
5855         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5856          * would have already happened in close and is redundant. */
5857         igb_release_hw_control(adapter);
5858
5859         pci_disable_device(pdev);
5860
5861         return 0;
5862 }
5863
5864 #ifdef CONFIG_PM
5865 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5866 {
5867         int retval;
5868         bool wake;
5869
5870         retval = __igb_shutdown(pdev, &wake);
5871         if (retval)
5872                 return retval;
5873
5874         if (wake) {
5875                 pci_prepare_to_sleep(pdev);
5876         } else {
5877                 pci_wake_from_d3(pdev, false);
5878                 pci_set_power_state(pdev, PCI_D3hot);
5879         }
5880
5881         return 0;
5882 }
5883
5884 static int igb_resume(struct pci_dev *pdev)
5885 {
5886         struct net_device *netdev = pci_get_drvdata(pdev);
5887         struct igb_adapter *adapter = netdev_priv(netdev);
5888         struct e1000_hw *hw = &adapter->hw;
5889         u32 err;
5890
5891         pci_set_power_state(pdev, PCI_D0);
5892         pci_restore_state(pdev);
5893         pci_save_state(pdev);
5894
5895         err = pci_enable_device_mem(pdev);
5896         if (err) {
5897                 dev_err(&pdev->dev,
5898                         "igb: Cannot enable PCI device from suspend\n");
5899                 return err;
5900         }
5901         pci_set_master(pdev);
5902
5903         pci_enable_wake(pdev, PCI_D3hot, 0);
5904         pci_enable_wake(pdev, PCI_D3cold, 0);
5905
5906         if (igb_init_interrupt_scheme(adapter)) {
5907                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5908                 return -ENOMEM;
5909         }
5910
5911         igb_reset(adapter);
5912
5913         /* let the f/w know that the h/w is now under the control of the
5914          * driver. */
5915         igb_get_hw_control(adapter);
5916
5917         wr32(E1000_WUS, ~0);
5918
5919         if (netif_running(netdev)) {
5920                 err = igb_open(netdev);
5921                 if (err)
5922                         return err;
5923         }
5924
5925         netif_device_attach(netdev);
5926
5927         return 0;
5928 }
5929 #endif
5930
5931 static void igb_shutdown(struct pci_dev *pdev)
5932 {
5933         bool wake;
5934
5935         __igb_shutdown(pdev, &wake);
5936
5937         if (system_state == SYSTEM_POWER_OFF) {
5938                 pci_wake_from_d3(pdev, wake);
5939                 pci_set_power_state(pdev, PCI_D3hot);
5940         }
5941 }
5942
5943 #ifdef CONFIG_NET_POLL_CONTROLLER
5944 /*
5945  * Polling 'interrupt' - used by things like netconsole to send skbs
5946  * without having to re-enable interrupts. It's not called while
5947  * the interrupt routine is executing.
5948  */
5949 static void igb_netpoll(struct net_device *netdev)
5950 {
5951         struct igb_adapter *adapter = netdev_priv(netdev);
5952         struct e1000_hw *hw = &adapter->hw;
5953         int i;
5954
5955         if (!adapter->msix_entries) {
5956                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5957                 igb_irq_disable(adapter);
5958                 napi_schedule(&q_vector->napi);
5959                 return;
5960         }
5961
5962         for (i = 0; i < adapter->num_q_vectors; i++) {
5963                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5964                 wr32(E1000_EIMC, q_vector->eims_value);
5965                 napi_schedule(&q_vector->napi);
5966         }
5967 }
5968 #endif /* CONFIG_NET_POLL_CONTROLLER */
5969
5970 /**
5971  * igb_io_error_detected - called when PCI error is detected
5972  * @pdev: Pointer to PCI device
5973  * @state: The current pci connection state
5974  *
5975  * This function is called after a PCI bus error affecting
5976  * this device has been detected.
5977  */
5978 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5979                                               pci_channel_state_t state)
5980 {
5981         struct net_device *netdev = pci_get_drvdata(pdev);
5982         struct igb_adapter *adapter = netdev_priv(netdev);
5983
5984         netif_device_detach(netdev);
5985
5986         if (state == pci_channel_io_perm_failure)
5987                 return PCI_ERS_RESULT_DISCONNECT;
5988
5989         if (netif_running(netdev))
5990                 igb_down(adapter);
5991         pci_disable_device(pdev);
5992
5993         /* Request a slot slot reset. */
5994         return PCI_ERS_RESULT_NEED_RESET;
5995 }
5996
5997 /**
5998  * igb_io_slot_reset - called after the pci bus has been reset.
5999  * @pdev: Pointer to PCI device
6000  *
6001  * Restart the card from scratch, as if from a cold-boot. Implementation
6002  * resembles the first-half of the igb_resume routine.
6003  */
6004 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6005 {
6006         struct net_device *netdev = pci_get_drvdata(pdev);
6007         struct igb_adapter *adapter = netdev_priv(netdev);
6008         struct e1000_hw *hw = &adapter->hw;
6009         pci_ers_result_t result;
6010         int err;
6011
6012         if (pci_enable_device_mem(pdev)) {
6013                 dev_err(&pdev->dev,
6014                         "Cannot re-enable PCI device after reset.\n");
6015                 result = PCI_ERS_RESULT_DISCONNECT;
6016         } else {
6017                 pci_set_master(pdev);
6018                 pci_restore_state(pdev);
6019                 pci_save_state(pdev);
6020
6021                 pci_enable_wake(pdev, PCI_D3hot, 0);
6022                 pci_enable_wake(pdev, PCI_D3cold, 0);
6023
6024                 igb_reset(adapter);
6025                 wr32(E1000_WUS, ~0);
6026                 result = PCI_ERS_RESULT_RECOVERED;
6027         }
6028
6029         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6030         if (err) {
6031                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6032                         "failed 0x%0x\n", err);
6033                 /* non-fatal, continue */
6034         }
6035
6036         return result;
6037 }
6038
6039 /**
6040  * igb_io_resume - called when traffic can start flowing again.
6041  * @pdev: Pointer to PCI device
6042  *
6043  * This callback is called when the error recovery driver tells us that
6044  * its OK to resume normal operation. Implementation resembles the
6045  * second-half of the igb_resume routine.
6046  */
6047 static void igb_io_resume(struct pci_dev *pdev)
6048 {
6049         struct net_device *netdev = pci_get_drvdata(pdev);
6050         struct igb_adapter *adapter = netdev_priv(netdev);
6051
6052         if (netif_running(netdev)) {
6053                 if (igb_up(adapter)) {
6054                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6055                         return;
6056                 }
6057         }
6058
6059         netif_device_attach(netdev);
6060
6061         /* let the f/w know that the h/w is now under the control of the
6062          * driver. */
6063         igb_get_hw_control(adapter);
6064 }
6065
6066 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6067                              u8 qsel)
6068 {
6069         u32 rar_low, rar_high;
6070         struct e1000_hw *hw = &adapter->hw;
6071
6072         /* HW expects these in little endian so we reverse the byte order
6073          * from network order (big endian) to little endian
6074          */
6075         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6076                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6077         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6078
6079         /* Indicate to hardware the Address is Valid. */
6080         rar_high |= E1000_RAH_AV;
6081
6082         if (hw->mac.type == e1000_82575)
6083                 rar_high |= E1000_RAH_POOL_1 * qsel;
6084         else
6085                 rar_high |= E1000_RAH_POOL_1 << qsel;
6086
6087         wr32(E1000_RAL(index), rar_low);
6088         wrfl();
6089         wr32(E1000_RAH(index), rar_high);
6090         wrfl();
6091 }
6092
6093 static int igb_set_vf_mac(struct igb_adapter *adapter,
6094                           int vf, unsigned char *mac_addr)
6095 {
6096         struct e1000_hw *hw = &adapter->hw;
6097         /* VF MAC addresses start at end of receive addresses and moves
6098          * torwards the first, as a result a collision should not be possible */
6099         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6100
6101         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6102
6103         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6104
6105         return 0;
6106 }
6107
6108 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6109 {
6110         struct igb_adapter *adapter = netdev_priv(netdev);
6111         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6112                 return -EINVAL;
6113         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6114         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6115         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6116                                       " change effective.");
6117         if (test_bit(__IGB_DOWN, &adapter->state)) {
6118                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6119                          " but the PF device is not up.\n");
6120                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6121                          " attempting to use the VF device.\n");
6122         }
6123         return igb_set_vf_mac(adapter, vf, mac);
6124 }
6125
6126 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6127 {
6128         return -EOPNOTSUPP;
6129 }
6130
6131 static int igb_ndo_get_vf_config(struct net_device *netdev,
6132                                  int vf, struct ifla_vf_info *ivi)
6133 {
6134         struct igb_adapter *adapter = netdev_priv(netdev);
6135         if (vf >= adapter->vfs_allocated_count)
6136                 return -EINVAL;
6137         ivi->vf = vf;
6138         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6139         ivi->tx_rate = 0;
6140         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6141         ivi->qos = adapter->vf_data[vf].pf_qos;
6142         return 0;
6143 }
6144
6145 static void igb_vmm_control(struct igb_adapter *adapter)
6146 {
6147         struct e1000_hw *hw = &adapter->hw;
6148         u32 reg;
6149
6150         switch (hw->mac.type) {
6151         case e1000_82575:
6152         default:
6153                 /* replication is not supported for 82575 */
6154                 return;
6155         case e1000_82576:
6156                 /* notify HW that the MAC is adding vlan tags */
6157                 reg = rd32(E1000_DTXCTL);
6158                 reg |= E1000_DTXCTL_VLAN_ADDED;
6159                 wr32(E1000_DTXCTL, reg);
6160         case e1000_82580:
6161                 /* enable replication vlan tag stripping */
6162                 reg = rd32(E1000_RPLOLR);
6163                 reg |= E1000_RPLOLR_STRVLAN;
6164                 wr32(E1000_RPLOLR, reg);
6165         case e1000_i350:
6166                 /* none of the above registers are supported by i350 */
6167                 break;
6168         }
6169
6170         if (adapter->vfs_allocated_count) {
6171                 igb_vmdq_set_loopback_pf(hw, true);
6172                 igb_vmdq_set_replication_pf(hw, true);
6173         } else {
6174                 igb_vmdq_set_loopback_pf(hw, false);
6175                 igb_vmdq_set_replication_pf(hw, false);
6176         }
6177 }
6178
6179 /* igb_main.c */