Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
81         /* required last entry */
82         {0, }
83 };
84
85 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
86
87 void igb_reset(struct igb_adapter *);
88 static int igb_setup_all_tx_resources(struct igb_adapter *);
89 static int igb_setup_all_rx_resources(struct igb_adapter *);
90 static void igb_free_all_tx_resources(struct igb_adapter *);
91 static void igb_free_all_rx_resources(struct igb_adapter *);
92 static void igb_setup_mrqc(struct igb_adapter *);
93 void igb_update_stats(struct igb_adapter *);
94 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
95 static void __devexit igb_remove(struct pci_dev *pdev);
96 static int igb_sw_init(struct igb_adapter *);
97 static int igb_open(struct net_device *);
98 static int igb_close(struct net_device *);
99 static void igb_configure_tx(struct igb_adapter *);
100 static void igb_configure_rx(struct igb_adapter *);
101 static void igb_clean_all_tx_rings(struct igb_adapter *);
102 static void igb_clean_all_rx_rings(struct igb_adapter *);
103 static void igb_clean_tx_ring(struct igb_ring *);
104 static void igb_clean_rx_ring(struct igb_ring *);
105 static void igb_set_rx_mode(struct net_device *);
106 static void igb_update_phy_info(unsigned long);
107 static void igb_watchdog(unsigned long);
108 static void igb_watchdog_task(struct work_struct *);
109 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
110 static struct net_device_stats *igb_get_stats(struct net_device *);
111 static int igb_change_mtu(struct net_device *, int);
112 static int igb_set_mac(struct net_device *, void *);
113 static void igb_set_uta(struct igb_adapter *adapter);
114 static irqreturn_t igb_intr(int irq, void *);
115 static irqreturn_t igb_intr_msi(int irq, void *);
116 static irqreturn_t igb_msix_other(int irq, void *);
117 static irqreturn_t igb_msix_ring(int irq, void *);
118 #ifdef CONFIG_IGB_DCA
119 static void igb_update_dca(struct igb_q_vector *);
120 static void igb_setup_dca(struct igb_adapter *);
121 #endif /* CONFIG_IGB_DCA */
122 static bool igb_clean_tx_irq(struct igb_q_vector *);
123 static int igb_poll(struct napi_struct *, int);
124 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
125 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
126 static void igb_tx_timeout(struct net_device *);
127 static void igb_reset_task(struct work_struct *);
128 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
129 static void igb_vlan_rx_add_vid(struct net_device *, u16);
130 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
131 static void igb_restore_vlan(struct igb_adapter *);
132 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
133 static void igb_ping_all_vfs(struct igb_adapter *);
134 static void igb_msg_task(struct igb_adapter *);
135 static void igb_vmm_control(struct igb_adapter *);
136 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
137 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
138 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
139 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
140                                int vf, u16 vlan, u8 qos);
141 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
142 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
143                                  struct ifla_vf_info *ivi);
144
145 #ifdef CONFIG_PM
146 static int igb_suspend(struct pci_dev *, pm_message_t);
147 static int igb_resume(struct pci_dev *);
148 #endif
149 static void igb_shutdown(struct pci_dev *);
150 #ifdef CONFIG_IGB_DCA
151 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
152 static struct notifier_block dca_notifier = {
153         .notifier_call  = igb_notify_dca,
154         .next           = NULL,
155         .priority       = 0
156 };
157 #endif
158 #ifdef CONFIG_NET_POLL_CONTROLLER
159 /* for netdump / net console */
160 static void igb_netpoll(struct net_device *);
161 #endif
162 #ifdef CONFIG_PCI_IOV
163 static unsigned int max_vfs = 0;
164 module_param(max_vfs, uint, 0);
165 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
166                  "per physical function");
167 #endif /* CONFIG_PCI_IOV */
168
169 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
170                      pci_channel_state_t);
171 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
172 static void igb_io_resume(struct pci_dev *);
173
174 static struct pci_error_handlers igb_err_handler = {
175         .error_detected = igb_io_error_detected,
176         .slot_reset = igb_io_slot_reset,
177         .resume = igb_io_resume,
178 };
179
180
181 static struct pci_driver igb_driver = {
182         .name     = igb_driver_name,
183         .id_table = igb_pci_tbl,
184         .probe    = igb_probe,
185         .remove   = __devexit_p(igb_remove),
186 #ifdef CONFIG_PM
187         /* Power Managment Hooks */
188         .suspend  = igb_suspend,
189         .resume   = igb_resume,
190 #endif
191         .shutdown = igb_shutdown,
192         .err_handler = &igb_err_handler
193 };
194
195 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
196 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
197 MODULE_LICENSE("GPL");
198 MODULE_VERSION(DRV_VERSION);
199
200 /**
201  * igb_read_clock - read raw cycle counter (to be used by time counter)
202  */
203 static cycle_t igb_read_clock(const struct cyclecounter *tc)
204 {
205         struct igb_adapter *adapter =
206                 container_of(tc, struct igb_adapter, cycles);
207         struct e1000_hw *hw = &adapter->hw;
208         u64 stamp = 0;
209         int shift = 0;
210
211         /*
212          * The timestamp latches on lowest register read. For the 82580
213          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
214          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
215          */
216         if (hw->mac.type == e1000_82580) {
217                 stamp = rd32(E1000_SYSTIMR) >> 8;
218                 shift = IGB_82580_TSYNC_SHIFT;
219         }
220
221         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
222         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
223         return stamp;
224 }
225
226 #ifdef DEBUG
227 /**
228  * igb_get_hw_dev_name - return device name string
229  * used by hardware layer to print debugging information
230  **/
231 char *igb_get_hw_dev_name(struct e1000_hw *hw)
232 {
233         struct igb_adapter *adapter = hw->back;
234         return adapter->netdev->name;
235 }
236
237 /**
238  * igb_get_time_str - format current NIC and system time as string
239  */
240 static char *igb_get_time_str(struct igb_adapter *adapter,
241                               char buffer[160])
242 {
243         cycle_t hw = adapter->cycles.read(&adapter->cycles);
244         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
245         struct timespec sys;
246         struct timespec delta;
247         getnstimeofday(&sys);
248
249         delta = timespec_sub(nic, sys);
250
251         sprintf(buffer,
252                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
253                 hw,
254                 (long)nic.tv_sec, nic.tv_nsec,
255                 (long)sys.tv_sec, sys.tv_nsec,
256                 (long)delta.tv_sec, delta.tv_nsec);
257
258         return buffer;
259 }
260 #endif
261
262 /**
263  * igb_init_module - Driver Registration Routine
264  *
265  * igb_init_module is the first routine called when the driver is
266  * loaded. All it does is register with the PCI subsystem.
267  **/
268 static int __init igb_init_module(void)
269 {
270         int ret;
271         printk(KERN_INFO "%s - version %s\n",
272                igb_driver_string, igb_driver_version);
273
274         printk(KERN_INFO "%s\n", igb_copyright);
275
276 #ifdef CONFIG_IGB_DCA
277         dca_register_notify(&dca_notifier);
278 #endif
279         ret = pci_register_driver(&igb_driver);
280         return ret;
281 }
282
283 module_init(igb_init_module);
284
285 /**
286  * igb_exit_module - Driver Exit Cleanup Routine
287  *
288  * igb_exit_module is called just before the driver is removed
289  * from memory.
290  **/
291 static void __exit igb_exit_module(void)
292 {
293 #ifdef CONFIG_IGB_DCA
294         dca_unregister_notify(&dca_notifier);
295 #endif
296         pci_unregister_driver(&igb_driver);
297 }
298
299 module_exit(igb_exit_module);
300
301 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
302 /**
303  * igb_cache_ring_register - Descriptor ring to register mapping
304  * @adapter: board private structure to initialize
305  *
306  * Once we know the feature-set enabled for the device, we'll cache
307  * the register offset the descriptor ring is assigned to.
308  **/
309 static void igb_cache_ring_register(struct igb_adapter *adapter)
310 {
311         int i = 0, j = 0;
312         u32 rbase_offset = adapter->vfs_allocated_count;
313
314         switch (adapter->hw.mac.type) {
315         case e1000_82576:
316                 /* The queues are allocated for virtualization such that VF 0
317                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
318                  * In order to avoid collision we start at the first free queue
319                  * and continue consuming queues in the same sequence
320                  */
321                 if (adapter->vfs_allocated_count) {
322                         for (; i < adapter->rss_queues; i++)
323                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
324                                                                Q_IDX_82576(i);
325                         for (; j < adapter->rss_queues; j++)
326                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
327                                                                Q_IDX_82576(j);
328                 }
329         case e1000_82575:
330         case e1000_82580:
331         default:
332                 for (; i < adapter->num_rx_queues; i++)
333                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
334                 for (; j < adapter->num_tx_queues; j++)
335                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
336                 break;
337         }
338 }
339
340 static void igb_free_queues(struct igb_adapter *adapter)
341 {
342         int i;
343
344         for (i = 0; i < adapter->num_tx_queues; i++) {
345                 kfree(adapter->tx_ring[i]);
346                 adapter->tx_ring[i] = NULL;
347         }
348         for (i = 0; i < adapter->num_rx_queues; i++) {
349                 kfree(adapter->rx_ring[i]);
350                 adapter->rx_ring[i] = NULL;
351         }
352         adapter->num_rx_queues = 0;
353         adapter->num_tx_queues = 0;
354 }
355
356 /**
357  * igb_alloc_queues - Allocate memory for all rings
358  * @adapter: board private structure to initialize
359  *
360  * We allocate one ring per queue at run-time since we don't know the
361  * number of queues at compile-time.
362  **/
363 static int igb_alloc_queues(struct igb_adapter *adapter)
364 {
365         struct igb_ring *ring;
366         int i;
367
368         for (i = 0; i < adapter->num_tx_queues; i++) {
369                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
370                 if (!ring)
371                         goto err;
372                 ring->count = adapter->tx_ring_count;
373                 ring->queue_index = i;
374                 ring->pdev = adapter->pdev;
375                 ring->netdev = adapter->netdev;
376                 /* For 82575, context index must be unique per ring. */
377                 if (adapter->hw.mac.type == e1000_82575)
378                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
379                 adapter->tx_ring[i] = ring;
380         }
381
382         for (i = 0; i < adapter->num_rx_queues; i++) {
383                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
384                 if (!ring)
385                         goto err;
386                 ring->count = adapter->rx_ring_count;
387                 ring->queue_index = i;
388                 ring->pdev = adapter->pdev;
389                 ring->netdev = adapter->netdev;
390                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
391                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
392                 /* set flag indicating ring supports SCTP checksum offload */
393                 if (adapter->hw.mac.type >= e1000_82576)
394                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
395                 adapter->rx_ring[i] = ring;
396         }
397
398         igb_cache_ring_register(adapter);
399
400         return 0;
401
402 err:
403         igb_free_queues(adapter);
404
405         return -ENOMEM;
406 }
407
408 #define IGB_N0_QUEUE -1
409 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
410 {
411         u32 msixbm = 0;
412         struct igb_adapter *adapter = q_vector->adapter;
413         struct e1000_hw *hw = &adapter->hw;
414         u32 ivar, index;
415         int rx_queue = IGB_N0_QUEUE;
416         int tx_queue = IGB_N0_QUEUE;
417
418         if (q_vector->rx_ring)
419                 rx_queue = q_vector->rx_ring->reg_idx;
420         if (q_vector->tx_ring)
421                 tx_queue = q_vector->tx_ring->reg_idx;
422
423         switch (hw->mac.type) {
424         case e1000_82575:
425                 /* The 82575 assigns vectors using a bitmask, which matches the
426                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
427                    or more queues to a vector, we write the appropriate bits
428                    into the MSIXBM register for that vector. */
429                 if (rx_queue > IGB_N0_QUEUE)
430                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
431                 if (tx_queue > IGB_N0_QUEUE)
432                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
433                 if (!adapter->msix_entries && msix_vector == 0)
434                         msixbm |= E1000_EIMS_OTHER;
435                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
436                 q_vector->eims_value = msixbm;
437                 break;
438         case e1000_82576:
439                 /* 82576 uses a table-based method for assigning vectors.
440                    Each queue has a single entry in the table to which we write
441                    a vector number along with a "valid" bit.  Sadly, the layout
442                    of the table is somewhat counterintuitive. */
443                 if (rx_queue > IGB_N0_QUEUE) {
444                         index = (rx_queue & 0x7);
445                         ivar = array_rd32(E1000_IVAR0, index);
446                         if (rx_queue < 8) {
447                                 /* vector goes into low byte of register */
448                                 ivar = ivar & 0xFFFFFF00;
449                                 ivar |= msix_vector | E1000_IVAR_VALID;
450                         } else {
451                                 /* vector goes into third byte of register */
452                                 ivar = ivar & 0xFF00FFFF;
453                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
454                         }
455                         array_wr32(E1000_IVAR0, index, ivar);
456                 }
457                 if (tx_queue > IGB_N0_QUEUE) {
458                         index = (tx_queue & 0x7);
459                         ivar = array_rd32(E1000_IVAR0, index);
460                         if (tx_queue < 8) {
461                                 /* vector goes into second byte of register */
462                                 ivar = ivar & 0xFFFF00FF;
463                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
464                         } else {
465                                 /* vector goes into high byte of register */
466                                 ivar = ivar & 0x00FFFFFF;
467                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
468                         }
469                         array_wr32(E1000_IVAR0, index, ivar);
470                 }
471                 q_vector->eims_value = 1 << msix_vector;
472                 break;
473         case e1000_82580:
474                 /* 82580 uses the same table-based approach as 82576 but has fewer
475                    entries as a result we carry over for queues greater than 4. */
476                 if (rx_queue > IGB_N0_QUEUE) {
477                         index = (rx_queue >> 1);
478                         ivar = array_rd32(E1000_IVAR0, index);
479                         if (rx_queue & 0x1) {
480                                 /* vector goes into third byte of register */
481                                 ivar = ivar & 0xFF00FFFF;
482                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
483                         } else {
484                                 /* vector goes into low byte of register */
485                                 ivar = ivar & 0xFFFFFF00;
486                                 ivar |= msix_vector | E1000_IVAR_VALID;
487                         }
488                         array_wr32(E1000_IVAR0, index, ivar);
489                 }
490                 if (tx_queue > IGB_N0_QUEUE) {
491                         index = (tx_queue >> 1);
492                         ivar = array_rd32(E1000_IVAR0, index);
493                         if (tx_queue & 0x1) {
494                                 /* vector goes into high byte of register */
495                                 ivar = ivar & 0x00FFFFFF;
496                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
497                         } else {
498                                 /* vector goes into second byte of register */
499                                 ivar = ivar & 0xFFFF00FF;
500                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
501                         }
502                         array_wr32(E1000_IVAR0, index, ivar);
503                 }
504                 q_vector->eims_value = 1 << msix_vector;
505                 break;
506         default:
507                 BUG();
508                 break;
509         }
510
511         /* add q_vector eims value to global eims_enable_mask */
512         adapter->eims_enable_mask |= q_vector->eims_value;
513
514         /* configure q_vector to set itr on first interrupt */
515         q_vector->set_itr = 1;
516 }
517
518 /**
519  * igb_configure_msix - Configure MSI-X hardware
520  *
521  * igb_configure_msix sets up the hardware to properly
522  * generate MSI-X interrupts.
523  **/
524 static void igb_configure_msix(struct igb_adapter *adapter)
525 {
526         u32 tmp;
527         int i, vector = 0;
528         struct e1000_hw *hw = &adapter->hw;
529
530         adapter->eims_enable_mask = 0;
531
532         /* set vector for other causes, i.e. link changes */
533         switch (hw->mac.type) {
534         case e1000_82575:
535                 tmp = rd32(E1000_CTRL_EXT);
536                 /* enable MSI-X PBA support*/
537                 tmp |= E1000_CTRL_EXT_PBA_CLR;
538
539                 /* Auto-Mask interrupts upon ICR read. */
540                 tmp |= E1000_CTRL_EXT_EIAME;
541                 tmp |= E1000_CTRL_EXT_IRCA;
542
543                 wr32(E1000_CTRL_EXT, tmp);
544
545                 /* enable msix_other interrupt */
546                 array_wr32(E1000_MSIXBM(0), vector++,
547                                       E1000_EIMS_OTHER);
548                 adapter->eims_other = E1000_EIMS_OTHER;
549
550                 break;
551
552         case e1000_82576:
553         case e1000_82580:
554                 /* Turn on MSI-X capability first, or our settings
555                  * won't stick.  And it will take days to debug. */
556                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
557                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
558                                 E1000_GPIE_NSICR);
559
560                 /* enable msix_other interrupt */
561                 adapter->eims_other = 1 << vector;
562                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
563
564                 wr32(E1000_IVAR_MISC, tmp);
565                 break;
566         default:
567                 /* do nothing, since nothing else supports MSI-X */
568                 break;
569         } /* switch (hw->mac.type) */
570
571         adapter->eims_enable_mask |= adapter->eims_other;
572
573         for (i = 0; i < adapter->num_q_vectors; i++)
574                 igb_assign_vector(adapter->q_vector[i], vector++);
575
576         wrfl();
577 }
578
579 /**
580  * igb_request_msix - Initialize MSI-X interrupts
581  *
582  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
583  * kernel.
584  **/
585 static int igb_request_msix(struct igb_adapter *adapter)
586 {
587         struct net_device *netdev = adapter->netdev;
588         struct e1000_hw *hw = &adapter->hw;
589         int i, err = 0, vector = 0;
590
591         err = request_irq(adapter->msix_entries[vector].vector,
592                           igb_msix_other, 0, netdev->name, adapter);
593         if (err)
594                 goto out;
595         vector++;
596
597         for (i = 0; i < adapter->num_q_vectors; i++) {
598                 struct igb_q_vector *q_vector = adapter->q_vector[i];
599
600                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
601
602                 if (q_vector->rx_ring && q_vector->tx_ring)
603                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
604                                 q_vector->rx_ring->queue_index);
605                 else if (q_vector->tx_ring)
606                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
607                                 q_vector->tx_ring->queue_index);
608                 else if (q_vector->rx_ring)
609                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
610                                 q_vector->rx_ring->queue_index);
611                 else
612                         sprintf(q_vector->name, "%s-unused", netdev->name);
613
614                 err = request_irq(adapter->msix_entries[vector].vector,
615                                   igb_msix_ring, 0, q_vector->name,
616                                   q_vector);
617                 if (err)
618                         goto out;
619                 vector++;
620         }
621
622         igb_configure_msix(adapter);
623         return 0;
624 out:
625         return err;
626 }
627
628 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
629 {
630         if (adapter->msix_entries) {
631                 pci_disable_msix(adapter->pdev);
632                 kfree(adapter->msix_entries);
633                 adapter->msix_entries = NULL;
634         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
635                 pci_disable_msi(adapter->pdev);
636         }
637 }
638
639 /**
640  * igb_free_q_vectors - Free memory allocated for interrupt vectors
641  * @adapter: board private structure to initialize
642  *
643  * This function frees the memory allocated to the q_vectors.  In addition if
644  * NAPI is enabled it will delete any references to the NAPI struct prior
645  * to freeing the q_vector.
646  **/
647 static void igb_free_q_vectors(struct igb_adapter *adapter)
648 {
649         int v_idx;
650
651         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
652                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
653                 adapter->q_vector[v_idx] = NULL;
654                 if (!q_vector)
655                         continue;
656                 netif_napi_del(&q_vector->napi);
657                 kfree(q_vector);
658         }
659         adapter->num_q_vectors = 0;
660 }
661
662 /**
663  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
664  *
665  * This function resets the device so that it has 0 rx queues, tx queues, and
666  * MSI-X interrupts allocated.
667  */
668 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
669 {
670         igb_free_queues(adapter);
671         igb_free_q_vectors(adapter);
672         igb_reset_interrupt_capability(adapter);
673 }
674
675 /**
676  * igb_set_interrupt_capability - set MSI or MSI-X if supported
677  *
678  * Attempt to configure interrupts using the best available
679  * capabilities of the hardware and kernel.
680  **/
681 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
682 {
683         int err;
684         int numvecs, i;
685
686         /* Number of supported queues. */
687         adapter->num_rx_queues = adapter->rss_queues;
688         adapter->num_tx_queues = adapter->rss_queues;
689
690         /* start with one vector for every rx queue */
691         numvecs = adapter->num_rx_queues;
692
693         /* if tx handler is separate add 1 for every tx queue */
694         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
695                 numvecs += adapter->num_tx_queues;
696
697         /* store the number of vectors reserved for queues */
698         adapter->num_q_vectors = numvecs;
699
700         /* add 1 vector for link status interrupts */
701         numvecs++;
702         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
703                                         GFP_KERNEL);
704         if (!adapter->msix_entries)
705                 goto msi_only;
706
707         for (i = 0; i < numvecs; i++)
708                 adapter->msix_entries[i].entry = i;
709
710         err = pci_enable_msix(adapter->pdev,
711                               adapter->msix_entries,
712                               numvecs);
713         if (err == 0)
714                 goto out;
715
716         igb_reset_interrupt_capability(adapter);
717
718         /* If we can't do MSI-X, try MSI */
719 msi_only:
720 #ifdef CONFIG_PCI_IOV
721         /* disable SR-IOV for non MSI-X configurations */
722         if (adapter->vf_data) {
723                 struct e1000_hw *hw = &adapter->hw;
724                 /* disable iov and allow time for transactions to clear */
725                 pci_disable_sriov(adapter->pdev);
726                 msleep(500);
727
728                 kfree(adapter->vf_data);
729                 adapter->vf_data = NULL;
730                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
731                 msleep(100);
732                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
733         }
734 #endif
735         adapter->vfs_allocated_count = 0;
736         adapter->rss_queues = 1;
737         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
738         adapter->num_rx_queues = 1;
739         adapter->num_tx_queues = 1;
740         adapter->num_q_vectors = 1;
741         if (!pci_enable_msi(adapter->pdev))
742                 adapter->flags |= IGB_FLAG_HAS_MSI;
743 out:
744         /* Notify the stack of the (possibly) reduced Tx Queue count. */
745         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
746         return;
747 }
748
749 /**
750  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
751  * @adapter: board private structure to initialize
752  *
753  * We allocate one q_vector per queue interrupt.  If allocation fails we
754  * return -ENOMEM.
755  **/
756 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
757 {
758         struct igb_q_vector *q_vector;
759         struct e1000_hw *hw = &adapter->hw;
760         int v_idx;
761
762         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
763                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
764                 if (!q_vector)
765                         goto err_out;
766                 q_vector->adapter = adapter;
767                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
768                 q_vector->itr_val = IGB_START_ITR;
769                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
770                 adapter->q_vector[v_idx] = q_vector;
771         }
772         return 0;
773
774 err_out:
775         igb_free_q_vectors(adapter);
776         return -ENOMEM;
777 }
778
779 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
780                                       int ring_idx, int v_idx)
781 {
782         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
783
784         q_vector->rx_ring = adapter->rx_ring[ring_idx];
785         q_vector->rx_ring->q_vector = q_vector;
786         q_vector->itr_val = adapter->rx_itr_setting;
787         if (q_vector->itr_val && q_vector->itr_val <= 3)
788                 q_vector->itr_val = IGB_START_ITR;
789 }
790
791 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
792                                       int ring_idx, int v_idx)
793 {
794         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
795
796         q_vector->tx_ring = adapter->tx_ring[ring_idx];
797         q_vector->tx_ring->q_vector = q_vector;
798         q_vector->itr_val = adapter->tx_itr_setting;
799         if (q_vector->itr_val && q_vector->itr_val <= 3)
800                 q_vector->itr_val = IGB_START_ITR;
801 }
802
803 /**
804  * igb_map_ring_to_vector - maps allocated queues to vectors
805  *
806  * This function maps the recently allocated queues to vectors.
807  **/
808 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
809 {
810         int i;
811         int v_idx = 0;
812
813         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
814             (adapter->num_q_vectors < adapter->num_tx_queues))
815                 return -ENOMEM;
816
817         if (adapter->num_q_vectors >=
818             (adapter->num_rx_queues + adapter->num_tx_queues)) {
819                 for (i = 0; i < adapter->num_rx_queues; i++)
820                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
821                 for (i = 0; i < adapter->num_tx_queues; i++)
822                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
823         } else {
824                 for (i = 0; i < adapter->num_rx_queues; i++) {
825                         if (i < adapter->num_tx_queues)
826                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
827                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
828                 }
829                 for (; i < adapter->num_tx_queues; i++)
830                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
831         }
832         return 0;
833 }
834
835 /**
836  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
837  *
838  * This function initializes the interrupts and allocates all of the queues.
839  **/
840 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
841 {
842         struct pci_dev *pdev = adapter->pdev;
843         int err;
844
845         igb_set_interrupt_capability(adapter);
846
847         err = igb_alloc_q_vectors(adapter);
848         if (err) {
849                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
850                 goto err_alloc_q_vectors;
851         }
852
853         err = igb_alloc_queues(adapter);
854         if (err) {
855                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
856                 goto err_alloc_queues;
857         }
858
859         err = igb_map_ring_to_vector(adapter);
860         if (err) {
861                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
862                 goto err_map_queues;
863         }
864
865
866         return 0;
867 err_map_queues:
868         igb_free_queues(adapter);
869 err_alloc_queues:
870         igb_free_q_vectors(adapter);
871 err_alloc_q_vectors:
872         igb_reset_interrupt_capability(adapter);
873         return err;
874 }
875
876 /**
877  * igb_request_irq - initialize interrupts
878  *
879  * Attempts to configure interrupts using the best available
880  * capabilities of the hardware and kernel.
881  **/
882 static int igb_request_irq(struct igb_adapter *adapter)
883 {
884         struct net_device *netdev = adapter->netdev;
885         struct pci_dev *pdev = adapter->pdev;
886         int err = 0;
887
888         if (adapter->msix_entries) {
889                 err = igb_request_msix(adapter);
890                 if (!err)
891                         goto request_done;
892                 /* fall back to MSI */
893                 igb_clear_interrupt_scheme(adapter);
894                 if (!pci_enable_msi(adapter->pdev))
895                         adapter->flags |= IGB_FLAG_HAS_MSI;
896                 igb_free_all_tx_resources(adapter);
897                 igb_free_all_rx_resources(adapter);
898                 adapter->num_tx_queues = 1;
899                 adapter->num_rx_queues = 1;
900                 adapter->num_q_vectors = 1;
901                 err = igb_alloc_q_vectors(adapter);
902                 if (err) {
903                         dev_err(&pdev->dev,
904                                 "Unable to allocate memory for vectors\n");
905                         goto request_done;
906                 }
907                 err = igb_alloc_queues(adapter);
908                 if (err) {
909                         dev_err(&pdev->dev,
910                                 "Unable to allocate memory for queues\n");
911                         igb_free_q_vectors(adapter);
912                         goto request_done;
913                 }
914                 igb_setup_all_tx_resources(adapter);
915                 igb_setup_all_rx_resources(adapter);
916         } else {
917                 igb_assign_vector(adapter->q_vector[0], 0);
918         }
919
920         if (adapter->flags & IGB_FLAG_HAS_MSI) {
921                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
922                                   netdev->name, adapter);
923                 if (!err)
924                         goto request_done;
925
926                 /* fall back to legacy interrupts */
927                 igb_reset_interrupt_capability(adapter);
928                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
929         }
930
931         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
932                           netdev->name, adapter);
933
934         if (err)
935                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
936                         err);
937
938 request_done:
939         return err;
940 }
941
942 static void igb_free_irq(struct igb_adapter *adapter)
943 {
944         if (adapter->msix_entries) {
945                 int vector = 0, i;
946
947                 free_irq(adapter->msix_entries[vector++].vector, adapter);
948
949                 for (i = 0; i < adapter->num_q_vectors; i++) {
950                         struct igb_q_vector *q_vector = adapter->q_vector[i];
951                         free_irq(adapter->msix_entries[vector++].vector,
952                                  q_vector);
953                 }
954         } else {
955                 free_irq(adapter->pdev->irq, adapter);
956         }
957 }
958
959 /**
960  * igb_irq_disable - Mask off interrupt generation on the NIC
961  * @adapter: board private structure
962  **/
963 static void igb_irq_disable(struct igb_adapter *adapter)
964 {
965         struct e1000_hw *hw = &adapter->hw;
966
967         /*
968          * we need to be careful when disabling interrupts.  The VFs are also
969          * mapped into these registers and so clearing the bits can cause
970          * issues on the VF drivers so we only need to clear what we set
971          */
972         if (adapter->msix_entries) {
973                 u32 regval = rd32(E1000_EIAM);
974                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
975                 wr32(E1000_EIMC, adapter->eims_enable_mask);
976                 regval = rd32(E1000_EIAC);
977                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
978         }
979
980         wr32(E1000_IAM, 0);
981         wr32(E1000_IMC, ~0);
982         wrfl();
983         synchronize_irq(adapter->pdev->irq);
984 }
985
986 /**
987  * igb_irq_enable - Enable default interrupt generation settings
988  * @adapter: board private structure
989  **/
990 static void igb_irq_enable(struct igb_adapter *adapter)
991 {
992         struct e1000_hw *hw = &adapter->hw;
993
994         if (adapter->msix_entries) {
995                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
996                 u32 regval = rd32(E1000_EIAC);
997                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
998                 regval = rd32(E1000_EIAM);
999                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1000                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1001                 if (adapter->vfs_allocated_count) {
1002                         wr32(E1000_MBVFIMR, 0xFF);
1003                         ims |= E1000_IMS_VMMB;
1004                 }
1005                 if (adapter->hw.mac.type == e1000_82580)
1006                         ims |= E1000_IMS_DRSTA;
1007
1008                 wr32(E1000_IMS, ims);
1009         } else {
1010                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1011                                 E1000_IMS_DRSTA);
1012                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1013                                 E1000_IMS_DRSTA);
1014         }
1015 }
1016
1017 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1018 {
1019         struct e1000_hw *hw = &adapter->hw;
1020         u16 vid = adapter->hw.mng_cookie.vlan_id;
1021         u16 old_vid = adapter->mng_vlan_id;
1022
1023         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1024                 /* add VID to filter table */
1025                 igb_vfta_set(hw, vid, true);
1026                 adapter->mng_vlan_id = vid;
1027         } else {
1028                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1029         }
1030
1031         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1032             (vid != old_vid) &&
1033             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1034                 /* remove VID from filter table */
1035                 igb_vfta_set(hw, old_vid, false);
1036         }
1037 }
1038
1039 /**
1040  * igb_release_hw_control - release control of the h/w to f/w
1041  * @adapter: address of board private structure
1042  *
1043  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1044  * For ASF and Pass Through versions of f/w this means that the
1045  * driver is no longer loaded.
1046  *
1047  **/
1048 static void igb_release_hw_control(struct igb_adapter *adapter)
1049 {
1050         struct e1000_hw *hw = &adapter->hw;
1051         u32 ctrl_ext;
1052
1053         /* Let firmware take over control of h/w */
1054         ctrl_ext = rd32(E1000_CTRL_EXT);
1055         wr32(E1000_CTRL_EXT,
1056                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1057 }
1058
1059 /**
1060  * igb_get_hw_control - get control of the h/w from f/w
1061  * @adapter: address of board private structure
1062  *
1063  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1064  * For ASF and Pass Through versions of f/w this means that
1065  * the driver is loaded.
1066  *
1067  **/
1068 static void igb_get_hw_control(struct igb_adapter *adapter)
1069 {
1070         struct e1000_hw *hw = &adapter->hw;
1071         u32 ctrl_ext;
1072
1073         /* Let firmware know the driver has taken over */
1074         ctrl_ext = rd32(E1000_CTRL_EXT);
1075         wr32(E1000_CTRL_EXT,
1076                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1077 }
1078
1079 /**
1080  * igb_configure - configure the hardware for RX and TX
1081  * @adapter: private board structure
1082  **/
1083 static void igb_configure(struct igb_adapter *adapter)
1084 {
1085         struct net_device *netdev = adapter->netdev;
1086         int i;
1087
1088         igb_get_hw_control(adapter);
1089         igb_set_rx_mode(netdev);
1090
1091         igb_restore_vlan(adapter);
1092
1093         igb_setup_tctl(adapter);
1094         igb_setup_mrqc(adapter);
1095         igb_setup_rctl(adapter);
1096
1097         igb_configure_tx(adapter);
1098         igb_configure_rx(adapter);
1099
1100         igb_rx_fifo_flush_82575(&adapter->hw);
1101
1102         /* call igb_desc_unused which always leaves
1103          * at least 1 descriptor unused to make sure
1104          * next_to_use != next_to_clean */
1105         for (i = 0; i < adapter->num_rx_queues; i++) {
1106                 struct igb_ring *ring = adapter->rx_ring[i];
1107                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1108         }
1109 }
1110
1111 /**
1112  * igb_power_up_link - Power up the phy/serdes link
1113  * @adapter: address of board private structure
1114  **/
1115 void igb_power_up_link(struct igb_adapter *adapter)
1116 {
1117         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1118                 igb_power_up_phy_copper(&adapter->hw);
1119         else
1120                 igb_power_up_serdes_link_82575(&adapter->hw);
1121 }
1122
1123 /**
1124  * igb_power_down_link - Power down the phy/serdes link
1125  * @adapter: address of board private structure
1126  */
1127 static void igb_power_down_link(struct igb_adapter *adapter)
1128 {
1129         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1130                 igb_power_down_phy_copper_82575(&adapter->hw);
1131         else
1132                 igb_shutdown_serdes_link_82575(&adapter->hw);
1133 }
1134
1135 /**
1136  * igb_up - Open the interface and prepare it to handle traffic
1137  * @adapter: board private structure
1138  **/
1139 int igb_up(struct igb_adapter *adapter)
1140 {
1141         struct e1000_hw *hw = &adapter->hw;
1142         int i;
1143
1144         /* hardware has been reset, we need to reload some things */
1145         igb_configure(adapter);
1146
1147         clear_bit(__IGB_DOWN, &adapter->state);
1148
1149         for (i = 0; i < adapter->num_q_vectors; i++) {
1150                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1151                 napi_enable(&q_vector->napi);
1152         }
1153         if (adapter->msix_entries)
1154                 igb_configure_msix(adapter);
1155         else
1156                 igb_assign_vector(adapter->q_vector[0], 0);
1157
1158         /* Clear any pending interrupts. */
1159         rd32(E1000_ICR);
1160         igb_irq_enable(adapter);
1161
1162         /* notify VFs that reset has been completed */
1163         if (adapter->vfs_allocated_count) {
1164                 u32 reg_data = rd32(E1000_CTRL_EXT);
1165                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1166                 wr32(E1000_CTRL_EXT, reg_data);
1167         }
1168
1169         netif_tx_start_all_queues(adapter->netdev);
1170
1171         /* start the watchdog. */
1172         hw->mac.get_link_status = 1;
1173         schedule_work(&adapter->watchdog_task);
1174
1175         return 0;
1176 }
1177
1178 void igb_down(struct igb_adapter *adapter)
1179 {
1180         struct net_device *netdev = adapter->netdev;
1181         struct e1000_hw *hw = &adapter->hw;
1182         u32 tctl, rctl;
1183         int i;
1184
1185         /* signal that we're down so the interrupt handler does not
1186          * reschedule our watchdog timer */
1187         set_bit(__IGB_DOWN, &adapter->state);
1188
1189         /* disable receives in the hardware */
1190         rctl = rd32(E1000_RCTL);
1191         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1192         /* flush and sleep below */
1193
1194         netif_tx_stop_all_queues(netdev);
1195
1196         /* disable transmits in the hardware */
1197         tctl = rd32(E1000_TCTL);
1198         tctl &= ~E1000_TCTL_EN;
1199         wr32(E1000_TCTL, tctl);
1200         /* flush both disables and wait for them to finish */
1201         wrfl();
1202         msleep(10);
1203
1204         for (i = 0; i < adapter->num_q_vectors; i++) {
1205                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1206                 napi_disable(&q_vector->napi);
1207         }
1208
1209         igb_irq_disable(adapter);
1210
1211         del_timer_sync(&adapter->watchdog_timer);
1212         del_timer_sync(&adapter->phy_info_timer);
1213
1214         netif_carrier_off(netdev);
1215
1216         /* record the stats before reset*/
1217         igb_update_stats(adapter);
1218
1219         adapter->link_speed = 0;
1220         adapter->link_duplex = 0;
1221
1222         if (!pci_channel_offline(adapter->pdev))
1223                 igb_reset(adapter);
1224         igb_clean_all_tx_rings(adapter);
1225         igb_clean_all_rx_rings(adapter);
1226 #ifdef CONFIG_IGB_DCA
1227
1228         /* since we reset the hardware DCA settings were cleared */
1229         igb_setup_dca(adapter);
1230 #endif
1231 }
1232
1233 void igb_reinit_locked(struct igb_adapter *adapter)
1234 {
1235         WARN_ON(in_interrupt());
1236         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1237                 msleep(1);
1238         igb_down(adapter);
1239         igb_up(adapter);
1240         clear_bit(__IGB_RESETTING, &adapter->state);
1241 }
1242
1243 void igb_reset(struct igb_adapter *adapter)
1244 {
1245         struct pci_dev *pdev = adapter->pdev;
1246         struct e1000_hw *hw = &adapter->hw;
1247         struct e1000_mac_info *mac = &hw->mac;
1248         struct e1000_fc_info *fc = &hw->fc;
1249         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1250         u16 hwm;
1251
1252         /* Repartition Pba for greater than 9k mtu
1253          * To take effect CTRL.RST is required.
1254          */
1255         switch (mac->type) {
1256         case e1000_82580:
1257                 pba = rd32(E1000_RXPBS);
1258                 pba = igb_rxpbs_adjust_82580(pba);
1259                 break;
1260         case e1000_82576:
1261                 pba = rd32(E1000_RXPBS);
1262                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1263                 break;
1264         case e1000_82575:
1265         default:
1266                 pba = E1000_PBA_34K;
1267                 break;
1268         }
1269
1270         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1271             (mac->type < e1000_82576)) {
1272                 /* adjust PBA for jumbo frames */
1273                 wr32(E1000_PBA, pba);
1274
1275                 /* To maintain wire speed transmits, the Tx FIFO should be
1276                  * large enough to accommodate two full transmit packets,
1277                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1278                  * the Rx FIFO should be large enough to accommodate at least
1279                  * one full receive packet and is similarly rounded up and
1280                  * expressed in KB. */
1281                 pba = rd32(E1000_PBA);
1282                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1283                 tx_space = pba >> 16;
1284                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1285                 pba &= 0xffff;
1286                 /* the tx fifo also stores 16 bytes of information about the tx
1287                  * but don't include ethernet FCS because hardware appends it */
1288                 min_tx_space = (adapter->max_frame_size +
1289                                 sizeof(union e1000_adv_tx_desc) -
1290                                 ETH_FCS_LEN) * 2;
1291                 min_tx_space = ALIGN(min_tx_space, 1024);
1292                 min_tx_space >>= 10;
1293                 /* software strips receive CRC, so leave room for it */
1294                 min_rx_space = adapter->max_frame_size;
1295                 min_rx_space = ALIGN(min_rx_space, 1024);
1296                 min_rx_space >>= 10;
1297
1298                 /* If current Tx allocation is less than the min Tx FIFO size,
1299                  * and the min Tx FIFO size is less than the current Rx FIFO
1300                  * allocation, take space away from current Rx allocation */
1301                 if (tx_space < min_tx_space &&
1302                     ((min_tx_space - tx_space) < pba)) {
1303                         pba = pba - (min_tx_space - tx_space);
1304
1305                         /* if short on rx space, rx wins and must trump tx
1306                          * adjustment */
1307                         if (pba < min_rx_space)
1308                                 pba = min_rx_space;
1309                 }
1310                 wr32(E1000_PBA, pba);
1311         }
1312
1313         /* flow control settings */
1314         /* The high water mark must be low enough to fit one full frame
1315          * (or the size used for early receive) above it in the Rx FIFO.
1316          * Set it to the lower of:
1317          * - 90% of the Rx FIFO size, or
1318          * - the full Rx FIFO size minus one full frame */
1319         hwm = min(((pba << 10) * 9 / 10),
1320                         ((pba << 10) - 2 * adapter->max_frame_size));
1321
1322         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1323         fc->low_water = fc->high_water - 16;
1324         fc->pause_time = 0xFFFF;
1325         fc->send_xon = 1;
1326         fc->current_mode = fc->requested_mode;
1327
1328         /* disable receive for all VFs and wait one second */
1329         if (adapter->vfs_allocated_count) {
1330                 int i;
1331                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1332                         adapter->vf_data[i].flags = 0;
1333
1334                 /* ping all the active vfs to let them know we are going down */
1335                 igb_ping_all_vfs(adapter);
1336
1337                 /* disable transmits and receives */
1338                 wr32(E1000_VFRE, 0);
1339                 wr32(E1000_VFTE, 0);
1340         }
1341
1342         /* Allow time for pending master requests to run */
1343         hw->mac.ops.reset_hw(hw);
1344         wr32(E1000_WUC, 0);
1345
1346         if (hw->mac.ops.init_hw(hw))
1347                 dev_err(&pdev->dev, "Hardware Error\n");
1348
1349         if (hw->mac.type == e1000_82580) {
1350                 u32 reg = rd32(E1000_PCIEMISC);
1351                 wr32(E1000_PCIEMISC,
1352                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1353         }
1354         if (!netif_running(adapter->netdev))
1355                 igb_power_down_link(adapter);
1356
1357         igb_update_mng_vlan(adapter);
1358
1359         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1360         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1361
1362         igb_get_phy_info(hw);
1363 }
1364
1365 static const struct net_device_ops igb_netdev_ops = {
1366         .ndo_open               = igb_open,
1367         .ndo_stop               = igb_close,
1368         .ndo_start_xmit         = igb_xmit_frame_adv,
1369         .ndo_get_stats          = igb_get_stats,
1370         .ndo_set_rx_mode        = igb_set_rx_mode,
1371         .ndo_set_multicast_list = igb_set_rx_mode,
1372         .ndo_set_mac_address    = igb_set_mac,
1373         .ndo_change_mtu         = igb_change_mtu,
1374         .ndo_do_ioctl           = igb_ioctl,
1375         .ndo_tx_timeout         = igb_tx_timeout,
1376         .ndo_validate_addr      = eth_validate_addr,
1377         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1378         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1379         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1380         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1381         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1382         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1383         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1384 #ifdef CONFIG_NET_POLL_CONTROLLER
1385         .ndo_poll_controller    = igb_netpoll,
1386 #endif
1387 };
1388
1389 /**
1390  * igb_probe - Device Initialization Routine
1391  * @pdev: PCI device information struct
1392  * @ent: entry in igb_pci_tbl
1393  *
1394  * Returns 0 on success, negative on failure
1395  *
1396  * igb_probe initializes an adapter identified by a pci_dev structure.
1397  * The OS initialization, configuring of the adapter private structure,
1398  * and a hardware reset occur.
1399  **/
1400 static int __devinit igb_probe(struct pci_dev *pdev,
1401                                const struct pci_device_id *ent)
1402 {
1403         struct net_device *netdev;
1404         struct igb_adapter *adapter;
1405         struct e1000_hw *hw;
1406         u16 eeprom_data = 0;
1407         static int global_quad_port_a; /* global quad port a indication */
1408         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1409         unsigned long mmio_start, mmio_len;
1410         int err, pci_using_dac;
1411         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1412         u32 part_num;
1413
1414         err = pci_enable_device_mem(pdev);
1415         if (err)
1416                 return err;
1417
1418         pci_using_dac = 0;
1419         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1420         if (!err) {
1421                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1422                 if (!err)
1423                         pci_using_dac = 1;
1424         } else {
1425                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1426                 if (err) {
1427                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1428                         if (err) {
1429                                 dev_err(&pdev->dev, "No usable DMA "
1430                                         "configuration, aborting\n");
1431                                 goto err_dma;
1432                         }
1433                 }
1434         }
1435
1436         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1437                                            IORESOURCE_MEM),
1438                                            igb_driver_name);
1439         if (err)
1440                 goto err_pci_reg;
1441
1442         pci_enable_pcie_error_reporting(pdev);
1443
1444         pci_set_master(pdev);
1445         pci_save_state(pdev);
1446
1447         err = -ENOMEM;
1448         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1449                                    IGB_ABS_MAX_TX_QUEUES);
1450         if (!netdev)
1451                 goto err_alloc_etherdev;
1452
1453         SET_NETDEV_DEV(netdev, &pdev->dev);
1454
1455         pci_set_drvdata(pdev, netdev);
1456         adapter = netdev_priv(netdev);
1457         adapter->netdev = netdev;
1458         adapter->pdev = pdev;
1459         hw = &adapter->hw;
1460         hw->back = adapter;
1461         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1462
1463         mmio_start = pci_resource_start(pdev, 0);
1464         mmio_len = pci_resource_len(pdev, 0);
1465
1466         err = -EIO;
1467         hw->hw_addr = ioremap(mmio_start, mmio_len);
1468         if (!hw->hw_addr)
1469                 goto err_ioremap;
1470
1471         netdev->netdev_ops = &igb_netdev_ops;
1472         igb_set_ethtool_ops(netdev);
1473         netdev->watchdog_timeo = 5 * HZ;
1474
1475         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1476
1477         netdev->mem_start = mmio_start;
1478         netdev->mem_end = mmio_start + mmio_len;
1479
1480         /* PCI config space info */
1481         hw->vendor_id = pdev->vendor;
1482         hw->device_id = pdev->device;
1483         hw->revision_id = pdev->revision;
1484         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1485         hw->subsystem_device_id = pdev->subsystem_device;
1486
1487         /* Copy the default MAC, PHY and NVM function pointers */
1488         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1489         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1490         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1491         /* Initialize skew-specific constants */
1492         err = ei->get_invariants(hw);
1493         if (err)
1494                 goto err_sw_init;
1495
1496         /* setup the private structure */
1497         err = igb_sw_init(adapter);
1498         if (err)
1499                 goto err_sw_init;
1500
1501         igb_get_bus_info_pcie(hw);
1502
1503         hw->phy.autoneg_wait_to_complete = false;
1504
1505         /* Copper options */
1506         if (hw->phy.media_type == e1000_media_type_copper) {
1507                 hw->phy.mdix = AUTO_ALL_MODES;
1508                 hw->phy.disable_polarity_correction = false;
1509                 hw->phy.ms_type = e1000_ms_hw_default;
1510         }
1511
1512         if (igb_check_reset_block(hw))
1513                 dev_info(&pdev->dev,
1514                         "PHY reset is blocked due to SOL/IDER session.\n");
1515
1516         netdev->features = NETIF_F_SG |
1517                            NETIF_F_IP_CSUM |
1518                            NETIF_F_HW_VLAN_TX |
1519                            NETIF_F_HW_VLAN_RX |
1520                            NETIF_F_HW_VLAN_FILTER;
1521
1522         netdev->features |= NETIF_F_IPV6_CSUM;
1523         netdev->features |= NETIF_F_TSO;
1524         netdev->features |= NETIF_F_TSO6;
1525         netdev->features |= NETIF_F_GRO;
1526
1527         netdev->vlan_features |= NETIF_F_TSO;
1528         netdev->vlan_features |= NETIF_F_TSO6;
1529         netdev->vlan_features |= NETIF_F_IP_CSUM;
1530         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1531         netdev->vlan_features |= NETIF_F_SG;
1532
1533         if (pci_using_dac)
1534                 netdev->features |= NETIF_F_HIGHDMA;
1535
1536         if (hw->mac.type >= e1000_82576)
1537                 netdev->features |= NETIF_F_SCTP_CSUM;
1538
1539         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1540
1541         /* before reading the NVM, reset the controller to put the device in a
1542          * known good starting state */
1543         hw->mac.ops.reset_hw(hw);
1544
1545         /* make sure the NVM is good */
1546         if (igb_validate_nvm_checksum(hw) < 0) {
1547                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1548                 err = -EIO;
1549                 goto err_eeprom;
1550         }
1551
1552         /* copy the MAC address out of the NVM */
1553         if (hw->mac.ops.read_mac_addr(hw))
1554                 dev_err(&pdev->dev, "NVM Read Error\n");
1555
1556         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1557         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1558
1559         if (!is_valid_ether_addr(netdev->perm_addr)) {
1560                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1561                 err = -EIO;
1562                 goto err_eeprom;
1563         }
1564
1565         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1566                     (unsigned long) adapter);
1567         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1568                     (unsigned long) adapter);
1569
1570         INIT_WORK(&adapter->reset_task, igb_reset_task);
1571         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1572
1573         /* Initialize link properties that are user-changeable */
1574         adapter->fc_autoneg = true;
1575         hw->mac.autoneg = true;
1576         hw->phy.autoneg_advertised = 0x2f;
1577
1578         hw->fc.requested_mode = e1000_fc_default;
1579         hw->fc.current_mode = e1000_fc_default;
1580
1581         igb_validate_mdi_setting(hw);
1582
1583         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1584          * enable the ACPI Magic Packet filter
1585          */
1586
1587         if (hw->bus.func == 0)
1588                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1589         else if (hw->mac.type == e1000_82580)
1590                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1591                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1592                                  &eeprom_data);
1593         else if (hw->bus.func == 1)
1594                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1595
1596         if (eeprom_data & eeprom_apme_mask)
1597                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1598
1599         /* now that we have the eeprom settings, apply the special cases where
1600          * the eeprom may be wrong or the board simply won't support wake on
1601          * lan on a particular port */
1602         switch (pdev->device) {
1603         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1604                 adapter->eeprom_wol = 0;
1605                 break;
1606         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1607         case E1000_DEV_ID_82576_FIBER:
1608         case E1000_DEV_ID_82576_SERDES:
1609                 /* Wake events only supported on port A for dual fiber
1610                  * regardless of eeprom setting */
1611                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1612                         adapter->eeprom_wol = 0;
1613                 break;
1614         case E1000_DEV_ID_82576_QUAD_COPPER:
1615         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1616                 /* if quad port adapter, disable WoL on all but port A */
1617                 if (global_quad_port_a != 0)
1618                         adapter->eeprom_wol = 0;
1619                 else
1620                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1621                 /* Reset for multiple quad port adapters */
1622                 if (++global_quad_port_a == 4)
1623                         global_quad_port_a = 0;
1624                 break;
1625         }
1626
1627         /* initialize the wol settings based on the eeprom settings */
1628         adapter->wol = adapter->eeprom_wol;
1629         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1630
1631         /* reset the hardware with the new settings */
1632         igb_reset(adapter);
1633
1634         /* let the f/w know that the h/w is now under the control of the
1635          * driver. */
1636         igb_get_hw_control(adapter);
1637
1638         strcpy(netdev->name, "eth%d");
1639         err = register_netdev(netdev);
1640         if (err)
1641                 goto err_register;
1642
1643         /* carrier off reporting is important to ethtool even BEFORE open */
1644         netif_carrier_off(netdev);
1645
1646 #ifdef CONFIG_IGB_DCA
1647         if (dca_add_requester(&pdev->dev) == 0) {
1648                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1649                 dev_info(&pdev->dev, "DCA enabled\n");
1650                 igb_setup_dca(adapter);
1651         }
1652
1653 #endif
1654         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1655         /* print bus type/speed/width info */
1656         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1657                  netdev->name,
1658                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1659                                                             "unknown"),
1660                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1661                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1662                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1663                    "unknown"),
1664                  netdev->dev_addr);
1665
1666         igb_read_part_num(hw, &part_num);
1667         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1668                 (part_num >> 8), (part_num & 0xff));
1669
1670         dev_info(&pdev->dev,
1671                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1672                 adapter->msix_entries ? "MSI-X" :
1673                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1674                 adapter->num_rx_queues, adapter->num_tx_queues);
1675
1676         return 0;
1677
1678 err_register:
1679         igb_release_hw_control(adapter);
1680 err_eeprom:
1681         if (!igb_check_reset_block(hw))
1682                 igb_reset_phy(hw);
1683
1684         if (hw->flash_address)
1685                 iounmap(hw->flash_address);
1686 err_sw_init:
1687         igb_clear_interrupt_scheme(adapter);
1688         iounmap(hw->hw_addr);
1689 err_ioremap:
1690         free_netdev(netdev);
1691 err_alloc_etherdev:
1692         pci_release_selected_regions(pdev,
1693                                      pci_select_bars(pdev, IORESOURCE_MEM));
1694 err_pci_reg:
1695 err_dma:
1696         pci_disable_device(pdev);
1697         return err;
1698 }
1699
1700 /**
1701  * igb_remove - Device Removal Routine
1702  * @pdev: PCI device information struct
1703  *
1704  * igb_remove is called by the PCI subsystem to alert the driver
1705  * that it should release a PCI device.  The could be caused by a
1706  * Hot-Plug event, or because the driver is going to be removed from
1707  * memory.
1708  **/
1709 static void __devexit igb_remove(struct pci_dev *pdev)
1710 {
1711         struct net_device *netdev = pci_get_drvdata(pdev);
1712         struct igb_adapter *adapter = netdev_priv(netdev);
1713         struct e1000_hw *hw = &adapter->hw;
1714
1715         /* flush_scheduled work may reschedule our watchdog task, so
1716          * explicitly disable watchdog tasks from being rescheduled  */
1717         set_bit(__IGB_DOWN, &adapter->state);
1718         del_timer_sync(&adapter->watchdog_timer);
1719         del_timer_sync(&adapter->phy_info_timer);
1720
1721         flush_scheduled_work();
1722
1723 #ifdef CONFIG_IGB_DCA
1724         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1725                 dev_info(&pdev->dev, "DCA disabled\n");
1726                 dca_remove_requester(&pdev->dev);
1727                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1728                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1729         }
1730 #endif
1731
1732         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1733          * would have already happened in close and is redundant. */
1734         igb_release_hw_control(adapter);
1735
1736         unregister_netdev(netdev);
1737
1738         igb_clear_interrupt_scheme(adapter);
1739
1740 #ifdef CONFIG_PCI_IOV
1741         /* reclaim resources allocated to VFs */
1742         if (adapter->vf_data) {
1743                 /* disable iov and allow time for transactions to clear */
1744                 pci_disable_sriov(pdev);
1745                 msleep(500);
1746
1747                 kfree(adapter->vf_data);
1748                 adapter->vf_data = NULL;
1749                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1750                 msleep(100);
1751                 dev_info(&pdev->dev, "IOV Disabled\n");
1752         }
1753 #endif
1754
1755         iounmap(hw->hw_addr);
1756         if (hw->flash_address)
1757                 iounmap(hw->flash_address);
1758         pci_release_selected_regions(pdev,
1759                                      pci_select_bars(pdev, IORESOURCE_MEM));
1760
1761         free_netdev(netdev);
1762
1763         pci_disable_pcie_error_reporting(pdev);
1764
1765         pci_disable_device(pdev);
1766 }
1767
1768 /**
1769  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1770  * @adapter: board private structure to initialize
1771  *
1772  * This function initializes the vf specific data storage and then attempts to
1773  * allocate the VFs.  The reason for ordering it this way is because it is much
1774  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1775  * the memory for the VFs.
1776  **/
1777 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1778 {
1779 #ifdef CONFIG_PCI_IOV
1780         struct pci_dev *pdev = adapter->pdev;
1781
1782         if (adapter->vfs_allocated_count > 7)
1783                 adapter->vfs_allocated_count = 7;
1784
1785         if (adapter->vfs_allocated_count) {
1786                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1787                                            sizeof(struct vf_data_storage),
1788                                            GFP_KERNEL);
1789                 /* if allocation failed then we do not support SR-IOV */
1790                 if (!adapter->vf_data) {
1791                         adapter->vfs_allocated_count = 0;
1792                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1793                                 "Data Storage\n");
1794                 }
1795         }
1796
1797         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1798                 kfree(adapter->vf_data);
1799                 adapter->vf_data = NULL;
1800 #endif /* CONFIG_PCI_IOV */
1801                 adapter->vfs_allocated_count = 0;
1802 #ifdef CONFIG_PCI_IOV
1803         } else {
1804                 unsigned char mac_addr[ETH_ALEN];
1805                 int i;
1806                 dev_info(&pdev->dev, "%d vfs allocated\n",
1807                          adapter->vfs_allocated_count);
1808                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1809                         random_ether_addr(mac_addr);
1810                         igb_set_vf_mac(adapter, i, mac_addr);
1811                 }
1812         }
1813 #endif /* CONFIG_PCI_IOV */
1814 }
1815
1816
1817 /**
1818  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1819  * @adapter: board private structure to initialize
1820  *
1821  * igb_init_hw_timer initializes the function pointer and values for the hw
1822  * timer found in hardware.
1823  **/
1824 static void igb_init_hw_timer(struct igb_adapter *adapter)
1825 {
1826         struct e1000_hw *hw = &adapter->hw;
1827
1828         switch (hw->mac.type) {
1829         case e1000_82580:
1830                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1831                 adapter->cycles.read = igb_read_clock;
1832                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1833                 adapter->cycles.mult = 1;
1834                 /*
1835                  * The 82580 timesync updates the system timer every 8ns by 8ns
1836                  * and the value cannot be shifted.  Instead we need to shift
1837                  * the registers to generate a 64bit timer value.  As a result
1838                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1839                  * 24 in order to generate a larger value for synchronization.
1840                  */
1841                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1842                 /* disable system timer temporarily by setting bit 31 */
1843                 wr32(E1000_TSAUXC, 0x80000000);
1844                 wrfl();
1845
1846                 /* Set registers so that rollover occurs soon to test this. */
1847                 wr32(E1000_SYSTIMR, 0x00000000);
1848                 wr32(E1000_SYSTIML, 0x80000000);
1849                 wr32(E1000_SYSTIMH, 0x000000FF);
1850                 wrfl();
1851
1852                 /* enable system timer by clearing bit 31 */
1853                 wr32(E1000_TSAUXC, 0x0);
1854                 wrfl();
1855
1856                 timecounter_init(&adapter->clock,
1857                                  &adapter->cycles,
1858                                  ktime_to_ns(ktime_get_real()));
1859                 /*
1860                  * Synchronize our NIC clock against system wall clock. NIC
1861                  * time stamp reading requires ~3us per sample, each sample
1862                  * was pretty stable even under load => only require 10
1863                  * samples for each offset comparison.
1864                  */
1865                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1866                 adapter->compare.source = &adapter->clock;
1867                 adapter->compare.target = ktime_get_real;
1868                 adapter->compare.num_samples = 10;
1869                 timecompare_update(&adapter->compare, 0);
1870                 break;
1871         case e1000_82576:
1872                 /*
1873                  * Initialize hardware timer: we keep it running just in case
1874                  * that some program needs it later on.
1875                  */
1876                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1877                 adapter->cycles.read = igb_read_clock;
1878                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1879                 adapter->cycles.mult = 1;
1880                 /**
1881                  * Scale the NIC clock cycle by a large factor so that
1882                  * relatively small clock corrections can be added or
1883                  * substracted at each clock tick. The drawbacks of a large
1884                  * factor are a) that the clock register overflows more quickly
1885                  * (not such a big deal) and b) that the increment per tick has
1886                  * to fit into 24 bits.  As a result we need to use a shift of
1887                  * 19 so we can fit a value of 16 into the TIMINCA register.
1888                  */
1889                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1890                 wr32(E1000_TIMINCA,
1891                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1892                                 (16 << IGB_82576_TSYNC_SHIFT));
1893
1894                 /* Set registers so that rollover occurs soon to test this. */
1895                 wr32(E1000_SYSTIML, 0x00000000);
1896                 wr32(E1000_SYSTIMH, 0xFF800000);
1897                 wrfl();
1898
1899                 timecounter_init(&adapter->clock,
1900                                  &adapter->cycles,
1901                                  ktime_to_ns(ktime_get_real()));
1902                 /*
1903                  * Synchronize our NIC clock against system wall clock. NIC
1904                  * time stamp reading requires ~3us per sample, each sample
1905                  * was pretty stable even under load => only require 10
1906                  * samples for each offset comparison.
1907                  */
1908                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1909                 adapter->compare.source = &adapter->clock;
1910                 adapter->compare.target = ktime_get_real;
1911                 adapter->compare.num_samples = 10;
1912                 timecompare_update(&adapter->compare, 0);
1913                 break;
1914         case e1000_82575:
1915                 /* 82575 does not support timesync */
1916         default:
1917                 break;
1918         }
1919
1920 }
1921
1922 /**
1923  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1924  * @adapter: board private structure to initialize
1925  *
1926  * igb_sw_init initializes the Adapter private data structure.
1927  * Fields are initialized based on PCI device information and
1928  * OS network device settings (MTU size).
1929  **/
1930 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1931 {
1932         struct e1000_hw *hw = &adapter->hw;
1933         struct net_device *netdev = adapter->netdev;
1934         struct pci_dev *pdev = adapter->pdev;
1935
1936         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1937
1938         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1939         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1940         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1941         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1942
1943         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1944         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1945
1946 #ifdef CONFIG_PCI_IOV
1947         if (hw->mac.type == e1000_82576)
1948                 adapter->vfs_allocated_count = max_vfs;
1949
1950 #endif /* CONFIG_PCI_IOV */
1951         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1952
1953         /*
1954          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1955          * then we should combine the queues into a queue pair in order to
1956          * conserve interrupts due to limited supply
1957          */
1958         if ((adapter->rss_queues > 4) ||
1959             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1960                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1961
1962         /* This call may decrease the number of queues */
1963         if (igb_init_interrupt_scheme(adapter)) {
1964                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1965                 return -ENOMEM;
1966         }
1967
1968         igb_init_hw_timer(adapter);
1969         igb_probe_vfs(adapter);
1970
1971         /* Explicitly disable IRQ since the NIC can be in any state. */
1972         igb_irq_disable(adapter);
1973
1974         set_bit(__IGB_DOWN, &adapter->state);
1975         return 0;
1976 }
1977
1978 /**
1979  * igb_open - Called when a network interface is made active
1980  * @netdev: network interface device structure
1981  *
1982  * Returns 0 on success, negative value on failure
1983  *
1984  * The open entry point is called when a network interface is made
1985  * active by the system (IFF_UP).  At this point all resources needed
1986  * for transmit and receive operations are allocated, the interrupt
1987  * handler is registered with the OS, the watchdog timer is started,
1988  * and the stack is notified that the interface is ready.
1989  **/
1990 static int igb_open(struct net_device *netdev)
1991 {
1992         struct igb_adapter *adapter = netdev_priv(netdev);
1993         struct e1000_hw *hw = &adapter->hw;
1994         int err;
1995         int i;
1996
1997         /* disallow open during test */
1998         if (test_bit(__IGB_TESTING, &adapter->state))
1999                 return -EBUSY;
2000
2001         netif_carrier_off(netdev);
2002
2003         /* allocate transmit descriptors */
2004         err = igb_setup_all_tx_resources(adapter);
2005         if (err)
2006                 goto err_setup_tx;
2007
2008         /* allocate receive descriptors */
2009         err = igb_setup_all_rx_resources(adapter);
2010         if (err)
2011                 goto err_setup_rx;
2012
2013         igb_power_up_link(adapter);
2014
2015         /* before we allocate an interrupt, we must be ready to handle it.
2016          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2017          * as soon as we call pci_request_irq, so we have to setup our
2018          * clean_rx handler before we do so.  */
2019         igb_configure(adapter);
2020
2021         err = igb_request_irq(adapter);
2022         if (err)
2023                 goto err_req_irq;
2024
2025         /* From here on the code is the same as igb_up() */
2026         clear_bit(__IGB_DOWN, &adapter->state);
2027
2028         for (i = 0; i < adapter->num_q_vectors; i++) {
2029                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2030                 napi_enable(&q_vector->napi);
2031         }
2032
2033         /* Clear any pending interrupts. */
2034         rd32(E1000_ICR);
2035
2036         igb_irq_enable(adapter);
2037
2038         /* notify VFs that reset has been completed */
2039         if (adapter->vfs_allocated_count) {
2040                 u32 reg_data = rd32(E1000_CTRL_EXT);
2041                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2042                 wr32(E1000_CTRL_EXT, reg_data);
2043         }
2044
2045         netif_tx_start_all_queues(netdev);
2046
2047         /* start the watchdog. */
2048         hw->mac.get_link_status = 1;
2049         schedule_work(&adapter->watchdog_task);
2050
2051         return 0;
2052
2053 err_req_irq:
2054         igb_release_hw_control(adapter);
2055         igb_power_down_link(adapter);
2056         igb_free_all_rx_resources(adapter);
2057 err_setup_rx:
2058         igb_free_all_tx_resources(adapter);
2059 err_setup_tx:
2060         igb_reset(adapter);
2061
2062         return err;
2063 }
2064
2065 /**
2066  * igb_close - Disables a network interface
2067  * @netdev: network interface device structure
2068  *
2069  * Returns 0, this is not allowed to fail
2070  *
2071  * The close entry point is called when an interface is de-activated
2072  * by the OS.  The hardware is still under the driver's control, but
2073  * needs to be disabled.  A global MAC reset is issued to stop the
2074  * hardware, and all transmit and receive resources are freed.
2075  **/
2076 static int igb_close(struct net_device *netdev)
2077 {
2078         struct igb_adapter *adapter = netdev_priv(netdev);
2079
2080         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2081         igb_down(adapter);
2082
2083         igb_free_irq(adapter);
2084
2085         igb_free_all_tx_resources(adapter);
2086         igb_free_all_rx_resources(adapter);
2087
2088         return 0;
2089 }
2090
2091 /**
2092  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2093  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2094  *
2095  * Return 0 on success, negative on failure
2096  **/
2097 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2098 {
2099         struct pci_dev *pdev = tx_ring->pdev;
2100         int size;
2101
2102         size = sizeof(struct igb_buffer) * tx_ring->count;
2103         tx_ring->buffer_info = vmalloc(size);
2104         if (!tx_ring->buffer_info)
2105                 goto err;
2106         memset(tx_ring->buffer_info, 0, size);
2107
2108         /* round up to nearest 4K */
2109         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2110         tx_ring->size = ALIGN(tx_ring->size, 4096);
2111
2112         tx_ring->desc = pci_alloc_consistent(pdev,
2113                                              tx_ring->size,
2114                                              &tx_ring->dma);
2115
2116         if (!tx_ring->desc)
2117                 goto err;
2118
2119         tx_ring->next_to_use = 0;
2120         tx_ring->next_to_clean = 0;
2121         return 0;
2122
2123 err:
2124         vfree(tx_ring->buffer_info);
2125         dev_err(&pdev->dev,
2126                 "Unable to allocate memory for the transmit descriptor ring\n");
2127         return -ENOMEM;
2128 }
2129
2130 /**
2131  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2132  *                                (Descriptors) for all queues
2133  * @adapter: board private structure
2134  *
2135  * Return 0 on success, negative on failure
2136  **/
2137 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2138 {
2139         struct pci_dev *pdev = adapter->pdev;
2140         int i, err = 0;
2141
2142         for (i = 0; i < adapter->num_tx_queues; i++) {
2143                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2144                 if (err) {
2145                         dev_err(&pdev->dev,
2146                                 "Allocation for Tx Queue %u failed\n", i);
2147                         for (i--; i >= 0; i--)
2148                                 igb_free_tx_resources(adapter->tx_ring[i]);
2149                         break;
2150                 }
2151         }
2152
2153         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2154                 int r_idx = i % adapter->num_tx_queues;
2155                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2156         }
2157         return err;
2158 }
2159
2160 /**
2161  * igb_setup_tctl - configure the transmit control registers
2162  * @adapter: Board private structure
2163  **/
2164 void igb_setup_tctl(struct igb_adapter *adapter)
2165 {
2166         struct e1000_hw *hw = &adapter->hw;
2167         u32 tctl;
2168
2169         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2170         wr32(E1000_TXDCTL(0), 0);
2171
2172         /* Program the Transmit Control Register */
2173         tctl = rd32(E1000_TCTL);
2174         tctl &= ~E1000_TCTL_CT;
2175         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2176                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2177
2178         igb_config_collision_dist(hw);
2179
2180         /* Enable transmits */
2181         tctl |= E1000_TCTL_EN;
2182
2183         wr32(E1000_TCTL, tctl);
2184 }
2185
2186 /**
2187  * igb_configure_tx_ring - Configure transmit ring after Reset
2188  * @adapter: board private structure
2189  * @ring: tx ring to configure
2190  *
2191  * Configure a transmit ring after a reset.
2192  **/
2193 void igb_configure_tx_ring(struct igb_adapter *adapter,
2194                            struct igb_ring *ring)
2195 {
2196         struct e1000_hw *hw = &adapter->hw;
2197         u32 txdctl;
2198         u64 tdba = ring->dma;
2199         int reg_idx = ring->reg_idx;
2200
2201         /* disable the queue */
2202         txdctl = rd32(E1000_TXDCTL(reg_idx));
2203         wr32(E1000_TXDCTL(reg_idx),
2204                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2205         wrfl();
2206         mdelay(10);
2207
2208         wr32(E1000_TDLEN(reg_idx),
2209                         ring->count * sizeof(union e1000_adv_tx_desc));
2210         wr32(E1000_TDBAL(reg_idx),
2211                         tdba & 0x00000000ffffffffULL);
2212         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2213
2214         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2215         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2216         writel(0, ring->head);
2217         writel(0, ring->tail);
2218
2219         txdctl |= IGB_TX_PTHRESH;
2220         txdctl |= IGB_TX_HTHRESH << 8;
2221         txdctl |= IGB_TX_WTHRESH << 16;
2222
2223         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2224         wr32(E1000_TXDCTL(reg_idx), txdctl);
2225 }
2226
2227 /**
2228  * igb_configure_tx - Configure transmit Unit after Reset
2229  * @adapter: board private structure
2230  *
2231  * Configure the Tx unit of the MAC after a reset.
2232  **/
2233 static void igb_configure_tx(struct igb_adapter *adapter)
2234 {
2235         int i;
2236
2237         for (i = 0; i < adapter->num_tx_queues; i++)
2238                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2239 }
2240
2241 /**
2242  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2243  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2244  *
2245  * Returns 0 on success, negative on failure
2246  **/
2247 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2248 {
2249         struct pci_dev *pdev = rx_ring->pdev;
2250         int size, desc_len;
2251
2252         size = sizeof(struct igb_buffer) * rx_ring->count;
2253         rx_ring->buffer_info = vmalloc(size);
2254         if (!rx_ring->buffer_info)
2255                 goto err;
2256         memset(rx_ring->buffer_info, 0, size);
2257
2258         desc_len = sizeof(union e1000_adv_rx_desc);
2259
2260         /* Round up to nearest 4K */
2261         rx_ring->size = rx_ring->count * desc_len;
2262         rx_ring->size = ALIGN(rx_ring->size, 4096);
2263
2264         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2265                                              &rx_ring->dma);
2266
2267         if (!rx_ring->desc)
2268                 goto err;
2269
2270         rx_ring->next_to_clean = 0;
2271         rx_ring->next_to_use = 0;
2272
2273         return 0;
2274
2275 err:
2276         vfree(rx_ring->buffer_info);
2277         rx_ring->buffer_info = NULL;
2278         dev_err(&pdev->dev, "Unable to allocate memory for "
2279                 "the receive descriptor ring\n");
2280         return -ENOMEM;
2281 }
2282
2283 /**
2284  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2285  *                                (Descriptors) for all queues
2286  * @adapter: board private structure
2287  *
2288  * Return 0 on success, negative on failure
2289  **/
2290 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2291 {
2292         struct pci_dev *pdev = adapter->pdev;
2293         int i, err = 0;
2294
2295         for (i = 0; i < adapter->num_rx_queues; i++) {
2296                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2297                 if (err) {
2298                         dev_err(&pdev->dev,
2299                                 "Allocation for Rx Queue %u failed\n", i);
2300                         for (i--; i >= 0; i--)
2301                                 igb_free_rx_resources(adapter->rx_ring[i]);
2302                         break;
2303                 }
2304         }
2305
2306         return err;
2307 }
2308
2309 /**
2310  * igb_setup_mrqc - configure the multiple receive queue control registers
2311  * @adapter: Board private structure
2312  **/
2313 static void igb_setup_mrqc(struct igb_adapter *adapter)
2314 {
2315         struct e1000_hw *hw = &adapter->hw;
2316         u32 mrqc, rxcsum;
2317         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2318         union e1000_reta {
2319                 u32 dword;
2320                 u8  bytes[4];
2321         } reta;
2322         static const u8 rsshash[40] = {
2323                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2324                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2325                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2326                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2327
2328         /* Fill out hash function seeds */
2329         for (j = 0; j < 10; j++) {
2330                 u32 rsskey = rsshash[(j * 4)];
2331                 rsskey |= rsshash[(j * 4) + 1] << 8;
2332                 rsskey |= rsshash[(j * 4) + 2] << 16;
2333                 rsskey |= rsshash[(j * 4) + 3] << 24;
2334                 array_wr32(E1000_RSSRK(0), j, rsskey);
2335         }
2336
2337         num_rx_queues = adapter->rss_queues;
2338
2339         if (adapter->vfs_allocated_count) {
2340                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2341                 switch (hw->mac.type) {
2342                 case e1000_82580:
2343                         num_rx_queues = 1;
2344                         shift = 0;
2345                         break;
2346                 case e1000_82576:
2347                         shift = 3;
2348                         num_rx_queues = 2;
2349                         break;
2350                 case e1000_82575:
2351                         shift = 2;
2352                         shift2 = 6;
2353                 default:
2354                         break;
2355                 }
2356         } else {
2357                 if (hw->mac.type == e1000_82575)
2358                         shift = 6;
2359         }
2360
2361         for (j = 0; j < (32 * 4); j++) {
2362                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2363                 if (shift2)
2364                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2365                 if ((j & 3) == 3)
2366                         wr32(E1000_RETA(j >> 2), reta.dword);
2367         }
2368
2369         /*
2370          * Disable raw packet checksumming so that RSS hash is placed in
2371          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2372          * offloads as they are enabled by default
2373          */
2374         rxcsum = rd32(E1000_RXCSUM);
2375         rxcsum |= E1000_RXCSUM_PCSD;
2376
2377         if (adapter->hw.mac.type >= e1000_82576)
2378                 /* Enable Receive Checksum Offload for SCTP */
2379                 rxcsum |= E1000_RXCSUM_CRCOFL;
2380
2381         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2382         wr32(E1000_RXCSUM, rxcsum);
2383
2384         /* If VMDq is enabled then we set the appropriate mode for that, else
2385          * we default to RSS so that an RSS hash is calculated per packet even
2386          * if we are only using one queue */
2387         if (adapter->vfs_allocated_count) {
2388                 if (hw->mac.type > e1000_82575) {
2389                         /* Set the default pool for the PF's first queue */
2390                         u32 vtctl = rd32(E1000_VT_CTL);
2391                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2392                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2393                         vtctl |= adapter->vfs_allocated_count <<
2394                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2395                         wr32(E1000_VT_CTL, vtctl);
2396                 }
2397                 if (adapter->rss_queues > 1)
2398                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2399                 else
2400                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2401         } else {
2402                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2403         }
2404         igb_vmm_control(adapter);
2405
2406         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2407                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2408         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2409                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2410         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2411                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2412         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2413                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2414
2415         wr32(E1000_MRQC, mrqc);
2416 }
2417
2418 /**
2419  * igb_setup_rctl - configure the receive control registers
2420  * @adapter: Board private structure
2421  **/
2422 void igb_setup_rctl(struct igb_adapter *adapter)
2423 {
2424         struct e1000_hw *hw = &adapter->hw;
2425         u32 rctl;
2426
2427         rctl = rd32(E1000_RCTL);
2428
2429         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2430         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2431
2432         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2433                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2434
2435         /*
2436          * enable stripping of CRC. It's unlikely this will break BMC
2437          * redirection as it did with e1000. Newer features require
2438          * that the HW strips the CRC.
2439          */
2440         rctl |= E1000_RCTL_SECRC;
2441
2442         /* disable store bad packets and clear size bits. */
2443         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2444
2445         /* enable LPE to prevent packets larger than max_frame_size */
2446         rctl |= E1000_RCTL_LPE;
2447
2448         /* disable queue 0 to prevent tail write w/o re-config */
2449         wr32(E1000_RXDCTL(0), 0);
2450
2451         /* Attention!!!  For SR-IOV PF driver operations you must enable
2452          * queue drop for all VF and PF queues to prevent head of line blocking
2453          * if an un-trusted VF does not provide descriptors to hardware.
2454          */
2455         if (adapter->vfs_allocated_count) {
2456                 /* set all queue drop enable bits */
2457                 wr32(E1000_QDE, ALL_QUEUES);
2458         }
2459
2460         wr32(E1000_RCTL, rctl);
2461 }
2462
2463 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2464                                    int vfn)
2465 {
2466         struct e1000_hw *hw = &adapter->hw;
2467         u32 vmolr;
2468
2469         /* if it isn't the PF check to see if VFs are enabled and
2470          * increase the size to support vlan tags */
2471         if (vfn < adapter->vfs_allocated_count &&
2472             adapter->vf_data[vfn].vlans_enabled)
2473                 size += VLAN_TAG_SIZE;
2474
2475         vmolr = rd32(E1000_VMOLR(vfn));
2476         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2477         vmolr |= size | E1000_VMOLR_LPE;
2478         wr32(E1000_VMOLR(vfn), vmolr);
2479
2480         return 0;
2481 }
2482
2483 /**
2484  * igb_rlpml_set - set maximum receive packet size
2485  * @adapter: board private structure
2486  *
2487  * Configure maximum receivable packet size.
2488  **/
2489 static void igb_rlpml_set(struct igb_adapter *adapter)
2490 {
2491         u32 max_frame_size = adapter->max_frame_size;
2492         struct e1000_hw *hw = &adapter->hw;
2493         u16 pf_id = adapter->vfs_allocated_count;
2494
2495         if (adapter->vlgrp)
2496                 max_frame_size += VLAN_TAG_SIZE;
2497
2498         /* if vfs are enabled we set RLPML to the largest possible request
2499          * size and set the VMOLR RLPML to the size we need */
2500         if (pf_id) {
2501                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2502                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2503         }
2504
2505         wr32(E1000_RLPML, max_frame_size);
2506 }
2507
2508 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2509                                  int vfn, bool aupe)
2510 {
2511         struct e1000_hw *hw = &adapter->hw;
2512         u32 vmolr;
2513
2514         /*
2515          * This register exists only on 82576 and newer so if we are older then
2516          * we should exit and do nothing
2517          */
2518         if (hw->mac.type < e1000_82576)
2519                 return;
2520
2521         vmolr = rd32(E1000_VMOLR(vfn));
2522         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2523         if (aupe)
2524                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2525         else
2526                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2527
2528         /* clear all bits that might not be set */
2529         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2530
2531         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2532                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2533         /*
2534          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2535          * multicast packets
2536          */
2537         if (vfn <= adapter->vfs_allocated_count)
2538                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2539
2540         wr32(E1000_VMOLR(vfn), vmolr);
2541 }
2542
2543 /**
2544  * igb_configure_rx_ring - Configure a receive ring after Reset
2545  * @adapter: board private structure
2546  * @ring: receive ring to be configured
2547  *
2548  * Configure the Rx unit of the MAC after a reset.
2549  **/
2550 void igb_configure_rx_ring(struct igb_adapter *adapter,
2551                            struct igb_ring *ring)
2552 {
2553         struct e1000_hw *hw = &adapter->hw;
2554         u64 rdba = ring->dma;
2555         int reg_idx = ring->reg_idx;
2556         u32 srrctl, rxdctl;
2557
2558         /* disable the queue */
2559         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2560         wr32(E1000_RXDCTL(reg_idx),
2561                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2562
2563         /* Set DMA base address registers */
2564         wr32(E1000_RDBAL(reg_idx),
2565              rdba & 0x00000000ffffffffULL);
2566         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2567         wr32(E1000_RDLEN(reg_idx),
2568                        ring->count * sizeof(union e1000_adv_rx_desc));
2569
2570         /* initialize head and tail */
2571         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2572         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2573         writel(0, ring->head);
2574         writel(0, ring->tail);
2575
2576         /* set descriptor configuration */
2577         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2578                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2579                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2580 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2581                 srrctl |= IGB_RXBUFFER_16384 >>
2582                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2583 #else
2584                 srrctl |= (PAGE_SIZE / 2) >>
2585                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2586 #endif
2587                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2588         } else {
2589                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2590                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2591                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2592         }
2593         /* Only set Drop Enable if we are supporting multiple queues */
2594         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2595                 srrctl |= E1000_SRRCTL_DROP_EN;
2596
2597         wr32(E1000_SRRCTL(reg_idx), srrctl);
2598
2599         /* set filtering for VMDQ pools */
2600         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2601
2602         /* enable receive descriptor fetching */
2603         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2604         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2605         rxdctl &= 0xFFF00000;
2606         rxdctl |= IGB_RX_PTHRESH;
2607         rxdctl |= IGB_RX_HTHRESH << 8;
2608         rxdctl |= IGB_RX_WTHRESH << 16;
2609         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2610 }
2611
2612 /**
2613  * igb_configure_rx - Configure receive Unit after Reset
2614  * @adapter: board private structure
2615  *
2616  * Configure the Rx unit of the MAC after a reset.
2617  **/
2618 static void igb_configure_rx(struct igb_adapter *adapter)
2619 {
2620         int i;
2621
2622         /* set UTA to appropriate mode */
2623         igb_set_uta(adapter);
2624
2625         /* set the correct pool for the PF default MAC address in entry 0 */
2626         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2627                          adapter->vfs_allocated_count);
2628
2629         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2630          * the Base and Length of the Rx Descriptor Ring */
2631         for (i = 0; i < adapter->num_rx_queues; i++)
2632                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2633 }
2634
2635 /**
2636  * igb_free_tx_resources - Free Tx Resources per Queue
2637  * @tx_ring: Tx descriptor ring for a specific queue
2638  *
2639  * Free all transmit software resources
2640  **/
2641 void igb_free_tx_resources(struct igb_ring *tx_ring)
2642 {
2643         igb_clean_tx_ring(tx_ring);
2644
2645         vfree(tx_ring->buffer_info);
2646         tx_ring->buffer_info = NULL;
2647
2648         /* if not set, then don't free */
2649         if (!tx_ring->desc)
2650                 return;
2651
2652         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2653                             tx_ring->desc, tx_ring->dma);
2654
2655         tx_ring->desc = NULL;
2656 }
2657
2658 /**
2659  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2660  * @adapter: board private structure
2661  *
2662  * Free all transmit software resources
2663  **/
2664 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2665 {
2666         int i;
2667
2668         for (i = 0; i < adapter->num_tx_queues; i++)
2669                 igb_free_tx_resources(adapter->tx_ring[i]);
2670 }
2671
2672 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2673                                     struct igb_buffer *buffer_info)
2674 {
2675         if (buffer_info->dma) {
2676                 if (buffer_info->mapped_as_page)
2677                         pci_unmap_page(tx_ring->pdev,
2678                                         buffer_info->dma,
2679                                         buffer_info->length,
2680                                         PCI_DMA_TODEVICE);
2681                 else
2682                         pci_unmap_single(tx_ring->pdev,
2683                                         buffer_info->dma,
2684                                         buffer_info->length,
2685                                         PCI_DMA_TODEVICE);
2686                 buffer_info->dma = 0;
2687         }
2688         if (buffer_info->skb) {
2689                 dev_kfree_skb_any(buffer_info->skb);
2690                 buffer_info->skb = NULL;
2691         }
2692         buffer_info->time_stamp = 0;
2693         buffer_info->length = 0;
2694         buffer_info->next_to_watch = 0;
2695         buffer_info->mapped_as_page = false;
2696 }
2697
2698 /**
2699  * igb_clean_tx_ring - Free Tx Buffers
2700  * @tx_ring: ring to be cleaned
2701  **/
2702 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2703 {
2704         struct igb_buffer *buffer_info;
2705         unsigned long size;
2706         unsigned int i;
2707
2708         if (!tx_ring->buffer_info)
2709                 return;
2710         /* Free all the Tx ring sk_buffs */
2711
2712         for (i = 0; i < tx_ring->count; i++) {
2713                 buffer_info = &tx_ring->buffer_info[i];
2714                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2715         }
2716
2717         size = sizeof(struct igb_buffer) * tx_ring->count;
2718         memset(tx_ring->buffer_info, 0, size);
2719
2720         /* Zero out the descriptor ring */
2721         memset(tx_ring->desc, 0, tx_ring->size);
2722
2723         tx_ring->next_to_use = 0;
2724         tx_ring->next_to_clean = 0;
2725 }
2726
2727 /**
2728  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2729  * @adapter: board private structure
2730  **/
2731 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2732 {
2733         int i;
2734
2735         for (i = 0; i < adapter->num_tx_queues; i++)
2736                 igb_clean_tx_ring(adapter->tx_ring[i]);
2737 }
2738
2739 /**
2740  * igb_free_rx_resources - Free Rx Resources
2741  * @rx_ring: ring to clean the resources from
2742  *
2743  * Free all receive software resources
2744  **/
2745 void igb_free_rx_resources(struct igb_ring *rx_ring)
2746 {
2747         igb_clean_rx_ring(rx_ring);
2748
2749         vfree(rx_ring->buffer_info);
2750         rx_ring->buffer_info = NULL;
2751
2752         /* if not set, then don't free */
2753         if (!rx_ring->desc)
2754                 return;
2755
2756         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2757                             rx_ring->desc, rx_ring->dma);
2758
2759         rx_ring->desc = NULL;
2760 }
2761
2762 /**
2763  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2764  * @adapter: board private structure
2765  *
2766  * Free all receive software resources
2767  **/
2768 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2769 {
2770         int i;
2771
2772         for (i = 0; i < adapter->num_rx_queues; i++)
2773                 igb_free_rx_resources(adapter->rx_ring[i]);
2774 }
2775
2776 /**
2777  * igb_clean_rx_ring - Free Rx Buffers per Queue
2778  * @rx_ring: ring to free buffers from
2779  **/
2780 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2781 {
2782         struct igb_buffer *buffer_info;
2783         unsigned long size;
2784         unsigned int i;
2785
2786         if (!rx_ring->buffer_info)
2787                 return;
2788
2789         /* Free all the Rx ring sk_buffs */
2790         for (i = 0; i < rx_ring->count; i++) {
2791                 buffer_info = &rx_ring->buffer_info[i];
2792                 if (buffer_info->dma) {
2793                         pci_unmap_single(rx_ring->pdev,
2794                                          buffer_info->dma,
2795                                          rx_ring->rx_buffer_len,
2796                                          PCI_DMA_FROMDEVICE);
2797                         buffer_info->dma = 0;
2798                 }
2799
2800                 if (buffer_info->skb) {
2801                         dev_kfree_skb(buffer_info->skb);
2802                         buffer_info->skb = NULL;
2803                 }
2804                 if (buffer_info->page_dma) {
2805                         pci_unmap_page(rx_ring->pdev,
2806                                        buffer_info->page_dma,
2807                                        PAGE_SIZE / 2,
2808                                        PCI_DMA_FROMDEVICE);
2809                         buffer_info->page_dma = 0;
2810                 }
2811                 if (buffer_info->page) {
2812                         put_page(buffer_info->page);
2813                         buffer_info->page = NULL;
2814                         buffer_info->page_offset = 0;
2815                 }
2816         }
2817
2818         size = sizeof(struct igb_buffer) * rx_ring->count;
2819         memset(rx_ring->buffer_info, 0, size);
2820
2821         /* Zero out the descriptor ring */
2822         memset(rx_ring->desc, 0, rx_ring->size);
2823
2824         rx_ring->next_to_clean = 0;
2825         rx_ring->next_to_use = 0;
2826 }
2827
2828 /**
2829  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2830  * @adapter: board private structure
2831  **/
2832 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2833 {
2834         int i;
2835
2836         for (i = 0; i < adapter->num_rx_queues; i++)
2837                 igb_clean_rx_ring(adapter->rx_ring[i]);
2838 }
2839
2840 /**
2841  * igb_set_mac - Change the Ethernet Address of the NIC
2842  * @netdev: network interface device structure
2843  * @p: pointer to an address structure
2844  *
2845  * Returns 0 on success, negative on failure
2846  **/
2847 static int igb_set_mac(struct net_device *netdev, void *p)
2848 {
2849         struct igb_adapter *adapter = netdev_priv(netdev);
2850         struct e1000_hw *hw = &adapter->hw;
2851         struct sockaddr *addr = p;
2852
2853         if (!is_valid_ether_addr(addr->sa_data))
2854                 return -EADDRNOTAVAIL;
2855
2856         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2857         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2858
2859         /* set the correct pool for the new PF MAC address in entry 0 */
2860         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2861                          adapter->vfs_allocated_count);
2862
2863         return 0;
2864 }
2865
2866 /**
2867  * igb_write_mc_addr_list - write multicast addresses to MTA
2868  * @netdev: network interface device structure
2869  *
2870  * Writes multicast address list to the MTA hash table.
2871  * Returns: -ENOMEM on failure
2872  *                0 on no addresses written
2873  *                X on writing X addresses to MTA
2874  **/
2875 static int igb_write_mc_addr_list(struct net_device *netdev)
2876 {
2877         struct igb_adapter *adapter = netdev_priv(netdev);
2878         struct e1000_hw *hw = &adapter->hw;
2879         struct dev_mc_list *mc_ptr;
2880         u8  *mta_list;
2881         int i;
2882
2883         if (netdev_mc_empty(netdev)) {
2884                 /* nothing to program, so clear mc list */
2885                 igb_update_mc_addr_list(hw, NULL, 0);
2886                 igb_restore_vf_multicasts(adapter);
2887                 return 0;
2888         }
2889
2890         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2891         if (!mta_list)
2892                 return -ENOMEM;
2893
2894         /* The shared function expects a packed array of only addresses. */
2895         i = 0;
2896         netdev_for_each_mc_addr(mc_ptr, netdev)
2897                 memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2898
2899         igb_update_mc_addr_list(hw, mta_list, i);
2900         kfree(mta_list);
2901
2902         return netdev_mc_count(netdev);
2903 }
2904
2905 /**
2906  * igb_write_uc_addr_list - write unicast addresses to RAR table
2907  * @netdev: network interface device structure
2908  *
2909  * Writes unicast address list to the RAR table.
2910  * Returns: -ENOMEM on failure/insufficient address space
2911  *                0 on no addresses written
2912  *                X on writing X addresses to the RAR table
2913  **/
2914 static int igb_write_uc_addr_list(struct net_device *netdev)
2915 {
2916         struct igb_adapter *adapter = netdev_priv(netdev);
2917         struct e1000_hw *hw = &adapter->hw;
2918         unsigned int vfn = adapter->vfs_allocated_count;
2919         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2920         int count = 0;
2921
2922         /* return ENOMEM indicating insufficient memory for addresses */
2923         if (netdev_uc_count(netdev) > rar_entries)
2924                 return -ENOMEM;
2925
2926         if (!netdev_uc_empty(netdev) && rar_entries) {
2927                 struct netdev_hw_addr *ha;
2928
2929                 netdev_for_each_uc_addr(ha, netdev) {
2930                         if (!rar_entries)
2931                                 break;
2932                         igb_rar_set_qsel(adapter, ha->addr,
2933                                          rar_entries--,
2934                                          vfn);
2935                         count++;
2936                 }
2937         }
2938         /* write the addresses in reverse order to avoid write combining */
2939         for (; rar_entries > 0 ; rar_entries--) {
2940                 wr32(E1000_RAH(rar_entries), 0);
2941                 wr32(E1000_RAL(rar_entries), 0);
2942         }
2943         wrfl();
2944
2945         return count;
2946 }
2947
2948 /**
2949  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2950  * @netdev: network interface device structure
2951  *
2952  * The set_rx_mode entry point is called whenever the unicast or multicast
2953  * address lists or the network interface flags are updated.  This routine is
2954  * responsible for configuring the hardware for proper unicast, multicast,
2955  * promiscuous mode, and all-multi behavior.
2956  **/
2957 static void igb_set_rx_mode(struct net_device *netdev)
2958 {
2959         struct igb_adapter *adapter = netdev_priv(netdev);
2960         struct e1000_hw *hw = &adapter->hw;
2961         unsigned int vfn = adapter->vfs_allocated_count;
2962         u32 rctl, vmolr = 0;
2963         int count;
2964
2965         /* Check for Promiscuous and All Multicast modes */
2966         rctl = rd32(E1000_RCTL);
2967
2968         /* clear the effected bits */
2969         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2970
2971         if (netdev->flags & IFF_PROMISC) {
2972                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2973                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2974         } else {
2975                 if (netdev->flags & IFF_ALLMULTI) {
2976                         rctl |= E1000_RCTL_MPE;
2977                         vmolr |= E1000_VMOLR_MPME;
2978                 } else {
2979                         /*
2980                          * Write addresses to the MTA, if the attempt fails
2981                          * then we should just turn on promiscous mode so
2982                          * that we can at least receive multicast traffic
2983                          */
2984                         count = igb_write_mc_addr_list(netdev);
2985                         if (count < 0) {
2986                                 rctl |= E1000_RCTL_MPE;
2987                                 vmolr |= E1000_VMOLR_MPME;
2988                         } else if (count) {
2989                                 vmolr |= E1000_VMOLR_ROMPE;
2990                         }
2991                 }
2992                 /*
2993                  * Write addresses to available RAR registers, if there is not
2994                  * sufficient space to store all the addresses then enable
2995                  * unicast promiscous mode
2996                  */
2997                 count = igb_write_uc_addr_list(netdev);
2998                 if (count < 0) {
2999                         rctl |= E1000_RCTL_UPE;
3000                         vmolr |= E1000_VMOLR_ROPE;
3001                 }
3002                 rctl |= E1000_RCTL_VFE;
3003         }
3004         wr32(E1000_RCTL, rctl);
3005
3006         /*
3007          * In order to support SR-IOV and eventually VMDq it is necessary to set
3008          * the VMOLR to enable the appropriate modes.  Without this workaround
3009          * we will have issues with VLAN tag stripping not being done for frames
3010          * that are only arriving because we are the default pool
3011          */
3012         if (hw->mac.type < e1000_82576)
3013                 return;
3014
3015         vmolr |= rd32(E1000_VMOLR(vfn)) &
3016                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3017         wr32(E1000_VMOLR(vfn), vmolr);
3018         igb_restore_vf_multicasts(adapter);
3019 }
3020
3021 /* Need to wait a few seconds after link up to get diagnostic information from
3022  * the phy */
3023 static void igb_update_phy_info(unsigned long data)
3024 {
3025         struct igb_adapter *adapter = (struct igb_adapter *) data;
3026         igb_get_phy_info(&adapter->hw);
3027 }
3028
3029 /**
3030  * igb_has_link - check shared code for link and determine up/down
3031  * @adapter: pointer to driver private info
3032  **/
3033 bool igb_has_link(struct igb_adapter *adapter)
3034 {
3035         struct e1000_hw *hw = &adapter->hw;
3036         bool link_active = false;
3037         s32 ret_val = 0;
3038
3039         /* get_link_status is set on LSC (link status) interrupt or
3040          * rx sequence error interrupt.  get_link_status will stay
3041          * false until the e1000_check_for_link establishes link
3042          * for copper adapters ONLY
3043          */
3044         switch (hw->phy.media_type) {
3045         case e1000_media_type_copper:
3046                 if (hw->mac.get_link_status) {
3047                         ret_val = hw->mac.ops.check_for_link(hw);
3048                         link_active = !hw->mac.get_link_status;
3049                 } else {
3050                         link_active = true;
3051                 }
3052                 break;
3053         case e1000_media_type_internal_serdes:
3054                 ret_val = hw->mac.ops.check_for_link(hw);
3055                 link_active = hw->mac.serdes_has_link;
3056                 break;
3057         default:
3058         case e1000_media_type_unknown:
3059                 break;
3060         }
3061
3062         return link_active;
3063 }
3064
3065 /**
3066  * igb_watchdog - Timer Call-back
3067  * @data: pointer to adapter cast into an unsigned long
3068  **/
3069 static void igb_watchdog(unsigned long data)
3070 {
3071         struct igb_adapter *adapter = (struct igb_adapter *)data;
3072         /* Do the rest outside of interrupt context */
3073         schedule_work(&adapter->watchdog_task);
3074 }
3075
3076 static void igb_watchdog_task(struct work_struct *work)
3077 {
3078         struct igb_adapter *adapter = container_of(work,
3079                                                    struct igb_adapter,
3080                                                    watchdog_task);
3081         struct e1000_hw *hw = &adapter->hw;
3082         struct net_device *netdev = adapter->netdev;
3083         u32 link;
3084         int i;
3085
3086         link = igb_has_link(adapter);
3087         if (link) {
3088                 if (!netif_carrier_ok(netdev)) {
3089                         u32 ctrl;
3090                         hw->mac.ops.get_speed_and_duplex(hw,
3091                                                          &adapter->link_speed,
3092                                                          &adapter->link_duplex);
3093
3094                         ctrl = rd32(E1000_CTRL);
3095                         /* Links status message must follow this format */
3096                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3097                                  "Flow Control: %s\n",
3098                                netdev->name,
3099                                adapter->link_speed,
3100                                adapter->link_duplex == FULL_DUPLEX ?
3101                                  "Full Duplex" : "Half Duplex",
3102                                ((ctrl & E1000_CTRL_TFCE) &&
3103                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3104                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3105                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3106
3107                         /* adjust timeout factor according to speed/duplex */
3108                         adapter->tx_timeout_factor = 1;
3109                         switch (adapter->link_speed) {
3110                         case SPEED_10:
3111                                 adapter->tx_timeout_factor = 14;
3112                                 break;
3113                         case SPEED_100:
3114                                 /* maybe add some timeout factor ? */
3115                                 break;
3116                         }
3117
3118                         netif_carrier_on(netdev);
3119
3120                         igb_ping_all_vfs(adapter);
3121
3122                         /* link state has changed, schedule phy info update */
3123                         if (!test_bit(__IGB_DOWN, &adapter->state))
3124                                 mod_timer(&adapter->phy_info_timer,
3125                                           round_jiffies(jiffies + 2 * HZ));
3126                 }
3127         } else {
3128                 if (netif_carrier_ok(netdev)) {
3129                         adapter->link_speed = 0;
3130                         adapter->link_duplex = 0;
3131                         /* Links status message must follow this format */
3132                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3133                                netdev->name);
3134                         netif_carrier_off(netdev);
3135
3136                         igb_ping_all_vfs(adapter);
3137
3138                         /* link state has changed, schedule phy info update */
3139                         if (!test_bit(__IGB_DOWN, &adapter->state))
3140                                 mod_timer(&adapter->phy_info_timer,
3141                                           round_jiffies(jiffies + 2 * HZ));
3142                 }
3143         }
3144
3145         igb_update_stats(adapter);
3146
3147         for (i = 0; i < adapter->num_tx_queues; i++) {
3148                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3149                 if (!netif_carrier_ok(netdev)) {
3150                         /* We've lost link, so the controller stops DMA,
3151                          * but we've got queued Tx work that's never going
3152                          * to get done, so reset controller to flush Tx.
3153                          * (Do the reset outside of interrupt context). */
3154                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3155                                 adapter->tx_timeout_count++;
3156                                 schedule_work(&adapter->reset_task);
3157                                 /* return immediately since reset is imminent */
3158                                 return;
3159                         }
3160                 }
3161
3162                 /* Force detection of hung controller every watchdog period */
3163                 tx_ring->detect_tx_hung = true;
3164         }
3165
3166         /* Cause software interrupt to ensure rx ring is cleaned */
3167         if (adapter->msix_entries) {
3168                 u32 eics = 0;
3169                 for (i = 0; i < adapter->num_q_vectors; i++) {
3170                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3171                         eics |= q_vector->eims_value;
3172                 }
3173                 wr32(E1000_EICS, eics);
3174         } else {
3175                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3176         }
3177
3178         /* Reset the timer */
3179         if (!test_bit(__IGB_DOWN, &adapter->state))
3180                 mod_timer(&adapter->watchdog_timer,
3181                           round_jiffies(jiffies + 2 * HZ));
3182 }
3183
3184 enum latency_range {
3185         lowest_latency = 0,
3186         low_latency = 1,
3187         bulk_latency = 2,
3188         latency_invalid = 255
3189 };
3190
3191 /**
3192  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3193  *
3194  *      Stores a new ITR value based on strictly on packet size.  This
3195  *      algorithm is less sophisticated than that used in igb_update_itr,
3196  *      due to the difficulty of synchronizing statistics across multiple
3197  *      receive rings.  The divisors and thresholds used by this fuction
3198  *      were determined based on theoretical maximum wire speed and testing
3199  *      data, in order to minimize response time while increasing bulk
3200  *      throughput.
3201  *      This functionality is controlled by the InterruptThrottleRate module
3202  *      parameter (see igb_param.c)
3203  *      NOTE:  This function is called only when operating in a multiqueue
3204  *             receive environment.
3205  * @q_vector: pointer to q_vector
3206  **/
3207 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3208 {
3209         int new_val = q_vector->itr_val;
3210         int avg_wire_size = 0;
3211         struct igb_adapter *adapter = q_vector->adapter;
3212
3213         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3214          * ints/sec - ITR timer value of 120 ticks.
3215          */
3216         if (adapter->link_speed != SPEED_1000) {
3217                 new_val = 976;
3218                 goto set_itr_val;
3219         }
3220
3221         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3222                 struct igb_ring *ring = q_vector->rx_ring;
3223                 avg_wire_size = ring->total_bytes / ring->total_packets;
3224         }
3225
3226         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3227                 struct igb_ring *ring = q_vector->tx_ring;
3228                 avg_wire_size = max_t(u32, avg_wire_size,
3229                                       (ring->total_bytes /
3230                                        ring->total_packets));
3231         }
3232
3233         /* if avg_wire_size isn't set no work was done */
3234         if (!avg_wire_size)
3235                 goto clear_counts;
3236
3237         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3238         avg_wire_size += 24;
3239
3240         /* Don't starve jumbo frames */
3241         avg_wire_size = min(avg_wire_size, 3000);
3242
3243         /* Give a little boost to mid-size frames */
3244         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3245                 new_val = avg_wire_size / 3;
3246         else
3247                 new_val = avg_wire_size / 2;
3248
3249         /* when in itr mode 3 do not exceed 20K ints/sec */
3250         if (adapter->rx_itr_setting == 3 && new_val < 196)
3251                 new_val = 196;
3252
3253 set_itr_val:
3254         if (new_val != q_vector->itr_val) {
3255                 q_vector->itr_val = new_val;
3256                 q_vector->set_itr = 1;
3257         }
3258 clear_counts:
3259         if (q_vector->rx_ring) {
3260                 q_vector->rx_ring->total_bytes = 0;
3261                 q_vector->rx_ring->total_packets = 0;
3262         }
3263         if (q_vector->tx_ring) {
3264                 q_vector->tx_ring->total_bytes = 0;
3265                 q_vector->tx_ring->total_packets = 0;
3266         }
3267 }
3268
3269 /**
3270  * igb_update_itr - update the dynamic ITR value based on statistics
3271  *      Stores a new ITR value based on packets and byte
3272  *      counts during the last interrupt.  The advantage of per interrupt
3273  *      computation is faster updates and more accurate ITR for the current
3274  *      traffic pattern.  Constants in this function were computed
3275  *      based on theoretical maximum wire speed and thresholds were set based
3276  *      on testing data as well as attempting to minimize response time
3277  *      while increasing bulk throughput.
3278  *      this functionality is controlled by the InterruptThrottleRate module
3279  *      parameter (see igb_param.c)
3280  *      NOTE:  These calculations are only valid when operating in a single-
3281  *             queue environment.
3282  * @adapter: pointer to adapter
3283  * @itr_setting: current q_vector->itr_val
3284  * @packets: the number of packets during this measurement interval
3285  * @bytes: the number of bytes during this measurement interval
3286  **/
3287 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3288                                    int packets, int bytes)
3289 {
3290         unsigned int retval = itr_setting;
3291
3292         if (packets == 0)
3293                 goto update_itr_done;
3294
3295         switch (itr_setting) {
3296         case lowest_latency:
3297                 /* handle TSO and jumbo frames */
3298                 if (bytes/packets > 8000)
3299                         retval = bulk_latency;
3300                 else if ((packets < 5) && (bytes > 512))
3301                         retval = low_latency;
3302                 break;
3303         case low_latency:  /* 50 usec aka 20000 ints/s */
3304                 if (bytes > 10000) {
3305                         /* this if handles the TSO accounting */
3306                         if (bytes/packets > 8000) {
3307                                 retval = bulk_latency;
3308                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3309                                 retval = bulk_latency;
3310                         } else if ((packets > 35)) {
3311                                 retval = lowest_latency;
3312                         }
3313                 } else if (bytes/packets > 2000) {
3314                         retval = bulk_latency;
3315                 } else if (packets <= 2 && bytes < 512) {
3316                         retval = lowest_latency;
3317                 }
3318                 break;
3319         case bulk_latency: /* 250 usec aka 4000 ints/s */
3320                 if (bytes > 25000) {
3321                         if (packets > 35)
3322                                 retval = low_latency;
3323                 } else if (bytes < 1500) {
3324                         retval = low_latency;
3325                 }
3326                 break;
3327         }
3328
3329 update_itr_done:
3330         return retval;
3331 }
3332
3333 static void igb_set_itr(struct igb_adapter *adapter)
3334 {
3335         struct igb_q_vector *q_vector = adapter->q_vector[0];
3336         u16 current_itr;
3337         u32 new_itr = q_vector->itr_val;
3338
3339         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3340         if (adapter->link_speed != SPEED_1000) {
3341                 current_itr = 0;
3342                 new_itr = 4000;
3343                 goto set_itr_now;
3344         }
3345
3346         adapter->rx_itr = igb_update_itr(adapter,
3347                                     adapter->rx_itr,
3348                                     q_vector->rx_ring->total_packets,
3349                                     q_vector->rx_ring->total_bytes);
3350
3351         adapter->tx_itr = igb_update_itr(adapter,
3352                                     adapter->tx_itr,
3353                                     q_vector->tx_ring->total_packets,
3354                                     q_vector->tx_ring->total_bytes);
3355         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3356
3357         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3358         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3359                 current_itr = low_latency;
3360
3361         switch (current_itr) {
3362         /* counts and packets in update_itr are dependent on these numbers */
3363         case lowest_latency:
3364                 new_itr = 56;  /* aka 70,000 ints/sec */
3365                 break;
3366         case low_latency:
3367                 new_itr = 196; /* aka 20,000 ints/sec */
3368                 break;
3369         case bulk_latency:
3370                 new_itr = 980; /* aka 4,000 ints/sec */
3371                 break;
3372         default:
3373                 break;
3374         }
3375
3376 set_itr_now:
3377         q_vector->rx_ring->total_bytes = 0;
3378         q_vector->rx_ring->total_packets = 0;
3379         q_vector->tx_ring->total_bytes = 0;
3380         q_vector->tx_ring->total_packets = 0;
3381
3382         if (new_itr != q_vector->itr_val) {
3383                 /* this attempts to bias the interrupt rate towards Bulk
3384                  * by adding intermediate steps when interrupt rate is
3385                  * increasing */
3386                 new_itr = new_itr > q_vector->itr_val ?
3387                              max((new_itr * q_vector->itr_val) /
3388                                  (new_itr + (q_vector->itr_val >> 2)),
3389                                  new_itr) :
3390                              new_itr;
3391                 /* Don't write the value here; it resets the adapter's
3392                  * internal timer, and causes us to delay far longer than
3393                  * we should between interrupts.  Instead, we write the ITR
3394                  * value at the beginning of the next interrupt so the timing
3395                  * ends up being correct.
3396                  */
3397                 q_vector->itr_val = new_itr;
3398                 q_vector->set_itr = 1;
3399         }
3400
3401         return;
3402 }
3403
3404 #define IGB_TX_FLAGS_CSUM               0x00000001
3405 #define IGB_TX_FLAGS_VLAN               0x00000002
3406 #define IGB_TX_FLAGS_TSO                0x00000004
3407 #define IGB_TX_FLAGS_IPV4               0x00000008
3408 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3409 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3410 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3411
3412 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3413                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3414 {
3415         struct e1000_adv_tx_context_desc *context_desc;
3416         unsigned int i;
3417         int err;
3418         struct igb_buffer *buffer_info;
3419         u32 info = 0, tu_cmd = 0;
3420         u32 mss_l4len_idx;
3421         u8 l4len;
3422
3423         if (skb_header_cloned(skb)) {
3424                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3425                 if (err)
3426                         return err;
3427         }
3428
3429         l4len = tcp_hdrlen(skb);
3430         *hdr_len += l4len;
3431
3432         if (skb->protocol == htons(ETH_P_IP)) {
3433                 struct iphdr *iph = ip_hdr(skb);
3434                 iph->tot_len = 0;
3435                 iph->check = 0;
3436                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3437                                                          iph->daddr, 0,
3438                                                          IPPROTO_TCP,
3439                                                          0);
3440         } else if (skb_is_gso_v6(skb)) {
3441                 ipv6_hdr(skb)->payload_len = 0;
3442                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3443                                                        &ipv6_hdr(skb)->daddr,
3444                                                        0, IPPROTO_TCP, 0);
3445         }
3446
3447         i = tx_ring->next_to_use;
3448
3449         buffer_info = &tx_ring->buffer_info[i];
3450         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3451         /* VLAN MACLEN IPLEN */
3452         if (tx_flags & IGB_TX_FLAGS_VLAN)
3453                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3454         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3455         *hdr_len += skb_network_offset(skb);
3456         info |= skb_network_header_len(skb);
3457         *hdr_len += skb_network_header_len(skb);
3458         context_desc->vlan_macip_lens = cpu_to_le32(info);
3459
3460         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3461         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3462
3463         if (skb->protocol == htons(ETH_P_IP))
3464                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3465         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3466
3467         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3468
3469         /* MSS L4LEN IDX */
3470         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3471         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3472
3473         /* For 82575, context index must be unique per ring. */
3474         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3475                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3476
3477         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3478         context_desc->seqnum_seed = 0;
3479
3480         buffer_info->time_stamp = jiffies;
3481         buffer_info->next_to_watch = i;
3482         buffer_info->dma = 0;
3483         i++;
3484         if (i == tx_ring->count)
3485                 i = 0;
3486
3487         tx_ring->next_to_use = i;
3488
3489         return true;
3490 }
3491
3492 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3493                                    struct sk_buff *skb, u32 tx_flags)
3494 {
3495         struct e1000_adv_tx_context_desc *context_desc;
3496         struct pci_dev *pdev = tx_ring->pdev;
3497         struct igb_buffer *buffer_info;
3498         u32 info = 0, tu_cmd = 0;
3499         unsigned int i;
3500
3501         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3502             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3503                 i = tx_ring->next_to_use;
3504                 buffer_info = &tx_ring->buffer_info[i];
3505                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3506
3507                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3508                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3509
3510                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3511                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3512                         info |= skb_network_header_len(skb);
3513
3514                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3515
3516                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3517
3518                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3519                         __be16 protocol;
3520
3521                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3522                                 const struct vlan_ethhdr *vhdr =
3523                                           (const struct vlan_ethhdr*)skb->data;
3524
3525                                 protocol = vhdr->h_vlan_encapsulated_proto;
3526                         } else {
3527                                 protocol = skb->protocol;
3528                         }
3529
3530                         switch (protocol) {
3531                         case cpu_to_be16(ETH_P_IP):
3532                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3533                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3534                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3535                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3536                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3537                                 break;
3538                         case cpu_to_be16(ETH_P_IPV6):
3539                                 /* XXX what about other V6 headers?? */
3540                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3541                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3542                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3543                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3544                                 break;
3545                         default:
3546                                 if (unlikely(net_ratelimit()))
3547                                         dev_warn(&pdev->dev,
3548                                             "partial checksum but proto=%x!\n",
3549                                             skb->protocol);
3550                                 break;
3551                         }
3552                 }
3553
3554                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3555                 context_desc->seqnum_seed = 0;
3556                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3557                         context_desc->mss_l4len_idx =
3558                                 cpu_to_le32(tx_ring->reg_idx << 4);
3559
3560                 buffer_info->time_stamp = jiffies;
3561                 buffer_info->next_to_watch = i;
3562                 buffer_info->dma = 0;
3563
3564                 i++;
3565                 if (i == tx_ring->count)
3566                         i = 0;
3567                 tx_ring->next_to_use = i;
3568
3569                 return true;
3570         }
3571         return false;
3572 }
3573
3574 #define IGB_MAX_TXD_PWR 16
3575 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3576
3577 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3578                                  unsigned int first)
3579 {
3580         struct igb_buffer *buffer_info;
3581         struct pci_dev *pdev = tx_ring->pdev;
3582         unsigned int len = skb_headlen(skb);
3583         unsigned int count = 0, i;
3584         unsigned int f;
3585
3586         i = tx_ring->next_to_use;
3587
3588         buffer_info = &tx_ring->buffer_info[i];
3589         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3590         buffer_info->length = len;
3591         /* set time_stamp *before* dma to help avoid a possible race */
3592         buffer_info->time_stamp = jiffies;
3593         buffer_info->next_to_watch = i;
3594         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3595                                           PCI_DMA_TODEVICE);
3596         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3597                 goto dma_error;
3598
3599         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3600                 struct skb_frag_struct *frag;
3601
3602                 count++;
3603                 i++;
3604                 if (i == tx_ring->count)
3605                         i = 0;
3606
3607                 frag = &skb_shinfo(skb)->frags[f];
3608                 len = frag->size;
3609
3610                 buffer_info = &tx_ring->buffer_info[i];
3611                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3612                 buffer_info->length = len;
3613                 buffer_info->time_stamp = jiffies;
3614                 buffer_info->next_to_watch = i;
3615                 buffer_info->mapped_as_page = true;
3616                 buffer_info->dma = pci_map_page(pdev,
3617                                                 frag->page,
3618                                                 frag->page_offset,
3619                                                 len,
3620                                                 PCI_DMA_TODEVICE);
3621                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3622                         goto dma_error;
3623
3624         }
3625
3626         tx_ring->buffer_info[i].skb = skb;
3627         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3628         tx_ring->buffer_info[first].next_to_watch = i;
3629
3630         return ++count;
3631
3632 dma_error:
3633         dev_err(&pdev->dev, "TX DMA map failed\n");
3634
3635         /* clear timestamp and dma mappings for failed buffer_info mapping */
3636         buffer_info->dma = 0;
3637         buffer_info->time_stamp = 0;
3638         buffer_info->length = 0;
3639         buffer_info->next_to_watch = 0;
3640         buffer_info->mapped_as_page = false;
3641
3642         /* clear timestamp and dma mappings for remaining portion of packet */
3643         while (count--) {
3644                 if (i == 0)
3645                         i = tx_ring->count;
3646                 i--;
3647                 buffer_info = &tx_ring->buffer_info[i];
3648                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3649         }
3650
3651         return 0;
3652 }
3653
3654 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3655                                     u32 tx_flags, int count, u32 paylen,
3656                                     u8 hdr_len)
3657 {
3658         union e1000_adv_tx_desc *tx_desc;
3659         struct igb_buffer *buffer_info;
3660         u32 olinfo_status = 0, cmd_type_len;
3661         unsigned int i = tx_ring->next_to_use;
3662
3663         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3664                         E1000_ADVTXD_DCMD_DEXT);
3665
3666         if (tx_flags & IGB_TX_FLAGS_VLAN)
3667                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3668
3669         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3670                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3671
3672         if (tx_flags & IGB_TX_FLAGS_TSO) {
3673                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3674
3675                 /* insert tcp checksum */
3676                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3677
3678                 /* insert ip checksum */
3679                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3680                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3681
3682         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3683                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3684         }
3685
3686         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3687             (tx_flags & (IGB_TX_FLAGS_CSUM |
3688                          IGB_TX_FLAGS_TSO |
3689                          IGB_TX_FLAGS_VLAN)))
3690                 olinfo_status |= tx_ring->reg_idx << 4;
3691
3692         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3693
3694         do {
3695                 buffer_info = &tx_ring->buffer_info[i];
3696                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3697                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3698                 tx_desc->read.cmd_type_len =
3699                         cpu_to_le32(cmd_type_len | buffer_info->length);
3700                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3701                 count--;
3702                 i++;
3703                 if (i == tx_ring->count)
3704                         i = 0;
3705         } while (count > 0);
3706
3707         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3708         /* Force memory writes to complete before letting h/w
3709          * know there are new descriptors to fetch.  (Only
3710          * applicable for weak-ordered memory model archs,
3711          * such as IA-64). */
3712         wmb();
3713
3714         tx_ring->next_to_use = i;
3715         writel(i, tx_ring->tail);
3716         /* we need this if more than one processor can write to our tail
3717          * at a time, it syncronizes IO on IA64/Altix systems */
3718         mmiowb();
3719 }
3720
3721 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3722 {
3723         struct net_device *netdev = tx_ring->netdev;
3724
3725         netif_stop_subqueue(netdev, tx_ring->queue_index);
3726
3727         /* Herbert's original patch had:
3728          *  smp_mb__after_netif_stop_queue();
3729          * but since that doesn't exist yet, just open code it. */
3730         smp_mb();
3731
3732         /* We need to check again in a case another CPU has just
3733          * made room available. */
3734         if (igb_desc_unused(tx_ring) < size)
3735                 return -EBUSY;
3736
3737         /* A reprieve! */
3738         netif_wake_subqueue(netdev, tx_ring->queue_index);
3739         tx_ring->tx_stats.restart_queue++;
3740         return 0;
3741 }
3742
3743 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3744 {
3745         if (igb_desc_unused(tx_ring) >= size)
3746                 return 0;
3747         return __igb_maybe_stop_tx(tx_ring, size);
3748 }
3749
3750 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3751                                     struct igb_ring *tx_ring)
3752 {
3753         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3754         int tso = 0, count;
3755         u32 tx_flags = 0;
3756         u16 first;
3757         u8 hdr_len = 0;
3758         union skb_shared_tx *shtx = skb_tx(skb);
3759
3760         /* need: 1 descriptor per page,
3761          *       + 2 desc gap to keep tail from touching head,
3762          *       + 1 desc for skb->data,
3763          *       + 1 desc for context descriptor,
3764          * otherwise try next time */
3765         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3766                 /* this is a hard error */
3767                 return NETDEV_TX_BUSY;
3768         }
3769
3770         if (unlikely(shtx->hardware)) {
3771                 shtx->in_progress = 1;
3772                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3773         }
3774
3775         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3776                 tx_flags |= IGB_TX_FLAGS_VLAN;
3777                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3778         }
3779
3780         if (skb->protocol == htons(ETH_P_IP))
3781                 tx_flags |= IGB_TX_FLAGS_IPV4;
3782
3783         first = tx_ring->next_to_use;
3784         if (skb_is_gso(skb)) {
3785                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3786
3787                 if (tso < 0) {
3788                         dev_kfree_skb_any(skb);
3789                         return NETDEV_TX_OK;
3790                 }
3791         }
3792
3793         if (tso)
3794                 tx_flags |= IGB_TX_FLAGS_TSO;
3795         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3796                  (skb->ip_summed == CHECKSUM_PARTIAL))
3797                 tx_flags |= IGB_TX_FLAGS_CSUM;
3798
3799         /*
3800          * count reflects descriptors mapped, if 0 or less then mapping error
3801          * has occured and we need to rewind the descriptor queue
3802          */
3803         count = igb_tx_map_adv(tx_ring, skb, first);
3804         if (!count) {
3805                 dev_kfree_skb_any(skb);
3806                 tx_ring->buffer_info[first].time_stamp = 0;
3807                 tx_ring->next_to_use = first;
3808                 return NETDEV_TX_OK;
3809         }
3810
3811         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3812
3813         /* Make sure there is space in the ring for the next send. */
3814         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3815
3816         return NETDEV_TX_OK;
3817 }
3818
3819 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3820                                       struct net_device *netdev)
3821 {
3822         struct igb_adapter *adapter = netdev_priv(netdev);
3823         struct igb_ring *tx_ring;
3824         int r_idx = 0;
3825
3826         if (test_bit(__IGB_DOWN, &adapter->state)) {
3827                 dev_kfree_skb_any(skb);
3828                 return NETDEV_TX_OK;
3829         }
3830
3831         if (skb->len <= 0) {
3832                 dev_kfree_skb_any(skb);
3833                 return NETDEV_TX_OK;
3834         }
3835
3836         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3837         tx_ring = adapter->multi_tx_table[r_idx];
3838
3839         /* This goes back to the question of how to logically map a tx queue
3840          * to a flow.  Right now, performance is impacted slightly negatively
3841          * if using multiple tx queues.  If the stack breaks away from a
3842          * single qdisc implementation, we can look at this again. */
3843         return igb_xmit_frame_ring_adv(skb, tx_ring);
3844 }
3845
3846 /**
3847  * igb_tx_timeout - Respond to a Tx Hang
3848  * @netdev: network interface device structure
3849  **/
3850 static void igb_tx_timeout(struct net_device *netdev)
3851 {
3852         struct igb_adapter *adapter = netdev_priv(netdev);
3853         struct e1000_hw *hw = &adapter->hw;
3854
3855         /* Do the reset outside of interrupt context */
3856         adapter->tx_timeout_count++;
3857
3858         if (hw->mac.type == e1000_82580)
3859                 hw->dev_spec._82575.global_device_reset = true;
3860
3861         schedule_work(&adapter->reset_task);
3862         wr32(E1000_EICS,
3863              (adapter->eims_enable_mask & ~adapter->eims_other));
3864 }
3865
3866 static void igb_reset_task(struct work_struct *work)
3867 {
3868         struct igb_adapter *adapter;
3869         adapter = container_of(work, struct igb_adapter, reset_task);
3870
3871         igb_reinit_locked(adapter);
3872 }
3873
3874 /**
3875  * igb_get_stats - Get System Network Statistics
3876  * @netdev: network interface device structure
3877  *
3878  * Returns the address of the device statistics structure.
3879  * The statistics are actually updated from the timer callback.
3880  **/
3881 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3882 {
3883         /* only return the current stats */
3884         return &netdev->stats;
3885 }
3886
3887 /**
3888  * igb_change_mtu - Change the Maximum Transfer Unit
3889  * @netdev: network interface device structure
3890  * @new_mtu: new value for maximum frame size
3891  *
3892  * Returns 0 on success, negative on failure
3893  **/
3894 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3895 {
3896         struct igb_adapter *adapter = netdev_priv(netdev);
3897         struct pci_dev *pdev = adapter->pdev;
3898         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3899         u32 rx_buffer_len, i;
3900
3901         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3902                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3903                 return -EINVAL;
3904         }
3905
3906         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3907                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3908                 return -EINVAL;
3909         }
3910
3911         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3912                 msleep(1);
3913
3914         /* igb_down has a dependency on max_frame_size */
3915         adapter->max_frame_size = max_frame;
3916
3917         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3918          * means we reserve 2 more, this pushes us to allocate from the next
3919          * larger slab size.
3920          * i.e. RXBUFFER_2048 --> size-4096 slab
3921          */
3922
3923         if (max_frame <= IGB_RXBUFFER_1024)
3924                 rx_buffer_len = IGB_RXBUFFER_1024;
3925         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3926                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3927         else
3928                 rx_buffer_len = IGB_RXBUFFER_128;
3929
3930         if (netif_running(netdev))
3931                 igb_down(adapter);
3932
3933         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3934                  netdev->mtu, new_mtu);
3935         netdev->mtu = new_mtu;
3936
3937         for (i = 0; i < adapter->num_rx_queues; i++)
3938                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3939
3940         if (netif_running(netdev))
3941                 igb_up(adapter);
3942         else
3943                 igb_reset(adapter);
3944
3945         clear_bit(__IGB_RESETTING, &adapter->state);
3946
3947         return 0;
3948 }
3949
3950 /**
3951  * igb_update_stats - Update the board statistics counters
3952  * @adapter: board private structure
3953  **/
3954
3955 void igb_update_stats(struct igb_adapter *adapter)
3956 {
3957         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3958         struct e1000_hw *hw = &adapter->hw;
3959         struct pci_dev *pdev = adapter->pdev;
3960         u32 reg, mpc;
3961         u16 phy_tmp;
3962         int i;
3963         u64 bytes, packets;
3964
3965 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3966
3967         /*
3968          * Prevent stats update while adapter is being reset, or if the pci
3969          * connection is down.
3970          */
3971         if (adapter->link_speed == 0)
3972                 return;
3973         if (pci_channel_offline(pdev))
3974                 return;
3975
3976         bytes = 0;
3977         packets = 0;
3978         for (i = 0; i < adapter->num_rx_queues; i++) {
3979                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3980                 struct igb_ring *ring = adapter->rx_ring[i];
3981                 ring->rx_stats.drops += rqdpc_tmp;
3982                 net_stats->rx_fifo_errors += rqdpc_tmp;
3983                 bytes += ring->rx_stats.bytes;
3984                 packets += ring->rx_stats.packets;
3985         }
3986
3987         net_stats->rx_bytes = bytes;
3988         net_stats->rx_packets = packets;
3989
3990         bytes = 0;
3991         packets = 0;
3992         for (i = 0; i < adapter->num_tx_queues; i++) {
3993                 struct igb_ring *ring = adapter->tx_ring[i];
3994                 bytes += ring->tx_stats.bytes;
3995                 packets += ring->tx_stats.packets;
3996         }
3997         net_stats->tx_bytes = bytes;
3998         net_stats->tx_packets = packets;
3999
4000         /* read stats registers */
4001         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4002         adapter->stats.gprc += rd32(E1000_GPRC);
4003         adapter->stats.gorc += rd32(E1000_GORCL);
4004         rd32(E1000_GORCH); /* clear GORCL */
4005         adapter->stats.bprc += rd32(E1000_BPRC);
4006         adapter->stats.mprc += rd32(E1000_MPRC);
4007         adapter->stats.roc += rd32(E1000_ROC);
4008
4009         adapter->stats.prc64 += rd32(E1000_PRC64);
4010         adapter->stats.prc127 += rd32(E1000_PRC127);
4011         adapter->stats.prc255 += rd32(E1000_PRC255);
4012         adapter->stats.prc511 += rd32(E1000_PRC511);
4013         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4014         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4015         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4016         adapter->stats.sec += rd32(E1000_SEC);
4017
4018         mpc = rd32(E1000_MPC);
4019         adapter->stats.mpc += mpc;
4020         net_stats->rx_fifo_errors += mpc;
4021         adapter->stats.scc += rd32(E1000_SCC);
4022         adapter->stats.ecol += rd32(E1000_ECOL);
4023         adapter->stats.mcc += rd32(E1000_MCC);
4024         adapter->stats.latecol += rd32(E1000_LATECOL);
4025         adapter->stats.dc += rd32(E1000_DC);
4026         adapter->stats.rlec += rd32(E1000_RLEC);
4027         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4028         adapter->stats.xontxc += rd32(E1000_XONTXC);
4029         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4030         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4031         adapter->stats.fcruc += rd32(E1000_FCRUC);
4032         adapter->stats.gptc += rd32(E1000_GPTC);
4033         adapter->stats.gotc += rd32(E1000_GOTCL);
4034         rd32(E1000_GOTCH); /* clear GOTCL */
4035         adapter->stats.rnbc += rd32(E1000_RNBC);
4036         adapter->stats.ruc += rd32(E1000_RUC);
4037         adapter->stats.rfc += rd32(E1000_RFC);
4038         adapter->stats.rjc += rd32(E1000_RJC);
4039         adapter->stats.tor += rd32(E1000_TORH);
4040         adapter->stats.tot += rd32(E1000_TOTH);
4041         adapter->stats.tpr += rd32(E1000_TPR);
4042
4043         adapter->stats.ptc64 += rd32(E1000_PTC64);
4044         adapter->stats.ptc127 += rd32(E1000_PTC127);
4045         adapter->stats.ptc255 += rd32(E1000_PTC255);
4046         adapter->stats.ptc511 += rd32(E1000_PTC511);
4047         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4048         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4049
4050         adapter->stats.mptc += rd32(E1000_MPTC);
4051         adapter->stats.bptc += rd32(E1000_BPTC);
4052
4053         adapter->stats.tpt += rd32(E1000_TPT);
4054         adapter->stats.colc += rd32(E1000_COLC);
4055
4056         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4057         /* read internal phy specific stats */
4058         reg = rd32(E1000_CTRL_EXT);
4059         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4060                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4061                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4062         }
4063
4064         adapter->stats.tsctc += rd32(E1000_TSCTC);
4065         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4066
4067         adapter->stats.iac += rd32(E1000_IAC);
4068         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4069         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4070         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4071         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4072         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4073         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4074         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4075         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4076
4077         /* Fill out the OS statistics structure */
4078         net_stats->multicast = adapter->stats.mprc;
4079         net_stats->collisions = adapter->stats.colc;
4080
4081         /* Rx Errors */
4082
4083         /* RLEC on some newer hardware can be incorrect so build
4084          * our own version based on RUC and ROC */
4085         net_stats->rx_errors = adapter->stats.rxerrc +
4086                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4087                 adapter->stats.ruc + adapter->stats.roc +
4088                 adapter->stats.cexterr;
4089         net_stats->rx_length_errors = adapter->stats.ruc +
4090                                       adapter->stats.roc;
4091         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4092         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4093         net_stats->rx_missed_errors = adapter->stats.mpc;
4094
4095         /* Tx Errors */
4096         net_stats->tx_errors = adapter->stats.ecol +
4097                                adapter->stats.latecol;
4098         net_stats->tx_aborted_errors = adapter->stats.ecol;
4099         net_stats->tx_window_errors = adapter->stats.latecol;
4100         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4101
4102         /* Tx Dropped needs to be maintained elsewhere */
4103
4104         /* Phy Stats */
4105         if (hw->phy.media_type == e1000_media_type_copper) {
4106                 if ((adapter->link_speed == SPEED_1000) &&
4107                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4108                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4109                         adapter->phy_stats.idle_errors += phy_tmp;
4110                 }
4111         }
4112
4113         /* Management Stats */
4114         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4115         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4116         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4117 }
4118
4119 static irqreturn_t igb_msix_other(int irq, void *data)
4120 {
4121         struct igb_adapter *adapter = data;
4122         struct e1000_hw *hw = &adapter->hw;
4123         u32 icr = rd32(E1000_ICR);
4124         /* reading ICR causes bit 31 of EICR to be cleared */
4125
4126         if (icr & E1000_ICR_DRSTA)
4127                 schedule_work(&adapter->reset_task);
4128
4129         if (icr & E1000_ICR_DOUTSYNC) {
4130                 /* HW is reporting DMA is out of sync */
4131                 adapter->stats.doosync++;
4132         }
4133
4134         /* Check for a mailbox event */
4135         if (icr & E1000_ICR_VMMB)
4136                 igb_msg_task(adapter);
4137
4138         if (icr & E1000_ICR_LSC) {
4139                 hw->mac.get_link_status = 1;
4140                 /* guard against interrupt when we're going down */
4141                 if (!test_bit(__IGB_DOWN, &adapter->state))
4142                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4143         }
4144
4145         if (adapter->vfs_allocated_count)
4146                 wr32(E1000_IMS, E1000_IMS_LSC |
4147                                 E1000_IMS_VMMB |
4148                                 E1000_IMS_DOUTSYNC);
4149         else
4150                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4151         wr32(E1000_EIMS, adapter->eims_other);
4152
4153         return IRQ_HANDLED;
4154 }
4155
4156 static void igb_write_itr(struct igb_q_vector *q_vector)
4157 {
4158         struct igb_adapter *adapter = q_vector->adapter;
4159         u32 itr_val = q_vector->itr_val & 0x7FFC;
4160
4161         if (!q_vector->set_itr)
4162                 return;
4163
4164         if (!itr_val)
4165                 itr_val = 0x4;
4166
4167         if (adapter->hw.mac.type == e1000_82575)
4168                 itr_val |= itr_val << 16;
4169         else
4170                 itr_val |= 0x8000000;
4171
4172         writel(itr_val, q_vector->itr_register);
4173         q_vector->set_itr = 0;
4174 }
4175
4176 static irqreturn_t igb_msix_ring(int irq, void *data)
4177 {
4178         struct igb_q_vector *q_vector = data;
4179
4180         /* Write the ITR value calculated from the previous interrupt. */
4181         igb_write_itr(q_vector);
4182
4183         napi_schedule(&q_vector->napi);
4184
4185         return IRQ_HANDLED;
4186 }
4187
4188 #ifdef CONFIG_IGB_DCA
4189 static void igb_update_dca(struct igb_q_vector *q_vector)
4190 {
4191         struct igb_adapter *adapter = q_vector->adapter;
4192         struct e1000_hw *hw = &adapter->hw;
4193         int cpu = get_cpu();
4194
4195         if (q_vector->cpu == cpu)
4196                 goto out_no_update;
4197
4198         if (q_vector->tx_ring) {
4199                 int q = q_vector->tx_ring->reg_idx;
4200                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4201                 if (hw->mac.type == e1000_82575) {
4202                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4203                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4204                 } else {
4205                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4206                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4207                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4208                 }
4209                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4210                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4211         }
4212         if (q_vector->rx_ring) {
4213                 int q = q_vector->rx_ring->reg_idx;
4214                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4215                 if (hw->mac.type == e1000_82575) {
4216                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4217                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4218                 } else {
4219                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4220                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4221                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4222                 }
4223                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4224                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4225                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4226                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4227         }
4228         q_vector->cpu = cpu;
4229 out_no_update:
4230         put_cpu();
4231 }
4232
4233 static void igb_setup_dca(struct igb_adapter *adapter)
4234 {
4235         struct e1000_hw *hw = &adapter->hw;
4236         int i;
4237
4238         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4239                 return;
4240
4241         /* Always use CB2 mode, difference is masked in the CB driver. */
4242         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4243
4244         for (i = 0; i < adapter->num_q_vectors; i++) {
4245                 adapter->q_vector[i]->cpu = -1;
4246                 igb_update_dca(adapter->q_vector[i]);
4247         }
4248 }
4249
4250 static int __igb_notify_dca(struct device *dev, void *data)
4251 {
4252         struct net_device *netdev = dev_get_drvdata(dev);
4253         struct igb_adapter *adapter = netdev_priv(netdev);
4254         struct pci_dev *pdev = adapter->pdev;
4255         struct e1000_hw *hw = &adapter->hw;
4256         unsigned long event = *(unsigned long *)data;
4257
4258         switch (event) {
4259         case DCA_PROVIDER_ADD:
4260                 /* if already enabled, don't do it again */
4261                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4262                         break;
4263                 if (dca_add_requester(dev) == 0) {
4264                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4265                         dev_info(&pdev->dev, "DCA enabled\n");
4266                         igb_setup_dca(adapter);
4267                         break;
4268                 }
4269                 /* Fall Through since DCA is disabled. */
4270         case DCA_PROVIDER_REMOVE:
4271                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4272                         /* without this a class_device is left
4273                          * hanging around in the sysfs model */
4274                         dca_remove_requester(dev);
4275                         dev_info(&pdev->dev, "DCA disabled\n");
4276                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4277                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4278                 }
4279                 break;
4280         }
4281
4282         return 0;
4283 }
4284
4285 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4286                           void *p)
4287 {
4288         int ret_val;
4289
4290         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4291                                          __igb_notify_dca);
4292
4293         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4294 }
4295 #endif /* CONFIG_IGB_DCA */
4296
4297 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4298 {
4299         struct e1000_hw *hw = &adapter->hw;
4300         u32 ping;
4301         int i;
4302
4303         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4304                 ping = E1000_PF_CONTROL_MSG;
4305                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4306                         ping |= E1000_VT_MSGTYPE_CTS;
4307                 igb_write_mbx(hw, &ping, 1, i);
4308         }
4309 }
4310
4311 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4312 {
4313         struct e1000_hw *hw = &adapter->hw;
4314         u32 vmolr = rd32(E1000_VMOLR(vf));
4315         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4316
4317         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4318                             IGB_VF_FLAG_MULTI_PROMISC);
4319         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4320
4321         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4322                 vmolr |= E1000_VMOLR_MPME;
4323                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4324         } else {
4325                 /*
4326                  * if we have hashes and we are clearing a multicast promisc
4327                  * flag we need to write the hashes to the MTA as this step
4328                  * was previously skipped
4329                  */
4330                 if (vf_data->num_vf_mc_hashes > 30) {
4331                         vmolr |= E1000_VMOLR_MPME;
4332                 } else if (vf_data->num_vf_mc_hashes) {
4333                         int j;
4334                         vmolr |= E1000_VMOLR_ROMPE;
4335                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4336                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4337                 }
4338         }
4339
4340         wr32(E1000_VMOLR(vf), vmolr);
4341
4342         /* there are flags left unprocessed, likely not supported */
4343         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4344                 return -EINVAL;
4345
4346         return 0;
4347
4348 }
4349
4350 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4351                                   u32 *msgbuf, u32 vf)
4352 {
4353         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4354         u16 *hash_list = (u16 *)&msgbuf[1];
4355         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4356         int i;
4357
4358         /* salt away the number of multicast addresses assigned
4359          * to this VF for later use to restore when the PF multi cast
4360          * list changes
4361          */
4362         vf_data->num_vf_mc_hashes = n;
4363
4364         /* only up to 30 hash values supported */
4365         if (n > 30)
4366                 n = 30;
4367
4368         /* store the hashes for later use */
4369         for (i = 0; i < n; i++)
4370                 vf_data->vf_mc_hashes[i] = hash_list[i];
4371
4372         /* Flush and reset the mta with the new values */
4373         igb_set_rx_mode(adapter->netdev);
4374
4375         return 0;
4376 }
4377
4378 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4379 {
4380         struct e1000_hw *hw = &adapter->hw;
4381         struct vf_data_storage *vf_data;
4382         int i, j;
4383
4384         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4385                 u32 vmolr = rd32(E1000_VMOLR(i));
4386                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4387
4388                 vf_data = &adapter->vf_data[i];
4389
4390                 if ((vf_data->num_vf_mc_hashes > 30) ||
4391                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4392                         vmolr |= E1000_VMOLR_MPME;
4393                 } else if (vf_data->num_vf_mc_hashes) {
4394                         vmolr |= E1000_VMOLR_ROMPE;
4395                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4396                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4397                 }
4398                 wr32(E1000_VMOLR(i), vmolr);
4399         }
4400 }
4401
4402 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4403 {
4404         struct e1000_hw *hw = &adapter->hw;
4405         u32 pool_mask, reg, vid;
4406         int i;
4407
4408         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4409
4410         /* Find the vlan filter for this id */
4411         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4412                 reg = rd32(E1000_VLVF(i));
4413
4414                 /* remove the vf from the pool */
4415                 reg &= ~pool_mask;
4416
4417                 /* if pool is empty then remove entry from vfta */
4418                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4419                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4420                         reg = 0;
4421                         vid = reg & E1000_VLVF_VLANID_MASK;
4422                         igb_vfta_set(hw, vid, false);
4423                 }
4424
4425                 wr32(E1000_VLVF(i), reg);
4426         }
4427
4428         adapter->vf_data[vf].vlans_enabled = 0;
4429 }
4430
4431 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4432 {
4433         struct e1000_hw *hw = &adapter->hw;
4434         u32 reg, i;
4435
4436         /* The vlvf table only exists on 82576 hardware and newer */
4437         if (hw->mac.type < e1000_82576)
4438                 return -1;
4439
4440         /* we only need to do this if VMDq is enabled */
4441         if (!adapter->vfs_allocated_count)
4442                 return -1;
4443
4444         /* Find the vlan filter for this id */
4445         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4446                 reg = rd32(E1000_VLVF(i));
4447                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4448                     vid == (reg & E1000_VLVF_VLANID_MASK))
4449                         break;
4450         }
4451
4452         if (add) {
4453                 if (i == E1000_VLVF_ARRAY_SIZE) {
4454                         /* Did not find a matching VLAN ID entry that was
4455                          * enabled.  Search for a free filter entry, i.e.
4456                          * one without the enable bit set
4457                          */
4458                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4459                                 reg = rd32(E1000_VLVF(i));
4460                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4461                                         break;
4462                         }
4463                 }
4464                 if (i < E1000_VLVF_ARRAY_SIZE) {
4465                         /* Found an enabled/available entry */
4466                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4467
4468                         /* if !enabled we need to set this up in vfta */
4469                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4470                                 /* add VID to filter table */
4471                                 igb_vfta_set(hw, vid, true);
4472                                 reg |= E1000_VLVF_VLANID_ENABLE;
4473                         }
4474                         reg &= ~E1000_VLVF_VLANID_MASK;
4475                         reg |= vid;
4476                         wr32(E1000_VLVF(i), reg);
4477
4478                         /* do not modify RLPML for PF devices */
4479                         if (vf >= adapter->vfs_allocated_count)
4480                                 return 0;
4481
4482                         if (!adapter->vf_data[vf].vlans_enabled) {
4483                                 u32 size;
4484                                 reg = rd32(E1000_VMOLR(vf));
4485                                 size = reg & E1000_VMOLR_RLPML_MASK;
4486                                 size += 4;
4487                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4488                                 reg |= size;
4489                                 wr32(E1000_VMOLR(vf), reg);
4490                         }
4491
4492                         adapter->vf_data[vf].vlans_enabled++;
4493                         return 0;
4494                 }
4495         } else {
4496                 if (i < E1000_VLVF_ARRAY_SIZE) {
4497                         /* remove vf from the pool */
4498                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4499                         /* if pool is empty then remove entry from vfta */
4500                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4501                                 reg = 0;
4502                                 igb_vfta_set(hw, vid, false);
4503                         }
4504                         wr32(E1000_VLVF(i), reg);
4505
4506                         /* do not modify RLPML for PF devices */
4507                         if (vf >= adapter->vfs_allocated_count)
4508                                 return 0;
4509
4510                         adapter->vf_data[vf].vlans_enabled--;
4511                         if (!adapter->vf_data[vf].vlans_enabled) {
4512                                 u32 size;
4513                                 reg = rd32(E1000_VMOLR(vf));
4514                                 size = reg & E1000_VMOLR_RLPML_MASK;
4515                                 size -= 4;
4516                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4517                                 reg |= size;
4518                                 wr32(E1000_VMOLR(vf), reg);
4519                         }
4520                 }
4521         }
4522         return 0;
4523 }
4524
4525 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4526 {
4527         struct e1000_hw *hw = &adapter->hw;
4528
4529         if (vid)
4530                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4531         else
4532                 wr32(E1000_VMVIR(vf), 0);
4533 }
4534
4535 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4536                                int vf, u16 vlan, u8 qos)
4537 {
4538         int err = 0;
4539         struct igb_adapter *adapter = netdev_priv(netdev);
4540
4541         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4542                 return -EINVAL;
4543         if (vlan || qos) {
4544                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4545                 if (err)
4546                         goto out;
4547                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4548                 igb_set_vmolr(adapter, vf, !vlan);
4549                 adapter->vf_data[vf].pf_vlan = vlan;
4550                 adapter->vf_data[vf].pf_qos = qos;
4551                 dev_info(&adapter->pdev->dev,
4552                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4553                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4554                         dev_warn(&adapter->pdev->dev,
4555                                  "The VF VLAN has been set,"
4556                                  " but the PF device is not up.\n");
4557                         dev_warn(&adapter->pdev->dev,
4558                                  "Bring the PF device up before"
4559                                  " attempting to use the VF device.\n");
4560                 }
4561         } else {
4562                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4563                                    false, vf);
4564                 igb_set_vmvir(adapter, vlan, vf);
4565                 igb_set_vmolr(adapter, vf, true);
4566                 adapter->vf_data[vf].pf_vlan = 0;
4567                 adapter->vf_data[vf].pf_qos = 0;
4568        }
4569 out:
4570        return err;
4571 }
4572
4573 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4574 {
4575         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4576         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4577
4578         return igb_vlvf_set(adapter, vid, add, vf);
4579 }
4580
4581 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4582 {
4583         /* clear flags */
4584         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4585         adapter->vf_data[vf].last_nack = jiffies;
4586
4587         /* reset offloads to defaults */
4588         igb_set_vmolr(adapter, vf, true);
4589
4590         /* reset vlans for device */
4591         igb_clear_vf_vfta(adapter, vf);
4592         if (adapter->vf_data[vf].pf_vlan)
4593                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4594                                     adapter->vf_data[vf].pf_vlan,
4595                                     adapter->vf_data[vf].pf_qos);
4596         else
4597                 igb_clear_vf_vfta(adapter, vf);
4598
4599         /* reset multicast table array for vf */
4600         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4601
4602         /* Flush and reset the mta with the new values */
4603         igb_set_rx_mode(adapter->netdev);
4604 }
4605
4606 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4607 {
4608         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4609
4610         /* generate a new mac address as we were hotplug removed/added */
4611         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4612                 random_ether_addr(vf_mac);
4613
4614         /* process remaining reset events */
4615         igb_vf_reset(adapter, vf);
4616 }
4617
4618 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4619 {
4620         struct e1000_hw *hw = &adapter->hw;
4621         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4622         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4623         u32 reg, msgbuf[3];
4624         u8 *addr = (u8 *)(&msgbuf[1]);
4625
4626         /* process all the same items cleared in a function level reset */
4627         igb_vf_reset(adapter, vf);
4628
4629         /* set vf mac address */
4630         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4631
4632         /* enable transmit and receive for vf */
4633         reg = rd32(E1000_VFTE);
4634         wr32(E1000_VFTE, reg | (1 << vf));
4635         reg = rd32(E1000_VFRE);
4636         wr32(E1000_VFRE, reg | (1 << vf));
4637
4638         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4639
4640         /* reply to reset with ack and vf mac address */
4641         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4642         memcpy(addr, vf_mac, 6);
4643         igb_write_mbx(hw, msgbuf, 3, vf);
4644 }
4645
4646 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4647 {
4648         unsigned char *addr = (char *)&msg[1];
4649         int err = -1;
4650
4651         if (is_valid_ether_addr(addr))
4652                 err = igb_set_vf_mac(adapter, vf, addr);
4653
4654         return err;
4655 }
4656
4657 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4658 {
4659         struct e1000_hw *hw = &adapter->hw;
4660         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4661         u32 msg = E1000_VT_MSGTYPE_NACK;
4662
4663         /* if device isn't clear to send it shouldn't be reading either */
4664         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4665             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4666                 igb_write_mbx(hw, &msg, 1, vf);
4667                 vf_data->last_nack = jiffies;
4668         }
4669 }
4670
4671 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4672 {
4673         struct pci_dev *pdev = adapter->pdev;
4674         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4675         struct e1000_hw *hw = &adapter->hw;
4676         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4677         s32 retval;
4678
4679         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4680
4681         if (retval) {
4682                 /* if receive failed revoke VF CTS stats and restart init */
4683                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4684                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4685                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4686                         return;
4687                 goto out;
4688         }
4689
4690         /* this is a message we already processed, do nothing */
4691         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4692                 return;
4693
4694         /*
4695          * until the vf completes a reset it should not be
4696          * allowed to start any configuration.
4697          */
4698
4699         if (msgbuf[0] == E1000_VF_RESET) {
4700                 igb_vf_reset_msg(adapter, vf);
4701                 return;
4702         }
4703
4704         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4705                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4706                         return;
4707                 retval = -1;
4708                 goto out;
4709         }
4710
4711         switch ((msgbuf[0] & 0xFFFF)) {
4712         case E1000_VF_SET_MAC_ADDR:
4713                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4714                 break;
4715         case E1000_VF_SET_PROMISC:
4716                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4717                 break;
4718         case E1000_VF_SET_MULTICAST:
4719                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4720                 break;
4721         case E1000_VF_SET_LPE:
4722                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4723                 break;
4724         case E1000_VF_SET_VLAN:
4725                 if (adapter->vf_data[vf].pf_vlan)
4726                         retval = -1;
4727                 else
4728                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4729                 break;
4730         default:
4731                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4732                 retval = -1;
4733                 break;
4734         }
4735
4736         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4737 out:
4738         /* notify the VF of the results of what it sent us */
4739         if (retval)
4740                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4741         else
4742                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4743
4744         igb_write_mbx(hw, msgbuf, 1, vf);
4745 }
4746
4747 static void igb_msg_task(struct igb_adapter *adapter)
4748 {
4749         struct e1000_hw *hw = &adapter->hw;
4750         u32 vf;
4751
4752         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4753                 /* process any reset requests */
4754                 if (!igb_check_for_rst(hw, vf))
4755                         igb_vf_reset_event(adapter, vf);
4756
4757                 /* process any messages pending */
4758                 if (!igb_check_for_msg(hw, vf))
4759                         igb_rcv_msg_from_vf(adapter, vf);
4760
4761                 /* process any acks */
4762                 if (!igb_check_for_ack(hw, vf))
4763                         igb_rcv_ack_from_vf(adapter, vf);
4764         }
4765 }
4766
4767 /**
4768  *  igb_set_uta - Set unicast filter table address
4769  *  @adapter: board private structure
4770  *
4771  *  The unicast table address is a register array of 32-bit registers.
4772  *  The table is meant to be used in a way similar to how the MTA is used
4773  *  however due to certain limitations in the hardware it is necessary to
4774  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4775  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4776  **/
4777 static void igb_set_uta(struct igb_adapter *adapter)
4778 {
4779         struct e1000_hw *hw = &adapter->hw;
4780         int i;
4781
4782         /* The UTA table only exists on 82576 hardware and newer */
4783         if (hw->mac.type < e1000_82576)
4784                 return;
4785
4786         /* we only need to do this if VMDq is enabled */
4787         if (!adapter->vfs_allocated_count)
4788                 return;
4789
4790         for (i = 0; i < hw->mac.uta_reg_count; i++)
4791                 array_wr32(E1000_UTA, i, ~0);
4792 }
4793
4794 /**
4795  * igb_intr_msi - Interrupt Handler
4796  * @irq: interrupt number
4797  * @data: pointer to a network interface device structure
4798  **/
4799 static irqreturn_t igb_intr_msi(int irq, void *data)
4800 {
4801         struct igb_adapter *adapter = data;
4802         struct igb_q_vector *q_vector = adapter->q_vector[0];
4803         struct e1000_hw *hw = &adapter->hw;
4804         /* read ICR disables interrupts using IAM */
4805         u32 icr = rd32(E1000_ICR);
4806
4807         igb_write_itr(q_vector);
4808
4809         if (icr & E1000_ICR_DRSTA)
4810                 schedule_work(&adapter->reset_task);
4811
4812         if (icr & E1000_ICR_DOUTSYNC) {
4813                 /* HW is reporting DMA is out of sync */
4814                 adapter->stats.doosync++;
4815         }
4816
4817         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4818                 hw->mac.get_link_status = 1;
4819                 if (!test_bit(__IGB_DOWN, &adapter->state))
4820                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4821         }
4822
4823         napi_schedule(&q_vector->napi);
4824
4825         return IRQ_HANDLED;
4826 }
4827
4828 /**
4829  * igb_intr - Legacy Interrupt Handler
4830  * @irq: interrupt number
4831  * @data: pointer to a network interface device structure
4832  **/
4833 static irqreturn_t igb_intr(int irq, void *data)
4834 {
4835         struct igb_adapter *adapter = data;
4836         struct igb_q_vector *q_vector = adapter->q_vector[0];
4837         struct e1000_hw *hw = &adapter->hw;
4838         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4839          * need for the IMC write */
4840         u32 icr = rd32(E1000_ICR);
4841         if (!icr)
4842                 return IRQ_NONE;  /* Not our interrupt */
4843
4844         igb_write_itr(q_vector);
4845
4846         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4847          * not set, then the adapter didn't send an interrupt */
4848         if (!(icr & E1000_ICR_INT_ASSERTED))
4849                 return IRQ_NONE;
4850
4851         if (icr & E1000_ICR_DRSTA)
4852                 schedule_work(&adapter->reset_task);
4853
4854         if (icr & E1000_ICR_DOUTSYNC) {
4855                 /* HW is reporting DMA is out of sync */
4856                 adapter->stats.doosync++;
4857         }
4858
4859         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4860                 hw->mac.get_link_status = 1;
4861                 /* guard against interrupt when we're going down */
4862                 if (!test_bit(__IGB_DOWN, &adapter->state))
4863                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4864         }
4865
4866         napi_schedule(&q_vector->napi);
4867
4868         return IRQ_HANDLED;
4869 }
4870
4871 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4872 {
4873         struct igb_adapter *adapter = q_vector->adapter;
4874         struct e1000_hw *hw = &adapter->hw;
4875
4876         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4877             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4878                 if (!adapter->msix_entries)
4879                         igb_set_itr(adapter);
4880                 else
4881                         igb_update_ring_itr(q_vector);
4882         }
4883
4884         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4885                 if (adapter->msix_entries)
4886                         wr32(E1000_EIMS, q_vector->eims_value);
4887                 else
4888                         igb_irq_enable(adapter);
4889         }
4890 }
4891
4892 /**
4893  * igb_poll - NAPI Rx polling callback
4894  * @napi: napi polling structure
4895  * @budget: count of how many packets we should handle
4896  **/
4897 static int igb_poll(struct napi_struct *napi, int budget)
4898 {
4899         struct igb_q_vector *q_vector = container_of(napi,
4900                                                      struct igb_q_vector,
4901                                                      napi);
4902         int tx_clean_complete = 1, work_done = 0;
4903
4904 #ifdef CONFIG_IGB_DCA
4905         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4906                 igb_update_dca(q_vector);
4907 #endif
4908         if (q_vector->tx_ring)
4909                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4910
4911         if (q_vector->rx_ring)
4912                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4913
4914         if (!tx_clean_complete)
4915                 work_done = budget;
4916
4917         /* If not enough Rx work done, exit the polling mode */
4918         if (work_done < budget) {
4919                 napi_complete(napi);
4920                 igb_ring_irq_enable(q_vector);
4921         }
4922
4923         return work_done;
4924 }
4925
4926 /**
4927  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4928  * @adapter: board private structure
4929  * @shhwtstamps: timestamp structure to update
4930  * @regval: unsigned 64bit system time value.
4931  *
4932  * We need to convert the system time value stored in the RX/TXSTMP registers
4933  * into a hwtstamp which can be used by the upper level timestamping functions
4934  */
4935 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4936                                    struct skb_shared_hwtstamps *shhwtstamps,
4937                                    u64 regval)
4938 {
4939         u64 ns;
4940
4941         /*
4942          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4943          * 24 to match clock shift we setup earlier.
4944          */
4945         if (adapter->hw.mac.type == e1000_82580)
4946                 regval <<= IGB_82580_TSYNC_SHIFT;
4947
4948         ns = timecounter_cyc2time(&adapter->clock, regval);
4949         timecompare_update(&adapter->compare, ns);
4950         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4951         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4952         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4953 }
4954
4955 /**
4956  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4957  * @q_vector: pointer to q_vector containing needed info
4958  * @skb: packet that was just sent
4959  *
4960  * If we were asked to do hardware stamping and such a time stamp is
4961  * available, then it must have been for this skb here because we only
4962  * allow only one such packet into the queue.
4963  */
4964 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4965 {
4966         struct igb_adapter *adapter = q_vector->adapter;
4967         union skb_shared_tx *shtx = skb_tx(skb);
4968         struct e1000_hw *hw = &adapter->hw;
4969         struct skb_shared_hwtstamps shhwtstamps;
4970         u64 regval;
4971
4972         /* if skb does not support hw timestamp or TX stamp not valid exit */
4973         if (likely(!shtx->hardware) ||
4974             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4975                 return;
4976
4977         regval = rd32(E1000_TXSTMPL);
4978         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4979
4980         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4981         skb_tstamp_tx(skb, &shhwtstamps);
4982 }
4983
4984 /**
4985  * igb_clean_tx_irq - Reclaim resources after transmit completes
4986  * @q_vector: pointer to q_vector containing needed info
4987  * returns true if ring is completely cleaned
4988  **/
4989 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4990 {
4991         struct igb_adapter *adapter = q_vector->adapter;
4992         struct igb_ring *tx_ring = q_vector->tx_ring;
4993         struct net_device *netdev = tx_ring->netdev;
4994         struct e1000_hw *hw = &adapter->hw;
4995         struct igb_buffer *buffer_info;
4996         struct sk_buff *skb;
4997         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4998         unsigned int total_bytes = 0, total_packets = 0;
4999         unsigned int i, eop, count = 0;
5000         bool cleaned = false;
5001
5002         i = tx_ring->next_to_clean;
5003         eop = tx_ring->buffer_info[i].next_to_watch;
5004         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5005
5006         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5007                (count < tx_ring->count)) {
5008                 for (cleaned = false; !cleaned; count++) {
5009                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5010                         buffer_info = &tx_ring->buffer_info[i];
5011                         cleaned = (i == eop);
5012                         skb = buffer_info->skb;
5013
5014                         if (skb) {
5015                                 unsigned int segs, bytecount;
5016                                 /* gso_segs is currently only valid for tcp */
5017                                 segs = buffer_info->gso_segs;
5018                                 /* multiply data chunks by size of headers */
5019                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5020                                             skb->len;
5021                                 total_packets += segs;
5022                                 total_bytes += bytecount;
5023
5024                                 igb_tx_hwtstamp(q_vector, skb);
5025                         }
5026
5027                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5028                         tx_desc->wb.status = 0;
5029
5030                         i++;
5031                         if (i == tx_ring->count)
5032                                 i = 0;
5033                 }
5034                 eop = tx_ring->buffer_info[i].next_to_watch;
5035                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5036         }
5037
5038         tx_ring->next_to_clean = i;
5039
5040         if (unlikely(count &&
5041                      netif_carrier_ok(netdev) &&
5042                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5043                 /* Make sure that anybody stopping the queue after this
5044                  * sees the new next_to_clean.
5045                  */
5046                 smp_mb();
5047                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5048                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5049                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5050                         tx_ring->tx_stats.restart_queue++;
5051                 }
5052         }
5053
5054         if (tx_ring->detect_tx_hung) {
5055                 /* Detect a transmit hang in hardware, this serializes the
5056                  * check with the clearing of time_stamp and movement of i */
5057                 tx_ring->detect_tx_hung = false;
5058                 if (tx_ring->buffer_info[i].time_stamp &&
5059                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5060                                (adapter->tx_timeout_factor * HZ)) &&
5061                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5062
5063                         /* detected Tx unit hang */
5064                         dev_err(&tx_ring->pdev->dev,
5065                                 "Detected Tx Unit Hang\n"
5066                                 "  Tx Queue             <%d>\n"
5067                                 "  TDH                  <%x>\n"
5068                                 "  TDT                  <%x>\n"
5069                                 "  next_to_use          <%x>\n"
5070                                 "  next_to_clean        <%x>\n"
5071                                 "buffer_info[next_to_clean]\n"
5072                                 "  time_stamp           <%lx>\n"
5073                                 "  next_to_watch        <%x>\n"
5074                                 "  jiffies              <%lx>\n"
5075                                 "  desc.status          <%x>\n",
5076                                 tx_ring->queue_index,
5077                                 readl(tx_ring->head),
5078                                 readl(tx_ring->tail),
5079                                 tx_ring->next_to_use,
5080                                 tx_ring->next_to_clean,
5081                                 tx_ring->buffer_info[eop].time_stamp,
5082                                 eop,
5083                                 jiffies,
5084                                 eop_desc->wb.status);
5085                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5086                 }
5087         }
5088         tx_ring->total_bytes += total_bytes;
5089         tx_ring->total_packets += total_packets;
5090         tx_ring->tx_stats.bytes += total_bytes;
5091         tx_ring->tx_stats.packets += total_packets;
5092         return (count < tx_ring->count);
5093 }
5094
5095 /**
5096  * igb_receive_skb - helper function to handle rx indications
5097  * @q_vector: structure containing interrupt and ring information
5098  * @skb: packet to send up
5099  * @vlan_tag: vlan tag for packet
5100  **/
5101 static void igb_receive_skb(struct igb_q_vector *q_vector,
5102                             struct sk_buff *skb,
5103                             u16 vlan_tag)
5104 {
5105         struct igb_adapter *adapter = q_vector->adapter;
5106
5107         if (vlan_tag && adapter->vlgrp)
5108                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5109                                  vlan_tag, skb);
5110         else
5111                 napi_gro_receive(&q_vector->napi, skb);
5112 }
5113
5114 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5115                                        u32 status_err, struct sk_buff *skb)
5116 {
5117         skb->ip_summed = CHECKSUM_NONE;
5118
5119         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5120         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5121              (status_err & E1000_RXD_STAT_IXSM))
5122                 return;
5123
5124         /* TCP/UDP checksum error bit is set */
5125         if (status_err &
5126             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5127                 /*
5128                  * work around errata with sctp packets where the TCPE aka
5129                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5130                  * packets, (aka let the stack check the crc32c)
5131                  */
5132                 if ((skb->len == 60) &&
5133                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5134                         ring->rx_stats.csum_err++;
5135
5136                 /* let the stack verify checksum errors */
5137                 return;
5138         }
5139         /* It must be a TCP or UDP packet with a valid checksum */
5140         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5141                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5142
5143         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5144 }
5145
5146 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5147                                    struct sk_buff *skb)
5148 {
5149         struct igb_adapter *adapter = q_vector->adapter;
5150         struct e1000_hw *hw = &adapter->hw;
5151         u64 regval;
5152
5153         /*
5154          * If this bit is set, then the RX registers contain the time stamp. No
5155          * other packet will be time stamped until we read these registers, so
5156          * read the registers to make them available again. Because only one
5157          * packet can be time stamped at a time, we know that the register
5158          * values must belong to this one here and therefore we don't need to
5159          * compare any of the additional attributes stored for it.
5160          *
5161          * If nothing went wrong, then it should have a skb_shared_tx that we
5162          * can turn into a skb_shared_hwtstamps.
5163          */
5164         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5165                 return;
5166         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5167                 return;
5168
5169         regval = rd32(E1000_RXSTMPL);
5170         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5171
5172         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5173 }
5174 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5175                                union e1000_adv_rx_desc *rx_desc)
5176 {
5177         /* HW will not DMA in data larger than the given buffer, even if it
5178          * parses the (NFS, of course) header to be larger.  In that case, it
5179          * fills the header buffer and spills the rest into the page.
5180          */
5181         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5182                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5183         if (hlen > rx_ring->rx_buffer_len)
5184                 hlen = rx_ring->rx_buffer_len;
5185         return hlen;
5186 }
5187
5188 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5189                                  int *work_done, int budget)
5190 {
5191         struct igb_ring *rx_ring = q_vector->rx_ring;
5192         struct net_device *netdev = rx_ring->netdev;
5193         struct pci_dev *pdev = rx_ring->pdev;
5194         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5195         struct igb_buffer *buffer_info , *next_buffer;
5196         struct sk_buff *skb;
5197         bool cleaned = false;
5198         int cleaned_count = 0;
5199         int current_node = numa_node_id();
5200         unsigned int total_bytes = 0, total_packets = 0;
5201         unsigned int i;
5202         u32 staterr;
5203         u16 length;
5204         u16 vlan_tag;
5205
5206         i = rx_ring->next_to_clean;
5207         buffer_info = &rx_ring->buffer_info[i];
5208         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5209         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5210
5211         while (staterr & E1000_RXD_STAT_DD) {
5212                 if (*work_done >= budget)
5213                         break;
5214                 (*work_done)++;
5215
5216                 skb = buffer_info->skb;
5217                 prefetch(skb->data - NET_IP_ALIGN);
5218                 buffer_info->skb = NULL;
5219
5220                 i++;
5221                 if (i == rx_ring->count)
5222                         i = 0;
5223
5224                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5225                 prefetch(next_rxd);
5226                 next_buffer = &rx_ring->buffer_info[i];
5227
5228                 length = le16_to_cpu(rx_desc->wb.upper.length);
5229                 cleaned = true;
5230                 cleaned_count++;
5231
5232                 if (buffer_info->dma) {
5233                         pci_unmap_single(pdev, buffer_info->dma,
5234                                          rx_ring->rx_buffer_len,
5235                                          PCI_DMA_FROMDEVICE);
5236                         buffer_info->dma = 0;
5237                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5238                                 skb_put(skb, length);
5239                                 goto send_up;
5240                         }
5241                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5242                 }
5243
5244                 if (length) {
5245                         pci_unmap_page(pdev, buffer_info->page_dma,
5246                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5247                         buffer_info->page_dma = 0;
5248
5249                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5250                                                 buffer_info->page,
5251                                                 buffer_info->page_offset,
5252                                                 length);
5253
5254                         if ((page_count(buffer_info->page) != 1) ||
5255                             (page_to_nid(buffer_info->page) != current_node))
5256                                 buffer_info->page = NULL;
5257                         else
5258                                 get_page(buffer_info->page);
5259
5260                         skb->len += length;
5261                         skb->data_len += length;
5262                         skb->truesize += length;
5263                 }
5264
5265                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5266                         buffer_info->skb = next_buffer->skb;
5267                         buffer_info->dma = next_buffer->dma;
5268                         next_buffer->skb = skb;
5269                         next_buffer->dma = 0;
5270                         goto next_desc;
5271                 }
5272 send_up:
5273                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5274                         dev_kfree_skb_irq(skb);
5275                         goto next_desc;
5276                 }
5277
5278                 igb_rx_hwtstamp(q_vector, staterr, skb);
5279                 total_bytes += skb->len;
5280                 total_packets++;
5281
5282                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5283
5284                 skb->protocol = eth_type_trans(skb, netdev);
5285                 skb_record_rx_queue(skb, rx_ring->queue_index);
5286
5287                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5288                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5289
5290                 igb_receive_skb(q_vector, skb, vlan_tag);
5291
5292 next_desc:
5293                 rx_desc->wb.upper.status_error = 0;
5294
5295                 /* return some buffers to hardware, one at a time is too slow */
5296                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5297                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5298                         cleaned_count = 0;
5299                 }
5300
5301                 /* use prefetched values */
5302                 rx_desc = next_rxd;
5303                 buffer_info = next_buffer;
5304                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5305         }
5306
5307         rx_ring->next_to_clean = i;
5308         cleaned_count = igb_desc_unused(rx_ring);
5309
5310         if (cleaned_count)
5311                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5312
5313         rx_ring->total_packets += total_packets;
5314         rx_ring->total_bytes += total_bytes;
5315         rx_ring->rx_stats.packets += total_packets;
5316         rx_ring->rx_stats.bytes += total_bytes;
5317         return cleaned;
5318 }
5319
5320 /**
5321  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5322  * @adapter: address of board private structure
5323  **/
5324 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5325 {
5326         struct net_device *netdev = rx_ring->netdev;
5327         union e1000_adv_rx_desc *rx_desc;
5328         struct igb_buffer *buffer_info;
5329         struct sk_buff *skb;
5330         unsigned int i;
5331         int bufsz;
5332
5333         i = rx_ring->next_to_use;
5334         buffer_info = &rx_ring->buffer_info[i];
5335
5336         bufsz = rx_ring->rx_buffer_len;
5337
5338         while (cleaned_count--) {
5339                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5340
5341                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5342                         if (!buffer_info->page) {
5343                                 buffer_info->page = netdev_alloc_page(netdev);
5344                                 if (!buffer_info->page) {
5345                                         rx_ring->rx_stats.alloc_failed++;
5346                                         goto no_buffers;
5347                                 }
5348                                 buffer_info->page_offset = 0;
5349                         } else {
5350                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5351                         }
5352                         buffer_info->page_dma =
5353                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5354                                              buffer_info->page_offset,
5355                                              PAGE_SIZE / 2,
5356                                              PCI_DMA_FROMDEVICE);
5357                         if (pci_dma_mapping_error(rx_ring->pdev,
5358                                                   buffer_info->page_dma)) {
5359                                 buffer_info->page_dma = 0;
5360                                 rx_ring->rx_stats.alloc_failed++;
5361                                 goto no_buffers;
5362                         }
5363                 }
5364
5365                 skb = buffer_info->skb;
5366                 if (!skb) {
5367                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5368                         if (!skb) {
5369                                 rx_ring->rx_stats.alloc_failed++;
5370                                 goto no_buffers;
5371                         }
5372
5373                         buffer_info->skb = skb;
5374                 }
5375                 if (!buffer_info->dma) {
5376                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5377                                                           skb->data,
5378                                                           bufsz,
5379                                                           PCI_DMA_FROMDEVICE);
5380                         if (pci_dma_mapping_error(rx_ring->pdev,
5381                                                   buffer_info->dma)) {
5382                                 buffer_info->dma = 0;
5383                                 rx_ring->rx_stats.alloc_failed++;
5384                                 goto no_buffers;
5385                         }
5386                 }
5387                 /* Refresh the desc even if buffer_addrs didn't change because
5388                  * each write-back erases this info. */
5389                 if (bufsz < IGB_RXBUFFER_1024) {
5390                         rx_desc->read.pkt_addr =
5391                              cpu_to_le64(buffer_info->page_dma);
5392                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5393                 } else {
5394                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5395                         rx_desc->read.hdr_addr = 0;
5396                 }
5397
5398                 i++;
5399                 if (i == rx_ring->count)
5400                         i = 0;
5401                 buffer_info = &rx_ring->buffer_info[i];
5402         }
5403
5404 no_buffers:
5405         if (rx_ring->next_to_use != i) {
5406                 rx_ring->next_to_use = i;
5407                 if (i == 0)
5408                         i = (rx_ring->count - 1);
5409                 else
5410                         i--;
5411
5412                 /* Force memory writes to complete before letting h/w
5413                  * know there are new descriptors to fetch.  (Only
5414                  * applicable for weak-ordered memory model archs,
5415                  * such as IA-64). */
5416                 wmb();
5417                 writel(i, rx_ring->tail);
5418         }
5419 }
5420
5421 /**
5422  * igb_mii_ioctl -
5423  * @netdev:
5424  * @ifreq:
5425  * @cmd:
5426  **/
5427 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5428 {
5429         struct igb_adapter *adapter = netdev_priv(netdev);
5430         struct mii_ioctl_data *data = if_mii(ifr);
5431
5432         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5433                 return -EOPNOTSUPP;
5434
5435         switch (cmd) {
5436         case SIOCGMIIPHY:
5437                 data->phy_id = adapter->hw.phy.addr;
5438                 break;
5439         case SIOCGMIIREG:
5440                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5441                                      &data->val_out))
5442                         return -EIO;
5443                 break;
5444         case SIOCSMIIREG:
5445         default:
5446                 return -EOPNOTSUPP;
5447         }
5448         return 0;
5449 }
5450
5451 /**
5452  * igb_hwtstamp_ioctl - control hardware time stamping
5453  * @netdev:
5454  * @ifreq:
5455  * @cmd:
5456  *
5457  * Outgoing time stamping can be enabled and disabled. Play nice and
5458  * disable it when requested, although it shouldn't case any overhead
5459  * when no packet needs it. At most one packet in the queue may be
5460  * marked for time stamping, otherwise it would be impossible to tell
5461  * for sure to which packet the hardware time stamp belongs.
5462  *
5463  * Incoming time stamping has to be configured via the hardware
5464  * filters. Not all combinations are supported, in particular event
5465  * type has to be specified. Matching the kind of event packet is
5466  * not supported, with the exception of "all V2 events regardless of
5467  * level 2 or 4".
5468  *
5469  **/
5470 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5471                               struct ifreq *ifr, int cmd)
5472 {
5473         struct igb_adapter *adapter = netdev_priv(netdev);
5474         struct e1000_hw *hw = &adapter->hw;
5475         struct hwtstamp_config config;
5476         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5477         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5478         u32 tsync_rx_cfg = 0;
5479         bool is_l4 = false;
5480         bool is_l2 = false;
5481         u32 regval;
5482
5483         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5484                 return -EFAULT;
5485
5486         /* reserved for future extensions */
5487         if (config.flags)
5488                 return -EINVAL;
5489
5490         switch (config.tx_type) {
5491         case HWTSTAMP_TX_OFF:
5492                 tsync_tx_ctl = 0;
5493         case HWTSTAMP_TX_ON:
5494                 break;
5495         default:
5496                 return -ERANGE;
5497         }
5498
5499         switch (config.rx_filter) {
5500         case HWTSTAMP_FILTER_NONE:
5501                 tsync_rx_ctl = 0;
5502                 break;
5503         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5504         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5505         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5506         case HWTSTAMP_FILTER_ALL:
5507                 /*
5508                  * register TSYNCRXCFG must be set, therefore it is not
5509                  * possible to time stamp both Sync and Delay_Req messages
5510                  * => fall back to time stamping all packets
5511                  */
5512                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5513                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5514                 break;
5515         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5516                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5517                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5518                 is_l4 = true;
5519                 break;
5520         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5521                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5522                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5523                 is_l4 = true;
5524                 break;
5525         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5526         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5527                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5528                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5529                 is_l2 = true;
5530                 is_l4 = true;
5531                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5532                 break;
5533         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5534         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5535                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5536                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5537                 is_l2 = true;
5538                 is_l4 = true;
5539                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5540                 break;
5541         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5542         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5543         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5544                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5545                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5546                 is_l2 = true;
5547                 break;
5548         default:
5549                 return -ERANGE;
5550         }
5551
5552         if (hw->mac.type == e1000_82575) {
5553                 if (tsync_rx_ctl | tsync_tx_ctl)
5554                         return -EINVAL;
5555                 return 0;
5556         }
5557
5558         /* enable/disable TX */
5559         regval = rd32(E1000_TSYNCTXCTL);
5560         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5561         regval |= tsync_tx_ctl;
5562         wr32(E1000_TSYNCTXCTL, regval);
5563
5564         /* enable/disable RX */
5565         regval = rd32(E1000_TSYNCRXCTL);
5566         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5567         regval |= tsync_rx_ctl;
5568         wr32(E1000_TSYNCRXCTL, regval);
5569
5570         /* define which PTP packets are time stamped */
5571         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5572
5573         /* define ethertype filter for timestamped packets */
5574         if (is_l2)
5575                 wr32(E1000_ETQF(3),
5576                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5577                                  E1000_ETQF_1588 | /* enable timestamping */
5578                                  ETH_P_1588));     /* 1588 eth protocol type */
5579         else
5580                 wr32(E1000_ETQF(3), 0);
5581
5582 #define PTP_PORT 319
5583         /* L4 Queue Filter[3]: filter by destination port and protocol */
5584         if (is_l4) {
5585                 u32 ftqf = (IPPROTO_UDP /* UDP */
5586                         | E1000_FTQF_VF_BP /* VF not compared */
5587                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5588                         | E1000_FTQF_MASK); /* mask all inputs */
5589                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5590
5591                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5592                 wr32(E1000_IMIREXT(3),
5593                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5594                 if (hw->mac.type == e1000_82576) {
5595                         /* enable source port check */
5596                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5597                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5598                 }
5599                 wr32(E1000_FTQF(3), ftqf);
5600         } else {
5601                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5602         }
5603         wrfl();
5604
5605         adapter->hwtstamp_config = config;
5606
5607         /* clear TX/RX time stamp registers, just to be sure */
5608         regval = rd32(E1000_TXSTMPH);
5609         regval = rd32(E1000_RXSTMPH);
5610
5611         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5612                 -EFAULT : 0;
5613 }
5614
5615 /**
5616  * igb_ioctl -
5617  * @netdev:
5618  * @ifreq:
5619  * @cmd:
5620  **/
5621 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5622 {
5623         switch (cmd) {
5624         case SIOCGMIIPHY:
5625         case SIOCGMIIREG:
5626         case SIOCSMIIREG:
5627                 return igb_mii_ioctl(netdev, ifr, cmd);
5628         case SIOCSHWTSTAMP:
5629                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5630         default:
5631                 return -EOPNOTSUPP;
5632         }
5633 }
5634
5635 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5636 {
5637         struct igb_adapter *adapter = hw->back;
5638         u16 cap_offset;
5639
5640         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5641         if (!cap_offset)
5642                 return -E1000_ERR_CONFIG;
5643
5644         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5645
5646         return 0;
5647 }
5648
5649 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5650 {
5651         struct igb_adapter *adapter = hw->back;
5652         u16 cap_offset;
5653
5654         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5655         if (!cap_offset)
5656                 return -E1000_ERR_CONFIG;
5657
5658         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5659
5660         return 0;
5661 }
5662
5663 static void igb_vlan_rx_register(struct net_device *netdev,
5664                                  struct vlan_group *grp)
5665 {
5666         struct igb_adapter *adapter = netdev_priv(netdev);
5667         struct e1000_hw *hw = &adapter->hw;
5668         u32 ctrl, rctl;
5669
5670         igb_irq_disable(adapter);
5671         adapter->vlgrp = grp;
5672
5673         if (grp) {
5674                 /* enable VLAN tag insert/strip */
5675                 ctrl = rd32(E1000_CTRL);
5676                 ctrl |= E1000_CTRL_VME;
5677                 wr32(E1000_CTRL, ctrl);
5678
5679                 /* Disable CFI check */
5680                 rctl = rd32(E1000_RCTL);
5681                 rctl &= ~E1000_RCTL_CFIEN;
5682                 wr32(E1000_RCTL, rctl);
5683         } else {
5684                 /* disable VLAN tag insert/strip */
5685                 ctrl = rd32(E1000_CTRL);
5686                 ctrl &= ~E1000_CTRL_VME;
5687                 wr32(E1000_CTRL, ctrl);
5688         }
5689
5690         igb_rlpml_set(adapter);
5691
5692         if (!test_bit(__IGB_DOWN, &adapter->state))
5693                 igb_irq_enable(adapter);
5694 }
5695
5696 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5697 {
5698         struct igb_adapter *adapter = netdev_priv(netdev);
5699         struct e1000_hw *hw = &adapter->hw;
5700         int pf_id = adapter->vfs_allocated_count;
5701
5702         /* attempt to add filter to vlvf array */
5703         igb_vlvf_set(adapter, vid, true, pf_id);
5704
5705         /* add the filter since PF can receive vlans w/o entry in vlvf */
5706         igb_vfta_set(hw, vid, true);
5707 }
5708
5709 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5710 {
5711         struct igb_adapter *adapter = netdev_priv(netdev);
5712         struct e1000_hw *hw = &adapter->hw;
5713         int pf_id = adapter->vfs_allocated_count;
5714         s32 err;
5715
5716         igb_irq_disable(adapter);
5717         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5718
5719         if (!test_bit(__IGB_DOWN, &adapter->state))
5720                 igb_irq_enable(adapter);
5721
5722         /* remove vlan from VLVF table array */
5723         err = igb_vlvf_set(adapter, vid, false, pf_id);
5724
5725         /* if vid was not present in VLVF just remove it from table */
5726         if (err)
5727                 igb_vfta_set(hw, vid, false);
5728 }
5729
5730 static void igb_restore_vlan(struct igb_adapter *adapter)
5731 {
5732         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5733
5734         if (adapter->vlgrp) {
5735                 u16 vid;
5736                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5737                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5738                                 continue;
5739                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5740                 }
5741         }
5742 }
5743
5744 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5745 {
5746         struct pci_dev *pdev = adapter->pdev;
5747         struct e1000_mac_info *mac = &adapter->hw.mac;
5748
5749         mac->autoneg = 0;
5750
5751         switch (spddplx) {
5752         case SPEED_10 + DUPLEX_HALF:
5753                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5754                 break;
5755         case SPEED_10 + DUPLEX_FULL:
5756                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5757                 break;
5758         case SPEED_100 + DUPLEX_HALF:
5759                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5760                 break;
5761         case SPEED_100 + DUPLEX_FULL:
5762                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5763                 break;
5764         case SPEED_1000 + DUPLEX_FULL:
5765                 mac->autoneg = 1;
5766                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5767                 break;
5768         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5769         default:
5770                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5771                 return -EINVAL;
5772         }
5773         return 0;
5774 }
5775
5776 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5777 {
5778         struct net_device *netdev = pci_get_drvdata(pdev);
5779         struct igb_adapter *adapter = netdev_priv(netdev);
5780         struct e1000_hw *hw = &adapter->hw;
5781         u32 ctrl, rctl, status;
5782         u32 wufc = adapter->wol;
5783 #ifdef CONFIG_PM
5784         int retval = 0;
5785 #endif
5786
5787         netif_device_detach(netdev);
5788
5789         if (netif_running(netdev))
5790                 igb_close(netdev);
5791
5792         igb_clear_interrupt_scheme(adapter);
5793
5794 #ifdef CONFIG_PM
5795         retval = pci_save_state(pdev);
5796         if (retval)
5797                 return retval;
5798 #endif
5799
5800         status = rd32(E1000_STATUS);
5801         if (status & E1000_STATUS_LU)
5802                 wufc &= ~E1000_WUFC_LNKC;
5803
5804         if (wufc) {
5805                 igb_setup_rctl(adapter);
5806                 igb_set_rx_mode(netdev);
5807
5808                 /* turn on all-multi mode if wake on multicast is enabled */
5809                 if (wufc & E1000_WUFC_MC) {
5810                         rctl = rd32(E1000_RCTL);
5811                         rctl |= E1000_RCTL_MPE;
5812                         wr32(E1000_RCTL, rctl);
5813                 }
5814
5815                 ctrl = rd32(E1000_CTRL);
5816                 /* advertise wake from D3Cold */
5817                 #define E1000_CTRL_ADVD3WUC 0x00100000
5818                 /* phy power management enable */
5819                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5820                 ctrl |= E1000_CTRL_ADVD3WUC;
5821                 wr32(E1000_CTRL, ctrl);
5822
5823                 /* Allow time for pending master requests to run */
5824                 igb_disable_pcie_master(hw);
5825
5826                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5827                 wr32(E1000_WUFC, wufc);
5828         } else {
5829                 wr32(E1000_WUC, 0);
5830                 wr32(E1000_WUFC, 0);
5831         }
5832
5833         *enable_wake = wufc || adapter->en_mng_pt;
5834         if (!*enable_wake)
5835                 igb_power_down_link(adapter);
5836         else
5837                 igb_power_up_link(adapter);
5838
5839         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5840          * would have already happened in close and is redundant. */
5841         igb_release_hw_control(adapter);
5842
5843         pci_disable_device(pdev);
5844
5845         return 0;
5846 }
5847
5848 #ifdef CONFIG_PM
5849 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5850 {
5851         int retval;
5852         bool wake;
5853
5854         retval = __igb_shutdown(pdev, &wake);
5855         if (retval)
5856                 return retval;
5857
5858         if (wake) {
5859                 pci_prepare_to_sleep(pdev);
5860         } else {
5861                 pci_wake_from_d3(pdev, false);
5862                 pci_set_power_state(pdev, PCI_D3hot);
5863         }
5864
5865         return 0;
5866 }
5867
5868 static int igb_resume(struct pci_dev *pdev)
5869 {
5870         struct net_device *netdev = pci_get_drvdata(pdev);
5871         struct igb_adapter *adapter = netdev_priv(netdev);
5872         struct e1000_hw *hw = &adapter->hw;
5873         u32 err;
5874
5875         pci_set_power_state(pdev, PCI_D0);
5876         pci_restore_state(pdev);
5877         pci_save_state(pdev);
5878
5879         err = pci_enable_device_mem(pdev);
5880         if (err) {
5881                 dev_err(&pdev->dev,
5882                         "igb: Cannot enable PCI device from suspend\n");
5883                 return err;
5884         }
5885         pci_set_master(pdev);
5886
5887         pci_enable_wake(pdev, PCI_D3hot, 0);
5888         pci_enable_wake(pdev, PCI_D3cold, 0);
5889
5890         if (igb_init_interrupt_scheme(adapter)) {
5891                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5892                 return -ENOMEM;
5893         }
5894
5895         igb_reset(adapter);
5896
5897         /* let the f/w know that the h/w is now under the control of the
5898          * driver. */
5899         igb_get_hw_control(adapter);
5900
5901         wr32(E1000_WUS, ~0);
5902
5903         if (netif_running(netdev)) {
5904                 err = igb_open(netdev);
5905                 if (err)
5906                         return err;
5907         }
5908
5909         netif_device_attach(netdev);
5910
5911         return 0;
5912 }
5913 #endif
5914
5915 static void igb_shutdown(struct pci_dev *pdev)
5916 {
5917         bool wake;
5918
5919         __igb_shutdown(pdev, &wake);
5920
5921         if (system_state == SYSTEM_POWER_OFF) {
5922                 pci_wake_from_d3(pdev, wake);
5923                 pci_set_power_state(pdev, PCI_D3hot);
5924         }
5925 }
5926
5927 #ifdef CONFIG_NET_POLL_CONTROLLER
5928 /*
5929  * Polling 'interrupt' - used by things like netconsole to send skbs
5930  * without having to re-enable interrupts. It's not called while
5931  * the interrupt routine is executing.
5932  */
5933 static void igb_netpoll(struct net_device *netdev)
5934 {
5935         struct igb_adapter *adapter = netdev_priv(netdev);
5936         struct e1000_hw *hw = &adapter->hw;
5937         int i;
5938
5939         if (!adapter->msix_entries) {
5940                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5941                 igb_irq_disable(adapter);
5942                 napi_schedule(&q_vector->napi);
5943                 return;
5944         }
5945
5946         for (i = 0; i < adapter->num_q_vectors; i++) {
5947                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5948                 wr32(E1000_EIMC, q_vector->eims_value);
5949                 napi_schedule(&q_vector->napi);
5950         }
5951 }
5952 #endif /* CONFIG_NET_POLL_CONTROLLER */
5953
5954 /**
5955  * igb_io_error_detected - called when PCI error is detected
5956  * @pdev: Pointer to PCI device
5957  * @state: The current pci connection state
5958  *
5959  * This function is called after a PCI bus error affecting
5960  * this device has been detected.
5961  */
5962 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5963                                               pci_channel_state_t state)
5964 {
5965         struct net_device *netdev = pci_get_drvdata(pdev);
5966         struct igb_adapter *adapter = netdev_priv(netdev);
5967
5968         netif_device_detach(netdev);
5969
5970         if (state == pci_channel_io_perm_failure)
5971                 return PCI_ERS_RESULT_DISCONNECT;
5972
5973         if (netif_running(netdev))
5974                 igb_down(adapter);
5975         pci_disable_device(pdev);
5976
5977         /* Request a slot slot reset. */
5978         return PCI_ERS_RESULT_NEED_RESET;
5979 }
5980
5981 /**
5982  * igb_io_slot_reset - called after the pci bus has been reset.
5983  * @pdev: Pointer to PCI device
5984  *
5985  * Restart the card from scratch, as if from a cold-boot. Implementation
5986  * resembles the first-half of the igb_resume routine.
5987  */
5988 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5989 {
5990         struct net_device *netdev = pci_get_drvdata(pdev);
5991         struct igb_adapter *adapter = netdev_priv(netdev);
5992         struct e1000_hw *hw = &adapter->hw;
5993         pci_ers_result_t result;
5994         int err;
5995
5996         if (pci_enable_device_mem(pdev)) {
5997                 dev_err(&pdev->dev,
5998                         "Cannot re-enable PCI device after reset.\n");
5999                 result = PCI_ERS_RESULT_DISCONNECT;
6000         } else {
6001                 pci_set_master(pdev);
6002                 pci_restore_state(pdev);
6003                 pci_save_state(pdev);
6004
6005                 pci_enable_wake(pdev, PCI_D3hot, 0);
6006                 pci_enable_wake(pdev, PCI_D3cold, 0);
6007
6008                 igb_reset(adapter);
6009                 wr32(E1000_WUS, ~0);
6010                 result = PCI_ERS_RESULT_RECOVERED;
6011         }
6012
6013         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6014         if (err) {
6015                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6016                         "failed 0x%0x\n", err);
6017                 /* non-fatal, continue */
6018         }
6019
6020         return result;
6021 }
6022
6023 /**
6024  * igb_io_resume - called when traffic can start flowing again.
6025  * @pdev: Pointer to PCI device
6026  *
6027  * This callback is called when the error recovery driver tells us that
6028  * its OK to resume normal operation. Implementation resembles the
6029  * second-half of the igb_resume routine.
6030  */
6031 static void igb_io_resume(struct pci_dev *pdev)
6032 {
6033         struct net_device *netdev = pci_get_drvdata(pdev);
6034         struct igb_adapter *adapter = netdev_priv(netdev);
6035
6036         if (netif_running(netdev)) {
6037                 if (igb_up(adapter)) {
6038                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6039                         return;
6040                 }
6041         }
6042
6043         netif_device_attach(netdev);
6044
6045         /* let the f/w know that the h/w is now under the control of the
6046          * driver. */
6047         igb_get_hw_control(adapter);
6048 }
6049
6050 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6051                              u8 qsel)
6052 {
6053         u32 rar_low, rar_high;
6054         struct e1000_hw *hw = &adapter->hw;
6055
6056         /* HW expects these in little endian so we reverse the byte order
6057          * from network order (big endian) to little endian
6058          */
6059         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6060                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6061         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6062
6063         /* Indicate to hardware the Address is Valid. */
6064         rar_high |= E1000_RAH_AV;
6065
6066         if (hw->mac.type == e1000_82575)
6067                 rar_high |= E1000_RAH_POOL_1 * qsel;
6068         else
6069                 rar_high |= E1000_RAH_POOL_1 << qsel;
6070
6071         wr32(E1000_RAL(index), rar_low);
6072         wrfl();
6073         wr32(E1000_RAH(index), rar_high);
6074         wrfl();
6075 }
6076
6077 static int igb_set_vf_mac(struct igb_adapter *adapter,
6078                           int vf, unsigned char *mac_addr)
6079 {
6080         struct e1000_hw *hw = &adapter->hw;
6081         /* VF MAC addresses start at end of receive addresses and moves
6082          * torwards the first, as a result a collision should not be possible */
6083         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6084
6085         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6086
6087         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6088
6089         return 0;
6090 }
6091
6092 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6093 {
6094         struct igb_adapter *adapter = netdev_priv(netdev);
6095         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6096                 return -EINVAL;
6097         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6098         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6099         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6100                                       " change effective.");
6101         if (test_bit(__IGB_DOWN, &adapter->state)) {
6102                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6103                          " but the PF device is not up.\n");
6104                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6105                          " attempting to use the VF device.\n");
6106         }
6107         return igb_set_vf_mac(adapter, vf, mac);
6108 }
6109
6110 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6111 {
6112         return -EOPNOTSUPP;
6113 }
6114
6115 static int igb_ndo_get_vf_config(struct net_device *netdev,
6116                                  int vf, struct ifla_vf_info *ivi)
6117 {
6118         struct igb_adapter *adapter = netdev_priv(netdev);
6119         if (vf >= adapter->vfs_allocated_count)
6120                 return -EINVAL;
6121         ivi->vf = vf;
6122         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6123         ivi->tx_rate = 0;
6124         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6125         ivi->qos = adapter->vf_data[vf].pf_qos;
6126         return 0;
6127 }
6128
6129 static void igb_vmm_control(struct igb_adapter *adapter)
6130 {
6131         struct e1000_hw *hw = &adapter->hw;
6132         u32 reg;
6133
6134         /* replication is not supported for 82575 */
6135         if (hw->mac.type == e1000_82575)
6136                 return;
6137
6138         /* enable replication vlan tag stripping */
6139         reg = rd32(E1000_RPLOLR);
6140         reg |= E1000_RPLOLR_STRVLAN;
6141         wr32(E1000_RPLOLR, reg);
6142
6143         /* notify HW that the MAC is adding vlan tags */
6144         reg = rd32(E1000_DTXCTL);
6145         reg |= E1000_DTXCTL_VLAN_ADDED;
6146         wr32(E1000_DTXCTL, reg);
6147
6148         if (adapter->vfs_allocated_count) {
6149                 igb_vmdq_set_loopback_pf(hw, true);
6150                 igb_vmdq_set_replication_pf(hw, true);
6151         } else {
6152                 igb_vmdq_set_loopback_pf(hw, false);
6153                 igb_vmdq_set_replication_pf(hw, false);
6154         }
6155 }
6156
6157 /* igb_main.c */