Merge branch 'for-linus' of git://neil.brown.name/md
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #include <linux/prefetch.h>
49 #ifdef CONFIG_IGB_DCA
50 #include <linux/dca.h>
51 #endif
52 #include "igb.h"
53
54 #define MAJ 3
55 #define MIN 0
56 #define BUILD 6
57 #define KFIX 2
58 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
59 __stringify(BUILD) "-k" __stringify(KFIX)
60 char igb_driver_name[] = "igb";
61 char igb_driver_version[] = DRV_VERSION;
62 static const char igb_driver_string[] =
63                                 "Intel(R) Gigabit Ethernet Network Driver";
64 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
65
66 static const struct e1000_info *igb_info_tbl[] = {
67         [board_82575] = &e1000_82575_info,
68 };
69
70 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
96         /* required last entry */
97         {0, }
98 };
99
100 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
101
102 void igb_reset(struct igb_adapter *);
103 static int igb_setup_all_tx_resources(struct igb_adapter *);
104 static int igb_setup_all_rx_resources(struct igb_adapter *);
105 static void igb_free_all_tx_resources(struct igb_adapter *);
106 static void igb_free_all_rx_resources(struct igb_adapter *);
107 static void igb_setup_mrqc(struct igb_adapter *);
108 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
109 static void __devexit igb_remove(struct pci_dev *pdev);
110 static void igb_init_hw_timer(struct igb_adapter *adapter);
111 static int igb_sw_init(struct igb_adapter *);
112 static int igb_open(struct net_device *);
113 static int igb_close(struct net_device *);
114 static void igb_configure_tx(struct igb_adapter *);
115 static void igb_configure_rx(struct igb_adapter *);
116 static void igb_clean_all_tx_rings(struct igb_adapter *);
117 static void igb_clean_all_rx_rings(struct igb_adapter *);
118 static void igb_clean_tx_ring(struct igb_ring *);
119 static void igb_clean_rx_ring(struct igb_ring *);
120 static void igb_set_rx_mode(struct net_device *);
121 static void igb_update_phy_info(unsigned long);
122 static void igb_watchdog(unsigned long);
123 static void igb_watchdog_task(struct work_struct *);
124 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
125 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
126                                                  struct rtnl_link_stats64 *stats);
127 static int igb_change_mtu(struct net_device *, int);
128 static int igb_set_mac(struct net_device *, void *);
129 static void igb_set_uta(struct igb_adapter *adapter);
130 static irqreturn_t igb_intr(int irq, void *);
131 static irqreturn_t igb_intr_msi(int irq, void *);
132 static irqreturn_t igb_msix_other(int irq, void *);
133 static irqreturn_t igb_msix_ring(int irq, void *);
134 #ifdef CONFIG_IGB_DCA
135 static void igb_update_dca(struct igb_q_vector *);
136 static void igb_setup_dca(struct igb_adapter *);
137 #endif /* CONFIG_IGB_DCA */
138 static bool igb_clean_tx_irq(struct igb_q_vector *);
139 static int igb_poll(struct napi_struct *, int);
140 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
141 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
142 static void igb_tx_timeout(struct net_device *);
143 static void igb_reset_task(struct work_struct *);
144 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
145 static void igb_vlan_rx_add_vid(struct net_device *, u16);
146 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
147 static void igb_restore_vlan(struct igb_adapter *);
148 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
149 static void igb_ping_all_vfs(struct igb_adapter *);
150 static void igb_msg_task(struct igb_adapter *);
151 static void igb_vmm_control(struct igb_adapter *);
152 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
153 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
154 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
155 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
156                                int vf, u16 vlan, u8 qos);
157 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
158 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
159                                  struct ifla_vf_info *ivi);
160 static void igb_check_vf_rate_limit(struct igb_adapter *);
161
162 #ifdef CONFIG_PM
163 static int igb_suspend(struct pci_dev *, pm_message_t);
164 static int igb_resume(struct pci_dev *);
165 #endif
166 static void igb_shutdown(struct pci_dev *);
167 #ifdef CONFIG_IGB_DCA
168 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
169 static struct notifier_block dca_notifier = {
170         .notifier_call  = igb_notify_dca,
171         .next           = NULL,
172         .priority       = 0
173 };
174 #endif
175 #ifdef CONFIG_NET_POLL_CONTROLLER
176 /* for netdump / net console */
177 static void igb_netpoll(struct net_device *);
178 #endif
179 #ifdef CONFIG_PCI_IOV
180 static unsigned int max_vfs = 0;
181 module_param(max_vfs, uint, 0);
182 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
183                  "per physical function");
184 #endif /* CONFIG_PCI_IOV */
185
186 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
187                      pci_channel_state_t);
188 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
189 static void igb_io_resume(struct pci_dev *);
190
191 static struct pci_error_handlers igb_err_handler = {
192         .error_detected = igb_io_error_detected,
193         .slot_reset = igb_io_slot_reset,
194         .resume = igb_io_resume,
195 };
196
197
198 static struct pci_driver igb_driver = {
199         .name     = igb_driver_name,
200         .id_table = igb_pci_tbl,
201         .probe    = igb_probe,
202         .remove   = __devexit_p(igb_remove),
203 #ifdef CONFIG_PM
204         /* Power Management Hooks */
205         .suspend  = igb_suspend,
206         .resume   = igb_resume,
207 #endif
208         .shutdown = igb_shutdown,
209         .err_handler = &igb_err_handler
210 };
211
212 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
213 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
214 MODULE_LICENSE("GPL");
215 MODULE_VERSION(DRV_VERSION);
216
217 struct igb_reg_info {
218         u32 ofs;
219         char *name;
220 };
221
222 static const struct igb_reg_info igb_reg_info_tbl[] = {
223
224         /* General Registers */
225         {E1000_CTRL, "CTRL"},
226         {E1000_STATUS, "STATUS"},
227         {E1000_CTRL_EXT, "CTRL_EXT"},
228
229         /* Interrupt Registers */
230         {E1000_ICR, "ICR"},
231
232         /* RX Registers */
233         {E1000_RCTL, "RCTL"},
234         {E1000_RDLEN(0), "RDLEN"},
235         {E1000_RDH(0), "RDH"},
236         {E1000_RDT(0), "RDT"},
237         {E1000_RXDCTL(0), "RXDCTL"},
238         {E1000_RDBAL(0), "RDBAL"},
239         {E1000_RDBAH(0), "RDBAH"},
240
241         /* TX Registers */
242         {E1000_TCTL, "TCTL"},
243         {E1000_TDBAL(0), "TDBAL"},
244         {E1000_TDBAH(0), "TDBAH"},
245         {E1000_TDLEN(0), "TDLEN"},
246         {E1000_TDH(0), "TDH"},
247         {E1000_TDT(0), "TDT"},
248         {E1000_TXDCTL(0), "TXDCTL"},
249         {E1000_TDFH, "TDFH"},
250         {E1000_TDFT, "TDFT"},
251         {E1000_TDFHS, "TDFHS"},
252         {E1000_TDFPC, "TDFPC"},
253
254         /* List Terminator */
255         {}
256 };
257
258 /*
259  * igb_regdump - register printout routine
260  */
261 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
262 {
263         int n = 0;
264         char rname[16];
265         u32 regs[8];
266
267         switch (reginfo->ofs) {
268         case E1000_RDLEN(0):
269                 for (n = 0; n < 4; n++)
270                         regs[n] = rd32(E1000_RDLEN(n));
271                 break;
272         case E1000_RDH(0):
273                 for (n = 0; n < 4; n++)
274                         regs[n] = rd32(E1000_RDH(n));
275                 break;
276         case E1000_RDT(0):
277                 for (n = 0; n < 4; n++)
278                         regs[n] = rd32(E1000_RDT(n));
279                 break;
280         case E1000_RXDCTL(0):
281                 for (n = 0; n < 4; n++)
282                         regs[n] = rd32(E1000_RXDCTL(n));
283                 break;
284         case E1000_RDBAL(0):
285                 for (n = 0; n < 4; n++)
286                         regs[n] = rd32(E1000_RDBAL(n));
287                 break;
288         case E1000_RDBAH(0):
289                 for (n = 0; n < 4; n++)
290                         regs[n] = rd32(E1000_RDBAH(n));
291                 break;
292         case E1000_TDBAL(0):
293                 for (n = 0; n < 4; n++)
294                         regs[n] = rd32(E1000_RDBAL(n));
295                 break;
296         case E1000_TDBAH(0):
297                 for (n = 0; n < 4; n++)
298                         regs[n] = rd32(E1000_TDBAH(n));
299                 break;
300         case E1000_TDLEN(0):
301                 for (n = 0; n < 4; n++)
302                         regs[n] = rd32(E1000_TDLEN(n));
303                 break;
304         case E1000_TDH(0):
305                 for (n = 0; n < 4; n++)
306                         regs[n] = rd32(E1000_TDH(n));
307                 break;
308         case E1000_TDT(0):
309                 for (n = 0; n < 4; n++)
310                         regs[n] = rd32(E1000_TDT(n));
311                 break;
312         case E1000_TXDCTL(0):
313                 for (n = 0; n < 4; n++)
314                         regs[n] = rd32(E1000_TXDCTL(n));
315                 break;
316         default:
317                 printk(KERN_INFO "%-15s %08x\n",
318                         reginfo->name, rd32(reginfo->ofs));
319                 return;
320         }
321
322         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
323         printk(KERN_INFO "%-15s ", rname);
324         for (n = 0; n < 4; n++)
325                 printk(KERN_CONT "%08x ", regs[n]);
326         printk(KERN_CONT "\n");
327 }
328
329 /*
330  * igb_dump - Print registers, tx-rings and rx-rings
331  */
332 static void igb_dump(struct igb_adapter *adapter)
333 {
334         struct net_device *netdev = adapter->netdev;
335         struct e1000_hw *hw = &adapter->hw;
336         struct igb_reg_info *reginfo;
337         int n = 0;
338         struct igb_ring *tx_ring;
339         union e1000_adv_tx_desc *tx_desc;
340         struct my_u0 { u64 a; u64 b; } *u0;
341         struct igb_buffer *buffer_info;
342         struct igb_ring *rx_ring;
343         union e1000_adv_rx_desc *rx_desc;
344         u32 staterr;
345         int i = 0;
346
347         if (!netif_msg_hw(adapter))
348                 return;
349
350         /* Print netdevice Info */
351         if (netdev) {
352                 dev_info(&adapter->pdev->dev, "Net device Info\n");
353                 printk(KERN_INFO "Device Name     state            "
354                         "trans_start      last_rx\n");
355                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
356                 netdev->name,
357                 netdev->state,
358                 netdev->trans_start,
359                 netdev->last_rx);
360         }
361
362         /* Print Registers */
363         dev_info(&adapter->pdev->dev, "Register Dump\n");
364         printk(KERN_INFO " Register Name   Value\n");
365         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
366              reginfo->name; reginfo++) {
367                 igb_regdump(hw, reginfo);
368         }
369
370         /* Print TX Ring Summary */
371         if (!netdev || !netif_running(netdev))
372                 goto exit;
373
374         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
375         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
376                 " leng ntw timestamp\n");
377         for (n = 0; n < adapter->num_tx_queues; n++) {
378                 tx_ring = adapter->tx_ring[n];
379                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
380                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
381                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
382                            (u64)buffer_info->dma,
383                            buffer_info->length,
384                            buffer_info->next_to_watch,
385                            (u64)buffer_info->time_stamp);
386         }
387
388         /* Print TX Rings */
389         if (!netif_msg_tx_done(adapter))
390                 goto rx_ring_summary;
391
392         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
393
394         /* Transmit Descriptor Formats
395          *
396          * Advanced Transmit Descriptor
397          *   +--------------------------------------------------------------+
398          * 0 |         Buffer Address [63:0]                                |
399          *   +--------------------------------------------------------------+
400          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
401          *   +--------------------------------------------------------------+
402          *   63      46 45    40 39 38 36 35 32 31   24             15       0
403          */
404
405         for (n = 0; n < adapter->num_tx_queues; n++) {
406                 tx_ring = adapter->tx_ring[n];
407                 printk(KERN_INFO "------------------------------------\n");
408                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
409                 printk(KERN_INFO "------------------------------------\n");
410                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
411                         "[PlPOCIStDDM Ln] [bi->dma       ] "
412                         "leng  ntw timestamp        bi->skb\n");
413
414                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
415                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
416                         buffer_info = &tx_ring->buffer_info[i];
417                         u0 = (struct my_u0 *)tx_desc;
418                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
419                                 " %04X  %3X %016llX %p", i,
420                                 le64_to_cpu(u0->a),
421                                 le64_to_cpu(u0->b),
422                                 (u64)buffer_info->dma,
423                                 buffer_info->length,
424                                 buffer_info->next_to_watch,
425                                 (u64)buffer_info->time_stamp,
426                                 buffer_info->skb);
427                         if (i == tx_ring->next_to_use &&
428                                 i == tx_ring->next_to_clean)
429                                 printk(KERN_CONT " NTC/U\n");
430                         else if (i == tx_ring->next_to_use)
431                                 printk(KERN_CONT " NTU\n");
432                         else if (i == tx_ring->next_to_clean)
433                                 printk(KERN_CONT " NTC\n");
434                         else
435                                 printk(KERN_CONT "\n");
436
437                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
438                                 print_hex_dump(KERN_INFO, "",
439                                         DUMP_PREFIX_ADDRESS,
440                                         16, 1, phys_to_virt(buffer_info->dma),
441                                         buffer_info->length, true);
442                 }
443         }
444
445         /* Print RX Rings Summary */
446 rx_ring_summary:
447         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
448         printk(KERN_INFO "Queue [NTU] [NTC]\n");
449         for (n = 0; n < adapter->num_rx_queues; n++) {
450                 rx_ring = adapter->rx_ring[n];
451                 printk(KERN_INFO " %5d %5X %5X\n", n,
452                            rx_ring->next_to_use, rx_ring->next_to_clean);
453         }
454
455         /* Print RX Rings */
456         if (!netif_msg_rx_status(adapter))
457                 goto exit;
458
459         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
460
461         /* Advanced Receive Descriptor (Read) Format
462          *    63                                           1        0
463          *    +-----------------------------------------------------+
464          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
465          *    +----------------------------------------------+------+
466          *  8 |       Header Buffer Address [63:1]           |  DD  |
467          *    +-----------------------------------------------------+
468          *
469          *
470          * Advanced Receive Descriptor (Write-Back) Format
471          *
472          *   63       48 47    32 31  30      21 20 17 16   4 3     0
473          *   +------------------------------------------------------+
474          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
475          *   | Checksum   Ident  |   |           |    | Type | Type |
476          *   +------------------------------------------------------+
477          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
478          *   +------------------------------------------------------+
479          *   63       48 47    32 31            20 19               0
480          */
481
482         for (n = 0; n < adapter->num_rx_queues; n++) {
483                 rx_ring = adapter->rx_ring[n];
484                 printk(KERN_INFO "------------------------------------\n");
485                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
486                 printk(KERN_INFO "------------------------------------\n");
487                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
488                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
489                         "<-- Adv Rx Read format\n");
490                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
491                         "[vl er S cks ln] ---------------- [bi->skb] "
492                         "<-- Adv Rx Write-Back format\n");
493
494                 for (i = 0; i < rx_ring->count; i++) {
495                         buffer_info = &rx_ring->buffer_info[i];
496                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
497                         u0 = (struct my_u0 *)rx_desc;
498                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
499                         if (staterr & E1000_RXD_STAT_DD) {
500                                 /* Descriptor Done */
501                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
502                                         "%016llX ---------------- %p", i,
503                                         le64_to_cpu(u0->a),
504                                         le64_to_cpu(u0->b),
505                                         buffer_info->skb);
506                         } else {
507                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
508                                         "%016llX %016llX %p", i,
509                                         le64_to_cpu(u0->a),
510                                         le64_to_cpu(u0->b),
511                                         (u64)buffer_info->dma,
512                                         buffer_info->skb);
513
514                                 if (netif_msg_pktdata(adapter)) {
515                                         print_hex_dump(KERN_INFO, "",
516                                                 DUMP_PREFIX_ADDRESS,
517                                                 16, 1,
518                                                 phys_to_virt(buffer_info->dma),
519                                                 rx_ring->rx_buffer_len, true);
520                                         if (rx_ring->rx_buffer_len
521                                                 < IGB_RXBUFFER_1024)
522                                                 print_hex_dump(KERN_INFO, "",
523                                                   DUMP_PREFIX_ADDRESS,
524                                                   16, 1,
525                                                   phys_to_virt(
526                                                     buffer_info->page_dma +
527                                                     buffer_info->page_offset),
528                                                   PAGE_SIZE/2, true);
529                                 }
530                         }
531
532                         if (i == rx_ring->next_to_use)
533                                 printk(KERN_CONT " NTU\n");
534                         else if (i == rx_ring->next_to_clean)
535                                 printk(KERN_CONT " NTC\n");
536                         else
537                                 printk(KERN_CONT "\n");
538
539                 }
540         }
541
542 exit:
543         return;
544 }
545
546
547 /**
548  * igb_read_clock - read raw cycle counter (to be used by time counter)
549  */
550 static cycle_t igb_read_clock(const struct cyclecounter *tc)
551 {
552         struct igb_adapter *adapter =
553                 container_of(tc, struct igb_adapter, cycles);
554         struct e1000_hw *hw = &adapter->hw;
555         u64 stamp = 0;
556         int shift = 0;
557
558         /*
559          * The timestamp latches on lowest register read. For the 82580
560          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
561          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
562          */
563         if (hw->mac.type == e1000_82580) {
564                 stamp = rd32(E1000_SYSTIMR) >> 8;
565                 shift = IGB_82580_TSYNC_SHIFT;
566         }
567
568         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
569         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
570         return stamp;
571 }
572
573 /**
574  * igb_get_hw_dev - return device
575  * used by hardware layer to print debugging information
576  **/
577 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 {
579         struct igb_adapter *adapter = hw->back;
580         return adapter->netdev;
581 }
582
583 /**
584  * igb_init_module - Driver Registration Routine
585  *
586  * igb_init_module is the first routine called when the driver is
587  * loaded. All it does is register with the PCI subsystem.
588  **/
589 static int __init igb_init_module(void)
590 {
591         int ret;
592         printk(KERN_INFO "%s - version %s\n",
593                igb_driver_string, igb_driver_version);
594
595         printk(KERN_INFO "%s\n", igb_copyright);
596
597 #ifdef CONFIG_IGB_DCA
598         dca_register_notify(&dca_notifier);
599 #endif
600         ret = pci_register_driver(&igb_driver);
601         return ret;
602 }
603
604 module_init(igb_init_module);
605
606 /**
607  * igb_exit_module - Driver Exit Cleanup Routine
608  *
609  * igb_exit_module is called just before the driver is removed
610  * from memory.
611  **/
612 static void __exit igb_exit_module(void)
613 {
614 #ifdef CONFIG_IGB_DCA
615         dca_unregister_notify(&dca_notifier);
616 #endif
617         pci_unregister_driver(&igb_driver);
618 }
619
620 module_exit(igb_exit_module);
621
622 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 /**
624  * igb_cache_ring_register - Descriptor ring to register mapping
625  * @adapter: board private structure to initialize
626  *
627  * Once we know the feature-set enabled for the device, we'll cache
628  * the register offset the descriptor ring is assigned to.
629  **/
630 static void igb_cache_ring_register(struct igb_adapter *adapter)
631 {
632         int i = 0, j = 0;
633         u32 rbase_offset = adapter->vfs_allocated_count;
634
635         switch (adapter->hw.mac.type) {
636         case e1000_82576:
637                 /* The queues are allocated for virtualization such that VF 0
638                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
639                  * In order to avoid collision we start at the first free queue
640                  * and continue consuming queues in the same sequence
641                  */
642                 if (adapter->vfs_allocated_count) {
643                         for (; i < adapter->rss_queues; i++)
644                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
645                                                                Q_IDX_82576(i);
646                 }
647         case e1000_82575:
648         case e1000_82580:
649         case e1000_i350:
650         default:
651                 for (; i < adapter->num_rx_queues; i++)
652                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653                 for (; j < adapter->num_tx_queues; j++)
654                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655                 break;
656         }
657 }
658
659 static void igb_free_queues(struct igb_adapter *adapter)
660 {
661         int i;
662
663         for (i = 0; i < adapter->num_tx_queues; i++) {
664                 kfree(adapter->tx_ring[i]);
665                 adapter->tx_ring[i] = NULL;
666         }
667         for (i = 0; i < adapter->num_rx_queues; i++) {
668                 kfree(adapter->rx_ring[i]);
669                 adapter->rx_ring[i] = NULL;
670         }
671         adapter->num_rx_queues = 0;
672         adapter->num_tx_queues = 0;
673 }
674
675 /**
676  * igb_alloc_queues - Allocate memory for all rings
677  * @adapter: board private structure to initialize
678  *
679  * We allocate one ring per queue at run-time since we don't know the
680  * number of queues at compile-time.
681  **/
682 static int igb_alloc_queues(struct igb_adapter *adapter)
683 {
684         struct igb_ring *ring;
685         int i;
686
687         for (i = 0; i < adapter->num_tx_queues; i++) {
688                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
689                 if (!ring)
690                         goto err;
691                 ring->count = adapter->tx_ring_count;
692                 ring->queue_index = i;
693                 ring->dev = &adapter->pdev->dev;
694                 ring->netdev = adapter->netdev;
695                 /* For 82575, context index must be unique per ring. */
696                 if (adapter->hw.mac.type == e1000_82575)
697                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
698                 adapter->tx_ring[i] = ring;
699         }
700
701         for (i = 0; i < adapter->num_rx_queues; i++) {
702                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
703                 if (!ring)
704                         goto err;
705                 ring->count = adapter->rx_ring_count;
706                 ring->queue_index = i;
707                 ring->dev = &adapter->pdev->dev;
708                 ring->netdev = adapter->netdev;
709                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
710                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
711                 /* set flag indicating ring supports SCTP checksum offload */
712                 if (adapter->hw.mac.type >= e1000_82576)
713                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
714                 adapter->rx_ring[i] = ring;
715         }
716
717         igb_cache_ring_register(adapter);
718
719         return 0;
720
721 err:
722         igb_free_queues(adapter);
723
724         return -ENOMEM;
725 }
726
727 #define IGB_N0_QUEUE -1
728 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
729 {
730         u32 msixbm = 0;
731         struct igb_adapter *adapter = q_vector->adapter;
732         struct e1000_hw *hw = &adapter->hw;
733         u32 ivar, index;
734         int rx_queue = IGB_N0_QUEUE;
735         int tx_queue = IGB_N0_QUEUE;
736
737         if (q_vector->rx_ring)
738                 rx_queue = q_vector->rx_ring->reg_idx;
739         if (q_vector->tx_ring)
740                 tx_queue = q_vector->tx_ring->reg_idx;
741
742         switch (hw->mac.type) {
743         case e1000_82575:
744                 /* The 82575 assigns vectors using a bitmask, which matches the
745                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
746                    or more queues to a vector, we write the appropriate bits
747                    into the MSIXBM register for that vector. */
748                 if (rx_queue > IGB_N0_QUEUE)
749                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
750                 if (tx_queue > IGB_N0_QUEUE)
751                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
752                 if (!adapter->msix_entries && msix_vector == 0)
753                         msixbm |= E1000_EIMS_OTHER;
754                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
755                 q_vector->eims_value = msixbm;
756                 break;
757         case e1000_82576:
758                 /* 82576 uses a table-based method for assigning vectors.
759                    Each queue has a single entry in the table to which we write
760                    a vector number along with a "valid" bit.  Sadly, the layout
761                    of the table is somewhat counterintuitive. */
762                 if (rx_queue > IGB_N0_QUEUE) {
763                         index = (rx_queue & 0x7);
764                         ivar = array_rd32(E1000_IVAR0, index);
765                         if (rx_queue < 8) {
766                                 /* vector goes into low byte of register */
767                                 ivar = ivar & 0xFFFFFF00;
768                                 ivar |= msix_vector | E1000_IVAR_VALID;
769                         } else {
770                                 /* vector goes into third byte of register */
771                                 ivar = ivar & 0xFF00FFFF;
772                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
773                         }
774                         array_wr32(E1000_IVAR0, index, ivar);
775                 }
776                 if (tx_queue > IGB_N0_QUEUE) {
777                         index = (tx_queue & 0x7);
778                         ivar = array_rd32(E1000_IVAR0, index);
779                         if (tx_queue < 8) {
780                                 /* vector goes into second byte of register */
781                                 ivar = ivar & 0xFFFF00FF;
782                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
783                         } else {
784                                 /* vector goes into high byte of register */
785                                 ivar = ivar & 0x00FFFFFF;
786                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
787                         }
788                         array_wr32(E1000_IVAR0, index, ivar);
789                 }
790                 q_vector->eims_value = 1 << msix_vector;
791                 break;
792         case e1000_82580:
793         case e1000_i350:
794                 /* 82580 uses the same table-based approach as 82576 but has fewer
795                    entries as a result we carry over for queues greater than 4. */
796                 if (rx_queue > IGB_N0_QUEUE) {
797                         index = (rx_queue >> 1);
798                         ivar = array_rd32(E1000_IVAR0, index);
799                         if (rx_queue & 0x1) {
800                                 /* vector goes into third byte of register */
801                                 ivar = ivar & 0xFF00FFFF;
802                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803                         } else {
804                                 /* vector goes into low byte of register */
805                                 ivar = ivar & 0xFFFFFF00;
806                                 ivar |= msix_vector | E1000_IVAR_VALID;
807                         }
808                         array_wr32(E1000_IVAR0, index, ivar);
809                 }
810                 if (tx_queue > IGB_N0_QUEUE) {
811                         index = (tx_queue >> 1);
812                         ivar = array_rd32(E1000_IVAR0, index);
813                         if (tx_queue & 0x1) {
814                                 /* vector goes into high byte of register */
815                                 ivar = ivar & 0x00FFFFFF;
816                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817                         } else {
818                                 /* vector goes into second byte of register */
819                                 ivar = ivar & 0xFFFF00FF;
820                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
821                         }
822                         array_wr32(E1000_IVAR0, index, ivar);
823                 }
824                 q_vector->eims_value = 1 << msix_vector;
825                 break;
826         default:
827                 BUG();
828                 break;
829         }
830
831         /* add q_vector eims value to global eims_enable_mask */
832         adapter->eims_enable_mask |= q_vector->eims_value;
833
834         /* configure q_vector to set itr on first interrupt */
835         q_vector->set_itr = 1;
836 }
837
838 /**
839  * igb_configure_msix - Configure MSI-X hardware
840  *
841  * igb_configure_msix sets up the hardware to properly
842  * generate MSI-X interrupts.
843  **/
844 static void igb_configure_msix(struct igb_adapter *adapter)
845 {
846         u32 tmp;
847         int i, vector = 0;
848         struct e1000_hw *hw = &adapter->hw;
849
850         adapter->eims_enable_mask = 0;
851
852         /* set vector for other causes, i.e. link changes */
853         switch (hw->mac.type) {
854         case e1000_82575:
855                 tmp = rd32(E1000_CTRL_EXT);
856                 /* enable MSI-X PBA support*/
857                 tmp |= E1000_CTRL_EXT_PBA_CLR;
858
859                 /* Auto-Mask interrupts upon ICR read. */
860                 tmp |= E1000_CTRL_EXT_EIAME;
861                 tmp |= E1000_CTRL_EXT_IRCA;
862
863                 wr32(E1000_CTRL_EXT, tmp);
864
865                 /* enable msix_other interrupt */
866                 array_wr32(E1000_MSIXBM(0), vector++,
867                                       E1000_EIMS_OTHER);
868                 adapter->eims_other = E1000_EIMS_OTHER;
869
870                 break;
871
872         case e1000_82576:
873         case e1000_82580:
874         case e1000_i350:
875                 /* Turn on MSI-X capability first, or our settings
876                  * won't stick.  And it will take days to debug. */
877                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
878                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
879                                 E1000_GPIE_NSICR);
880
881                 /* enable msix_other interrupt */
882                 adapter->eims_other = 1 << vector;
883                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
884
885                 wr32(E1000_IVAR_MISC, tmp);
886                 break;
887         default:
888                 /* do nothing, since nothing else supports MSI-X */
889                 break;
890         } /* switch (hw->mac.type) */
891
892         adapter->eims_enable_mask |= adapter->eims_other;
893
894         for (i = 0; i < adapter->num_q_vectors; i++)
895                 igb_assign_vector(adapter->q_vector[i], vector++);
896
897         wrfl();
898 }
899
900 /**
901  * igb_request_msix - Initialize MSI-X interrupts
902  *
903  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
904  * kernel.
905  **/
906 static int igb_request_msix(struct igb_adapter *adapter)
907 {
908         struct net_device *netdev = adapter->netdev;
909         struct e1000_hw *hw = &adapter->hw;
910         int i, err = 0, vector = 0;
911
912         err = request_irq(adapter->msix_entries[vector].vector,
913                           igb_msix_other, 0, netdev->name, adapter);
914         if (err)
915                 goto out;
916         vector++;
917
918         for (i = 0; i < adapter->num_q_vectors; i++) {
919                 struct igb_q_vector *q_vector = adapter->q_vector[i];
920
921                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
922
923                 if (q_vector->rx_ring && q_vector->tx_ring)
924                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
925                                 q_vector->rx_ring->queue_index);
926                 else if (q_vector->tx_ring)
927                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
928                                 q_vector->tx_ring->queue_index);
929                 else if (q_vector->rx_ring)
930                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
931                                 q_vector->rx_ring->queue_index);
932                 else
933                         sprintf(q_vector->name, "%s-unused", netdev->name);
934
935                 err = request_irq(adapter->msix_entries[vector].vector,
936                                   igb_msix_ring, 0, q_vector->name,
937                                   q_vector);
938                 if (err)
939                         goto out;
940                 vector++;
941         }
942
943         igb_configure_msix(adapter);
944         return 0;
945 out:
946         return err;
947 }
948
949 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
950 {
951         if (adapter->msix_entries) {
952                 pci_disable_msix(adapter->pdev);
953                 kfree(adapter->msix_entries);
954                 adapter->msix_entries = NULL;
955         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
956                 pci_disable_msi(adapter->pdev);
957         }
958 }
959
960 /**
961  * igb_free_q_vectors - Free memory allocated for interrupt vectors
962  * @adapter: board private structure to initialize
963  *
964  * This function frees the memory allocated to the q_vectors.  In addition if
965  * NAPI is enabled it will delete any references to the NAPI struct prior
966  * to freeing the q_vector.
967  **/
968 static void igb_free_q_vectors(struct igb_adapter *adapter)
969 {
970         int v_idx;
971
972         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
973                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
974                 adapter->q_vector[v_idx] = NULL;
975                 if (!q_vector)
976                         continue;
977                 netif_napi_del(&q_vector->napi);
978                 kfree(q_vector);
979         }
980         adapter->num_q_vectors = 0;
981 }
982
983 /**
984  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
985  *
986  * This function resets the device so that it has 0 rx queues, tx queues, and
987  * MSI-X interrupts allocated.
988  */
989 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
990 {
991         igb_free_queues(adapter);
992         igb_free_q_vectors(adapter);
993         igb_reset_interrupt_capability(adapter);
994 }
995
996 /**
997  * igb_set_interrupt_capability - set MSI or MSI-X if supported
998  *
999  * Attempt to configure interrupts using the best available
1000  * capabilities of the hardware and kernel.
1001  **/
1002 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1003 {
1004         int err;
1005         int numvecs, i;
1006
1007         /* Number of supported queues. */
1008         adapter->num_rx_queues = adapter->rss_queues;
1009         if (adapter->vfs_allocated_count)
1010                 adapter->num_tx_queues = 1;
1011         else
1012                 adapter->num_tx_queues = adapter->rss_queues;
1013
1014         /* start with one vector for every rx queue */
1015         numvecs = adapter->num_rx_queues;
1016
1017         /* if tx handler is separate add 1 for every tx queue */
1018         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1019                 numvecs += adapter->num_tx_queues;
1020
1021         /* store the number of vectors reserved for queues */
1022         adapter->num_q_vectors = numvecs;
1023
1024         /* add 1 vector for link status interrupts */
1025         numvecs++;
1026         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1027                                         GFP_KERNEL);
1028         if (!adapter->msix_entries)
1029                 goto msi_only;
1030
1031         for (i = 0; i < numvecs; i++)
1032                 adapter->msix_entries[i].entry = i;
1033
1034         err = pci_enable_msix(adapter->pdev,
1035                               adapter->msix_entries,
1036                               numvecs);
1037         if (err == 0)
1038                 goto out;
1039
1040         igb_reset_interrupt_capability(adapter);
1041
1042         /* If we can't do MSI-X, try MSI */
1043 msi_only:
1044 #ifdef CONFIG_PCI_IOV
1045         /* disable SR-IOV for non MSI-X configurations */
1046         if (adapter->vf_data) {
1047                 struct e1000_hw *hw = &adapter->hw;
1048                 /* disable iov and allow time for transactions to clear */
1049                 pci_disable_sriov(adapter->pdev);
1050                 msleep(500);
1051
1052                 kfree(adapter->vf_data);
1053                 adapter->vf_data = NULL;
1054                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055                 msleep(100);
1056                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1057         }
1058 #endif
1059         adapter->vfs_allocated_count = 0;
1060         adapter->rss_queues = 1;
1061         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1062         adapter->num_rx_queues = 1;
1063         adapter->num_tx_queues = 1;
1064         adapter->num_q_vectors = 1;
1065         if (!pci_enable_msi(adapter->pdev))
1066                 adapter->flags |= IGB_FLAG_HAS_MSI;
1067 out:
1068         /* Notify the stack of the (possibly) reduced queue counts. */
1069         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1070         return netif_set_real_num_rx_queues(adapter->netdev,
1071                                             adapter->num_rx_queues);
1072 }
1073
1074 /**
1075  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1076  * @adapter: board private structure to initialize
1077  *
1078  * We allocate one q_vector per queue interrupt.  If allocation fails we
1079  * return -ENOMEM.
1080  **/
1081 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1082 {
1083         struct igb_q_vector *q_vector;
1084         struct e1000_hw *hw = &adapter->hw;
1085         int v_idx;
1086
1087         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1088                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1089                 if (!q_vector)
1090                         goto err_out;
1091                 q_vector->adapter = adapter;
1092                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1093                 q_vector->itr_val = IGB_START_ITR;
1094                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1095                 adapter->q_vector[v_idx] = q_vector;
1096         }
1097         return 0;
1098
1099 err_out:
1100         igb_free_q_vectors(adapter);
1101         return -ENOMEM;
1102 }
1103
1104 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1105                                       int ring_idx, int v_idx)
1106 {
1107         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1108
1109         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1110         q_vector->rx_ring->q_vector = q_vector;
1111         q_vector->itr_val = adapter->rx_itr_setting;
1112         if (q_vector->itr_val && q_vector->itr_val <= 3)
1113                 q_vector->itr_val = IGB_START_ITR;
1114 }
1115
1116 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1117                                       int ring_idx, int v_idx)
1118 {
1119         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1120
1121         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1122         q_vector->tx_ring->q_vector = q_vector;
1123         q_vector->itr_val = adapter->tx_itr_setting;
1124         if (q_vector->itr_val && q_vector->itr_val <= 3)
1125                 q_vector->itr_val = IGB_START_ITR;
1126 }
1127
1128 /**
1129  * igb_map_ring_to_vector - maps allocated queues to vectors
1130  *
1131  * This function maps the recently allocated queues to vectors.
1132  **/
1133 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1134 {
1135         int i;
1136         int v_idx = 0;
1137
1138         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1139             (adapter->num_q_vectors < adapter->num_tx_queues))
1140                 return -ENOMEM;
1141
1142         if (adapter->num_q_vectors >=
1143             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1144                 for (i = 0; i < adapter->num_rx_queues; i++)
1145                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146                 for (i = 0; i < adapter->num_tx_queues; i++)
1147                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1148         } else {
1149                 for (i = 0; i < adapter->num_rx_queues; i++) {
1150                         if (i < adapter->num_tx_queues)
1151                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1152                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1153                 }
1154                 for (; i < adapter->num_tx_queues; i++)
1155                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1156         }
1157         return 0;
1158 }
1159
1160 /**
1161  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1162  *
1163  * This function initializes the interrupts and allocates all of the queues.
1164  **/
1165 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1166 {
1167         struct pci_dev *pdev = adapter->pdev;
1168         int err;
1169
1170         err = igb_set_interrupt_capability(adapter);
1171         if (err)
1172                 return err;
1173
1174         err = igb_alloc_q_vectors(adapter);
1175         if (err) {
1176                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1177                 goto err_alloc_q_vectors;
1178         }
1179
1180         err = igb_alloc_queues(adapter);
1181         if (err) {
1182                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1183                 goto err_alloc_queues;
1184         }
1185
1186         err = igb_map_ring_to_vector(adapter);
1187         if (err) {
1188                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1189                 goto err_map_queues;
1190         }
1191
1192
1193         return 0;
1194 err_map_queues:
1195         igb_free_queues(adapter);
1196 err_alloc_queues:
1197         igb_free_q_vectors(adapter);
1198 err_alloc_q_vectors:
1199         igb_reset_interrupt_capability(adapter);
1200         return err;
1201 }
1202
1203 /**
1204  * igb_request_irq - initialize interrupts
1205  *
1206  * Attempts to configure interrupts using the best available
1207  * capabilities of the hardware and kernel.
1208  **/
1209 static int igb_request_irq(struct igb_adapter *adapter)
1210 {
1211         struct net_device *netdev = adapter->netdev;
1212         struct pci_dev *pdev = adapter->pdev;
1213         int err = 0;
1214
1215         if (adapter->msix_entries) {
1216                 err = igb_request_msix(adapter);
1217                 if (!err)
1218                         goto request_done;
1219                 /* fall back to MSI */
1220                 igb_clear_interrupt_scheme(adapter);
1221                 if (!pci_enable_msi(adapter->pdev))
1222                         adapter->flags |= IGB_FLAG_HAS_MSI;
1223                 igb_free_all_tx_resources(adapter);
1224                 igb_free_all_rx_resources(adapter);
1225                 adapter->num_tx_queues = 1;
1226                 adapter->num_rx_queues = 1;
1227                 adapter->num_q_vectors = 1;
1228                 err = igb_alloc_q_vectors(adapter);
1229                 if (err) {
1230                         dev_err(&pdev->dev,
1231                                 "Unable to allocate memory for vectors\n");
1232                         goto request_done;
1233                 }
1234                 err = igb_alloc_queues(adapter);
1235                 if (err) {
1236                         dev_err(&pdev->dev,
1237                                 "Unable to allocate memory for queues\n");
1238                         igb_free_q_vectors(adapter);
1239                         goto request_done;
1240                 }
1241                 igb_setup_all_tx_resources(adapter);
1242                 igb_setup_all_rx_resources(adapter);
1243         } else {
1244                 igb_assign_vector(adapter->q_vector[0], 0);
1245         }
1246
1247         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1248                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1249                                   netdev->name, adapter);
1250                 if (!err)
1251                         goto request_done;
1252
1253                 /* fall back to legacy interrupts */
1254                 igb_reset_interrupt_capability(adapter);
1255                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1256         }
1257
1258         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1259                           netdev->name, adapter);
1260
1261         if (err)
1262                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1263                         err);
1264
1265 request_done:
1266         return err;
1267 }
1268
1269 static void igb_free_irq(struct igb_adapter *adapter)
1270 {
1271         if (adapter->msix_entries) {
1272                 int vector = 0, i;
1273
1274                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1275
1276                 for (i = 0; i < adapter->num_q_vectors; i++) {
1277                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1278                         free_irq(adapter->msix_entries[vector++].vector,
1279                                  q_vector);
1280                 }
1281         } else {
1282                 free_irq(adapter->pdev->irq, adapter);
1283         }
1284 }
1285
1286 /**
1287  * igb_irq_disable - Mask off interrupt generation on the NIC
1288  * @adapter: board private structure
1289  **/
1290 static void igb_irq_disable(struct igb_adapter *adapter)
1291 {
1292         struct e1000_hw *hw = &adapter->hw;
1293
1294         /*
1295          * we need to be careful when disabling interrupts.  The VFs are also
1296          * mapped into these registers and so clearing the bits can cause
1297          * issues on the VF drivers so we only need to clear what we set
1298          */
1299         if (adapter->msix_entries) {
1300                 u32 regval = rd32(E1000_EIAM);
1301                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1302                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1303                 regval = rd32(E1000_EIAC);
1304                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1305         }
1306
1307         wr32(E1000_IAM, 0);
1308         wr32(E1000_IMC, ~0);
1309         wrfl();
1310         if (adapter->msix_entries) {
1311                 int i;
1312                 for (i = 0; i < adapter->num_q_vectors; i++)
1313                         synchronize_irq(adapter->msix_entries[i].vector);
1314         } else {
1315                 synchronize_irq(adapter->pdev->irq);
1316         }
1317 }
1318
1319 /**
1320  * igb_irq_enable - Enable default interrupt generation settings
1321  * @adapter: board private structure
1322  **/
1323 static void igb_irq_enable(struct igb_adapter *adapter)
1324 {
1325         struct e1000_hw *hw = &adapter->hw;
1326
1327         if (adapter->msix_entries) {
1328                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1329                 u32 regval = rd32(E1000_EIAC);
1330                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1331                 regval = rd32(E1000_EIAM);
1332                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1333                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1334                 if (adapter->vfs_allocated_count) {
1335                         wr32(E1000_MBVFIMR, 0xFF);
1336                         ims |= E1000_IMS_VMMB;
1337                 }
1338                 if (adapter->hw.mac.type == e1000_82580)
1339                         ims |= E1000_IMS_DRSTA;
1340
1341                 wr32(E1000_IMS, ims);
1342         } else {
1343                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1344                                 E1000_IMS_DRSTA);
1345                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1346                                 E1000_IMS_DRSTA);
1347         }
1348 }
1349
1350 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1351 {
1352         struct e1000_hw *hw = &adapter->hw;
1353         u16 vid = adapter->hw.mng_cookie.vlan_id;
1354         u16 old_vid = adapter->mng_vlan_id;
1355
1356         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1357                 /* add VID to filter table */
1358                 igb_vfta_set(hw, vid, true);
1359                 adapter->mng_vlan_id = vid;
1360         } else {
1361                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1362         }
1363
1364         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1365             (vid != old_vid) &&
1366             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1367                 /* remove VID from filter table */
1368                 igb_vfta_set(hw, old_vid, false);
1369         }
1370 }
1371
1372 /**
1373  * igb_release_hw_control - release control of the h/w to f/w
1374  * @adapter: address of board private structure
1375  *
1376  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1377  * For ASF and Pass Through versions of f/w this means that the
1378  * driver is no longer loaded.
1379  *
1380  **/
1381 static void igb_release_hw_control(struct igb_adapter *adapter)
1382 {
1383         struct e1000_hw *hw = &adapter->hw;
1384         u32 ctrl_ext;
1385
1386         /* Let firmware take over control of h/w */
1387         ctrl_ext = rd32(E1000_CTRL_EXT);
1388         wr32(E1000_CTRL_EXT,
1389                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1390 }
1391
1392 /**
1393  * igb_get_hw_control - get control of the h/w from f/w
1394  * @adapter: address of board private structure
1395  *
1396  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1397  * For ASF and Pass Through versions of f/w this means that
1398  * the driver is loaded.
1399  *
1400  **/
1401 static void igb_get_hw_control(struct igb_adapter *adapter)
1402 {
1403         struct e1000_hw *hw = &adapter->hw;
1404         u32 ctrl_ext;
1405
1406         /* Let firmware know the driver has taken over */
1407         ctrl_ext = rd32(E1000_CTRL_EXT);
1408         wr32(E1000_CTRL_EXT,
1409                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1410 }
1411
1412 /**
1413  * igb_configure - configure the hardware for RX and TX
1414  * @adapter: private board structure
1415  **/
1416 static void igb_configure(struct igb_adapter *adapter)
1417 {
1418         struct net_device *netdev = adapter->netdev;
1419         int i;
1420
1421         igb_get_hw_control(adapter);
1422         igb_set_rx_mode(netdev);
1423
1424         igb_restore_vlan(adapter);
1425
1426         igb_setup_tctl(adapter);
1427         igb_setup_mrqc(adapter);
1428         igb_setup_rctl(adapter);
1429
1430         igb_configure_tx(adapter);
1431         igb_configure_rx(adapter);
1432
1433         igb_rx_fifo_flush_82575(&adapter->hw);
1434
1435         /* call igb_desc_unused which always leaves
1436          * at least 1 descriptor unused to make sure
1437          * next_to_use != next_to_clean */
1438         for (i = 0; i < adapter->num_rx_queues; i++) {
1439                 struct igb_ring *ring = adapter->rx_ring[i];
1440                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1441         }
1442 }
1443
1444 /**
1445  * igb_power_up_link - Power up the phy/serdes link
1446  * @adapter: address of board private structure
1447  **/
1448 void igb_power_up_link(struct igb_adapter *adapter)
1449 {
1450         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1451                 igb_power_up_phy_copper(&adapter->hw);
1452         else
1453                 igb_power_up_serdes_link_82575(&adapter->hw);
1454 }
1455
1456 /**
1457  * igb_power_down_link - Power down the phy/serdes link
1458  * @adapter: address of board private structure
1459  */
1460 static void igb_power_down_link(struct igb_adapter *adapter)
1461 {
1462         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1463                 igb_power_down_phy_copper_82575(&adapter->hw);
1464         else
1465                 igb_shutdown_serdes_link_82575(&adapter->hw);
1466 }
1467
1468 /**
1469  * igb_up - Open the interface and prepare it to handle traffic
1470  * @adapter: board private structure
1471  **/
1472 int igb_up(struct igb_adapter *adapter)
1473 {
1474         struct e1000_hw *hw = &adapter->hw;
1475         int i;
1476
1477         /* hardware has been reset, we need to reload some things */
1478         igb_configure(adapter);
1479
1480         clear_bit(__IGB_DOWN, &adapter->state);
1481
1482         for (i = 0; i < adapter->num_q_vectors; i++) {
1483                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1484                 napi_enable(&q_vector->napi);
1485         }
1486         if (adapter->msix_entries)
1487                 igb_configure_msix(adapter);
1488         else
1489                 igb_assign_vector(adapter->q_vector[0], 0);
1490
1491         /* Clear any pending interrupts. */
1492         rd32(E1000_ICR);
1493         igb_irq_enable(adapter);
1494
1495         /* notify VFs that reset has been completed */
1496         if (adapter->vfs_allocated_count) {
1497                 u32 reg_data = rd32(E1000_CTRL_EXT);
1498                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1499                 wr32(E1000_CTRL_EXT, reg_data);
1500         }
1501
1502         netif_tx_start_all_queues(adapter->netdev);
1503
1504         /* start the watchdog. */
1505         hw->mac.get_link_status = 1;
1506         schedule_work(&adapter->watchdog_task);
1507
1508         return 0;
1509 }
1510
1511 void igb_down(struct igb_adapter *adapter)
1512 {
1513         struct net_device *netdev = adapter->netdev;
1514         struct e1000_hw *hw = &adapter->hw;
1515         u32 tctl, rctl;
1516         int i;
1517
1518         /* signal that we're down so the interrupt handler does not
1519          * reschedule our watchdog timer */
1520         set_bit(__IGB_DOWN, &adapter->state);
1521
1522         /* disable receives in the hardware */
1523         rctl = rd32(E1000_RCTL);
1524         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1525         /* flush and sleep below */
1526
1527         netif_tx_stop_all_queues(netdev);
1528
1529         /* disable transmits in the hardware */
1530         tctl = rd32(E1000_TCTL);
1531         tctl &= ~E1000_TCTL_EN;
1532         wr32(E1000_TCTL, tctl);
1533         /* flush both disables and wait for them to finish */
1534         wrfl();
1535         msleep(10);
1536
1537         for (i = 0; i < adapter->num_q_vectors; i++) {
1538                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1539                 napi_disable(&q_vector->napi);
1540         }
1541
1542         igb_irq_disable(adapter);
1543
1544         del_timer_sync(&adapter->watchdog_timer);
1545         del_timer_sync(&adapter->phy_info_timer);
1546
1547         netif_carrier_off(netdev);
1548
1549         /* record the stats before reset*/
1550         spin_lock(&adapter->stats64_lock);
1551         igb_update_stats(adapter, &adapter->stats64);
1552         spin_unlock(&adapter->stats64_lock);
1553
1554         adapter->link_speed = 0;
1555         adapter->link_duplex = 0;
1556
1557         if (!pci_channel_offline(adapter->pdev))
1558                 igb_reset(adapter);
1559         igb_clean_all_tx_rings(adapter);
1560         igb_clean_all_rx_rings(adapter);
1561 #ifdef CONFIG_IGB_DCA
1562
1563         /* since we reset the hardware DCA settings were cleared */
1564         igb_setup_dca(adapter);
1565 #endif
1566 }
1567
1568 void igb_reinit_locked(struct igb_adapter *adapter)
1569 {
1570         WARN_ON(in_interrupt());
1571         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1572                 msleep(1);
1573         igb_down(adapter);
1574         igb_up(adapter);
1575         clear_bit(__IGB_RESETTING, &adapter->state);
1576 }
1577
1578 void igb_reset(struct igb_adapter *adapter)
1579 {
1580         struct pci_dev *pdev = adapter->pdev;
1581         struct e1000_hw *hw = &adapter->hw;
1582         struct e1000_mac_info *mac = &hw->mac;
1583         struct e1000_fc_info *fc = &hw->fc;
1584         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1585         u16 hwm;
1586
1587         /* Repartition Pba for greater than 9k mtu
1588          * To take effect CTRL.RST is required.
1589          */
1590         switch (mac->type) {
1591         case e1000_i350:
1592         case e1000_82580:
1593                 pba = rd32(E1000_RXPBS);
1594                 pba = igb_rxpbs_adjust_82580(pba);
1595                 break;
1596         case e1000_82576:
1597                 pba = rd32(E1000_RXPBS);
1598                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1599                 break;
1600         case e1000_82575:
1601         default:
1602                 pba = E1000_PBA_34K;
1603                 break;
1604         }
1605
1606         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1607             (mac->type < e1000_82576)) {
1608                 /* adjust PBA for jumbo frames */
1609                 wr32(E1000_PBA, pba);
1610
1611                 /* To maintain wire speed transmits, the Tx FIFO should be
1612                  * large enough to accommodate two full transmit packets,
1613                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1614                  * the Rx FIFO should be large enough to accommodate at least
1615                  * one full receive packet and is similarly rounded up and
1616                  * expressed in KB. */
1617                 pba = rd32(E1000_PBA);
1618                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1619                 tx_space = pba >> 16;
1620                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1621                 pba &= 0xffff;
1622                 /* the tx fifo also stores 16 bytes of information about the tx
1623                  * but don't include ethernet FCS because hardware appends it */
1624                 min_tx_space = (adapter->max_frame_size +
1625                                 sizeof(union e1000_adv_tx_desc) -
1626                                 ETH_FCS_LEN) * 2;
1627                 min_tx_space = ALIGN(min_tx_space, 1024);
1628                 min_tx_space >>= 10;
1629                 /* software strips receive CRC, so leave room for it */
1630                 min_rx_space = adapter->max_frame_size;
1631                 min_rx_space = ALIGN(min_rx_space, 1024);
1632                 min_rx_space >>= 10;
1633
1634                 /* If current Tx allocation is less than the min Tx FIFO size,
1635                  * and the min Tx FIFO size is less than the current Rx FIFO
1636                  * allocation, take space away from current Rx allocation */
1637                 if (tx_space < min_tx_space &&
1638                     ((min_tx_space - tx_space) < pba)) {
1639                         pba = pba - (min_tx_space - tx_space);
1640
1641                         /* if short on rx space, rx wins and must trump tx
1642                          * adjustment */
1643                         if (pba < min_rx_space)
1644                                 pba = min_rx_space;
1645                 }
1646                 wr32(E1000_PBA, pba);
1647         }
1648
1649         /* flow control settings */
1650         /* The high water mark must be low enough to fit one full frame
1651          * (or the size used for early receive) above it in the Rx FIFO.
1652          * Set it to the lower of:
1653          * - 90% of the Rx FIFO size, or
1654          * - the full Rx FIFO size minus one full frame */
1655         hwm = min(((pba << 10) * 9 / 10),
1656                         ((pba << 10) - 2 * adapter->max_frame_size));
1657
1658         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1659         fc->low_water = fc->high_water - 16;
1660         fc->pause_time = 0xFFFF;
1661         fc->send_xon = 1;
1662         fc->current_mode = fc->requested_mode;
1663
1664         /* disable receive for all VFs and wait one second */
1665         if (adapter->vfs_allocated_count) {
1666                 int i;
1667                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1668                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1669
1670                 /* ping all the active vfs to let them know we are going down */
1671                 igb_ping_all_vfs(adapter);
1672
1673                 /* disable transmits and receives */
1674                 wr32(E1000_VFRE, 0);
1675                 wr32(E1000_VFTE, 0);
1676         }
1677
1678         /* Allow time for pending master requests to run */
1679         hw->mac.ops.reset_hw(hw);
1680         wr32(E1000_WUC, 0);
1681
1682         if (hw->mac.ops.init_hw(hw))
1683                 dev_err(&pdev->dev, "Hardware Error\n");
1684         if (hw->mac.type > e1000_82580) {
1685                 if (adapter->flags & IGB_FLAG_DMAC) {
1686                         u32 reg;
1687
1688                         /*
1689                          * DMA Coalescing high water mark needs to be higher
1690                          * than * the * Rx threshold.  The Rx threshold is
1691                          * currently * pba - 6, so we * should use a high water
1692                          * mark of pba * - 4. */
1693                         hwm = (pba - 4) << 10;
1694
1695                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1696                                & E1000_DMACR_DMACTHR_MASK);
1697
1698                         /* transition to L0x or L1 if available..*/
1699                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1700
1701                         /* watchdog timer= +-1000 usec in 32usec intervals */
1702                         reg |= (1000 >> 5);
1703                         wr32(E1000_DMACR, reg);
1704
1705                         /* no lower threshold to disable coalescing(smart fifb)
1706                          * -UTRESH=0*/
1707                         wr32(E1000_DMCRTRH, 0);
1708
1709                         /* set hwm to PBA -  2 * max frame size */
1710                         wr32(E1000_FCRTC, hwm);
1711
1712                         /*
1713                          * This sets the time to wait before requesting tran-
1714                          * sition to * low power state to number of usecs needed
1715                          * to receive 1 512 * byte frame at gigabit line rate
1716                          */
1717                         reg = rd32(E1000_DMCTLX);
1718                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1719
1720                         /* Delay 255 usec before entering Lx state. */
1721                         reg |= 0xFF;
1722                         wr32(E1000_DMCTLX, reg);
1723
1724                         /* free space in Tx packet buffer to wake from DMAC */
1725                         wr32(E1000_DMCTXTH,
1726                              (IGB_MIN_TXPBSIZE -
1727                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1728                              >> 6);
1729
1730                         /* make low power state decision controlled by DMAC */
1731                         reg = rd32(E1000_PCIEMISC);
1732                         reg |= E1000_PCIEMISC_LX_DECISION;
1733                         wr32(E1000_PCIEMISC, reg);
1734                 } /* end if IGB_FLAG_DMAC set */
1735         }
1736         if (hw->mac.type == e1000_82580) {
1737                 u32 reg = rd32(E1000_PCIEMISC);
1738                 wr32(E1000_PCIEMISC,
1739                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1740         }
1741         if (!netif_running(adapter->netdev))
1742                 igb_power_down_link(adapter);
1743
1744         igb_update_mng_vlan(adapter);
1745
1746         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1747         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1748
1749         igb_get_phy_info(hw);
1750 }
1751
1752 static const struct net_device_ops igb_netdev_ops = {
1753         .ndo_open               = igb_open,
1754         .ndo_stop               = igb_close,
1755         .ndo_start_xmit         = igb_xmit_frame_adv,
1756         .ndo_get_stats64        = igb_get_stats64,
1757         .ndo_set_rx_mode        = igb_set_rx_mode,
1758         .ndo_set_multicast_list = igb_set_rx_mode,
1759         .ndo_set_mac_address    = igb_set_mac,
1760         .ndo_change_mtu         = igb_change_mtu,
1761         .ndo_do_ioctl           = igb_ioctl,
1762         .ndo_tx_timeout         = igb_tx_timeout,
1763         .ndo_validate_addr      = eth_validate_addr,
1764         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1765         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1766         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1767         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1768         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1769         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1770         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1771 #ifdef CONFIG_NET_POLL_CONTROLLER
1772         .ndo_poll_controller    = igb_netpoll,
1773 #endif
1774 };
1775
1776 /**
1777  * igb_probe - Device Initialization Routine
1778  * @pdev: PCI device information struct
1779  * @ent: entry in igb_pci_tbl
1780  *
1781  * Returns 0 on success, negative on failure
1782  *
1783  * igb_probe initializes an adapter identified by a pci_dev structure.
1784  * The OS initialization, configuring of the adapter private structure,
1785  * and a hardware reset occur.
1786  **/
1787 static int __devinit igb_probe(struct pci_dev *pdev,
1788                                const struct pci_device_id *ent)
1789 {
1790         struct net_device *netdev;
1791         struct igb_adapter *adapter;
1792         struct e1000_hw *hw;
1793         u16 eeprom_data = 0;
1794         s32 ret_val;
1795         static int global_quad_port_a; /* global quad port a indication */
1796         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1797         unsigned long mmio_start, mmio_len;
1798         int err, pci_using_dac;
1799         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1800         u8 part_str[E1000_PBANUM_LENGTH];
1801
1802         /* Catch broken hardware that put the wrong VF device ID in
1803          * the PCIe SR-IOV capability.
1804          */
1805         if (pdev->is_virtfn) {
1806                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1807                      pci_name(pdev), pdev->vendor, pdev->device);
1808                 return -EINVAL;
1809         }
1810
1811         err = pci_enable_device_mem(pdev);
1812         if (err)
1813                 return err;
1814
1815         pci_using_dac = 0;
1816         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1817         if (!err) {
1818                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1819                 if (!err)
1820                         pci_using_dac = 1;
1821         } else {
1822                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1823                 if (err) {
1824                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1825                         if (err) {
1826                                 dev_err(&pdev->dev, "No usable DMA "
1827                                         "configuration, aborting\n");
1828                                 goto err_dma;
1829                         }
1830                 }
1831         }
1832
1833         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1834                                            IORESOURCE_MEM),
1835                                            igb_driver_name);
1836         if (err)
1837                 goto err_pci_reg;
1838
1839         pci_enable_pcie_error_reporting(pdev);
1840
1841         pci_set_master(pdev);
1842         pci_save_state(pdev);
1843
1844         err = -ENOMEM;
1845         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1846                                    IGB_ABS_MAX_TX_QUEUES);
1847         if (!netdev)
1848                 goto err_alloc_etherdev;
1849
1850         SET_NETDEV_DEV(netdev, &pdev->dev);
1851
1852         pci_set_drvdata(pdev, netdev);
1853         adapter = netdev_priv(netdev);
1854         adapter->netdev = netdev;
1855         adapter->pdev = pdev;
1856         hw = &adapter->hw;
1857         hw->back = adapter;
1858         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1859
1860         mmio_start = pci_resource_start(pdev, 0);
1861         mmio_len = pci_resource_len(pdev, 0);
1862
1863         err = -EIO;
1864         hw->hw_addr = ioremap(mmio_start, mmio_len);
1865         if (!hw->hw_addr)
1866                 goto err_ioremap;
1867
1868         netdev->netdev_ops = &igb_netdev_ops;
1869         igb_set_ethtool_ops(netdev);
1870         netdev->watchdog_timeo = 5 * HZ;
1871
1872         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1873
1874         netdev->mem_start = mmio_start;
1875         netdev->mem_end = mmio_start + mmio_len;
1876
1877         /* PCI config space info */
1878         hw->vendor_id = pdev->vendor;
1879         hw->device_id = pdev->device;
1880         hw->revision_id = pdev->revision;
1881         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1882         hw->subsystem_device_id = pdev->subsystem_device;
1883
1884         /* Copy the default MAC, PHY and NVM function pointers */
1885         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1886         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1887         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1888         /* Initialize skew-specific constants */
1889         err = ei->get_invariants(hw);
1890         if (err)
1891                 goto err_sw_init;
1892
1893         /* setup the private structure */
1894         err = igb_sw_init(adapter);
1895         if (err)
1896                 goto err_sw_init;
1897
1898         igb_get_bus_info_pcie(hw);
1899
1900         hw->phy.autoneg_wait_to_complete = false;
1901
1902         /* Copper options */
1903         if (hw->phy.media_type == e1000_media_type_copper) {
1904                 hw->phy.mdix = AUTO_ALL_MODES;
1905                 hw->phy.disable_polarity_correction = false;
1906                 hw->phy.ms_type = e1000_ms_hw_default;
1907         }
1908
1909         if (igb_check_reset_block(hw))
1910                 dev_info(&pdev->dev,
1911                         "PHY reset is blocked due to SOL/IDER session.\n");
1912
1913         netdev->features = NETIF_F_SG |
1914                            NETIF_F_IP_CSUM |
1915                            NETIF_F_HW_VLAN_TX |
1916                            NETIF_F_HW_VLAN_RX |
1917                            NETIF_F_HW_VLAN_FILTER;
1918
1919         netdev->features |= NETIF_F_IPV6_CSUM;
1920         netdev->features |= NETIF_F_TSO;
1921         netdev->features |= NETIF_F_TSO6;
1922         netdev->features |= NETIF_F_GRO;
1923
1924         netdev->vlan_features |= NETIF_F_TSO;
1925         netdev->vlan_features |= NETIF_F_TSO6;
1926         netdev->vlan_features |= NETIF_F_IP_CSUM;
1927         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1928         netdev->vlan_features |= NETIF_F_SG;
1929
1930         if (pci_using_dac) {
1931                 netdev->features |= NETIF_F_HIGHDMA;
1932                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1933         }
1934
1935         if (hw->mac.type >= e1000_82576)
1936                 netdev->features |= NETIF_F_SCTP_CSUM;
1937
1938         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1939
1940         /* before reading the NVM, reset the controller to put the device in a
1941          * known good starting state */
1942         hw->mac.ops.reset_hw(hw);
1943
1944         /* make sure the NVM is good */
1945         if (hw->nvm.ops.validate(hw) < 0) {
1946                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1947                 err = -EIO;
1948                 goto err_eeprom;
1949         }
1950
1951         /* copy the MAC address out of the NVM */
1952         if (hw->mac.ops.read_mac_addr(hw))
1953                 dev_err(&pdev->dev, "NVM Read Error\n");
1954
1955         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1956         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1957
1958         if (!is_valid_ether_addr(netdev->perm_addr)) {
1959                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1960                 err = -EIO;
1961                 goto err_eeprom;
1962         }
1963
1964         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1965                     (unsigned long) adapter);
1966         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1967                     (unsigned long) adapter);
1968
1969         INIT_WORK(&adapter->reset_task, igb_reset_task);
1970         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1971
1972         /* Initialize link properties that are user-changeable */
1973         adapter->fc_autoneg = true;
1974         hw->mac.autoneg = true;
1975         hw->phy.autoneg_advertised = 0x2f;
1976
1977         hw->fc.requested_mode = e1000_fc_default;
1978         hw->fc.current_mode = e1000_fc_default;
1979
1980         igb_validate_mdi_setting(hw);
1981
1982         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1983          * enable the ACPI Magic Packet filter
1984          */
1985
1986         if (hw->bus.func == 0)
1987                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1988         else if (hw->mac.type == e1000_82580)
1989                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1990                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1991                                  &eeprom_data);
1992         else if (hw->bus.func == 1)
1993                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1994
1995         if (eeprom_data & eeprom_apme_mask)
1996                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1997
1998         /* now that we have the eeprom settings, apply the special cases where
1999          * the eeprom may be wrong or the board simply won't support wake on
2000          * lan on a particular port */
2001         switch (pdev->device) {
2002         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2003                 adapter->eeprom_wol = 0;
2004                 break;
2005         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2006         case E1000_DEV_ID_82576_FIBER:
2007         case E1000_DEV_ID_82576_SERDES:
2008                 /* Wake events only supported on port A for dual fiber
2009                  * regardless of eeprom setting */
2010                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2011                         adapter->eeprom_wol = 0;
2012                 break;
2013         case E1000_DEV_ID_82576_QUAD_COPPER:
2014         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2015                 /* if quad port adapter, disable WoL on all but port A */
2016                 if (global_quad_port_a != 0)
2017                         adapter->eeprom_wol = 0;
2018                 else
2019                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2020                 /* Reset for multiple quad port adapters */
2021                 if (++global_quad_port_a == 4)
2022                         global_quad_port_a = 0;
2023                 break;
2024         }
2025
2026         /* initialize the wol settings based on the eeprom settings */
2027         adapter->wol = adapter->eeprom_wol;
2028         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2029
2030         /* reset the hardware with the new settings */
2031         igb_reset(adapter);
2032
2033         /* let the f/w know that the h/w is now under the control of the
2034          * driver. */
2035         igb_get_hw_control(adapter);
2036
2037         strcpy(netdev->name, "eth%d");
2038         err = register_netdev(netdev);
2039         if (err)
2040                 goto err_register;
2041
2042         /* carrier off reporting is important to ethtool even BEFORE open */
2043         netif_carrier_off(netdev);
2044
2045 #ifdef CONFIG_IGB_DCA
2046         if (dca_add_requester(&pdev->dev) == 0) {
2047                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2048                 dev_info(&pdev->dev, "DCA enabled\n");
2049                 igb_setup_dca(adapter);
2050         }
2051
2052 #endif
2053         /* do hw tstamp init after resetting */
2054         igb_init_hw_timer(adapter);
2055
2056         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2057         /* print bus type/speed/width info */
2058         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2059                  netdev->name,
2060                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2061                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2062                                                             "unknown"),
2063                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2064                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2065                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2066                    "unknown"),
2067                  netdev->dev_addr);
2068
2069         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2070         if (ret_val)
2071                 strcpy(part_str, "Unknown");
2072         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2073         dev_info(&pdev->dev,
2074                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2075                 adapter->msix_entries ? "MSI-X" :
2076                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2077                 adapter->num_rx_queues, adapter->num_tx_queues);
2078         switch (hw->mac.type) {
2079         case e1000_i350:
2080                 igb_set_eee_i350(hw);
2081                 break;
2082         default:
2083                 break;
2084         }
2085         return 0;
2086
2087 err_register:
2088         igb_release_hw_control(adapter);
2089 err_eeprom:
2090         if (!igb_check_reset_block(hw))
2091                 igb_reset_phy(hw);
2092
2093         if (hw->flash_address)
2094                 iounmap(hw->flash_address);
2095 err_sw_init:
2096         igb_clear_interrupt_scheme(adapter);
2097         iounmap(hw->hw_addr);
2098 err_ioremap:
2099         free_netdev(netdev);
2100 err_alloc_etherdev:
2101         pci_release_selected_regions(pdev,
2102                                      pci_select_bars(pdev, IORESOURCE_MEM));
2103 err_pci_reg:
2104 err_dma:
2105         pci_disable_device(pdev);
2106         return err;
2107 }
2108
2109 /**
2110  * igb_remove - Device Removal Routine
2111  * @pdev: PCI device information struct
2112  *
2113  * igb_remove is called by the PCI subsystem to alert the driver
2114  * that it should release a PCI device.  The could be caused by a
2115  * Hot-Plug event, or because the driver is going to be removed from
2116  * memory.
2117  **/
2118 static void __devexit igb_remove(struct pci_dev *pdev)
2119 {
2120         struct net_device *netdev = pci_get_drvdata(pdev);
2121         struct igb_adapter *adapter = netdev_priv(netdev);
2122         struct e1000_hw *hw = &adapter->hw;
2123
2124         /*
2125          * The watchdog timer may be rescheduled, so explicitly
2126          * disable watchdog from being rescheduled.
2127          */
2128         set_bit(__IGB_DOWN, &adapter->state);
2129         del_timer_sync(&adapter->watchdog_timer);
2130         del_timer_sync(&adapter->phy_info_timer);
2131
2132         cancel_work_sync(&adapter->reset_task);
2133         cancel_work_sync(&adapter->watchdog_task);
2134
2135 #ifdef CONFIG_IGB_DCA
2136         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2137                 dev_info(&pdev->dev, "DCA disabled\n");
2138                 dca_remove_requester(&pdev->dev);
2139                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2140                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2141         }
2142 #endif
2143
2144         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2145          * would have already happened in close and is redundant. */
2146         igb_release_hw_control(adapter);
2147
2148         unregister_netdev(netdev);
2149
2150         igb_clear_interrupt_scheme(adapter);
2151
2152 #ifdef CONFIG_PCI_IOV
2153         /* reclaim resources allocated to VFs */
2154         if (adapter->vf_data) {
2155                 /* disable iov and allow time for transactions to clear */
2156                 pci_disable_sriov(pdev);
2157                 msleep(500);
2158
2159                 kfree(adapter->vf_data);
2160                 adapter->vf_data = NULL;
2161                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2162                 msleep(100);
2163                 dev_info(&pdev->dev, "IOV Disabled\n");
2164         }
2165 #endif
2166
2167         iounmap(hw->hw_addr);
2168         if (hw->flash_address)
2169                 iounmap(hw->flash_address);
2170         pci_release_selected_regions(pdev,
2171                                      pci_select_bars(pdev, IORESOURCE_MEM));
2172
2173         free_netdev(netdev);
2174
2175         pci_disable_pcie_error_reporting(pdev);
2176
2177         pci_disable_device(pdev);
2178 }
2179
2180 /**
2181  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2182  * @adapter: board private structure to initialize
2183  *
2184  * This function initializes the vf specific data storage and then attempts to
2185  * allocate the VFs.  The reason for ordering it this way is because it is much
2186  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2187  * the memory for the VFs.
2188  **/
2189 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2190 {
2191 #ifdef CONFIG_PCI_IOV
2192         struct pci_dev *pdev = adapter->pdev;
2193
2194         if (adapter->vfs_allocated_count) {
2195                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2196                                            sizeof(struct vf_data_storage),
2197                                            GFP_KERNEL);
2198                 /* if allocation failed then we do not support SR-IOV */
2199                 if (!adapter->vf_data) {
2200                         adapter->vfs_allocated_count = 0;
2201                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2202                                 "Data Storage\n");
2203                 }
2204         }
2205
2206         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2207                 kfree(adapter->vf_data);
2208                 adapter->vf_data = NULL;
2209 #endif /* CONFIG_PCI_IOV */
2210                 adapter->vfs_allocated_count = 0;
2211 #ifdef CONFIG_PCI_IOV
2212         } else {
2213                 unsigned char mac_addr[ETH_ALEN];
2214                 int i;
2215                 dev_info(&pdev->dev, "%d vfs allocated\n",
2216                          adapter->vfs_allocated_count);
2217                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2218                         random_ether_addr(mac_addr);
2219                         igb_set_vf_mac(adapter, i, mac_addr);
2220                 }
2221                 /* DMA Coalescing is not supported in IOV mode. */
2222                 if (adapter->flags & IGB_FLAG_DMAC)
2223                         adapter->flags &= ~IGB_FLAG_DMAC;
2224         }
2225 #endif /* CONFIG_PCI_IOV */
2226 }
2227
2228
2229 /**
2230  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2231  * @adapter: board private structure to initialize
2232  *
2233  * igb_init_hw_timer initializes the function pointer and values for the hw
2234  * timer found in hardware.
2235  **/
2236 static void igb_init_hw_timer(struct igb_adapter *adapter)
2237 {
2238         struct e1000_hw *hw = &adapter->hw;
2239
2240         switch (hw->mac.type) {
2241         case e1000_i350:
2242         case e1000_82580:
2243                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2244                 adapter->cycles.read = igb_read_clock;
2245                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2246                 adapter->cycles.mult = 1;
2247                 /*
2248                  * The 82580 timesync updates the system timer every 8ns by 8ns
2249                  * and the value cannot be shifted.  Instead we need to shift
2250                  * the registers to generate a 64bit timer value.  As a result
2251                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2252                  * 24 in order to generate a larger value for synchronization.
2253                  */
2254                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2255                 /* disable system timer temporarily by setting bit 31 */
2256                 wr32(E1000_TSAUXC, 0x80000000);
2257                 wrfl();
2258
2259                 /* Set registers so that rollover occurs soon to test this. */
2260                 wr32(E1000_SYSTIMR, 0x00000000);
2261                 wr32(E1000_SYSTIML, 0x80000000);
2262                 wr32(E1000_SYSTIMH, 0x000000FF);
2263                 wrfl();
2264
2265                 /* enable system timer by clearing bit 31 */
2266                 wr32(E1000_TSAUXC, 0x0);
2267                 wrfl();
2268
2269                 timecounter_init(&adapter->clock,
2270                                  &adapter->cycles,
2271                                  ktime_to_ns(ktime_get_real()));
2272                 /*
2273                  * Synchronize our NIC clock against system wall clock. NIC
2274                  * time stamp reading requires ~3us per sample, each sample
2275                  * was pretty stable even under load => only require 10
2276                  * samples for each offset comparison.
2277                  */
2278                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2279                 adapter->compare.source = &adapter->clock;
2280                 adapter->compare.target = ktime_get_real;
2281                 adapter->compare.num_samples = 10;
2282                 timecompare_update(&adapter->compare, 0);
2283                 break;
2284         case e1000_82576:
2285                 /*
2286                  * Initialize hardware timer: we keep it running just in case
2287                  * that some program needs it later on.
2288                  */
2289                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2290                 adapter->cycles.read = igb_read_clock;
2291                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2292                 adapter->cycles.mult = 1;
2293                 /**
2294                  * Scale the NIC clock cycle by a large factor so that
2295                  * relatively small clock corrections can be added or
2296                  * subtracted at each clock tick. The drawbacks of a large
2297                  * factor are a) that the clock register overflows more quickly
2298                  * (not such a big deal) and b) that the increment per tick has
2299                  * to fit into 24 bits.  As a result we need to use a shift of
2300                  * 19 so we can fit a value of 16 into the TIMINCA register.
2301                  */
2302                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2303                 wr32(E1000_TIMINCA,
2304                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2305                                 (16 << IGB_82576_TSYNC_SHIFT));
2306
2307                 /* Set registers so that rollover occurs soon to test this. */
2308                 wr32(E1000_SYSTIML, 0x00000000);
2309                 wr32(E1000_SYSTIMH, 0xFF800000);
2310                 wrfl();
2311
2312                 timecounter_init(&adapter->clock,
2313                                  &adapter->cycles,
2314                                  ktime_to_ns(ktime_get_real()));
2315                 /*
2316                  * Synchronize our NIC clock against system wall clock. NIC
2317                  * time stamp reading requires ~3us per sample, each sample
2318                  * was pretty stable even under load => only require 10
2319                  * samples for each offset comparison.
2320                  */
2321                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2322                 adapter->compare.source = &adapter->clock;
2323                 adapter->compare.target = ktime_get_real;
2324                 adapter->compare.num_samples = 10;
2325                 timecompare_update(&adapter->compare, 0);
2326                 break;
2327         case e1000_82575:
2328                 /* 82575 does not support timesync */
2329         default:
2330                 break;
2331         }
2332
2333 }
2334
2335 /**
2336  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2337  * @adapter: board private structure to initialize
2338  *
2339  * igb_sw_init initializes the Adapter private data structure.
2340  * Fields are initialized based on PCI device information and
2341  * OS network device settings (MTU size).
2342  **/
2343 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2344 {
2345         struct e1000_hw *hw = &adapter->hw;
2346         struct net_device *netdev = adapter->netdev;
2347         struct pci_dev *pdev = adapter->pdev;
2348
2349         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2350
2351         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2352         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2353         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2354         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2355
2356         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2357         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2358
2359         spin_lock_init(&adapter->stats64_lock);
2360 #ifdef CONFIG_PCI_IOV
2361         switch (hw->mac.type) {
2362         case e1000_82576:
2363         case e1000_i350:
2364                 if (max_vfs > 7) {
2365                         dev_warn(&pdev->dev,
2366                                  "Maximum of 7 VFs per PF, using max\n");
2367                         adapter->vfs_allocated_count = 7;
2368                 } else
2369                         adapter->vfs_allocated_count = max_vfs;
2370                 break;
2371         default:
2372                 break;
2373         }
2374 #endif /* CONFIG_PCI_IOV */
2375         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2376
2377         /*
2378          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2379          * then we should combine the queues into a queue pair in order to
2380          * conserve interrupts due to limited supply
2381          */
2382         if ((adapter->rss_queues > 4) ||
2383             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2384                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2385
2386         /* This call may decrease the number of queues */
2387         if (igb_init_interrupt_scheme(adapter)) {
2388                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2389                 return -ENOMEM;
2390         }
2391
2392         igb_probe_vfs(adapter);
2393
2394         /* Explicitly disable IRQ since the NIC can be in any state. */
2395         igb_irq_disable(adapter);
2396
2397         if (hw->mac.type == e1000_i350)
2398                 adapter->flags &= ~IGB_FLAG_DMAC;
2399
2400         set_bit(__IGB_DOWN, &adapter->state);
2401         return 0;
2402 }
2403
2404 /**
2405  * igb_open - Called when a network interface is made active
2406  * @netdev: network interface device structure
2407  *
2408  * Returns 0 on success, negative value on failure
2409  *
2410  * The open entry point is called when a network interface is made
2411  * active by the system (IFF_UP).  At this point all resources needed
2412  * for transmit and receive operations are allocated, the interrupt
2413  * handler is registered with the OS, the watchdog timer is started,
2414  * and the stack is notified that the interface is ready.
2415  **/
2416 static int igb_open(struct net_device *netdev)
2417 {
2418         struct igb_adapter *adapter = netdev_priv(netdev);
2419         struct e1000_hw *hw = &adapter->hw;
2420         int err;
2421         int i;
2422
2423         /* disallow open during test */
2424         if (test_bit(__IGB_TESTING, &adapter->state))
2425                 return -EBUSY;
2426
2427         netif_carrier_off(netdev);
2428
2429         /* allocate transmit descriptors */
2430         err = igb_setup_all_tx_resources(adapter);
2431         if (err)
2432                 goto err_setup_tx;
2433
2434         /* allocate receive descriptors */
2435         err = igb_setup_all_rx_resources(adapter);
2436         if (err)
2437                 goto err_setup_rx;
2438
2439         igb_power_up_link(adapter);
2440
2441         /* before we allocate an interrupt, we must be ready to handle it.
2442          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2443          * as soon as we call pci_request_irq, so we have to setup our
2444          * clean_rx handler before we do so.  */
2445         igb_configure(adapter);
2446
2447         err = igb_request_irq(adapter);
2448         if (err)
2449                 goto err_req_irq;
2450
2451         /* From here on the code is the same as igb_up() */
2452         clear_bit(__IGB_DOWN, &adapter->state);
2453
2454         for (i = 0; i < adapter->num_q_vectors; i++) {
2455                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2456                 napi_enable(&q_vector->napi);
2457         }
2458
2459         /* Clear any pending interrupts. */
2460         rd32(E1000_ICR);
2461
2462         igb_irq_enable(adapter);
2463
2464         /* notify VFs that reset has been completed */
2465         if (adapter->vfs_allocated_count) {
2466                 u32 reg_data = rd32(E1000_CTRL_EXT);
2467                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2468                 wr32(E1000_CTRL_EXT, reg_data);
2469         }
2470
2471         netif_tx_start_all_queues(netdev);
2472
2473         /* start the watchdog. */
2474         hw->mac.get_link_status = 1;
2475         schedule_work(&adapter->watchdog_task);
2476
2477         return 0;
2478
2479 err_req_irq:
2480         igb_release_hw_control(adapter);
2481         igb_power_down_link(adapter);
2482         igb_free_all_rx_resources(adapter);
2483 err_setup_rx:
2484         igb_free_all_tx_resources(adapter);
2485 err_setup_tx:
2486         igb_reset(adapter);
2487
2488         return err;
2489 }
2490
2491 /**
2492  * igb_close - Disables a network interface
2493  * @netdev: network interface device structure
2494  *
2495  * Returns 0, this is not allowed to fail
2496  *
2497  * The close entry point is called when an interface is de-activated
2498  * by the OS.  The hardware is still under the driver's control, but
2499  * needs to be disabled.  A global MAC reset is issued to stop the
2500  * hardware, and all transmit and receive resources are freed.
2501  **/
2502 static int igb_close(struct net_device *netdev)
2503 {
2504         struct igb_adapter *adapter = netdev_priv(netdev);
2505
2506         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2507         igb_down(adapter);
2508
2509         igb_free_irq(adapter);
2510
2511         igb_free_all_tx_resources(adapter);
2512         igb_free_all_rx_resources(adapter);
2513
2514         return 0;
2515 }
2516
2517 /**
2518  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2519  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2520  *
2521  * Return 0 on success, negative on failure
2522  **/
2523 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2524 {
2525         struct device *dev = tx_ring->dev;
2526         int size;
2527
2528         size = sizeof(struct igb_buffer) * tx_ring->count;
2529         tx_ring->buffer_info = vzalloc(size);
2530         if (!tx_ring->buffer_info)
2531                 goto err;
2532
2533         /* round up to nearest 4K */
2534         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2535         tx_ring->size = ALIGN(tx_ring->size, 4096);
2536
2537         tx_ring->desc = dma_alloc_coherent(dev,
2538                                            tx_ring->size,
2539                                            &tx_ring->dma,
2540                                            GFP_KERNEL);
2541
2542         if (!tx_ring->desc)
2543                 goto err;
2544
2545         tx_ring->next_to_use = 0;
2546         tx_ring->next_to_clean = 0;
2547         return 0;
2548
2549 err:
2550         vfree(tx_ring->buffer_info);
2551         dev_err(dev,
2552                 "Unable to allocate memory for the transmit descriptor ring\n");
2553         return -ENOMEM;
2554 }
2555
2556 /**
2557  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2558  *                                (Descriptors) for all queues
2559  * @adapter: board private structure
2560  *
2561  * Return 0 on success, negative on failure
2562  **/
2563 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2564 {
2565         struct pci_dev *pdev = adapter->pdev;
2566         int i, err = 0;
2567
2568         for (i = 0; i < adapter->num_tx_queues; i++) {
2569                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2570                 if (err) {
2571                         dev_err(&pdev->dev,
2572                                 "Allocation for Tx Queue %u failed\n", i);
2573                         for (i--; i >= 0; i--)
2574                                 igb_free_tx_resources(adapter->tx_ring[i]);
2575                         break;
2576                 }
2577         }
2578
2579         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2580                 int r_idx = i % adapter->num_tx_queues;
2581                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2582         }
2583         return err;
2584 }
2585
2586 /**
2587  * igb_setup_tctl - configure the transmit control registers
2588  * @adapter: Board private structure
2589  **/
2590 void igb_setup_tctl(struct igb_adapter *adapter)
2591 {
2592         struct e1000_hw *hw = &adapter->hw;
2593         u32 tctl;
2594
2595         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2596         wr32(E1000_TXDCTL(0), 0);
2597
2598         /* Program the Transmit Control Register */
2599         tctl = rd32(E1000_TCTL);
2600         tctl &= ~E1000_TCTL_CT;
2601         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2602                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2603
2604         igb_config_collision_dist(hw);
2605
2606         /* Enable transmits */
2607         tctl |= E1000_TCTL_EN;
2608
2609         wr32(E1000_TCTL, tctl);
2610 }
2611
2612 /**
2613  * igb_configure_tx_ring - Configure transmit ring after Reset
2614  * @adapter: board private structure
2615  * @ring: tx ring to configure
2616  *
2617  * Configure a transmit ring after a reset.
2618  **/
2619 void igb_configure_tx_ring(struct igb_adapter *adapter,
2620                            struct igb_ring *ring)
2621 {
2622         struct e1000_hw *hw = &adapter->hw;
2623         u32 txdctl;
2624         u64 tdba = ring->dma;
2625         int reg_idx = ring->reg_idx;
2626
2627         /* disable the queue */
2628         txdctl = rd32(E1000_TXDCTL(reg_idx));
2629         wr32(E1000_TXDCTL(reg_idx),
2630                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2631         wrfl();
2632         mdelay(10);
2633
2634         wr32(E1000_TDLEN(reg_idx),
2635                         ring->count * sizeof(union e1000_adv_tx_desc));
2636         wr32(E1000_TDBAL(reg_idx),
2637                         tdba & 0x00000000ffffffffULL);
2638         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2639
2640         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2641         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2642         writel(0, ring->head);
2643         writel(0, ring->tail);
2644
2645         txdctl |= IGB_TX_PTHRESH;
2646         txdctl |= IGB_TX_HTHRESH << 8;
2647         txdctl |= IGB_TX_WTHRESH << 16;
2648
2649         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2650         wr32(E1000_TXDCTL(reg_idx), txdctl);
2651 }
2652
2653 /**
2654  * igb_configure_tx - Configure transmit Unit after Reset
2655  * @adapter: board private structure
2656  *
2657  * Configure the Tx unit of the MAC after a reset.
2658  **/
2659 static void igb_configure_tx(struct igb_adapter *adapter)
2660 {
2661         int i;
2662
2663         for (i = 0; i < adapter->num_tx_queues; i++)
2664                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2665 }
2666
2667 /**
2668  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2669  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2670  *
2671  * Returns 0 on success, negative on failure
2672  **/
2673 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2674 {
2675         struct device *dev = rx_ring->dev;
2676         int size, desc_len;
2677
2678         size = sizeof(struct igb_buffer) * rx_ring->count;
2679         rx_ring->buffer_info = vzalloc(size);
2680         if (!rx_ring->buffer_info)
2681                 goto err;
2682
2683         desc_len = sizeof(union e1000_adv_rx_desc);
2684
2685         /* Round up to nearest 4K */
2686         rx_ring->size = rx_ring->count * desc_len;
2687         rx_ring->size = ALIGN(rx_ring->size, 4096);
2688
2689         rx_ring->desc = dma_alloc_coherent(dev,
2690                                            rx_ring->size,
2691                                            &rx_ring->dma,
2692                                            GFP_KERNEL);
2693
2694         if (!rx_ring->desc)
2695                 goto err;
2696
2697         rx_ring->next_to_clean = 0;
2698         rx_ring->next_to_use = 0;
2699
2700         return 0;
2701
2702 err:
2703         vfree(rx_ring->buffer_info);
2704         rx_ring->buffer_info = NULL;
2705         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2706                 " ring\n");
2707         return -ENOMEM;
2708 }
2709
2710 /**
2711  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2712  *                                (Descriptors) for all queues
2713  * @adapter: board private structure
2714  *
2715  * Return 0 on success, negative on failure
2716  **/
2717 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2718 {
2719         struct pci_dev *pdev = adapter->pdev;
2720         int i, err = 0;
2721
2722         for (i = 0; i < adapter->num_rx_queues; i++) {
2723                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2724                 if (err) {
2725                         dev_err(&pdev->dev,
2726                                 "Allocation for Rx Queue %u failed\n", i);
2727                         for (i--; i >= 0; i--)
2728                                 igb_free_rx_resources(adapter->rx_ring[i]);
2729                         break;
2730                 }
2731         }
2732
2733         return err;
2734 }
2735
2736 /**
2737  * igb_setup_mrqc - configure the multiple receive queue control registers
2738  * @adapter: Board private structure
2739  **/
2740 static void igb_setup_mrqc(struct igb_adapter *adapter)
2741 {
2742         struct e1000_hw *hw = &adapter->hw;
2743         u32 mrqc, rxcsum;
2744         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2745         union e1000_reta {
2746                 u32 dword;
2747                 u8  bytes[4];
2748         } reta;
2749         static const u8 rsshash[40] = {
2750                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2751                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2752                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2753                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2754
2755         /* Fill out hash function seeds */
2756         for (j = 0; j < 10; j++) {
2757                 u32 rsskey = rsshash[(j * 4)];
2758                 rsskey |= rsshash[(j * 4) + 1] << 8;
2759                 rsskey |= rsshash[(j * 4) + 2] << 16;
2760                 rsskey |= rsshash[(j * 4) + 3] << 24;
2761                 array_wr32(E1000_RSSRK(0), j, rsskey);
2762         }
2763
2764         num_rx_queues = adapter->rss_queues;
2765
2766         if (adapter->vfs_allocated_count) {
2767                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2768                 switch (hw->mac.type) {
2769                 case e1000_i350:
2770                 case e1000_82580:
2771                         num_rx_queues = 1;
2772                         shift = 0;
2773                         break;
2774                 case e1000_82576:
2775                         shift = 3;
2776                         num_rx_queues = 2;
2777                         break;
2778                 case e1000_82575:
2779                         shift = 2;
2780                         shift2 = 6;
2781                 default:
2782                         break;
2783                 }
2784         } else {
2785                 if (hw->mac.type == e1000_82575)
2786                         shift = 6;
2787         }
2788
2789         for (j = 0; j < (32 * 4); j++) {
2790                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2791                 if (shift2)
2792                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2793                 if ((j & 3) == 3)
2794                         wr32(E1000_RETA(j >> 2), reta.dword);
2795         }
2796
2797         /*
2798          * Disable raw packet checksumming so that RSS hash is placed in
2799          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2800          * offloads as they are enabled by default
2801          */
2802         rxcsum = rd32(E1000_RXCSUM);
2803         rxcsum |= E1000_RXCSUM_PCSD;
2804
2805         if (adapter->hw.mac.type >= e1000_82576)
2806                 /* Enable Receive Checksum Offload for SCTP */
2807                 rxcsum |= E1000_RXCSUM_CRCOFL;
2808
2809         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2810         wr32(E1000_RXCSUM, rxcsum);
2811
2812         /* If VMDq is enabled then we set the appropriate mode for that, else
2813          * we default to RSS so that an RSS hash is calculated per packet even
2814          * if we are only using one queue */
2815         if (adapter->vfs_allocated_count) {
2816                 if (hw->mac.type > e1000_82575) {
2817                         /* Set the default pool for the PF's first queue */
2818                         u32 vtctl = rd32(E1000_VT_CTL);
2819                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2820                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2821                         vtctl |= adapter->vfs_allocated_count <<
2822                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2823                         wr32(E1000_VT_CTL, vtctl);
2824                 }
2825                 if (adapter->rss_queues > 1)
2826                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2827                 else
2828                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2829         } else {
2830                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2831         }
2832         igb_vmm_control(adapter);
2833
2834         /*
2835          * Generate RSS hash based on TCP port numbers and/or
2836          * IPv4/v6 src and dst addresses since UDP cannot be
2837          * hashed reliably due to IP fragmentation
2838          */
2839         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2840                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2841                 E1000_MRQC_RSS_FIELD_IPV6 |
2842                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2843                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2844
2845         wr32(E1000_MRQC, mrqc);
2846 }
2847
2848 /**
2849  * igb_setup_rctl - configure the receive control registers
2850  * @adapter: Board private structure
2851  **/
2852 void igb_setup_rctl(struct igb_adapter *adapter)
2853 {
2854         struct e1000_hw *hw = &adapter->hw;
2855         u32 rctl;
2856
2857         rctl = rd32(E1000_RCTL);
2858
2859         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2860         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2861
2862         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2863                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2864
2865         /*
2866          * enable stripping of CRC. It's unlikely this will break BMC
2867          * redirection as it did with e1000. Newer features require
2868          * that the HW strips the CRC.
2869          */
2870         rctl |= E1000_RCTL_SECRC;
2871
2872         /* disable store bad packets and clear size bits. */
2873         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2874
2875         /* enable LPE to prevent packets larger than max_frame_size */
2876         rctl |= E1000_RCTL_LPE;
2877
2878         /* disable queue 0 to prevent tail write w/o re-config */
2879         wr32(E1000_RXDCTL(0), 0);
2880
2881         /* Attention!!!  For SR-IOV PF driver operations you must enable
2882          * queue drop for all VF and PF queues to prevent head of line blocking
2883          * if an un-trusted VF does not provide descriptors to hardware.
2884          */
2885         if (adapter->vfs_allocated_count) {
2886                 /* set all queue drop enable bits */
2887                 wr32(E1000_QDE, ALL_QUEUES);
2888         }
2889
2890         wr32(E1000_RCTL, rctl);
2891 }
2892
2893 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2894                                    int vfn)
2895 {
2896         struct e1000_hw *hw = &adapter->hw;
2897         u32 vmolr;
2898
2899         /* if it isn't the PF check to see if VFs are enabled and
2900          * increase the size to support vlan tags */
2901         if (vfn < adapter->vfs_allocated_count &&
2902             adapter->vf_data[vfn].vlans_enabled)
2903                 size += VLAN_TAG_SIZE;
2904
2905         vmolr = rd32(E1000_VMOLR(vfn));
2906         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2907         vmolr |= size | E1000_VMOLR_LPE;
2908         wr32(E1000_VMOLR(vfn), vmolr);
2909
2910         return 0;
2911 }
2912
2913 /**
2914  * igb_rlpml_set - set maximum receive packet size
2915  * @adapter: board private structure
2916  *
2917  * Configure maximum receivable packet size.
2918  **/
2919 static void igb_rlpml_set(struct igb_adapter *adapter)
2920 {
2921         u32 max_frame_size = adapter->max_frame_size;
2922         struct e1000_hw *hw = &adapter->hw;
2923         u16 pf_id = adapter->vfs_allocated_count;
2924
2925         if (adapter->vlgrp)
2926                 max_frame_size += VLAN_TAG_SIZE;
2927
2928         /* if vfs are enabled we set RLPML to the largest possible request
2929          * size and set the VMOLR RLPML to the size we need */
2930         if (pf_id) {
2931                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2932                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2933         }
2934
2935         wr32(E1000_RLPML, max_frame_size);
2936 }
2937
2938 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2939                                  int vfn, bool aupe)
2940 {
2941         struct e1000_hw *hw = &adapter->hw;
2942         u32 vmolr;
2943
2944         /*
2945          * This register exists only on 82576 and newer so if we are older then
2946          * we should exit and do nothing
2947          */
2948         if (hw->mac.type < e1000_82576)
2949                 return;
2950
2951         vmolr = rd32(E1000_VMOLR(vfn));
2952         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2953         if (aupe)
2954                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2955         else
2956                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2957
2958         /* clear all bits that might not be set */
2959         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2960
2961         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2962                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2963         /*
2964          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2965          * multicast packets
2966          */
2967         if (vfn <= adapter->vfs_allocated_count)
2968                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2969
2970         wr32(E1000_VMOLR(vfn), vmolr);
2971 }
2972
2973 /**
2974  * igb_configure_rx_ring - Configure a receive ring after Reset
2975  * @adapter: board private structure
2976  * @ring: receive ring to be configured
2977  *
2978  * Configure the Rx unit of the MAC after a reset.
2979  **/
2980 void igb_configure_rx_ring(struct igb_adapter *adapter,
2981                            struct igb_ring *ring)
2982 {
2983         struct e1000_hw *hw = &adapter->hw;
2984         u64 rdba = ring->dma;
2985         int reg_idx = ring->reg_idx;
2986         u32 srrctl, rxdctl;
2987
2988         /* disable the queue */
2989         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2990         wr32(E1000_RXDCTL(reg_idx),
2991                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2992
2993         /* Set DMA base address registers */
2994         wr32(E1000_RDBAL(reg_idx),
2995              rdba & 0x00000000ffffffffULL);
2996         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2997         wr32(E1000_RDLEN(reg_idx),
2998                        ring->count * sizeof(union e1000_adv_rx_desc));
2999
3000         /* initialize head and tail */
3001         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3002         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3003         writel(0, ring->head);
3004         writel(0, ring->tail);
3005
3006         /* set descriptor configuration */
3007         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3008                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3009                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3010 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3011                 srrctl |= IGB_RXBUFFER_16384 >>
3012                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3013 #else
3014                 srrctl |= (PAGE_SIZE / 2) >>
3015                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3016 #endif
3017                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3018         } else {
3019                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3020                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3021                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3022         }
3023         if (hw->mac.type == e1000_82580)
3024                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3025         /* Only set Drop Enable if we are supporting multiple queues */
3026         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3027                 srrctl |= E1000_SRRCTL_DROP_EN;
3028
3029         wr32(E1000_SRRCTL(reg_idx), srrctl);
3030
3031         /* set filtering for VMDQ pools */
3032         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3033
3034         /* enable receive descriptor fetching */
3035         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3036         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3037         rxdctl &= 0xFFF00000;
3038         rxdctl |= IGB_RX_PTHRESH;
3039         rxdctl |= IGB_RX_HTHRESH << 8;
3040         rxdctl |= IGB_RX_WTHRESH << 16;
3041         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3042 }
3043
3044 /**
3045  * igb_configure_rx - Configure receive Unit after Reset
3046  * @adapter: board private structure
3047  *
3048  * Configure the Rx unit of the MAC after a reset.
3049  **/
3050 static void igb_configure_rx(struct igb_adapter *adapter)
3051 {
3052         int i;
3053
3054         /* set UTA to appropriate mode */
3055         igb_set_uta(adapter);
3056
3057         /* set the correct pool for the PF default MAC address in entry 0 */
3058         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3059                          adapter->vfs_allocated_count);
3060
3061         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3062          * the Base and Length of the Rx Descriptor Ring */
3063         for (i = 0; i < adapter->num_rx_queues; i++)
3064                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3065 }
3066
3067 /**
3068  * igb_free_tx_resources - Free Tx Resources per Queue
3069  * @tx_ring: Tx descriptor ring for a specific queue
3070  *
3071  * Free all transmit software resources
3072  **/
3073 void igb_free_tx_resources(struct igb_ring *tx_ring)
3074 {
3075         igb_clean_tx_ring(tx_ring);
3076
3077         vfree(tx_ring->buffer_info);
3078         tx_ring->buffer_info = NULL;
3079
3080         /* if not set, then don't free */
3081         if (!tx_ring->desc)
3082                 return;
3083
3084         dma_free_coherent(tx_ring->dev, tx_ring->size,
3085                           tx_ring->desc, tx_ring->dma);
3086
3087         tx_ring->desc = NULL;
3088 }
3089
3090 /**
3091  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3092  * @adapter: board private structure
3093  *
3094  * Free all transmit software resources
3095  **/
3096 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3097 {
3098         int i;
3099
3100         for (i = 0; i < adapter->num_tx_queues; i++)
3101                 igb_free_tx_resources(adapter->tx_ring[i]);
3102 }
3103
3104 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3105                                     struct igb_buffer *buffer_info)
3106 {
3107         if (buffer_info->dma) {
3108                 if (buffer_info->mapped_as_page)
3109                         dma_unmap_page(tx_ring->dev,
3110                                         buffer_info->dma,
3111                                         buffer_info->length,
3112                                         DMA_TO_DEVICE);
3113                 else
3114                         dma_unmap_single(tx_ring->dev,
3115                                         buffer_info->dma,
3116                                         buffer_info->length,
3117                                         DMA_TO_DEVICE);
3118                 buffer_info->dma = 0;
3119         }
3120         if (buffer_info->skb) {
3121                 dev_kfree_skb_any(buffer_info->skb);
3122                 buffer_info->skb = NULL;
3123         }
3124         buffer_info->time_stamp = 0;
3125         buffer_info->length = 0;
3126         buffer_info->next_to_watch = 0;
3127         buffer_info->mapped_as_page = false;
3128 }
3129
3130 /**
3131  * igb_clean_tx_ring - Free Tx Buffers
3132  * @tx_ring: ring to be cleaned
3133  **/
3134 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3135 {
3136         struct igb_buffer *buffer_info;
3137         unsigned long size;
3138         unsigned int i;
3139
3140         if (!tx_ring->buffer_info)
3141                 return;
3142         /* Free all the Tx ring sk_buffs */
3143
3144         for (i = 0; i < tx_ring->count; i++) {
3145                 buffer_info = &tx_ring->buffer_info[i];
3146                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3147         }
3148
3149         size = sizeof(struct igb_buffer) * tx_ring->count;
3150         memset(tx_ring->buffer_info, 0, size);
3151
3152         /* Zero out the descriptor ring */
3153         memset(tx_ring->desc, 0, tx_ring->size);
3154
3155         tx_ring->next_to_use = 0;
3156         tx_ring->next_to_clean = 0;
3157 }
3158
3159 /**
3160  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3161  * @adapter: board private structure
3162  **/
3163 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3164 {
3165         int i;
3166
3167         for (i = 0; i < adapter->num_tx_queues; i++)
3168                 igb_clean_tx_ring(adapter->tx_ring[i]);
3169 }
3170
3171 /**
3172  * igb_free_rx_resources - Free Rx Resources
3173  * @rx_ring: ring to clean the resources from
3174  *
3175  * Free all receive software resources
3176  **/
3177 void igb_free_rx_resources(struct igb_ring *rx_ring)
3178 {
3179         igb_clean_rx_ring(rx_ring);
3180
3181         vfree(rx_ring->buffer_info);
3182         rx_ring->buffer_info = NULL;
3183
3184         /* if not set, then don't free */
3185         if (!rx_ring->desc)
3186                 return;
3187
3188         dma_free_coherent(rx_ring->dev, rx_ring->size,
3189                           rx_ring->desc, rx_ring->dma);
3190
3191         rx_ring->desc = NULL;
3192 }
3193
3194 /**
3195  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3196  * @adapter: board private structure
3197  *
3198  * Free all receive software resources
3199  **/
3200 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3201 {
3202         int i;
3203
3204         for (i = 0; i < adapter->num_rx_queues; i++)
3205                 igb_free_rx_resources(adapter->rx_ring[i]);
3206 }
3207
3208 /**
3209  * igb_clean_rx_ring - Free Rx Buffers per Queue
3210  * @rx_ring: ring to free buffers from
3211  **/
3212 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3213 {
3214         struct igb_buffer *buffer_info;
3215         unsigned long size;
3216         unsigned int i;
3217
3218         if (!rx_ring->buffer_info)
3219                 return;
3220
3221         /* Free all the Rx ring sk_buffs */
3222         for (i = 0; i < rx_ring->count; i++) {
3223                 buffer_info = &rx_ring->buffer_info[i];
3224                 if (buffer_info->dma) {
3225                         dma_unmap_single(rx_ring->dev,
3226                                          buffer_info->dma,
3227                                          rx_ring->rx_buffer_len,
3228                                          DMA_FROM_DEVICE);
3229                         buffer_info->dma = 0;
3230                 }
3231
3232                 if (buffer_info->skb) {
3233                         dev_kfree_skb(buffer_info->skb);
3234                         buffer_info->skb = NULL;
3235                 }
3236                 if (buffer_info->page_dma) {
3237                         dma_unmap_page(rx_ring->dev,
3238                                        buffer_info->page_dma,
3239                                        PAGE_SIZE / 2,
3240                                        DMA_FROM_DEVICE);
3241                         buffer_info->page_dma = 0;
3242                 }
3243                 if (buffer_info->page) {
3244                         put_page(buffer_info->page);
3245                         buffer_info->page = NULL;
3246                         buffer_info->page_offset = 0;
3247                 }
3248         }
3249
3250         size = sizeof(struct igb_buffer) * rx_ring->count;
3251         memset(rx_ring->buffer_info, 0, size);
3252
3253         /* Zero out the descriptor ring */
3254         memset(rx_ring->desc, 0, rx_ring->size);
3255
3256         rx_ring->next_to_clean = 0;
3257         rx_ring->next_to_use = 0;
3258 }
3259
3260 /**
3261  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3262  * @adapter: board private structure
3263  **/
3264 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3265 {
3266         int i;
3267
3268         for (i = 0; i < adapter->num_rx_queues; i++)
3269                 igb_clean_rx_ring(adapter->rx_ring[i]);
3270 }
3271
3272 /**
3273  * igb_set_mac - Change the Ethernet Address of the NIC
3274  * @netdev: network interface device structure
3275  * @p: pointer to an address structure
3276  *
3277  * Returns 0 on success, negative on failure
3278  **/
3279 static int igb_set_mac(struct net_device *netdev, void *p)
3280 {
3281         struct igb_adapter *adapter = netdev_priv(netdev);
3282         struct e1000_hw *hw = &adapter->hw;
3283         struct sockaddr *addr = p;
3284
3285         if (!is_valid_ether_addr(addr->sa_data))
3286                 return -EADDRNOTAVAIL;
3287
3288         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3289         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3290
3291         /* set the correct pool for the new PF MAC address in entry 0 */
3292         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3293                          adapter->vfs_allocated_count);
3294
3295         return 0;
3296 }
3297
3298 /**
3299  * igb_write_mc_addr_list - write multicast addresses to MTA
3300  * @netdev: network interface device structure
3301  *
3302  * Writes multicast address list to the MTA hash table.
3303  * Returns: -ENOMEM on failure
3304  *                0 on no addresses written
3305  *                X on writing X addresses to MTA
3306  **/
3307 static int igb_write_mc_addr_list(struct net_device *netdev)
3308 {
3309         struct igb_adapter *adapter = netdev_priv(netdev);
3310         struct e1000_hw *hw = &adapter->hw;
3311         struct netdev_hw_addr *ha;
3312         u8  *mta_list;
3313         int i;
3314
3315         if (netdev_mc_empty(netdev)) {
3316                 /* nothing to program, so clear mc list */
3317                 igb_update_mc_addr_list(hw, NULL, 0);
3318                 igb_restore_vf_multicasts(adapter);
3319                 return 0;
3320         }
3321
3322         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3323         if (!mta_list)
3324                 return -ENOMEM;
3325
3326         /* The shared function expects a packed array of only addresses. */
3327         i = 0;
3328         netdev_for_each_mc_addr(ha, netdev)
3329                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3330
3331         igb_update_mc_addr_list(hw, mta_list, i);
3332         kfree(mta_list);
3333
3334         return netdev_mc_count(netdev);
3335 }
3336
3337 /**
3338  * igb_write_uc_addr_list - write unicast addresses to RAR table
3339  * @netdev: network interface device structure
3340  *
3341  * Writes unicast address list to the RAR table.
3342  * Returns: -ENOMEM on failure/insufficient address space
3343  *                0 on no addresses written
3344  *                X on writing X addresses to the RAR table
3345  **/
3346 static int igb_write_uc_addr_list(struct net_device *netdev)
3347 {
3348         struct igb_adapter *adapter = netdev_priv(netdev);
3349         struct e1000_hw *hw = &adapter->hw;
3350         unsigned int vfn = adapter->vfs_allocated_count;
3351         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3352         int count = 0;
3353
3354         /* return ENOMEM indicating insufficient memory for addresses */
3355         if (netdev_uc_count(netdev) > rar_entries)
3356                 return -ENOMEM;
3357
3358         if (!netdev_uc_empty(netdev) && rar_entries) {
3359                 struct netdev_hw_addr *ha;
3360
3361                 netdev_for_each_uc_addr(ha, netdev) {
3362                         if (!rar_entries)
3363                                 break;
3364                         igb_rar_set_qsel(adapter, ha->addr,
3365                                          rar_entries--,
3366                                          vfn);
3367                         count++;
3368                 }
3369         }
3370         /* write the addresses in reverse order to avoid write combining */
3371         for (; rar_entries > 0 ; rar_entries--) {
3372                 wr32(E1000_RAH(rar_entries), 0);
3373                 wr32(E1000_RAL(rar_entries), 0);
3374         }
3375         wrfl();
3376
3377         return count;
3378 }
3379
3380 /**
3381  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3382  * @netdev: network interface device structure
3383  *
3384  * The set_rx_mode entry point is called whenever the unicast or multicast
3385  * address lists or the network interface flags are updated.  This routine is
3386  * responsible for configuring the hardware for proper unicast, multicast,
3387  * promiscuous mode, and all-multi behavior.
3388  **/
3389 static void igb_set_rx_mode(struct net_device *netdev)
3390 {
3391         struct igb_adapter *adapter = netdev_priv(netdev);
3392         struct e1000_hw *hw = &adapter->hw;
3393         unsigned int vfn = adapter->vfs_allocated_count;
3394         u32 rctl, vmolr = 0;
3395         int count;
3396
3397         /* Check for Promiscuous and All Multicast modes */
3398         rctl = rd32(E1000_RCTL);
3399
3400         /* clear the effected bits */
3401         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3402
3403         if (netdev->flags & IFF_PROMISC) {
3404                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3405                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3406         } else {
3407                 if (netdev->flags & IFF_ALLMULTI) {
3408                         rctl |= E1000_RCTL_MPE;
3409                         vmolr |= E1000_VMOLR_MPME;
3410                 } else {
3411                         /*
3412                          * Write addresses to the MTA, if the attempt fails
3413                          * then we should just turn on promiscuous mode so
3414                          * that we can at least receive multicast traffic
3415                          */
3416                         count = igb_write_mc_addr_list(netdev);
3417                         if (count < 0) {
3418                                 rctl |= E1000_RCTL_MPE;
3419                                 vmolr |= E1000_VMOLR_MPME;
3420                         } else if (count) {
3421                                 vmolr |= E1000_VMOLR_ROMPE;
3422                         }
3423                 }
3424                 /*
3425                  * Write addresses to available RAR registers, if there is not
3426                  * sufficient space to store all the addresses then enable
3427                  * unicast promiscuous mode
3428                  */
3429                 count = igb_write_uc_addr_list(netdev);
3430                 if (count < 0) {
3431                         rctl |= E1000_RCTL_UPE;
3432                         vmolr |= E1000_VMOLR_ROPE;
3433                 }
3434                 rctl |= E1000_RCTL_VFE;
3435         }
3436         wr32(E1000_RCTL, rctl);
3437
3438         /*
3439          * In order to support SR-IOV and eventually VMDq it is necessary to set
3440          * the VMOLR to enable the appropriate modes.  Without this workaround
3441          * we will have issues with VLAN tag stripping not being done for frames
3442          * that are only arriving because we are the default pool
3443          */
3444         if (hw->mac.type < e1000_82576)
3445                 return;
3446
3447         vmolr |= rd32(E1000_VMOLR(vfn)) &
3448                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3449         wr32(E1000_VMOLR(vfn), vmolr);
3450         igb_restore_vf_multicasts(adapter);
3451 }
3452
3453 static void igb_check_wvbr(struct igb_adapter *adapter)
3454 {
3455         struct e1000_hw *hw = &adapter->hw;
3456         u32 wvbr = 0;
3457
3458         switch (hw->mac.type) {
3459         case e1000_82576:
3460         case e1000_i350:
3461                 if (!(wvbr = rd32(E1000_WVBR)))
3462                         return;
3463                 break;
3464         default:
3465                 break;
3466         }
3467
3468         adapter->wvbr |= wvbr;
3469 }
3470
3471 #define IGB_STAGGERED_QUEUE_OFFSET 8
3472
3473 static void igb_spoof_check(struct igb_adapter *adapter)
3474 {
3475         int j;
3476
3477         if (!adapter->wvbr)
3478                 return;
3479
3480         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3481                 if (adapter->wvbr & (1 << j) ||
3482                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3483                         dev_warn(&adapter->pdev->dev,
3484                                 "Spoof event(s) detected on VF %d\n", j);
3485                         adapter->wvbr &=
3486                                 ~((1 << j) |
3487                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3488                 }
3489         }
3490 }
3491
3492 /* Need to wait a few seconds after link up to get diagnostic information from
3493  * the phy */
3494 static void igb_update_phy_info(unsigned long data)
3495 {
3496         struct igb_adapter *adapter = (struct igb_adapter *) data;
3497         igb_get_phy_info(&adapter->hw);
3498 }
3499
3500 /**
3501  * igb_has_link - check shared code for link and determine up/down
3502  * @adapter: pointer to driver private info
3503  **/
3504 bool igb_has_link(struct igb_adapter *adapter)
3505 {
3506         struct e1000_hw *hw = &adapter->hw;
3507         bool link_active = false;
3508         s32 ret_val = 0;
3509
3510         /* get_link_status is set on LSC (link status) interrupt or
3511          * rx sequence error interrupt.  get_link_status will stay
3512          * false until the e1000_check_for_link establishes link
3513          * for copper adapters ONLY
3514          */
3515         switch (hw->phy.media_type) {
3516         case e1000_media_type_copper:
3517                 if (hw->mac.get_link_status) {
3518                         ret_val = hw->mac.ops.check_for_link(hw);
3519                         link_active = !hw->mac.get_link_status;
3520                 } else {
3521                         link_active = true;
3522                 }
3523                 break;
3524         case e1000_media_type_internal_serdes:
3525                 ret_val = hw->mac.ops.check_for_link(hw);
3526                 link_active = hw->mac.serdes_has_link;
3527                 break;
3528         default:
3529         case e1000_media_type_unknown:
3530                 break;
3531         }
3532
3533         return link_active;
3534 }
3535
3536 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3537 {
3538         bool ret = false;
3539         u32 ctrl_ext, thstat;
3540
3541         /* check for thermal sensor event on i350, copper only */
3542         if (hw->mac.type == e1000_i350) {
3543                 thstat = rd32(E1000_THSTAT);
3544                 ctrl_ext = rd32(E1000_CTRL_EXT);
3545
3546                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3547                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3548                         ret = !!(thstat & event);
3549                 }
3550         }
3551
3552         return ret;
3553 }
3554
3555 /**
3556  * igb_watchdog - Timer Call-back
3557  * @data: pointer to adapter cast into an unsigned long
3558  **/
3559 static void igb_watchdog(unsigned long data)
3560 {
3561         struct igb_adapter *adapter = (struct igb_adapter *)data;
3562         /* Do the rest outside of interrupt context */
3563         schedule_work(&adapter->watchdog_task);
3564 }
3565
3566 static void igb_watchdog_task(struct work_struct *work)
3567 {
3568         struct igb_adapter *adapter = container_of(work,
3569                                                    struct igb_adapter,
3570                                                    watchdog_task);
3571         struct e1000_hw *hw = &adapter->hw;
3572         struct net_device *netdev = adapter->netdev;
3573         u32 link;
3574         int i;
3575
3576         link = igb_has_link(adapter);
3577         if (link) {
3578                 if (!netif_carrier_ok(netdev)) {
3579                         u32 ctrl;
3580                         hw->mac.ops.get_speed_and_duplex(hw,
3581                                                          &adapter->link_speed,
3582                                                          &adapter->link_duplex);
3583
3584                         ctrl = rd32(E1000_CTRL);
3585                         /* Links status message must follow this format */
3586                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3587                                  "Flow Control: %s\n",
3588                                netdev->name,
3589                                adapter->link_speed,
3590                                adapter->link_duplex == FULL_DUPLEX ?
3591                                  "Full Duplex" : "Half Duplex",
3592                                ((ctrl & E1000_CTRL_TFCE) &&
3593                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3594                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3595                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3596
3597                         /* check for thermal sensor event */
3598                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3599                                 printk(KERN_INFO "igb: %s The network adapter "
3600                                                  "link speed was downshifted "
3601                                                  "because it overheated.\n",
3602                                                  netdev->name);
3603                         }
3604
3605                         /* adjust timeout factor according to speed/duplex */
3606                         adapter->tx_timeout_factor = 1;
3607                         switch (adapter->link_speed) {
3608                         case SPEED_10:
3609                                 adapter->tx_timeout_factor = 14;
3610                                 break;
3611                         case SPEED_100:
3612                                 /* maybe add some timeout factor ? */
3613                                 break;
3614                         }
3615
3616                         netif_carrier_on(netdev);
3617
3618                         igb_ping_all_vfs(adapter);
3619                         igb_check_vf_rate_limit(adapter);
3620
3621                         /* link state has changed, schedule phy info update */
3622                         if (!test_bit(__IGB_DOWN, &adapter->state))
3623                                 mod_timer(&adapter->phy_info_timer,
3624                                           round_jiffies(jiffies + 2 * HZ));
3625                 }
3626         } else {
3627                 if (netif_carrier_ok(netdev)) {
3628                         adapter->link_speed = 0;
3629                         adapter->link_duplex = 0;
3630
3631                         /* check for thermal sensor event */
3632                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3633                                 printk(KERN_ERR "igb: %s The network adapter "
3634                                                 "was stopped because it "
3635                                                 "overheated.\n",
3636                                                 netdev->name);
3637                         }
3638
3639                         /* Links status message must follow this format */
3640                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3641                                netdev->name);
3642                         netif_carrier_off(netdev);
3643
3644                         igb_ping_all_vfs(adapter);
3645
3646                         /* link state has changed, schedule phy info update */
3647                         if (!test_bit(__IGB_DOWN, &adapter->state))
3648                                 mod_timer(&adapter->phy_info_timer,
3649                                           round_jiffies(jiffies + 2 * HZ));
3650                 }
3651         }
3652
3653         spin_lock(&adapter->stats64_lock);
3654         igb_update_stats(adapter, &adapter->stats64);
3655         spin_unlock(&adapter->stats64_lock);
3656
3657         for (i = 0; i < adapter->num_tx_queues; i++) {
3658                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3659                 if (!netif_carrier_ok(netdev)) {
3660                         /* We've lost link, so the controller stops DMA,
3661                          * but we've got queued Tx work that's never going
3662                          * to get done, so reset controller to flush Tx.
3663                          * (Do the reset outside of interrupt context). */
3664                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3665                                 adapter->tx_timeout_count++;
3666                                 schedule_work(&adapter->reset_task);
3667                                 /* return immediately since reset is imminent */
3668                                 return;
3669                         }
3670                 }
3671
3672                 /* Force detection of hung controller every watchdog period */
3673                 tx_ring->detect_tx_hung = true;
3674         }
3675
3676         /* Cause software interrupt to ensure rx ring is cleaned */
3677         if (adapter->msix_entries) {
3678                 u32 eics = 0;
3679                 for (i = 0; i < adapter->num_q_vectors; i++) {
3680                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3681                         eics |= q_vector->eims_value;
3682                 }
3683                 wr32(E1000_EICS, eics);
3684         } else {
3685                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3686         }
3687
3688         igb_spoof_check(adapter);
3689
3690         /* Reset the timer */
3691         if (!test_bit(__IGB_DOWN, &adapter->state))
3692                 mod_timer(&adapter->watchdog_timer,
3693                           round_jiffies(jiffies + 2 * HZ));
3694 }
3695
3696 enum latency_range {
3697         lowest_latency = 0,
3698         low_latency = 1,
3699         bulk_latency = 2,
3700         latency_invalid = 255
3701 };
3702
3703 /**
3704  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3705  *
3706  *      Stores a new ITR value based on strictly on packet size.  This
3707  *      algorithm is less sophisticated than that used in igb_update_itr,
3708  *      due to the difficulty of synchronizing statistics across multiple
3709  *      receive rings.  The divisors and thresholds used by this function
3710  *      were determined based on theoretical maximum wire speed and testing
3711  *      data, in order to minimize response time while increasing bulk
3712  *      throughput.
3713  *      This functionality is controlled by the InterruptThrottleRate module
3714  *      parameter (see igb_param.c)
3715  *      NOTE:  This function is called only when operating in a multiqueue
3716  *             receive environment.
3717  * @q_vector: pointer to q_vector
3718  **/
3719 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3720 {
3721         int new_val = q_vector->itr_val;
3722         int avg_wire_size = 0;
3723         struct igb_adapter *adapter = q_vector->adapter;
3724         struct igb_ring *ring;
3725         unsigned int packets;
3726
3727         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3728          * ints/sec - ITR timer value of 120 ticks.
3729          */
3730         if (adapter->link_speed != SPEED_1000) {
3731                 new_val = 976;
3732                 goto set_itr_val;
3733         }
3734
3735         ring = q_vector->rx_ring;
3736         if (ring) {
3737                 packets = ACCESS_ONCE(ring->total_packets);
3738
3739                 if (packets)
3740                         avg_wire_size = ring->total_bytes / packets;
3741         }
3742
3743         ring = q_vector->tx_ring;
3744         if (ring) {
3745                 packets = ACCESS_ONCE(ring->total_packets);
3746
3747                 if (packets)
3748                         avg_wire_size = max_t(u32, avg_wire_size,
3749                                               ring->total_bytes / packets);
3750         }
3751
3752         /* if avg_wire_size isn't set no work was done */
3753         if (!avg_wire_size)
3754                 goto clear_counts;
3755
3756         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3757         avg_wire_size += 24;
3758
3759         /* Don't starve jumbo frames */
3760         avg_wire_size = min(avg_wire_size, 3000);
3761
3762         /* Give a little boost to mid-size frames */
3763         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3764                 new_val = avg_wire_size / 3;
3765         else
3766                 new_val = avg_wire_size / 2;
3767
3768         /* when in itr mode 3 do not exceed 20K ints/sec */
3769         if (adapter->rx_itr_setting == 3 && new_val < 196)
3770                 new_val = 196;
3771
3772 set_itr_val:
3773         if (new_val != q_vector->itr_val) {
3774                 q_vector->itr_val = new_val;
3775                 q_vector->set_itr = 1;
3776         }
3777 clear_counts:
3778         if (q_vector->rx_ring) {
3779                 q_vector->rx_ring->total_bytes = 0;
3780                 q_vector->rx_ring->total_packets = 0;
3781         }
3782         if (q_vector->tx_ring) {
3783                 q_vector->tx_ring->total_bytes = 0;
3784                 q_vector->tx_ring->total_packets = 0;
3785         }
3786 }
3787
3788 /**
3789  * igb_update_itr - update the dynamic ITR value based on statistics
3790  *      Stores a new ITR value based on packets and byte
3791  *      counts during the last interrupt.  The advantage of per interrupt
3792  *      computation is faster updates and more accurate ITR for the current
3793  *      traffic pattern.  Constants in this function were computed
3794  *      based on theoretical maximum wire speed and thresholds were set based
3795  *      on testing data as well as attempting to minimize response time
3796  *      while increasing bulk throughput.
3797  *      this functionality is controlled by the InterruptThrottleRate module
3798  *      parameter (see igb_param.c)
3799  *      NOTE:  These calculations are only valid when operating in a single-
3800  *             queue environment.
3801  * @adapter: pointer to adapter
3802  * @itr_setting: current q_vector->itr_val
3803  * @packets: the number of packets during this measurement interval
3804  * @bytes: the number of bytes during this measurement interval
3805  **/
3806 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3807                                    int packets, int bytes)
3808 {
3809         unsigned int retval = itr_setting;
3810
3811         if (packets == 0)
3812                 goto update_itr_done;
3813
3814         switch (itr_setting) {
3815         case lowest_latency:
3816                 /* handle TSO and jumbo frames */
3817                 if (bytes/packets > 8000)
3818                         retval = bulk_latency;
3819                 else if ((packets < 5) && (bytes > 512))
3820                         retval = low_latency;
3821                 break;
3822         case low_latency:  /* 50 usec aka 20000 ints/s */
3823                 if (bytes > 10000) {
3824                         /* this if handles the TSO accounting */
3825                         if (bytes/packets > 8000) {
3826                                 retval = bulk_latency;
3827                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3828                                 retval = bulk_latency;
3829                         } else if ((packets > 35)) {
3830                                 retval = lowest_latency;
3831                         }
3832                 } else if (bytes/packets > 2000) {
3833                         retval = bulk_latency;
3834                 } else if (packets <= 2 && bytes < 512) {
3835                         retval = lowest_latency;
3836                 }
3837                 break;
3838         case bulk_latency: /* 250 usec aka 4000 ints/s */
3839                 if (bytes > 25000) {
3840                         if (packets > 35)
3841                                 retval = low_latency;
3842                 } else if (bytes < 1500) {
3843                         retval = low_latency;
3844                 }
3845                 break;
3846         }
3847
3848 update_itr_done:
3849         return retval;
3850 }
3851
3852 static void igb_set_itr(struct igb_adapter *adapter)
3853 {
3854         struct igb_q_vector *q_vector = adapter->q_vector[0];
3855         u16 current_itr;
3856         u32 new_itr = q_vector->itr_val;
3857
3858         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3859         if (adapter->link_speed != SPEED_1000) {
3860                 current_itr = 0;
3861                 new_itr = 4000;
3862                 goto set_itr_now;
3863         }
3864
3865         adapter->rx_itr = igb_update_itr(adapter,
3866                                     adapter->rx_itr,
3867                                     q_vector->rx_ring->total_packets,
3868                                     q_vector->rx_ring->total_bytes);
3869
3870         adapter->tx_itr = igb_update_itr(adapter,
3871                                     adapter->tx_itr,
3872                                     q_vector->tx_ring->total_packets,
3873                                     q_vector->tx_ring->total_bytes);
3874         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3875
3876         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3877         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3878                 current_itr = low_latency;
3879
3880         switch (current_itr) {
3881         /* counts and packets in update_itr are dependent on these numbers */
3882         case lowest_latency:
3883                 new_itr = 56;  /* aka 70,000 ints/sec */
3884                 break;
3885         case low_latency:
3886                 new_itr = 196; /* aka 20,000 ints/sec */
3887                 break;
3888         case bulk_latency:
3889                 new_itr = 980; /* aka 4,000 ints/sec */
3890                 break;
3891         default:
3892                 break;
3893         }
3894
3895 set_itr_now:
3896         q_vector->rx_ring->total_bytes = 0;
3897         q_vector->rx_ring->total_packets = 0;
3898         q_vector->tx_ring->total_bytes = 0;
3899         q_vector->tx_ring->total_packets = 0;
3900
3901         if (new_itr != q_vector->itr_val) {
3902                 /* this attempts to bias the interrupt rate towards Bulk
3903                  * by adding intermediate steps when interrupt rate is
3904                  * increasing */
3905                 new_itr = new_itr > q_vector->itr_val ?
3906                              max((new_itr * q_vector->itr_val) /
3907                                  (new_itr + (q_vector->itr_val >> 2)),
3908                                  new_itr) :
3909                              new_itr;
3910                 /* Don't write the value here; it resets the adapter's
3911                  * internal timer, and causes us to delay far longer than
3912                  * we should between interrupts.  Instead, we write the ITR
3913                  * value at the beginning of the next interrupt so the timing
3914                  * ends up being correct.
3915                  */
3916                 q_vector->itr_val = new_itr;
3917                 q_vector->set_itr = 1;
3918         }
3919 }
3920
3921 #define IGB_TX_FLAGS_CSUM               0x00000001
3922 #define IGB_TX_FLAGS_VLAN               0x00000002
3923 #define IGB_TX_FLAGS_TSO                0x00000004
3924 #define IGB_TX_FLAGS_IPV4               0x00000008
3925 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3926 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3927 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3928
3929 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3930                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3931 {
3932         struct e1000_adv_tx_context_desc *context_desc;
3933         unsigned int i;
3934         int err;
3935         struct igb_buffer *buffer_info;
3936         u32 info = 0, tu_cmd = 0;
3937         u32 mss_l4len_idx;
3938         u8 l4len;
3939
3940         if (skb_header_cloned(skb)) {
3941                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3942                 if (err)
3943                         return err;
3944         }
3945
3946         l4len = tcp_hdrlen(skb);
3947         *hdr_len += l4len;
3948
3949         if (skb->protocol == htons(ETH_P_IP)) {
3950                 struct iphdr *iph = ip_hdr(skb);
3951                 iph->tot_len = 0;
3952                 iph->check = 0;
3953                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3954                                                          iph->daddr, 0,
3955                                                          IPPROTO_TCP,
3956                                                          0);
3957         } else if (skb_is_gso_v6(skb)) {
3958                 ipv6_hdr(skb)->payload_len = 0;
3959                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3960                                                        &ipv6_hdr(skb)->daddr,
3961                                                        0, IPPROTO_TCP, 0);
3962         }
3963
3964         i = tx_ring->next_to_use;
3965
3966         buffer_info = &tx_ring->buffer_info[i];
3967         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3968         /* VLAN MACLEN IPLEN */
3969         if (tx_flags & IGB_TX_FLAGS_VLAN)
3970                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3971         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3972         *hdr_len += skb_network_offset(skb);
3973         info |= skb_network_header_len(skb);
3974         *hdr_len += skb_network_header_len(skb);
3975         context_desc->vlan_macip_lens = cpu_to_le32(info);
3976
3977         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3978         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3979
3980         if (skb->protocol == htons(ETH_P_IP))
3981                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3982         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3983
3984         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3985
3986         /* MSS L4LEN IDX */
3987         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3988         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3989
3990         /* For 82575, context index must be unique per ring. */
3991         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3992                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3993
3994         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3995         context_desc->seqnum_seed = 0;
3996
3997         buffer_info->time_stamp = jiffies;
3998         buffer_info->next_to_watch = i;
3999         buffer_info->dma = 0;
4000         i++;
4001         if (i == tx_ring->count)
4002                 i = 0;
4003
4004         tx_ring->next_to_use = i;
4005
4006         return true;
4007 }
4008
4009 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4010                                    struct sk_buff *skb, u32 tx_flags)
4011 {
4012         struct e1000_adv_tx_context_desc *context_desc;
4013         struct device *dev = tx_ring->dev;
4014         struct igb_buffer *buffer_info;
4015         u32 info = 0, tu_cmd = 0;
4016         unsigned int i;
4017
4018         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4019             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4020                 i = tx_ring->next_to_use;
4021                 buffer_info = &tx_ring->buffer_info[i];
4022                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4023
4024                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4025                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4026
4027                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4028                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4029                         info |= skb_network_header_len(skb);
4030
4031                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4032
4033                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4034
4035                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4036                         __be16 protocol;
4037
4038                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4039                                 const struct vlan_ethhdr *vhdr =
4040                                           (const struct vlan_ethhdr*)skb->data;
4041
4042                                 protocol = vhdr->h_vlan_encapsulated_proto;
4043                         } else {
4044                                 protocol = skb->protocol;
4045                         }
4046
4047                         switch (protocol) {
4048                         case cpu_to_be16(ETH_P_IP):
4049                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4050                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4051                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4052                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4053                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4054                                 break;
4055                         case cpu_to_be16(ETH_P_IPV6):
4056                                 /* XXX what about other V6 headers?? */
4057                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4058                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4059                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4060                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4061                                 break;
4062                         default:
4063                                 if (unlikely(net_ratelimit()))
4064                                         dev_warn(dev,
4065                                             "partial checksum but proto=%x!\n",
4066                                             skb->protocol);
4067                                 break;
4068                         }
4069                 }
4070
4071                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4072                 context_desc->seqnum_seed = 0;
4073                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4074                         context_desc->mss_l4len_idx =
4075                                 cpu_to_le32(tx_ring->reg_idx << 4);
4076
4077                 buffer_info->time_stamp = jiffies;
4078                 buffer_info->next_to_watch = i;
4079                 buffer_info->dma = 0;
4080
4081                 i++;
4082                 if (i == tx_ring->count)
4083                         i = 0;
4084                 tx_ring->next_to_use = i;
4085
4086                 return true;
4087         }
4088         return false;
4089 }
4090
4091 #define IGB_MAX_TXD_PWR 16
4092 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4093
4094 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4095                                  unsigned int first)
4096 {
4097         struct igb_buffer *buffer_info;
4098         struct device *dev = tx_ring->dev;
4099         unsigned int hlen = skb_headlen(skb);
4100         unsigned int count = 0, i;
4101         unsigned int f;
4102         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4103
4104         i = tx_ring->next_to_use;
4105
4106         buffer_info = &tx_ring->buffer_info[i];
4107         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4108         buffer_info->length = hlen;
4109         /* set time_stamp *before* dma to help avoid a possible race */
4110         buffer_info->time_stamp = jiffies;
4111         buffer_info->next_to_watch = i;
4112         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4113                                           DMA_TO_DEVICE);
4114         if (dma_mapping_error(dev, buffer_info->dma))
4115                 goto dma_error;
4116
4117         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4118                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4119                 unsigned int len = frag->size;
4120
4121                 count++;
4122                 i++;
4123                 if (i == tx_ring->count)
4124                         i = 0;
4125
4126                 buffer_info = &tx_ring->buffer_info[i];
4127                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4128                 buffer_info->length = len;
4129                 buffer_info->time_stamp = jiffies;
4130                 buffer_info->next_to_watch = i;
4131                 buffer_info->mapped_as_page = true;
4132                 buffer_info->dma = dma_map_page(dev,
4133                                                 frag->page,
4134                                                 frag->page_offset,
4135                                                 len,
4136                                                 DMA_TO_DEVICE);
4137                 if (dma_mapping_error(dev, buffer_info->dma))
4138                         goto dma_error;
4139
4140         }
4141
4142         tx_ring->buffer_info[i].skb = skb;
4143         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4144         /* multiply data chunks by size of headers */
4145         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4146         tx_ring->buffer_info[i].gso_segs = gso_segs;
4147         tx_ring->buffer_info[first].next_to_watch = i;
4148
4149         return ++count;
4150
4151 dma_error:
4152         dev_err(dev, "TX DMA map failed\n");
4153
4154         /* clear timestamp and dma mappings for failed buffer_info mapping */
4155         buffer_info->dma = 0;
4156         buffer_info->time_stamp = 0;
4157         buffer_info->length = 0;
4158         buffer_info->next_to_watch = 0;
4159         buffer_info->mapped_as_page = false;
4160
4161         /* clear timestamp and dma mappings for remaining portion of packet */
4162         while (count--) {
4163                 if (i == 0)
4164                         i = tx_ring->count;
4165                 i--;
4166                 buffer_info = &tx_ring->buffer_info[i];
4167                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4168         }
4169
4170         return 0;
4171 }
4172
4173 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4174                                     u32 tx_flags, int count, u32 paylen,
4175                                     u8 hdr_len)
4176 {
4177         union e1000_adv_tx_desc *tx_desc;
4178         struct igb_buffer *buffer_info;
4179         u32 olinfo_status = 0, cmd_type_len;
4180         unsigned int i = tx_ring->next_to_use;
4181
4182         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4183                         E1000_ADVTXD_DCMD_DEXT);
4184
4185         if (tx_flags & IGB_TX_FLAGS_VLAN)
4186                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4187
4188         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4189                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4190
4191         if (tx_flags & IGB_TX_FLAGS_TSO) {
4192                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4193
4194                 /* insert tcp checksum */
4195                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4196
4197                 /* insert ip checksum */
4198                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4199                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4200
4201         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4202                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4203         }
4204
4205         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4206             (tx_flags & (IGB_TX_FLAGS_CSUM |
4207                          IGB_TX_FLAGS_TSO |
4208                          IGB_TX_FLAGS_VLAN)))
4209                 olinfo_status |= tx_ring->reg_idx << 4;
4210
4211         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4212
4213         do {
4214                 buffer_info = &tx_ring->buffer_info[i];
4215                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4216                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4217                 tx_desc->read.cmd_type_len =
4218                         cpu_to_le32(cmd_type_len | buffer_info->length);
4219                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4220                 count--;
4221                 i++;
4222                 if (i == tx_ring->count)
4223                         i = 0;
4224         } while (count > 0);
4225
4226         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4227         /* Force memory writes to complete before letting h/w
4228          * know there are new descriptors to fetch.  (Only
4229          * applicable for weak-ordered memory model archs,
4230          * such as IA-64). */
4231         wmb();
4232
4233         tx_ring->next_to_use = i;
4234         writel(i, tx_ring->tail);
4235         /* we need this if more than one processor can write to our tail
4236          * at a time, it syncronizes IO on IA64/Altix systems */
4237         mmiowb();
4238 }
4239
4240 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4241 {
4242         struct net_device *netdev = tx_ring->netdev;
4243
4244         netif_stop_subqueue(netdev, tx_ring->queue_index);
4245
4246         /* Herbert's original patch had:
4247          *  smp_mb__after_netif_stop_queue();
4248          * but since that doesn't exist yet, just open code it. */
4249         smp_mb();
4250
4251         /* We need to check again in a case another CPU has just
4252          * made room available. */
4253         if (igb_desc_unused(tx_ring) < size)
4254                 return -EBUSY;
4255
4256         /* A reprieve! */
4257         netif_wake_subqueue(netdev, tx_ring->queue_index);
4258
4259         u64_stats_update_begin(&tx_ring->tx_syncp2);
4260         tx_ring->tx_stats.restart_queue2++;
4261         u64_stats_update_end(&tx_ring->tx_syncp2);
4262
4263         return 0;
4264 }
4265
4266 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4267 {
4268         if (igb_desc_unused(tx_ring) >= size)
4269                 return 0;
4270         return __igb_maybe_stop_tx(tx_ring, size);
4271 }
4272
4273 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4274                                     struct igb_ring *tx_ring)
4275 {
4276         int tso = 0, count;
4277         u32 tx_flags = 0;
4278         u16 first;
4279         u8 hdr_len = 0;
4280
4281         /* need: 1 descriptor per page,
4282          *       + 2 desc gap to keep tail from touching head,
4283          *       + 1 desc for skb->data,
4284          *       + 1 desc for context descriptor,
4285          * otherwise try next time */
4286         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4287                 /* this is a hard error */
4288                 return NETDEV_TX_BUSY;
4289         }
4290
4291         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4292                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4293                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4294         }
4295
4296         if (vlan_tx_tag_present(skb)) {
4297                 tx_flags |= IGB_TX_FLAGS_VLAN;
4298                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4299         }
4300
4301         if (skb->protocol == htons(ETH_P_IP))
4302                 tx_flags |= IGB_TX_FLAGS_IPV4;
4303
4304         first = tx_ring->next_to_use;
4305         if (skb_is_gso(skb)) {
4306                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4307
4308                 if (tso < 0) {
4309                         dev_kfree_skb_any(skb);
4310                         return NETDEV_TX_OK;
4311                 }
4312         }
4313
4314         if (tso)
4315                 tx_flags |= IGB_TX_FLAGS_TSO;
4316         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4317                  (skb->ip_summed == CHECKSUM_PARTIAL))
4318                 tx_flags |= IGB_TX_FLAGS_CSUM;
4319
4320         /*
4321          * count reflects descriptors mapped, if 0 or less then mapping error
4322          * has occurred and we need to rewind the descriptor queue
4323          */
4324         count = igb_tx_map_adv(tx_ring, skb, first);
4325         if (!count) {
4326                 dev_kfree_skb_any(skb);
4327                 tx_ring->buffer_info[first].time_stamp = 0;
4328                 tx_ring->next_to_use = first;
4329                 return NETDEV_TX_OK;
4330         }
4331
4332         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4333
4334         /* Make sure there is space in the ring for the next send. */
4335         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4336
4337         return NETDEV_TX_OK;
4338 }
4339
4340 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4341                                       struct net_device *netdev)
4342 {
4343         struct igb_adapter *adapter = netdev_priv(netdev);
4344         struct igb_ring *tx_ring;
4345         int r_idx = 0;
4346
4347         if (test_bit(__IGB_DOWN, &adapter->state)) {
4348                 dev_kfree_skb_any(skb);
4349                 return NETDEV_TX_OK;
4350         }
4351
4352         if (skb->len <= 0) {
4353                 dev_kfree_skb_any(skb);
4354                 return NETDEV_TX_OK;
4355         }
4356
4357         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4358         tx_ring = adapter->multi_tx_table[r_idx];
4359
4360         /* This goes back to the question of how to logically map a tx queue
4361          * to a flow.  Right now, performance is impacted slightly negatively
4362          * if using multiple tx queues.  If the stack breaks away from a
4363          * single qdisc implementation, we can look at this again. */
4364         return igb_xmit_frame_ring_adv(skb, tx_ring);
4365 }
4366
4367 /**
4368  * igb_tx_timeout - Respond to a Tx Hang
4369  * @netdev: network interface device structure
4370  **/
4371 static void igb_tx_timeout(struct net_device *netdev)
4372 {
4373         struct igb_adapter *adapter = netdev_priv(netdev);
4374         struct e1000_hw *hw = &adapter->hw;
4375
4376         /* Do the reset outside of interrupt context */
4377         adapter->tx_timeout_count++;
4378
4379         if (hw->mac.type == e1000_82580)
4380                 hw->dev_spec._82575.global_device_reset = true;
4381
4382         schedule_work(&adapter->reset_task);
4383         wr32(E1000_EICS,
4384              (adapter->eims_enable_mask & ~adapter->eims_other));
4385 }
4386
4387 static void igb_reset_task(struct work_struct *work)
4388 {
4389         struct igb_adapter *adapter;
4390         adapter = container_of(work, struct igb_adapter, reset_task);
4391
4392         igb_dump(adapter);
4393         netdev_err(adapter->netdev, "Reset adapter\n");
4394         igb_reinit_locked(adapter);
4395 }
4396
4397 /**
4398  * igb_get_stats64 - Get System Network Statistics
4399  * @netdev: network interface device structure
4400  * @stats: rtnl_link_stats64 pointer
4401  *
4402  **/
4403 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4404                                                  struct rtnl_link_stats64 *stats)
4405 {
4406         struct igb_adapter *adapter = netdev_priv(netdev);
4407
4408         spin_lock(&adapter->stats64_lock);
4409         igb_update_stats(adapter, &adapter->stats64);
4410         memcpy(stats, &adapter->stats64, sizeof(*stats));
4411         spin_unlock(&adapter->stats64_lock);
4412
4413         return stats;
4414 }
4415
4416 /**
4417  * igb_change_mtu - Change the Maximum Transfer Unit
4418  * @netdev: network interface device structure
4419  * @new_mtu: new value for maximum frame size
4420  *
4421  * Returns 0 on success, negative on failure
4422  **/
4423 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4424 {
4425         struct igb_adapter *adapter = netdev_priv(netdev);
4426         struct pci_dev *pdev = adapter->pdev;
4427         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4428         u32 rx_buffer_len, i;
4429
4430         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4431                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4432                 return -EINVAL;
4433         }
4434
4435         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4436                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4437                 return -EINVAL;
4438         }
4439
4440         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4441                 msleep(1);
4442
4443         /* igb_down has a dependency on max_frame_size */
4444         adapter->max_frame_size = max_frame;
4445
4446         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4447          * means we reserve 2 more, this pushes us to allocate from the next
4448          * larger slab size.
4449          * i.e. RXBUFFER_2048 --> size-4096 slab
4450          */
4451
4452         if (adapter->hw.mac.type == e1000_82580)
4453                 max_frame += IGB_TS_HDR_LEN;
4454
4455         if (max_frame <= IGB_RXBUFFER_1024)
4456                 rx_buffer_len = IGB_RXBUFFER_1024;
4457         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4458                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4459         else
4460                 rx_buffer_len = IGB_RXBUFFER_128;
4461
4462         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4463              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4464                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4465
4466         if ((adapter->hw.mac.type == e1000_82580) &&
4467             (rx_buffer_len == IGB_RXBUFFER_128))
4468                 rx_buffer_len += IGB_RXBUFFER_64;
4469
4470         if (netif_running(netdev))
4471                 igb_down(adapter);
4472
4473         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4474                  netdev->mtu, new_mtu);
4475         netdev->mtu = new_mtu;
4476
4477         for (i = 0; i < adapter->num_rx_queues; i++)
4478                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4479
4480         if (netif_running(netdev))
4481                 igb_up(adapter);
4482         else
4483                 igb_reset(adapter);
4484
4485         clear_bit(__IGB_RESETTING, &adapter->state);
4486
4487         return 0;
4488 }
4489
4490 /**
4491  * igb_update_stats - Update the board statistics counters
4492  * @adapter: board private structure
4493  **/
4494
4495 void igb_update_stats(struct igb_adapter *adapter,
4496                       struct rtnl_link_stats64 *net_stats)
4497 {
4498         struct e1000_hw *hw = &adapter->hw;
4499         struct pci_dev *pdev = adapter->pdev;
4500         u32 reg, mpc;
4501         u16 phy_tmp;
4502         int i;
4503         u64 bytes, packets;
4504         unsigned int start;
4505         u64 _bytes, _packets;
4506
4507 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4508
4509         /*
4510          * Prevent stats update while adapter is being reset, or if the pci
4511          * connection is down.
4512          */
4513         if (adapter->link_speed == 0)
4514                 return;
4515         if (pci_channel_offline(pdev))
4516                 return;
4517
4518         bytes = 0;
4519         packets = 0;
4520         for (i = 0; i < adapter->num_rx_queues; i++) {
4521                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4522                 struct igb_ring *ring = adapter->rx_ring[i];
4523
4524                 ring->rx_stats.drops += rqdpc_tmp;
4525                 net_stats->rx_fifo_errors += rqdpc_tmp;
4526
4527                 do {
4528                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4529                         _bytes = ring->rx_stats.bytes;
4530                         _packets = ring->rx_stats.packets;
4531                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4532                 bytes += _bytes;
4533                 packets += _packets;
4534         }
4535
4536         net_stats->rx_bytes = bytes;
4537         net_stats->rx_packets = packets;
4538
4539         bytes = 0;
4540         packets = 0;
4541         for (i = 0; i < adapter->num_tx_queues; i++) {
4542                 struct igb_ring *ring = adapter->tx_ring[i];
4543                 do {
4544                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4545                         _bytes = ring->tx_stats.bytes;
4546                         _packets = ring->tx_stats.packets;
4547                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4548                 bytes += _bytes;
4549                 packets += _packets;
4550         }
4551         net_stats->tx_bytes = bytes;
4552         net_stats->tx_packets = packets;
4553
4554         /* read stats registers */
4555         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4556         adapter->stats.gprc += rd32(E1000_GPRC);
4557         adapter->stats.gorc += rd32(E1000_GORCL);
4558         rd32(E1000_GORCH); /* clear GORCL */
4559         adapter->stats.bprc += rd32(E1000_BPRC);
4560         adapter->stats.mprc += rd32(E1000_MPRC);
4561         adapter->stats.roc += rd32(E1000_ROC);
4562
4563         adapter->stats.prc64 += rd32(E1000_PRC64);
4564         adapter->stats.prc127 += rd32(E1000_PRC127);
4565         adapter->stats.prc255 += rd32(E1000_PRC255);
4566         adapter->stats.prc511 += rd32(E1000_PRC511);
4567         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4568         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4569         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4570         adapter->stats.sec += rd32(E1000_SEC);
4571
4572         mpc = rd32(E1000_MPC);
4573         adapter->stats.mpc += mpc;
4574         net_stats->rx_fifo_errors += mpc;
4575         adapter->stats.scc += rd32(E1000_SCC);
4576         adapter->stats.ecol += rd32(E1000_ECOL);
4577         adapter->stats.mcc += rd32(E1000_MCC);
4578         adapter->stats.latecol += rd32(E1000_LATECOL);
4579         adapter->stats.dc += rd32(E1000_DC);
4580         adapter->stats.rlec += rd32(E1000_RLEC);
4581         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4582         adapter->stats.xontxc += rd32(E1000_XONTXC);
4583         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4584         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4585         adapter->stats.fcruc += rd32(E1000_FCRUC);
4586         adapter->stats.gptc += rd32(E1000_GPTC);
4587         adapter->stats.gotc += rd32(E1000_GOTCL);
4588         rd32(E1000_GOTCH); /* clear GOTCL */
4589         adapter->stats.rnbc += rd32(E1000_RNBC);
4590         adapter->stats.ruc += rd32(E1000_RUC);
4591         adapter->stats.rfc += rd32(E1000_RFC);
4592         adapter->stats.rjc += rd32(E1000_RJC);
4593         adapter->stats.tor += rd32(E1000_TORH);
4594         adapter->stats.tot += rd32(E1000_TOTH);
4595         adapter->stats.tpr += rd32(E1000_TPR);
4596
4597         adapter->stats.ptc64 += rd32(E1000_PTC64);
4598         adapter->stats.ptc127 += rd32(E1000_PTC127);
4599         adapter->stats.ptc255 += rd32(E1000_PTC255);
4600         adapter->stats.ptc511 += rd32(E1000_PTC511);
4601         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4602         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4603
4604         adapter->stats.mptc += rd32(E1000_MPTC);
4605         adapter->stats.bptc += rd32(E1000_BPTC);
4606
4607         adapter->stats.tpt += rd32(E1000_TPT);
4608         adapter->stats.colc += rd32(E1000_COLC);
4609
4610         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4611         /* read internal phy specific stats */
4612         reg = rd32(E1000_CTRL_EXT);
4613         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4614                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4615                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4616         }
4617
4618         adapter->stats.tsctc += rd32(E1000_TSCTC);
4619         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4620
4621         adapter->stats.iac += rd32(E1000_IAC);
4622         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4623         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4624         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4625         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4626         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4627         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4628         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4629         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4630
4631         /* Fill out the OS statistics structure */
4632         net_stats->multicast = adapter->stats.mprc;
4633         net_stats->collisions = adapter->stats.colc;
4634
4635         /* Rx Errors */
4636
4637         /* RLEC on some newer hardware can be incorrect so build
4638          * our own version based on RUC and ROC */
4639         net_stats->rx_errors = adapter->stats.rxerrc +
4640                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4641                 adapter->stats.ruc + adapter->stats.roc +
4642                 adapter->stats.cexterr;
4643         net_stats->rx_length_errors = adapter->stats.ruc +
4644                                       adapter->stats.roc;
4645         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4646         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4647         net_stats->rx_missed_errors = adapter->stats.mpc;
4648
4649         /* Tx Errors */
4650         net_stats->tx_errors = adapter->stats.ecol +
4651                                adapter->stats.latecol;
4652         net_stats->tx_aborted_errors = adapter->stats.ecol;
4653         net_stats->tx_window_errors = adapter->stats.latecol;
4654         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4655
4656         /* Tx Dropped needs to be maintained elsewhere */
4657
4658         /* Phy Stats */
4659         if (hw->phy.media_type == e1000_media_type_copper) {
4660                 if ((adapter->link_speed == SPEED_1000) &&
4661                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4662                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4663                         adapter->phy_stats.idle_errors += phy_tmp;
4664                 }
4665         }
4666
4667         /* Management Stats */
4668         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4669         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4670         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4671
4672         /* OS2BMC Stats */
4673         reg = rd32(E1000_MANC);
4674         if (reg & E1000_MANC_EN_BMC2OS) {
4675                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4676                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4677                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4678                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4679         }
4680 }
4681
4682 static irqreturn_t igb_msix_other(int irq, void *data)
4683 {
4684         struct igb_adapter *adapter = data;
4685         struct e1000_hw *hw = &adapter->hw;
4686         u32 icr = rd32(E1000_ICR);
4687         /* reading ICR causes bit 31 of EICR to be cleared */
4688
4689         if (icr & E1000_ICR_DRSTA)
4690                 schedule_work(&adapter->reset_task);
4691
4692         if (icr & E1000_ICR_DOUTSYNC) {
4693                 /* HW is reporting DMA is out of sync */
4694                 adapter->stats.doosync++;
4695                 /* The DMA Out of Sync is also indication of a spoof event
4696                  * in IOV mode. Check the Wrong VM Behavior register to
4697                  * see if it is really a spoof event. */
4698                 igb_check_wvbr(adapter);
4699         }
4700
4701         /* Check for a mailbox event */
4702         if (icr & E1000_ICR_VMMB)
4703                 igb_msg_task(adapter);
4704
4705         if (icr & E1000_ICR_LSC) {
4706                 hw->mac.get_link_status = 1;
4707                 /* guard against interrupt when we're going down */
4708                 if (!test_bit(__IGB_DOWN, &adapter->state))
4709                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4710         }
4711
4712         if (adapter->vfs_allocated_count)
4713                 wr32(E1000_IMS, E1000_IMS_LSC |
4714                                 E1000_IMS_VMMB |
4715                                 E1000_IMS_DOUTSYNC);
4716         else
4717                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4718         wr32(E1000_EIMS, adapter->eims_other);
4719
4720         return IRQ_HANDLED;
4721 }
4722
4723 static void igb_write_itr(struct igb_q_vector *q_vector)
4724 {
4725         struct igb_adapter *adapter = q_vector->adapter;
4726         u32 itr_val = q_vector->itr_val & 0x7FFC;
4727
4728         if (!q_vector->set_itr)
4729                 return;
4730
4731         if (!itr_val)
4732                 itr_val = 0x4;
4733
4734         if (adapter->hw.mac.type == e1000_82575)
4735                 itr_val |= itr_val << 16;
4736         else
4737                 itr_val |= 0x8000000;
4738
4739         writel(itr_val, q_vector->itr_register);
4740         q_vector->set_itr = 0;
4741 }
4742
4743 static irqreturn_t igb_msix_ring(int irq, void *data)
4744 {
4745         struct igb_q_vector *q_vector = data;
4746
4747         /* Write the ITR value calculated from the previous interrupt. */
4748         igb_write_itr(q_vector);
4749
4750         napi_schedule(&q_vector->napi);
4751
4752         return IRQ_HANDLED;
4753 }
4754
4755 #ifdef CONFIG_IGB_DCA
4756 static void igb_update_dca(struct igb_q_vector *q_vector)
4757 {
4758         struct igb_adapter *adapter = q_vector->adapter;
4759         struct e1000_hw *hw = &adapter->hw;
4760         int cpu = get_cpu();
4761
4762         if (q_vector->cpu == cpu)
4763                 goto out_no_update;
4764
4765         if (q_vector->tx_ring) {
4766                 int q = q_vector->tx_ring->reg_idx;
4767                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4768                 if (hw->mac.type == e1000_82575) {
4769                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4770                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4771                 } else {
4772                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4773                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4774                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4775                 }
4776                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4777                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4778         }
4779         if (q_vector->rx_ring) {
4780                 int q = q_vector->rx_ring->reg_idx;
4781                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4782                 if (hw->mac.type == e1000_82575) {
4783                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4784                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4785                 } else {
4786                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4787                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4788                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4789                 }
4790                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4791                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4792                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4793                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4794         }
4795         q_vector->cpu = cpu;
4796 out_no_update:
4797         put_cpu();
4798 }
4799
4800 static void igb_setup_dca(struct igb_adapter *adapter)
4801 {
4802         struct e1000_hw *hw = &adapter->hw;
4803         int i;
4804
4805         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4806                 return;
4807
4808         /* Always use CB2 mode, difference is masked in the CB driver. */
4809         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4810
4811         for (i = 0; i < adapter->num_q_vectors; i++) {
4812                 adapter->q_vector[i]->cpu = -1;
4813                 igb_update_dca(adapter->q_vector[i]);
4814         }
4815 }
4816
4817 static int __igb_notify_dca(struct device *dev, void *data)
4818 {
4819         struct net_device *netdev = dev_get_drvdata(dev);
4820         struct igb_adapter *adapter = netdev_priv(netdev);
4821         struct pci_dev *pdev = adapter->pdev;
4822         struct e1000_hw *hw = &adapter->hw;
4823         unsigned long event = *(unsigned long *)data;
4824
4825         switch (event) {
4826         case DCA_PROVIDER_ADD:
4827                 /* if already enabled, don't do it again */
4828                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4829                         break;
4830                 if (dca_add_requester(dev) == 0) {
4831                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4832                         dev_info(&pdev->dev, "DCA enabled\n");
4833                         igb_setup_dca(adapter);
4834                         break;
4835                 }
4836                 /* Fall Through since DCA is disabled. */
4837         case DCA_PROVIDER_REMOVE:
4838                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4839                         /* without this a class_device is left
4840                          * hanging around in the sysfs model */
4841                         dca_remove_requester(dev);
4842                         dev_info(&pdev->dev, "DCA disabled\n");
4843                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4844                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4845                 }
4846                 break;
4847         }
4848
4849         return 0;
4850 }
4851
4852 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4853                           void *p)
4854 {
4855         int ret_val;
4856
4857         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4858                                          __igb_notify_dca);
4859
4860         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4861 }
4862 #endif /* CONFIG_IGB_DCA */
4863
4864 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4865 {
4866         struct e1000_hw *hw = &adapter->hw;
4867         u32 ping;
4868         int i;
4869
4870         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4871                 ping = E1000_PF_CONTROL_MSG;
4872                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4873                         ping |= E1000_VT_MSGTYPE_CTS;
4874                 igb_write_mbx(hw, &ping, 1, i);
4875         }
4876 }
4877
4878 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4879 {
4880         struct e1000_hw *hw = &adapter->hw;
4881         u32 vmolr = rd32(E1000_VMOLR(vf));
4882         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4883
4884         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4885                             IGB_VF_FLAG_MULTI_PROMISC);
4886         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4887
4888         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4889                 vmolr |= E1000_VMOLR_MPME;
4890                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4891                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4892         } else {
4893                 /*
4894                  * if we have hashes and we are clearing a multicast promisc
4895                  * flag we need to write the hashes to the MTA as this step
4896                  * was previously skipped
4897                  */
4898                 if (vf_data->num_vf_mc_hashes > 30) {
4899                         vmolr |= E1000_VMOLR_MPME;
4900                 } else if (vf_data->num_vf_mc_hashes) {
4901                         int j;
4902                         vmolr |= E1000_VMOLR_ROMPE;
4903                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4904                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4905                 }
4906         }
4907
4908         wr32(E1000_VMOLR(vf), vmolr);
4909
4910         /* there are flags left unprocessed, likely not supported */
4911         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4912                 return -EINVAL;
4913
4914         return 0;
4915
4916 }
4917
4918 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4919                                   u32 *msgbuf, u32 vf)
4920 {
4921         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4922         u16 *hash_list = (u16 *)&msgbuf[1];
4923         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4924         int i;
4925
4926         /* salt away the number of multicast addresses assigned
4927          * to this VF for later use to restore when the PF multi cast
4928          * list changes
4929          */
4930         vf_data->num_vf_mc_hashes = n;
4931
4932         /* only up to 30 hash values supported */
4933         if (n > 30)
4934                 n = 30;
4935
4936         /* store the hashes for later use */
4937         for (i = 0; i < n; i++)
4938                 vf_data->vf_mc_hashes[i] = hash_list[i];
4939
4940         /* Flush and reset the mta with the new values */
4941         igb_set_rx_mode(adapter->netdev);
4942
4943         return 0;
4944 }
4945
4946 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4947 {
4948         struct e1000_hw *hw = &adapter->hw;
4949         struct vf_data_storage *vf_data;
4950         int i, j;
4951
4952         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4953                 u32 vmolr = rd32(E1000_VMOLR(i));
4954                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4955
4956                 vf_data = &adapter->vf_data[i];
4957
4958                 if ((vf_data->num_vf_mc_hashes > 30) ||
4959                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4960                         vmolr |= E1000_VMOLR_MPME;
4961                 } else if (vf_data->num_vf_mc_hashes) {
4962                         vmolr |= E1000_VMOLR_ROMPE;
4963                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4964                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4965                 }
4966                 wr32(E1000_VMOLR(i), vmolr);
4967         }
4968 }
4969
4970 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4971 {
4972         struct e1000_hw *hw = &adapter->hw;
4973         u32 pool_mask, reg, vid;
4974         int i;
4975
4976         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4977
4978         /* Find the vlan filter for this id */
4979         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4980                 reg = rd32(E1000_VLVF(i));
4981
4982                 /* remove the vf from the pool */
4983                 reg &= ~pool_mask;
4984
4985                 /* if pool is empty then remove entry from vfta */
4986                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4987                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4988                         reg = 0;
4989                         vid = reg & E1000_VLVF_VLANID_MASK;
4990                         igb_vfta_set(hw, vid, false);
4991                 }
4992
4993                 wr32(E1000_VLVF(i), reg);
4994         }
4995
4996         adapter->vf_data[vf].vlans_enabled = 0;
4997 }
4998
4999 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5000 {
5001         struct e1000_hw *hw = &adapter->hw;
5002         u32 reg, i;
5003
5004         /* The vlvf table only exists on 82576 hardware and newer */
5005         if (hw->mac.type < e1000_82576)
5006                 return -1;
5007
5008         /* we only need to do this if VMDq is enabled */
5009         if (!adapter->vfs_allocated_count)
5010                 return -1;
5011
5012         /* Find the vlan filter for this id */
5013         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5014                 reg = rd32(E1000_VLVF(i));
5015                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5016                     vid == (reg & E1000_VLVF_VLANID_MASK))
5017                         break;
5018         }
5019
5020         if (add) {
5021                 if (i == E1000_VLVF_ARRAY_SIZE) {
5022                         /* Did not find a matching VLAN ID entry that was
5023                          * enabled.  Search for a free filter entry, i.e.
5024                          * one without the enable bit set
5025                          */
5026                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5027                                 reg = rd32(E1000_VLVF(i));
5028                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5029                                         break;
5030                         }
5031                 }
5032                 if (i < E1000_VLVF_ARRAY_SIZE) {
5033                         /* Found an enabled/available entry */
5034                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5035
5036                         /* if !enabled we need to set this up in vfta */
5037                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5038                                 /* add VID to filter table */
5039                                 igb_vfta_set(hw, vid, true);
5040                                 reg |= E1000_VLVF_VLANID_ENABLE;
5041                         }
5042                         reg &= ~E1000_VLVF_VLANID_MASK;
5043                         reg |= vid;
5044                         wr32(E1000_VLVF(i), reg);
5045
5046                         /* do not modify RLPML for PF devices */
5047                         if (vf >= adapter->vfs_allocated_count)
5048                                 return 0;
5049
5050                         if (!adapter->vf_data[vf].vlans_enabled) {
5051                                 u32 size;
5052                                 reg = rd32(E1000_VMOLR(vf));
5053                                 size = reg & E1000_VMOLR_RLPML_MASK;
5054                                 size += 4;
5055                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5056                                 reg |= size;
5057                                 wr32(E1000_VMOLR(vf), reg);
5058                         }
5059
5060                         adapter->vf_data[vf].vlans_enabled++;
5061                         return 0;
5062                 }
5063         } else {
5064                 if (i < E1000_VLVF_ARRAY_SIZE) {
5065                         /* remove vf from the pool */
5066                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5067                         /* if pool is empty then remove entry from vfta */
5068                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5069                                 reg = 0;
5070                                 igb_vfta_set(hw, vid, false);
5071                         }
5072                         wr32(E1000_VLVF(i), reg);
5073
5074                         /* do not modify RLPML for PF devices */
5075                         if (vf >= adapter->vfs_allocated_count)
5076                                 return 0;
5077
5078                         adapter->vf_data[vf].vlans_enabled--;
5079                         if (!adapter->vf_data[vf].vlans_enabled) {
5080                                 u32 size;
5081                                 reg = rd32(E1000_VMOLR(vf));
5082                                 size = reg & E1000_VMOLR_RLPML_MASK;
5083                                 size -= 4;
5084                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5085                                 reg |= size;
5086                                 wr32(E1000_VMOLR(vf), reg);
5087                         }
5088                 }
5089         }
5090         return 0;
5091 }
5092
5093 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5094 {
5095         struct e1000_hw *hw = &adapter->hw;
5096
5097         if (vid)
5098                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5099         else
5100                 wr32(E1000_VMVIR(vf), 0);
5101 }
5102
5103 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5104                                int vf, u16 vlan, u8 qos)
5105 {
5106         int err = 0;
5107         struct igb_adapter *adapter = netdev_priv(netdev);
5108
5109         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5110                 return -EINVAL;
5111         if (vlan || qos) {
5112                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5113                 if (err)
5114                         goto out;
5115                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5116                 igb_set_vmolr(adapter, vf, !vlan);
5117                 adapter->vf_data[vf].pf_vlan = vlan;
5118                 adapter->vf_data[vf].pf_qos = qos;
5119                 dev_info(&adapter->pdev->dev,
5120                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5121                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5122                         dev_warn(&adapter->pdev->dev,
5123                                  "The VF VLAN has been set,"
5124                                  " but the PF device is not up.\n");
5125                         dev_warn(&adapter->pdev->dev,
5126                                  "Bring the PF device up before"
5127                                  " attempting to use the VF device.\n");
5128                 }
5129         } else {
5130                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5131                                    false, vf);
5132                 igb_set_vmvir(adapter, vlan, vf);
5133                 igb_set_vmolr(adapter, vf, true);
5134                 adapter->vf_data[vf].pf_vlan = 0;
5135                 adapter->vf_data[vf].pf_qos = 0;
5136        }
5137 out:
5138        return err;
5139 }
5140
5141 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5142 {
5143         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5144         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5145
5146         return igb_vlvf_set(adapter, vid, add, vf);
5147 }
5148
5149 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5150 {
5151         /* clear flags - except flag that indicates PF has set the MAC */
5152         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5153         adapter->vf_data[vf].last_nack = jiffies;
5154
5155         /* reset offloads to defaults */
5156         igb_set_vmolr(adapter, vf, true);
5157
5158         /* reset vlans for device */
5159         igb_clear_vf_vfta(adapter, vf);
5160         if (adapter->vf_data[vf].pf_vlan)
5161                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5162                                     adapter->vf_data[vf].pf_vlan,
5163                                     adapter->vf_data[vf].pf_qos);
5164         else
5165                 igb_clear_vf_vfta(adapter, vf);
5166
5167         /* reset multicast table array for vf */
5168         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5169
5170         /* Flush and reset the mta with the new values */
5171         igb_set_rx_mode(adapter->netdev);
5172 }
5173
5174 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5175 {
5176         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5177
5178         /* generate a new mac address as we were hotplug removed/added */
5179         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5180                 random_ether_addr(vf_mac);
5181
5182         /* process remaining reset events */
5183         igb_vf_reset(adapter, vf);
5184 }
5185
5186 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5187 {
5188         struct e1000_hw *hw = &adapter->hw;
5189         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5190         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5191         u32 reg, msgbuf[3];
5192         u8 *addr = (u8 *)(&msgbuf[1]);
5193
5194         /* process all the same items cleared in a function level reset */
5195         igb_vf_reset(adapter, vf);
5196
5197         /* set vf mac address */
5198         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5199
5200         /* enable transmit and receive for vf */
5201         reg = rd32(E1000_VFTE);
5202         wr32(E1000_VFTE, reg | (1 << vf));
5203         reg = rd32(E1000_VFRE);
5204         wr32(E1000_VFRE, reg | (1 << vf));
5205
5206         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5207
5208         /* reply to reset with ack and vf mac address */
5209         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5210         memcpy(addr, vf_mac, 6);
5211         igb_write_mbx(hw, msgbuf, 3, vf);
5212 }
5213
5214 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5215 {
5216         /*
5217          * The VF MAC Address is stored in a packed array of bytes
5218          * starting at the second 32 bit word of the msg array
5219          */
5220         unsigned char *addr = (char *)&msg[1];
5221         int err = -1;
5222
5223         if (is_valid_ether_addr(addr))
5224                 err = igb_set_vf_mac(adapter, vf, addr);
5225
5226         return err;
5227 }
5228
5229 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5230 {
5231         struct e1000_hw *hw = &adapter->hw;
5232         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5233         u32 msg = E1000_VT_MSGTYPE_NACK;
5234
5235         /* if device isn't clear to send it shouldn't be reading either */
5236         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5237             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5238                 igb_write_mbx(hw, &msg, 1, vf);
5239                 vf_data->last_nack = jiffies;
5240         }
5241 }
5242
5243 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5244 {
5245         struct pci_dev *pdev = adapter->pdev;
5246         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5247         struct e1000_hw *hw = &adapter->hw;
5248         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5249         s32 retval;
5250
5251         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5252
5253         if (retval) {
5254                 /* if receive failed revoke VF CTS stats and restart init */
5255                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5256                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5257                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5258                         return;
5259                 goto out;
5260         }
5261
5262         /* this is a message we already processed, do nothing */
5263         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5264                 return;
5265
5266         /*
5267          * until the vf completes a reset it should not be
5268          * allowed to start any configuration.
5269          */
5270
5271         if (msgbuf[0] == E1000_VF_RESET) {
5272                 igb_vf_reset_msg(adapter, vf);
5273                 return;
5274         }
5275
5276         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5277                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5278                         return;
5279                 retval = -1;
5280                 goto out;
5281         }
5282
5283         switch ((msgbuf[0] & 0xFFFF)) {
5284         case E1000_VF_SET_MAC_ADDR:
5285                 retval = -EINVAL;
5286                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5287                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5288                 else
5289                         dev_warn(&pdev->dev,
5290                                  "VF %d attempted to override administratively "
5291                                  "set MAC address\nReload the VF driver to "
5292                                  "resume operations\n", vf);
5293                 break;
5294         case E1000_VF_SET_PROMISC:
5295                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5296                 break;
5297         case E1000_VF_SET_MULTICAST:
5298                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5299                 break;
5300         case E1000_VF_SET_LPE:
5301                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5302                 break;
5303         case E1000_VF_SET_VLAN:
5304                 retval = -1;
5305                 if (vf_data->pf_vlan)
5306                         dev_warn(&pdev->dev,
5307                                  "VF %d attempted to override administratively "
5308                                  "set VLAN tag\nReload the VF driver to "
5309                                  "resume operations\n", vf);
5310                 else
5311                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5312                 break;
5313         default:
5314                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5315                 retval = -1;
5316                 break;
5317         }
5318
5319         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5320 out:
5321         /* notify the VF of the results of what it sent us */
5322         if (retval)
5323                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5324         else
5325                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5326
5327         igb_write_mbx(hw, msgbuf, 1, vf);
5328 }
5329
5330 static void igb_msg_task(struct igb_adapter *adapter)
5331 {
5332         struct e1000_hw *hw = &adapter->hw;
5333         u32 vf;
5334
5335         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5336                 /* process any reset requests */
5337                 if (!igb_check_for_rst(hw, vf))
5338                         igb_vf_reset_event(adapter, vf);
5339
5340                 /* process any messages pending */
5341                 if (!igb_check_for_msg(hw, vf))
5342                         igb_rcv_msg_from_vf(adapter, vf);
5343
5344                 /* process any acks */
5345                 if (!igb_check_for_ack(hw, vf))
5346                         igb_rcv_ack_from_vf(adapter, vf);
5347         }
5348 }
5349
5350 /**
5351  *  igb_set_uta - Set unicast filter table address
5352  *  @adapter: board private structure
5353  *
5354  *  The unicast table address is a register array of 32-bit registers.
5355  *  The table is meant to be used in a way similar to how the MTA is used
5356  *  however due to certain limitations in the hardware it is necessary to
5357  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5358  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5359  **/
5360 static void igb_set_uta(struct igb_adapter *adapter)
5361 {
5362         struct e1000_hw *hw = &adapter->hw;
5363         int i;
5364
5365         /* The UTA table only exists on 82576 hardware and newer */
5366         if (hw->mac.type < e1000_82576)
5367                 return;
5368
5369         /* we only need to do this if VMDq is enabled */
5370         if (!adapter->vfs_allocated_count)
5371                 return;
5372
5373         for (i = 0; i < hw->mac.uta_reg_count; i++)
5374                 array_wr32(E1000_UTA, i, ~0);
5375 }
5376
5377 /**
5378  * igb_intr_msi - Interrupt Handler
5379  * @irq: interrupt number
5380  * @data: pointer to a network interface device structure
5381  **/
5382 static irqreturn_t igb_intr_msi(int irq, void *data)
5383 {
5384         struct igb_adapter *adapter = data;
5385         struct igb_q_vector *q_vector = adapter->q_vector[0];
5386         struct e1000_hw *hw = &adapter->hw;
5387         /* read ICR disables interrupts using IAM */
5388         u32 icr = rd32(E1000_ICR);
5389
5390         igb_write_itr(q_vector);
5391
5392         if (icr & E1000_ICR_DRSTA)
5393                 schedule_work(&adapter->reset_task);
5394
5395         if (icr & E1000_ICR_DOUTSYNC) {
5396                 /* HW is reporting DMA is out of sync */
5397                 adapter->stats.doosync++;
5398         }
5399
5400         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5401                 hw->mac.get_link_status = 1;
5402                 if (!test_bit(__IGB_DOWN, &adapter->state))
5403                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5404         }
5405
5406         napi_schedule(&q_vector->napi);
5407
5408         return IRQ_HANDLED;
5409 }
5410
5411 /**
5412  * igb_intr - Legacy Interrupt Handler
5413  * @irq: interrupt number
5414  * @data: pointer to a network interface device structure
5415  **/
5416 static irqreturn_t igb_intr(int irq, void *data)
5417 {
5418         struct igb_adapter *adapter = data;
5419         struct igb_q_vector *q_vector = adapter->q_vector[0];
5420         struct e1000_hw *hw = &adapter->hw;
5421         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5422          * need for the IMC write */
5423         u32 icr = rd32(E1000_ICR);
5424         if (!icr)
5425                 return IRQ_NONE;  /* Not our interrupt */
5426
5427         igb_write_itr(q_vector);
5428
5429         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5430          * not set, then the adapter didn't send an interrupt */
5431         if (!(icr & E1000_ICR_INT_ASSERTED))
5432                 return IRQ_NONE;
5433
5434         if (icr & E1000_ICR_DRSTA)
5435                 schedule_work(&adapter->reset_task);
5436
5437         if (icr & E1000_ICR_DOUTSYNC) {
5438                 /* HW is reporting DMA is out of sync */
5439                 adapter->stats.doosync++;
5440         }
5441
5442         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5443                 hw->mac.get_link_status = 1;
5444                 /* guard against interrupt when we're going down */
5445                 if (!test_bit(__IGB_DOWN, &adapter->state))
5446                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5447         }
5448
5449         napi_schedule(&q_vector->napi);
5450
5451         return IRQ_HANDLED;
5452 }
5453
5454 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5455 {
5456         struct igb_adapter *adapter = q_vector->adapter;
5457         struct e1000_hw *hw = &adapter->hw;
5458
5459         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5460             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5461                 if (!adapter->msix_entries)
5462                         igb_set_itr(adapter);
5463                 else
5464                         igb_update_ring_itr(q_vector);
5465         }
5466
5467         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5468                 if (adapter->msix_entries)
5469                         wr32(E1000_EIMS, q_vector->eims_value);
5470                 else
5471                         igb_irq_enable(adapter);
5472         }
5473 }
5474
5475 /**
5476  * igb_poll - NAPI Rx polling callback
5477  * @napi: napi polling structure
5478  * @budget: count of how many packets we should handle
5479  **/
5480 static int igb_poll(struct napi_struct *napi, int budget)
5481 {
5482         struct igb_q_vector *q_vector = container_of(napi,
5483                                                      struct igb_q_vector,
5484                                                      napi);
5485         int tx_clean_complete = 1, work_done = 0;
5486
5487 #ifdef CONFIG_IGB_DCA
5488         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5489                 igb_update_dca(q_vector);
5490 #endif
5491         if (q_vector->tx_ring)
5492                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5493
5494         if (q_vector->rx_ring)
5495                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5496
5497         if (!tx_clean_complete)
5498                 work_done = budget;
5499
5500         /* If not enough Rx work done, exit the polling mode */
5501         if (work_done < budget) {
5502                 napi_complete(napi);
5503                 igb_ring_irq_enable(q_vector);
5504         }
5505
5506         return work_done;
5507 }
5508
5509 /**
5510  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5511  * @adapter: board private structure
5512  * @shhwtstamps: timestamp structure to update
5513  * @regval: unsigned 64bit system time value.
5514  *
5515  * We need to convert the system time value stored in the RX/TXSTMP registers
5516  * into a hwtstamp which can be used by the upper level timestamping functions
5517  */
5518 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5519                                    struct skb_shared_hwtstamps *shhwtstamps,
5520                                    u64 regval)
5521 {
5522         u64 ns;
5523
5524         /*
5525          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5526          * 24 to match clock shift we setup earlier.
5527          */
5528         if (adapter->hw.mac.type == e1000_82580)
5529                 regval <<= IGB_82580_TSYNC_SHIFT;
5530
5531         ns = timecounter_cyc2time(&adapter->clock, regval);
5532         timecompare_update(&adapter->compare, ns);
5533         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5534         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5535         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5536 }
5537
5538 /**
5539  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5540  * @q_vector: pointer to q_vector containing needed info
5541  * @buffer: pointer to igb_buffer structure
5542  *
5543  * If we were asked to do hardware stamping and such a time stamp is
5544  * available, then it must have been for this skb here because we only
5545  * allow only one such packet into the queue.
5546  */
5547 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5548 {
5549         struct igb_adapter *adapter = q_vector->adapter;
5550         struct e1000_hw *hw = &adapter->hw;
5551         struct skb_shared_hwtstamps shhwtstamps;
5552         u64 regval;
5553
5554         /* if skb does not support hw timestamp or TX stamp not valid exit */
5555         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5556             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5557                 return;
5558
5559         regval = rd32(E1000_TXSTMPL);
5560         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5561
5562         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5563         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5564 }
5565
5566 /**
5567  * igb_clean_tx_irq - Reclaim resources after transmit completes
5568  * @q_vector: pointer to q_vector containing needed info
5569  * returns true if ring is completely cleaned
5570  **/
5571 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5572 {
5573         struct igb_adapter *adapter = q_vector->adapter;
5574         struct igb_ring *tx_ring = q_vector->tx_ring;
5575         struct net_device *netdev = tx_ring->netdev;
5576         struct e1000_hw *hw = &adapter->hw;
5577         struct igb_buffer *buffer_info;
5578         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5579         unsigned int total_bytes = 0, total_packets = 0;
5580         unsigned int i, eop, count = 0;
5581         bool cleaned = false;
5582
5583         i = tx_ring->next_to_clean;
5584         eop = tx_ring->buffer_info[i].next_to_watch;
5585         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5586
5587         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5588                (count < tx_ring->count)) {
5589                 rmb();  /* read buffer_info after eop_desc status */
5590                 for (cleaned = false; !cleaned; count++) {
5591                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5592                         buffer_info = &tx_ring->buffer_info[i];
5593                         cleaned = (i == eop);
5594
5595                         if (buffer_info->skb) {
5596                                 total_bytes += buffer_info->bytecount;
5597                                 /* gso_segs is currently only valid for tcp */
5598                                 total_packets += buffer_info->gso_segs;
5599                                 igb_tx_hwtstamp(q_vector, buffer_info);
5600                         }
5601
5602                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5603                         tx_desc->wb.status = 0;
5604
5605                         i++;
5606                         if (i == tx_ring->count)
5607                                 i = 0;
5608                 }
5609                 eop = tx_ring->buffer_info[i].next_to_watch;
5610                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5611         }
5612
5613         tx_ring->next_to_clean = i;
5614
5615         if (unlikely(count &&
5616                      netif_carrier_ok(netdev) &&
5617                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5618                 /* Make sure that anybody stopping the queue after this
5619                  * sees the new next_to_clean.
5620                  */
5621                 smp_mb();
5622                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5623                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5624                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5625
5626                         u64_stats_update_begin(&tx_ring->tx_syncp);
5627                         tx_ring->tx_stats.restart_queue++;
5628                         u64_stats_update_end(&tx_ring->tx_syncp);
5629                 }
5630         }
5631
5632         if (tx_ring->detect_tx_hung) {
5633                 /* Detect a transmit hang in hardware, this serializes the
5634                  * check with the clearing of time_stamp and movement of i */
5635                 tx_ring->detect_tx_hung = false;
5636                 if (tx_ring->buffer_info[i].time_stamp &&
5637                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5638                                (adapter->tx_timeout_factor * HZ)) &&
5639                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5640
5641                         /* detected Tx unit hang */
5642                         dev_err(tx_ring->dev,
5643                                 "Detected Tx Unit Hang\n"
5644                                 "  Tx Queue             <%d>\n"
5645                                 "  TDH                  <%x>\n"
5646                                 "  TDT                  <%x>\n"
5647                                 "  next_to_use          <%x>\n"
5648                                 "  next_to_clean        <%x>\n"
5649                                 "buffer_info[next_to_clean]\n"
5650                                 "  time_stamp           <%lx>\n"
5651                                 "  next_to_watch        <%x>\n"
5652                                 "  jiffies              <%lx>\n"
5653                                 "  desc.status          <%x>\n",
5654                                 tx_ring->queue_index,
5655                                 readl(tx_ring->head),
5656                                 readl(tx_ring->tail),
5657                                 tx_ring->next_to_use,
5658                                 tx_ring->next_to_clean,
5659                                 tx_ring->buffer_info[eop].time_stamp,
5660                                 eop,
5661                                 jiffies,
5662                                 eop_desc->wb.status);
5663                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5664                 }
5665         }
5666         tx_ring->total_bytes += total_bytes;
5667         tx_ring->total_packets += total_packets;
5668         u64_stats_update_begin(&tx_ring->tx_syncp);
5669         tx_ring->tx_stats.bytes += total_bytes;
5670         tx_ring->tx_stats.packets += total_packets;
5671         u64_stats_update_end(&tx_ring->tx_syncp);
5672         return count < tx_ring->count;
5673 }
5674
5675 /**
5676  * igb_receive_skb - helper function to handle rx indications
5677  * @q_vector: structure containing interrupt and ring information
5678  * @skb: packet to send up
5679  * @vlan_tag: vlan tag for packet
5680  **/
5681 static void igb_receive_skb(struct igb_q_vector *q_vector,
5682                             struct sk_buff *skb,
5683                             u16 vlan_tag)
5684 {
5685         struct igb_adapter *adapter = q_vector->adapter;
5686
5687         if (vlan_tag && adapter->vlgrp)
5688                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5689                                  vlan_tag, skb);
5690         else
5691                 napi_gro_receive(&q_vector->napi, skb);
5692 }
5693
5694 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5695                                        u32 status_err, struct sk_buff *skb)
5696 {
5697         skb_checksum_none_assert(skb);
5698
5699         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5700         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5701              (status_err & E1000_RXD_STAT_IXSM))
5702                 return;
5703
5704         /* TCP/UDP checksum error bit is set */
5705         if (status_err &
5706             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5707                 /*
5708                  * work around errata with sctp packets where the TCPE aka
5709                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5710                  * packets, (aka let the stack check the crc32c)
5711                  */
5712                 if ((skb->len == 60) &&
5713                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5714                         u64_stats_update_begin(&ring->rx_syncp);
5715                         ring->rx_stats.csum_err++;
5716                         u64_stats_update_end(&ring->rx_syncp);
5717                 }
5718                 /* let the stack verify checksum errors */
5719                 return;
5720         }
5721         /* It must be a TCP or UDP packet with a valid checksum */
5722         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5723                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5724
5725         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5726 }
5727
5728 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5729                                    struct sk_buff *skb)
5730 {
5731         struct igb_adapter *adapter = q_vector->adapter;
5732         struct e1000_hw *hw = &adapter->hw;
5733         u64 regval;
5734
5735         /*
5736          * If this bit is set, then the RX registers contain the time stamp. No
5737          * other packet will be time stamped until we read these registers, so
5738          * read the registers to make them available again. Because only one
5739          * packet can be time stamped at a time, we know that the register
5740          * values must belong to this one here and therefore we don't need to
5741          * compare any of the additional attributes stored for it.
5742          *
5743          * If nothing went wrong, then it should have a shared tx_flags that we
5744          * can turn into a skb_shared_hwtstamps.
5745          */
5746         if (staterr & E1000_RXDADV_STAT_TSIP) {
5747                 u32 *stamp = (u32 *)skb->data;
5748                 regval = le32_to_cpu(*(stamp + 2));
5749                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5750                 skb_pull(skb, IGB_TS_HDR_LEN);
5751         } else {
5752                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5753                         return;
5754
5755                 regval = rd32(E1000_RXSTMPL);
5756                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5757         }
5758
5759         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5760 }
5761 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5762                                union e1000_adv_rx_desc *rx_desc)
5763 {
5764         /* HW will not DMA in data larger than the given buffer, even if it
5765          * parses the (NFS, of course) header to be larger.  In that case, it
5766          * fills the header buffer and spills the rest into the page.
5767          */
5768         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5769                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5770         if (hlen > rx_ring->rx_buffer_len)
5771                 hlen = rx_ring->rx_buffer_len;
5772         return hlen;
5773 }
5774
5775 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5776                                  int *work_done, int budget)
5777 {
5778         struct igb_ring *rx_ring = q_vector->rx_ring;
5779         struct net_device *netdev = rx_ring->netdev;
5780         struct device *dev = rx_ring->dev;
5781         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5782         struct igb_buffer *buffer_info , *next_buffer;
5783         struct sk_buff *skb;
5784         bool cleaned = false;
5785         int cleaned_count = 0;
5786         int current_node = numa_node_id();
5787         unsigned int total_bytes = 0, total_packets = 0;
5788         unsigned int i;
5789         u32 staterr;
5790         u16 length;
5791         u16 vlan_tag;
5792
5793         i = rx_ring->next_to_clean;
5794         buffer_info = &rx_ring->buffer_info[i];
5795         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5796         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5797
5798         while (staterr & E1000_RXD_STAT_DD) {
5799                 if (*work_done >= budget)
5800                         break;
5801                 (*work_done)++;
5802                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5803
5804                 skb = buffer_info->skb;
5805                 prefetch(skb->data - NET_IP_ALIGN);
5806                 buffer_info->skb = NULL;
5807
5808                 i++;
5809                 if (i == rx_ring->count)
5810                         i = 0;
5811
5812                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5813                 prefetch(next_rxd);
5814                 next_buffer = &rx_ring->buffer_info[i];
5815
5816                 length = le16_to_cpu(rx_desc->wb.upper.length);
5817                 cleaned = true;
5818                 cleaned_count++;
5819
5820                 if (buffer_info->dma) {
5821                         dma_unmap_single(dev, buffer_info->dma,
5822                                          rx_ring->rx_buffer_len,
5823                                          DMA_FROM_DEVICE);
5824                         buffer_info->dma = 0;
5825                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5826                                 skb_put(skb, length);
5827                                 goto send_up;
5828                         }
5829                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5830                 }
5831
5832                 if (length) {
5833                         dma_unmap_page(dev, buffer_info->page_dma,
5834                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5835                         buffer_info->page_dma = 0;
5836
5837                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5838                                                 buffer_info->page,
5839                                                 buffer_info->page_offset,
5840                                                 length);
5841
5842                         if ((page_count(buffer_info->page) != 1) ||
5843                             (page_to_nid(buffer_info->page) != current_node))
5844                                 buffer_info->page = NULL;
5845                         else
5846                                 get_page(buffer_info->page);
5847
5848                         skb->len += length;
5849                         skb->data_len += length;
5850                         skb->truesize += length;
5851                 }
5852
5853                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5854                         buffer_info->skb = next_buffer->skb;
5855                         buffer_info->dma = next_buffer->dma;
5856                         next_buffer->skb = skb;
5857                         next_buffer->dma = 0;
5858                         goto next_desc;
5859                 }
5860 send_up:
5861                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5862                         dev_kfree_skb_irq(skb);
5863                         goto next_desc;
5864                 }
5865
5866                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5867                         igb_rx_hwtstamp(q_vector, staterr, skb);
5868                 total_bytes += skb->len;
5869                 total_packets++;
5870
5871                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5872
5873                 skb->protocol = eth_type_trans(skb, netdev);
5874                 skb_record_rx_queue(skb, rx_ring->queue_index);
5875
5876                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5877                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5878
5879                 igb_receive_skb(q_vector, skb, vlan_tag);
5880
5881 next_desc:
5882                 rx_desc->wb.upper.status_error = 0;
5883
5884                 /* return some buffers to hardware, one at a time is too slow */
5885                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5886                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5887                         cleaned_count = 0;
5888                 }
5889
5890                 /* use prefetched values */
5891                 rx_desc = next_rxd;
5892                 buffer_info = next_buffer;
5893                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5894         }
5895
5896         rx_ring->next_to_clean = i;
5897         cleaned_count = igb_desc_unused(rx_ring);
5898
5899         if (cleaned_count)
5900                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5901
5902         rx_ring->total_packets += total_packets;
5903         rx_ring->total_bytes += total_bytes;
5904         u64_stats_update_begin(&rx_ring->rx_syncp);
5905         rx_ring->rx_stats.packets += total_packets;
5906         rx_ring->rx_stats.bytes += total_bytes;
5907         u64_stats_update_end(&rx_ring->rx_syncp);
5908         return cleaned;
5909 }
5910
5911 /**
5912  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5913  * @adapter: address of board private structure
5914  **/
5915 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5916 {
5917         struct net_device *netdev = rx_ring->netdev;
5918         union e1000_adv_rx_desc *rx_desc;
5919         struct igb_buffer *buffer_info;
5920         struct sk_buff *skb;
5921         unsigned int i;
5922         int bufsz;
5923
5924         i = rx_ring->next_to_use;
5925         buffer_info = &rx_ring->buffer_info[i];
5926
5927         bufsz = rx_ring->rx_buffer_len;
5928
5929         while (cleaned_count--) {
5930                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5931
5932                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5933                         if (!buffer_info->page) {
5934                                 buffer_info->page = netdev_alloc_page(netdev);
5935                                 if (unlikely(!buffer_info->page)) {
5936                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5937                                         rx_ring->rx_stats.alloc_failed++;
5938                                         u64_stats_update_end(&rx_ring->rx_syncp);
5939                                         goto no_buffers;
5940                                 }
5941                                 buffer_info->page_offset = 0;
5942                         } else {
5943                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5944                         }
5945                         buffer_info->page_dma =
5946                                 dma_map_page(rx_ring->dev, buffer_info->page,
5947                                              buffer_info->page_offset,
5948                                              PAGE_SIZE / 2,
5949                                              DMA_FROM_DEVICE);
5950                         if (dma_mapping_error(rx_ring->dev,
5951                                               buffer_info->page_dma)) {
5952                                 buffer_info->page_dma = 0;
5953                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5954                                 rx_ring->rx_stats.alloc_failed++;
5955                                 u64_stats_update_end(&rx_ring->rx_syncp);
5956                                 goto no_buffers;
5957                         }
5958                 }
5959
5960                 skb = buffer_info->skb;
5961                 if (!skb) {
5962                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5963                         if (unlikely(!skb)) {
5964                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5965                                 rx_ring->rx_stats.alloc_failed++;
5966                                 u64_stats_update_end(&rx_ring->rx_syncp);
5967                                 goto no_buffers;
5968                         }
5969
5970                         buffer_info->skb = skb;
5971                 }
5972                 if (!buffer_info->dma) {
5973                         buffer_info->dma = dma_map_single(rx_ring->dev,
5974                                                           skb->data,
5975                                                           bufsz,
5976                                                           DMA_FROM_DEVICE);
5977                         if (dma_mapping_error(rx_ring->dev,
5978                                               buffer_info->dma)) {
5979                                 buffer_info->dma = 0;
5980                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5981                                 rx_ring->rx_stats.alloc_failed++;
5982                                 u64_stats_update_end(&rx_ring->rx_syncp);
5983                                 goto no_buffers;
5984                         }
5985                 }
5986                 /* Refresh the desc even if buffer_addrs didn't change because
5987                  * each write-back erases this info. */
5988                 if (bufsz < IGB_RXBUFFER_1024) {
5989                         rx_desc->read.pkt_addr =
5990                              cpu_to_le64(buffer_info->page_dma);
5991                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5992                 } else {
5993                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5994                         rx_desc->read.hdr_addr = 0;
5995                 }
5996
5997                 i++;
5998                 if (i == rx_ring->count)
5999                         i = 0;
6000                 buffer_info = &rx_ring->buffer_info[i];
6001         }
6002
6003 no_buffers:
6004         if (rx_ring->next_to_use != i) {
6005                 rx_ring->next_to_use = i;
6006                 if (i == 0)
6007                         i = (rx_ring->count - 1);
6008                 else
6009                         i--;
6010
6011                 /* Force memory writes to complete before letting h/w
6012                  * know there are new descriptors to fetch.  (Only
6013                  * applicable for weak-ordered memory model archs,
6014                  * such as IA-64). */
6015                 wmb();
6016                 writel(i, rx_ring->tail);
6017         }
6018 }
6019
6020 /**
6021  * igb_mii_ioctl -
6022  * @netdev:
6023  * @ifreq:
6024  * @cmd:
6025  **/
6026 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6027 {
6028         struct igb_adapter *adapter = netdev_priv(netdev);
6029         struct mii_ioctl_data *data = if_mii(ifr);
6030
6031         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6032                 return -EOPNOTSUPP;
6033
6034         switch (cmd) {
6035         case SIOCGMIIPHY:
6036                 data->phy_id = adapter->hw.phy.addr;
6037                 break;
6038         case SIOCGMIIREG:
6039                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6040                                      &data->val_out))
6041                         return -EIO;
6042                 break;
6043         case SIOCSMIIREG:
6044         default:
6045                 return -EOPNOTSUPP;
6046         }
6047         return 0;
6048 }
6049
6050 /**
6051  * igb_hwtstamp_ioctl - control hardware time stamping
6052  * @netdev:
6053  * @ifreq:
6054  * @cmd:
6055  *
6056  * Outgoing time stamping can be enabled and disabled. Play nice and
6057  * disable it when requested, although it shouldn't case any overhead
6058  * when no packet needs it. At most one packet in the queue may be
6059  * marked for time stamping, otherwise it would be impossible to tell
6060  * for sure to which packet the hardware time stamp belongs.
6061  *
6062  * Incoming time stamping has to be configured via the hardware
6063  * filters. Not all combinations are supported, in particular event
6064  * type has to be specified. Matching the kind of event packet is
6065  * not supported, with the exception of "all V2 events regardless of
6066  * level 2 or 4".
6067  *
6068  **/
6069 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6070                               struct ifreq *ifr, int cmd)
6071 {
6072         struct igb_adapter *adapter = netdev_priv(netdev);
6073         struct e1000_hw *hw = &adapter->hw;
6074         struct hwtstamp_config config;
6075         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6076         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6077         u32 tsync_rx_cfg = 0;
6078         bool is_l4 = false;
6079         bool is_l2 = false;
6080         u32 regval;
6081
6082         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6083                 return -EFAULT;
6084
6085         /* reserved for future extensions */
6086         if (config.flags)
6087                 return -EINVAL;
6088
6089         switch (config.tx_type) {
6090         case HWTSTAMP_TX_OFF:
6091                 tsync_tx_ctl = 0;
6092         case HWTSTAMP_TX_ON:
6093                 break;
6094         default:
6095                 return -ERANGE;
6096         }
6097
6098         switch (config.rx_filter) {
6099         case HWTSTAMP_FILTER_NONE:
6100                 tsync_rx_ctl = 0;
6101                 break;
6102         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6103         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6104         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6105         case HWTSTAMP_FILTER_ALL:
6106                 /*
6107                  * register TSYNCRXCFG must be set, therefore it is not
6108                  * possible to time stamp both Sync and Delay_Req messages
6109                  * => fall back to time stamping all packets
6110                  */
6111                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6112                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6113                 break;
6114         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6115                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6116                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6117                 is_l4 = true;
6118                 break;
6119         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6120                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6121                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6122                 is_l4 = true;
6123                 break;
6124         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6125         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6126                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6127                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6128                 is_l2 = true;
6129                 is_l4 = true;
6130                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6131                 break;
6132         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6133         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6134                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6135                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6136                 is_l2 = true;
6137                 is_l4 = true;
6138                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6139                 break;
6140         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6141         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6142         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6143                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6144                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6145                 is_l2 = true;
6146                 break;
6147         default:
6148                 return -ERANGE;
6149         }
6150
6151         if (hw->mac.type == e1000_82575) {
6152                 if (tsync_rx_ctl | tsync_tx_ctl)
6153                         return -EINVAL;
6154                 return 0;
6155         }
6156
6157         /*
6158          * Per-packet timestamping only works if all packets are
6159          * timestamped, so enable timestamping in all packets as
6160          * long as one rx filter was configured.
6161          */
6162         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6163                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6164                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6165         }
6166
6167         /* enable/disable TX */
6168         regval = rd32(E1000_TSYNCTXCTL);
6169         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6170         regval |= tsync_tx_ctl;
6171         wr32(E1000_TSYNCTXCTL, regval);
6172
6173         /* enable/disable RX */
6174         regval = rd32(E1000_TSYNCRXCTL);
6175         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6176         regval |= tsync_rx_ctl;
6177         wr32(E1000_TSYNCRXCTL, regval);
6178
6179         /* define which PTP packets are time stamped */
6180         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6181
6182         /* define ethertype filter for timestamped packets */
6183         if (is_l2)
6184                 wr32(E1000_ETQF(3),
6185                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6186                                  E1000_ETQF_1588 | /* enable timestamping */
6187                                  ETH_P_1588));     /* 1588 eth protocol type */
6188         else
6189                 wr32(E1000_ETQF(3), 0);
6190
6191 #define PTP_PORT 319
6192         /* L4 Queue Filter[3]: filter by destination port and protocol */
6193         if (is_l4) {
6194                 u32 ftqf = (IPPROTO_UDP /* UDP */
6195                         | E1000_FTQF_VF_BP /* VF not compared */
6196                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6197                         | E1000_FTQF_MASK); /* mask all inputs */
6198                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6199
6200                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6201                 wr32(E1000_IMIREXT(3),
6202                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6203                 if (hw->mac.type == e1000_82576) {
6204                         /* enable source port check */
6205                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6206                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6207                 }
6208                 wr32(E1000_FTQF(3), ftqf);
6209         } else {
6210                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6211         }
6212         wrfl();
6213
6214         adapter->hwtstamp_config = config;
6215
6216         /* clear TX/RX time stamp registers, just to be sure */
6217         regval = rd32(E1000_TXSTMPH);
6218         regval = rd32(E1000_RXSTMPH);
6219
6220         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6221                 -EFAULT : 0;
6222 }
6223
6224 /**
6225  * igb_ioctl -
6226  * @netdev:
6227  * @ifreq:
6228  * @cmd:
6229  **/
6230 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6231 {
6232         switch (cmd) {
6233         case SIOCGMIIPHY:
6234         case SIOCGMIIREG:
6235         case SIOCSMIIREG:
6236                 return igb_mii_ioctl(netdev, ifr, cmd);
6237         case SIOCSHWTSTAMP:
6238                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6239         default:
6240                 return -EOPNOTSUPP;
6241         }
6242 }
6243
6244 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6245 {
6246         struct igb_adapter *adapter = hw->back;
6247         u16 cap_offset;
6248
6249         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6250         if (!cap_offset)
6251                 return -E1000_ERR_CONFIG;
6252
6253         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6254
6255         return 0;
6256 }
6257
6258 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6259 {
6260         struct igb_adapter *adapter = hw->back;
6261         u16 cap_offset;
6262
6263         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6264         if (!cap_offset)
6265                 return -E1000_ERR_CONFIG;
6266
6267         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6268
6269         return 0;
6270 }
6271
6272 static void igb_vlan_rx_register(struct net_device *netdev,
6273                                  struct vlan_group *grp)
6274 {
6275         struct igb_adapter *adapter = netdev_priv(netdev);
6276         struct e1000_hw *hw = &adapter->hw;
6277         u32 ctrl, rctl;
6278
6279         igb_irq_disable(adapter);
6280         adapter->vlgrp = grp;
6281
6282         if (grp) {
6283                 /* enable VLAN tag insert/strip */
6284                 ctrl = rd32(E1000_CTRL);
6285                 ctrl |= E1000_CTRL_VME;
6286                 wr32(E1000_CTRL, ctrl);
6287
6288                 /* Disable CFI check */
6289                 rctl = rd32(E1000_RCTL);
6290                 rctl &= ~E1000_RCTL_CFIEN;
6291                 wr32(E1000_RCTL, rctl);
6292         } else {
6293                 /* disable VLAN tag insert/strip */
6294                 ctrl = rd32(E1000_CTRL);
6295                 ctrl &= ~E1000_CTRL_VME;
6296                 wr32(E1000_CTRL, ctrl);
6297         }
6298
6299         igb_rlpml_set(adapter);
6300
6301         if (!test_bit(__IGB_DOWN, &adapter->state))
6302                 igb_irq_enable(adapter);
6303 }
6304
6305 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6306 {
6307         struct igb_adapter *adapter = netdev_priv(netdev);
6308         struct e1000_hw *hw = &adapter->hw;
6309         int pf_id = adapter->vfs_allocated_count;
6310
6311         /* attempt to add filter to vlvf array */
6312         igb_vlvf_set(adapter, vid, true, pf_id);
6313
6314         /* add the filter since PF can receive vlans w/o entry in vlvf */
6315         igb_vfta_set(hw, vid, true);
6316 }
6317
6318 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6319 {
6320         struct igb_adapter *adapter = netdev_priv(netdev);
6321         struct e1000_hw *hw = &adapter->hw;
6322         int pf_id = adapter->vfs_allocated_count;
6323         s32 err;
6324
6325         igb_irq_disable(adapter);
6326         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6327
6328         if (!test_bit(__IGB_DOWN, &adapter->state))
6329                 igb_irq_enable(adapter);
6330
6331         /* remove vlan from VLVF table array */
6332         err = igb_vlvf_set(adapter, vid, false, pf_id);
6333
6334         /* if vid was not present in VLVF just remove it from table */
6335         if (err)
6336                 igb_vfta_set(hw, vid, false);
6337 }
6338
6339 static void igb_restore_vlan(struct igb_adapter *adapter)
6340 {
6341         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6342
6343         if (adapter->vlgrp) {
6344                 u16 vid;
6345                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6346                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6347                                 continue;
6348                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6349                 }
6350         }
6351 }
6352
6353 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6354 {
6355         struct pci_dev *pdev = adapter->pdev;
6356         struct e1000_mac_info *mac = &adapter->hw.mac;
6357
6358         mac->autoneg = 0;
6359
6360         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6361          * for the switch() below to work */
6362         if ((spd & 1) || (dplx & ~1))
6363                 goto err_inval;
6364
6365         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6366         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6367             spd != SPEED_1000 &&
6368             dplx != DUPLEX_FULL)
6369                 goto err_inval;
6370
6371         switch (spd + dplx) {
6372         case SPEED_10 + DUPLEX_HALF:
6373                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6374                 break;
6375         case SPEED_10 + DUPLEX_FULL:
6376                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6377                 break;
6378         case SPEED_100 + DUPLEX_HALF:
6379                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6380                 break;
6381         case SPEED_100 + DUPLEX_FULL:
6382                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6383                 break;
6384         case SPEED_1000 + DUPLEX_FULL:
6385                 mac->autoneg = 1;
6386                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6387                 break;
6388         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6389         default:
6390                 goto err_inval;
6391         }
6392         return 0;
6393
6394 err_inval:
6395         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6396         return -EINVAL;
6397 }
6398
6399 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6400 {
6401         struct net_device *netdev = pci_get_drvdata(pdev);
6402         struct igb_adapter *adapter = netdev_priv(netdev);
6403         struct e1000_hw *hw = &adapter->hw;
6404         u32 ctrl, rctl, status;
6405         u32 wufc = adapter->wol;
6406 #ifdef CONFIG_PM
6407         int retval = 0;
6408 #endif
6409
6410         netif_device_detach(netdev);
6411
6412         if (netif_running(netdev))
6413                 igb_close(netdev);
6414
6415         igb_clear_interrupt_scheme(adapter);
6416
6417 #ifdef CONFIG_PM
6418         retval = pci_save_state(pdev);
6419         if (retval)
6420                 return retval;
6421 #endif
6422
6423         status = rd32(E1000_STATUS);
6424         if (status & E1000_STATUS_LU)
6425                 wufc &= ~E1000_WUFC_LNKC;
6426
6427         if (wufc) {
6428                 igb_setup_rctl(adapter);
6429                 igb_set_rx_mode(netdev);
6430
6431                 /* turn on all-multi mode if wake on multicast is enabled */
6432                 if (wufc & E1000_WUFC_MC) {
6433                         rctl = rd32(E1000_RCTL);
6434                         rctl |= E1000_RCTL_MPE;
6435                         wr32(E1000_RCTL, rctl);
6436                 }
6437
6438                 ctrl = rd32(E1000_CTRL);
6439                 /* advertise wake from D3Cold */
6440                 #define E1000_CTRL_ADVD3WUC 0x00100000
6441                 /* phy power management enable */
6442                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6443                 ctrl |= E1000_CTRL_ADVD3WUC;
6444                 wr32(E1000_CTRL, ctrl);
6445
6446                 /* Allow time for pending master requests to run */
6447                 igb_disable_pcie_master(hw);
6448
6449                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6450                 wr32(E1000_WUFC, wufc);
6451         } else {
6452                 wr32(E1000_WUC, 0);
6453                 wr32(E1000_WUFC, 0);
6454         }
6455
6456         *enable_wake = wufc || adapter->en_mng_pt;
6457         if (!*enable_wake)
6458                 igb_power_down_link(adapter);
6459         else
6460                 igb_power_up_link(adapter);
6461
6462         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6463          * would have already happened in close and is redundant. */
6464         igb_release_hw_control(adapter);
6465
6466         pci_disable_device(pdev);
6467
6468         return 0;
6469 }
6470
6471 #ifdef CONFIG_PM
6472 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6473 {
6474         int retval;
6475         bool wake;
6476
6477         retval = __igb_shutdown(pdev, &wake);
6478         if (retval)
6479                 return retval;
6480
6481         if (wake) {
6482                 pci_prepare_to_sleep(pdev);
6483         } else {
6484                 pci_wake_from_d3(pdev, false);
6485                 pci_set_power_state(pdev, PCI_D3hot);
6486         }
6487
6488         return 0;
6489 }
6490
6491 static int igb_resume(struct pci_dev *pdev)
6492 {
6493         struct net_device *netdev = pci_get_drvdata(pdev);
6494         struct igb_adapter *adapter = netdev_priv(netdev);
6495         struct e1000_hw *hw = &adapter->hw;
6496         u32 err;
6497
6498         pci_set_power_state(pdev, PCI_D0);
6499         pci_restore_state(pdev);
6500         pci_save_state(pdev);
6501
6502         err = pci_enable_device_mem(pdev);
6503         if (err) {
6504                 dev_err(&pdev->dev,
6505                         "igb: Cannot enable PCI device from suspend\n");
6506                 return err;
6507         }
6508         pci_set_master(pdev);
6509
6510         pci_enable_wake(pdev, PCI_D3hot, 0);
6511         pci_enable_wake(pdev, PCI_D3cold, 0);
6512
6513         if (igb_init_interrupt_scheme(adapter)) {
6514                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6515                 return -ENOMEM;
6516         }
6517
6518         igb_reset(adapter);
6519
6520         /* let the f/w know that the h/w is now under the control of the
6521          * driver. */
6522         igb_get_hw_control(adapter);
6523
6524         wr32(E1000_WUS, ~0);
6525
6526         if (netif_running(netdev)) {
6527                 err = igb_open(netdev);
6528                 if (err)
6529                         return err;
6530         }
6531
6532         netif_device_attach(netdev);
6533
6534         return 0;
6535 }
6536 #endif
6537
6538 static void igb_shutdown(struct pci_dev *pdev)
6539 {
6540         bool wake;
6541
6542         __igb_shutdown(pdev, &wake);
6543
6544         if (system_state == SYSTEM_POWER_OFF) {
6545                 pci_wake_from_d3(pdev, wake);
6546                 pci_set_power_state(pdev, PCI_D3hot);
6547         }
6548 }
6549
6550 #ifdef CONFIG_NET_POLL_CONTROLLER
6551 /*
6552  * Polling 'interrupt' - used by things like netconsole to send skbs
6553  * without having to re-enable interrupts. It's not called while
6554  * the interrupt routine is executing.
6555  */
6556 static void igb_netpoll(struct net_device *netdev)
6557 {
6558         struct igb_adapter *adapter = netdev_priv(netdev);
6559         struct e1000_hw *hw = &adapter->hw;
6560         int i;
6561
6562         if (!adapter->msix_entries) {
6563                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6564                 igb_irq_disable(adapter);
6565                 napi_schedule(&q_vector->napi);
6566                 return;
6567         }
6568
6569         for (i = 0; i < adapter->num_q_vectors; i++) {
6570                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6571                 wr32(E1000_EIMC, q_vector->eims_value);
6572                 napi_schedule(&q_vector->napi);
6573         }
6574 }
6575 #endif /* CONFIG_NET_POLL_CONTROLLER */
6576
6577 /**
6578  * igb_io_error_detected - called when PCI error is detected
6579  * @pdev: Pointer to PCI device
6580  * @state: The current pci connection state
6581  *
6582  * This function is called after a PCI bus error affecting
6583  * this device has been detected.
6584  */
6585 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6586                                               pci_channel_state_t state)
6587 {
6588         struct net_device *netdev = pci_get_drvdata(pdev);
6589         struct igb_adapter *adapter = netdev_priv(netdev);
6590
6591         netif_device_detach(netdev);
6592
6593         if (state == pci_channel_io_perm_failure)
6594                 return PCI_ERS_RESULT_DISCONNECT;
6595
6596         if (netif_running(netdev))
6597                 igb_down(adapter);
6598         pci_disable_device(pdev);
6599
6600         /* Request a slot slot reset. */
6601         return PCI_ERS_RESULT_NEED_RESET;
6602 }
6603
6604 /**
6605  * igb_io_slot_reset - called after the pci bus has been reset.
6606  * @pdev: Pointer to PCI device
6607  *
6608  * Restart the card from scratch, as if from a cold-boot. Implementation
6609  * resembles the first-half of the igb_resume routine.
6610  */
6611 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6612 {
6613         struct net_device *netdev = pci_get_drvdata(pdev);
6614         struct igb_adapter *adapter = netdev_priv(netdev);
6615         struct e1000_hw *hw = &adapter->hw;
6616         pci_ers_result_t result;
6617         int err;
6618
6619         if (pci_enable_device_mem(pdev)) {
6620                 dev_err(&pdev->dev,
6621                         "Cannot re-enable PCI device after reset.\n");
6622                 result = PCI_ERS_RESULT_DISCONNECT;
6623         } else {
6624                 pci_set_master(pdev);
6625                 pci_restore_state(pdev);
6626                 pci_save_state(pdev);
6627
6628                 pci_enable_wake(pdev, PCI_D3hot, 0);
6629                 pci_enable_wake(pdev, PCI_D3cold, 0);
6630
6631                 igb_reset(adapter);
6632                 wr32(E1000_WUS, ~0);
6633                 result = PCI_ERS_RESULT_RECOVERED;
6634         }
6635
6636         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6637         if (err) {
6638                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6639                         "failed 0x%0x\n", err);
6640                 /* non-fatal, continue */
6641         }
6642
6643         return result;
6644 }
6645
6646 /**
6647  * igb_io_resume - called when traffic can start flowing again.
6648  * @pdev: Pointer to PCI device
6649  *
6650  * This callback is called when the error recovery driver tells us that
6651  * its OK to resume normal operation. Implementation resembles the
6652  * second-half of the igb_resume routine.
6653  */
6654 static void igb_io_resume(struct pci_dev *pdev)
6655 {
6656         struct net_device *netdev = pci_get_drvdata(pdev);
6657         struct igb_adapter *adapter = netdev_priv(netdev);
6658
6659         if (netif_running(netdev)) {
6660                 if (igb_up(adapter)) {
6661                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6662                         return;
6663                 }
6664         }
6665
6666         netif_device_attach(netdev);
6667
6668         /* let the f/w know that the h/w is now under the control of the
6669          * driver. */
6670         igb_get_hw_control(adapter);
6671 }
6672
6673 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6674                              u8 qsel)
6675 {
6676         u32 rar_low, rar_high;
6677         struct e1000_hw *hw = &adapter->hw;
6678
6679         /* HW expects these in little endian so we reverse the byte order
6680          * from network order (big endian) to little endian
6681          */
6682         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6683                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6684         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6685
6686         /* Indicate to hardware the Address is Valid. */
6687         rar_high |= E1000_RAH_AV;
6688
6689         if (hw->mac.type == e1000_82575)
6690                 rar_high |= E1000_RAH_POOL_1 * qsel;
6691         else
6692                 rar_high |= E1000_RAH_POOL_1 << qsel;
6693
6694         wr32(E1000_RAL(index), rar_low);
6695         wrfl();
6696         wr32(E1000_RAH(index), rar_high);
6697         wrfl();
6698 }
6699
6700 static int igb_set_vf_mac(struct igb_adapter *adapter,
6701                           int vf, unsigned char *mac_addr)
6702 {
6703         struct e1000_hw *hw = &adapter->hw;
6704         /* VF MAC addresses start at end of receive addresses and moves
6705          * torwards the first, as a result a collision should not be possible */
6706         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6707
6708         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6709
6710         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6711
6712         return 0;
6713 }
6714
6715 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6716 {
6717         struct igb_adapter *adapter = netdev_priv(netdev);
6718         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6719                 return -EINVAL;
6720         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6721         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6722         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6723                                       " change effective.");
6724         if (test_bit(__IGB_DOWN, &adapter->state)) {
6725                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6726                          " but the PF device is not up.\n");
6727                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6728                          " attempting to use the VF device.\n");
6729         }
6730         return igb_set_vf_mac(adapter, vf, mac);
6731 }
6732
6733 static int igb_link_mbps(int internal_link_speed)
6734 {
6735         switch (internal_link_speed) {
6736         case SPEED_100:
6737                 return 100;
6738         case SPEED_1000:
6739                 return 1000;
6740         default:
6741                 return 0;
6742         }
6743 }
6744
6745 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6746                                   int link_speed)
6747 {
6748         int rf_dec, rf_int;
6749         u32 bcnrc_val;
6750
6751         if (tx_rate != 0) {
6752                 /* Calculate the rate factor values to set */
6753                 rf_int = link_speed / tx_rate;
6754                 rf_dec = (link_speed - (rf_int * tx_rate));
6755                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6756
6757                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6758                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6759                                E1000_RTTBCNRC_RF_INT_MASK);
6760                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6761         } else {
6762                 bcnrc_val = 0;
6763         }
6764
6765         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6766         wr32(E1000_RTTBCNRC, bcnrc_val);
6767 }
6768
6769 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6770 {
6771         int actual_link_speed, i;
6772         bool reset_rate = false;
6773
6774         /* VF TX rate limit was not set or not supported */
6775         if ((adapter->vf_rate_link_speed == 0) ||
6776             (adapter->hw.mac.type != e1000_82576))
6777                 return;
6778
6779         actual_link_speed = igb_link_mbps(adapter->link_speed);
6780         if (actual_link_speed != adapter->vf_rate_link_speed) {
6781                 reset_rate = true;
6782                 adapter->vf_rate_link_speed = 0;
6783                 dev_info(&adapter->pdev->dev,
6784                          "Link speed has been changed. VF Transmit "
6785                          "rate is disabled\n");
6786         }
6787
6788         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6789                 if (reset_rate)
6790                         adapter->vf_data[i].tx_rate = 0;
6791
6792                 igb_set_vf_rate_limit(&adapter->hw, i,
6793                                       adapter->vf_data[i].tx_rate,
6794                                       actual_link_speed);
6795         }
6796 }
6797
6798 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6799 {
6800         struct igb_adapter *adapter = netdev_priv(netdev);
6801         struct e1000_hw *hw = &adapter->hw;
6802         int actual_link_speed;
6803
6804         if (hw->mac.type != e1000_82576)
6805                 return -EOPNOTSUPP;
6806
6807         actual_link_speed = igb_link_mbps(adapter->link_speed);
6808         if ((vf >= adapter->vfs_allocated_count) ||
6809             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6810             (tx_rate < 0) || (tx_rate > actual_link_speed))
6811                 return -EINVAL;
6812
6813         adapter->vf_rate_link_speed = actual_link_speed;
6814         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6815         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6816
6817         return 0;
6818 }
6819
6820 static int igb_ndo_get_vf_config(struct net_device *netdev,
6821                                  int vf, struct ifla_vf_info *ivi)
6822 {
6823         struct igb_adapter *adapter = netdev_priv(netdev);
6824         if (vf >= adapter->vfs_allocated_count)
6825                 return -EINVAL;
6826         ivi->vf = vf;
6827         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6828         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6829         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6830         ivi->qos = adapter->vf_data[vf].pf_qos;
6831         return 0;
6832 }
6833
6834 static void igb_vmm_control(struct igb_adapter *adapter)
6835 {
6836         struct e1000_hw *hw = &adapter->hw;
6837         u32 reg;
6838
6839         switch (hw->mac.type) {
6840         case e1000_82575:
6841         default:
6842                 /* replication is not supported for 82575 */
6843                 return;
6844         case e1000_82576:
6845                 /* notify HW that the MAC is adding vlan tags */
6846                 reg = rd32(E1000_DTXCTL);
6847                 reg |= E1000_DTXCTL_VLAN_ADDED;
6848                 wr32(E1000_DTXCTL, reg);
6849         case e1000_82580:
6850                 /* enable replication vlan tag stripping */
6851                 reg = rd32(E1000_RPLOLR);
6852                 reg |= E1000_RPLOLR_STRVLAN;
6853                 wr32(E1000_RPLOLR, reg);
6854         case e1000_i350:
6855                 /* none of the above registers are supported by i350 */
6856                 break;
6857         }
6858
6859         if (adapter->vfs_allocated_count) {
6860                 igb_vmdq_set_loopback_pf(hw, true);
6861                 igb_vmdq_set_replication_pf(hw, true);
6862                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6863                                                 adapter->vfs_allocated_count);
6864         } else {
6865                 igb_vmdq_set_loopback_pf(hw, false);
6866                 igb_vmdq_set_replication_pf(hw, false);
6867         }
6868 }
6869
6870 /* igb_main.c */