Merge branches 'x86-fixes-for-linus', 'sched-fixes-for-linus', 'timers-fixes-for...
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define MAJ 3
54 #define MIN 0
55 #define BUILD 6
56 #define KFIX 2
57 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
58 __stringify(BUILD) "-k" __stringify(KFIX)
59 char igb_driver_name[] = "igb";
60 char igb_driver_version[] = DRV_VERSION;
61 static const char igb_driver_string[] =
62                                 "Intel(R) Gigabit Ethernet Network Driver";
63 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
64
65 static const struct e1000_info *igb_info_tbl[] = {
66         [board_82575] = &e1000_82575_info,
67 };
68
69 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
95         /* required last entry */
96         {0, }
97 };
98
99 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
100
101 void igb_reset(struct igb_adapter *);
102 static int igb_setup_all_tx_resources(struct igb_adapter *);
103 static int igb_setup_all_rx_resources(struct igb_adapter *);
104 static void igb_free_all_tx_resources(struct igb_adapter *);
105 static void igb_free_all_rx_resources(struct igb_adapter *);
106 static void igb_setup_mrqc(struct igb_adapter *);
107 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
108 static void __devexit igb_remove(struct pci_dev *pdev);
109 static void igb_init_hw_timer(struct igb_adapter *adapter);
110 static int igb_sw_init(struct igb_adapter *);
111 static int igb_open(struct net_device *);
112 static int igb_close(struct net_device *);
113 static void igb_configure_tx(struct igb_adapter *);
114 static void igb_configure_rx(struct igb_adapter *);
115 static void igb_clean_all_tx_rings(struct igb_adapter *);
116 static void igb_clean_all_rx_rings(struct igb_adapter *);
117 static void igb_clean_tx_ring(struct igb_ring *);
118 static void igb_clean_rx_ring(struct igb_ring *);
119 static void igb_set_rx_mode(struct net_device *);
120 static void igb_update_phy_info(unsigned long);
121 static void igb_watchdog(unsigned long);
122 static void igb_watchdog_task(struct work_struct *);
123 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
124 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
125                                                  struct rtnl_link_stats64 *stats);
126 static int igb_change_mtu(struct net_device *, int);
127 static int igb_set_mac(struct net_device *, void *);
128 static void igb_set_uta(struct igb_adapter *adapter);
129 static irqreturn_t igb_intr(int irq, void *);
130 static irqreturn_t igb_intr_msi(int irq, void *);
131 static irqreturn_t igb_msix_other(int irq, void *);
132 static irqreturn_t igb_msix_ring(int irq, void *);
133 #ifdef CONFIG_IGB_DCA
134 static void igb_update_dca(struct igb_q_vector *);
135 static void igb_setup_dca(struct igb_adapter *);
136 #endif /* CONFIG_IGB_DCA */
137 static bool igb_clean_tx_irq(struct igb_q_vector *);
138 static int igb_poll(struct napi_struct *, int);
139 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
140 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
141 static void igb_tx_timeout(struct net_device *);
142 static void igb_reset_task(struct work_struct *);
143 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
144 static void igb_vlan_rx_add_vid(struct net_device *, u16);
145 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
146 static void igb_restore_vlan(struct igb_adapter *);
147 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
148 static void igb_ping_all_vfs(struct igb_adapter *);
149 static void igb_msg_task(struct igb_adapter *);
150 static void igb_vmm_control(struct igb_adapter *);
151 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
152 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
153 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
154 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
155                                int vf, u16 vlan, u8 qos);
156 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
157 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
158                                  struct ifla_vf_info *ivi);
159 static void igb_check_vf_rate_limit(struct igb_adapter *);
160
161 #ifdef CONFIG_PM
162 static int igb_suspend(struct pci_dev *, pm_message_t);
163 static int igb_resume(struct pci_dev *);
164 #endif
165 static void igb_shutdown(struct pci_dev *);
166 #ifdef CONFIG_IGB_DCA
167 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
168 static struct notifier_block dca_notifier = {
169         .notifier_call  = igb_notify_dca,
170         .next           = NULL,
171         .priority       = 0
172 };
173 #endif
174 #ifdef CONFIG_NET_POLL_CONTROLLER
175 /* for netdump / net console */
176 static void igb_netpoll(struct net_device *);
177 #endif
178 #ifdef CONFIG_PCI_IOV
179 static unsigned int max_vfs = 0;
180 module_param(max_vfs, uint, 0);
181 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
182                  "per physical function");
183 #endif /* CONFIG_PCI_IOV */
184
185 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
186                      pci_channel_state_t);
187 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
188 static void igb_io_resume(struct pci_dev *);
189
190 static struct pci_error_handlers igb_err_handler = {
191         .error_detected = igb_io_error_detected,
192         .slot_reset = igb_io_slot_reset,
193         .resume = igb_io_resume,
194 };
195
196
197 static struct pci_driver igb_driver = {
198         .name     = igb_driver_name,
199         .id_table = igb_pci_tbl,
200         .probe    = igb_probe,
201         .remove   = __devexit_p(igb_remove),
202 #ifdef CONFIG_PM
203         /* Power Management Hooks */
204         .suspend  = igb_suspend,
205         .resume   = igb_resume,
206 #endif
207         .shutdown = igb_shutdown,
208         .err_handler = &igb_err_handler
209 };
210
211 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
212 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
213 MODULE_LICENSE("GPL");
214 MODULE_VERSION(DRV_VERSION);
215
216 struct igb_reg_info {
217         u32 ofs;
218         char *name;
219 };
220
221 static const struct igb_reg_info igb_reg_info_tbl[] = {
222
223         /* General Registers */
224         {E1000_CTRL, "CTRL"},
225         {E1000_STATUS, "STATUS"},
226         {E1000_CTRL_EXT, "CTRL_EXT"},
227
228         /* Interrupt Registers */
229         {E1000_ICR, "ICR"},
230
231         /* RX Registers */
232         {E1000_RCTL, "RCTL"},
233         {E1000_RDLEN(0), "RDLEN"},
234         {E1000_RDH(0), "RDH"},
235         {E1000_RDT(0), "RDT"},
236         {E1000_RXDCTL(0), "RXDCTL"},
237         {E1000_RDBAL(0), "RDBAL"},
238         {E1000_RDBAH(0), "RDBAH"},
239
240         /* TX Registers */
241         {E1000_TCTL, "TCTL"},
242         {E1000_TDBAL(0), "TDBAL"},
243         {E1000_TDBAH(0), "TDBAH"},
244         {E1000_TDLEN(0), "TDLEN"},
245         {E1000_TDH(0), "TDH"},
246         {E1000_TDT(0), "TDT"},
247         {E1000_TXDCTL(0), "TXDCTL"},
248         {E1000_TDFH, "TDFH"},
249         {E1000_TDFT, "TDFT"},
250         {E1000_TDFHS, "TDFHS"},
251         {E1000_TDFPC, "TDFPC"},
252
253         /* List Terminator */
254         {}
255 };
256
257 /*
258  * igb_regdump - register printout routine
259  */
260 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
261 {
262         int n = 0;
263         char rname[16];
264         u32 regs[8];
265
266         switch (reginfo->ofs) {
267         case E1000_RDLEN(0):
268                 for (n = 0; n < 4; n++)
269                         regs[n] = rd32(E1000_RDLEN(n));
270                 break;
271         case E1000_RDH(0):
272                 for (n = 0; n < 4; n++)
273                         regs[n] = rd32(E1000_RDH(n));
274                 break;
275         case E1000_RDT(0):
276                 for (n = 0; n < 4; n++)
277                         regs[n] = rd32(E1000_RDT(n));
278                 break;
279         case E1000_RXDCTL(0):
280                 for (n = 0; n < 4; n++)
281                         regs[n] = rd32(E1000_RXDCTL(n));
282                 break;
283         case E1000_RDBAL(0):
284                 for (n = 0; n < 4; n++)
285                         regs[n] = rd32(E1000_RDBAL(n));
286                 break;
287         case E1000_RDBAH(0):
288                 for (n = 0; n < 4; n++)
289                         regs[n] = rd32(E1000_RDBAH(n));
290                 break;
291         case E1000_TDBAL(0):
292                 for (n = 0; n < 4; n++)
293                         regs[n] = rd32(E1000_RDBAL(n));
294                 break;
295         case E1000_TDBAH(0):
296                 for (n = 0; n < 4; n++)
297                         regs[n] = rd32(E1000_TDBAH(n));
298                 break;
299         case E1000_TDLEN(0):
300                 for (n = 0; n < 4; n++)
301                         regs[n] = rd32(E1000_TDLEN(n));
302                 break;
303         case E1000_TDH(0):
304                 for (n = 0; n < 4; n++)
305                         regs[n] = rd32(E1000_TDH(n));
306                 break;
307         case E1000_TDT(0):
308                 for (n = 0; n < 4; n++)
309                         regs[n] = rd32(E1000_TDT(n));
310                 break;
311         case E1000_TXDCTL(0):
312                 for (n = 0; n < 4; n++)
313                         regs[n] = rd32(E1000_TXDCTL(n));
314                 break;
315         default:
316                 printk(KERN_INFO "%-15s %08x\n",
317                         reginfo->name, rd32(reginfo->ofs));
318                 return;
319         }
320
321         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
322         printk(KERN_INFO "%-15s ", rname);
323         for (n = 0; n < 4; n++)
324                 printk(KERN_CONT "%08x ", regs[n]);
325         printk(KERN_CONT "\n");
326 }
327
328 /*
329  * igb_dump - Print registers, tx-rings and rx-rings
330  */
331 static void igb_dump(struct igb_adapter *adapter)
332 {
333         struct net_device *netdev = adapter->netdev;
334         struct e1000_hw *hw = &adapter->hw;
335         struct igb_reg_info *reginfo;
336         int n = 0;
337         struct igb_ring *tx_ring;
338         union e1000_adv_tx_desc *tx_desc;
339         struct my_u0 { u64 a; u64 b; } *u0;
340         struct igb_buffer *buffer_info;
341         struct igb_ring *rx_ring;
342         union e1000_adv_rx_desc *rx_desc;
343         u32 staterr;
344         int i = 0;
345
346         if (!netif_msg_hw(adapter))
347                 return;
348
349         /* Print netdevice Info */
350         if (netdev) {
351                 dev_info(&adapter->pdev->dev, "Net device Info\n");
352                 printk(KERN_INFO "Device Name     state            "
353                         "trans_start      last_rx\n");
354                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
355                 netdev->name,
356                 netdev->state,
357                 netdev->trans_start,
358                 netdev->last_rx);
359         }
360
361         /* Print Registers */
362         dev_info(&adapter->pdev->dev, "Register Dump\n");
363         printk(KERN_INFO " Register Name   Value\n");
364         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
365              reginfo->name; reginfo++) {
366                 igb_regdump(hw, reginfo);
367         }
368
369         /* Print TX Ring Summary */
370         if (!netdev || !netif_running(netdev))
371                 goto exit;
372
373         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
374         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
375                 " leng ntw timestamp\n");
376         for (n = 0; n < adapter->num_tx_queues; n++) {
377                 tx_ring = adapter->tx_ring[n];
378                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
379                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
380                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
381                            (u64)buffer_info->dma,
382                            buffer_info->length,
383                            buffer_info->next_to_watch,
384                            (u64)buffer_info->time_stamp);
385         }
386
387         /* Print TX Rings */
388         if (!netif_msg_tx_done(adapter))
389                 goto rx_ring_summary;
390
391         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
392
393         /* Transmit Descriptor Formats
394          *
395          * Advanced Transmit Descriptor
396          *   +--------------------------------------------------------------+
397          * 0 |         Buffer Address [63:0]                                |
398          *   +--------------------------------------------------------------+
399          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
400          *   +--------------------------------------------------------------+
401          *   63      46 45    40 39 38 36 35 32 31   24             15       0
402          */
403
404         for (n = 0; n < adapter->num_tx_queues; n++) {
405                 tx_ring = adapter->tx_ring[n];
406                 printk(KERN_INFO "------------------------------------\n");
407                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
408                 printk(KERN_INFO "------------------------------------\n");
409                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
410                         "[PlPOCIStDDM Ln] [bi->dma       ] "
411                         "leng  ntw timestamp        bi->skb\n");
412
413                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
414                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
415                         buffer_info = &tx_ring->buffer_info[i];
416                         u0 = (struct my_u0 *)tx_desc;
417                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
418                                 " %04X  %3X %016llX %p", i,
419                                 le64_to_cpu(u0->a),
420                                 le64_to_cpu(u0->b),
421                                 (u64)buffer_info->dma,
422                                 buffer_info->length,
423                                 buffer_info->next_to_watch,
424                                 (u64)buffer_info->time_stamp,
425                                 buffer_info->skb);
426                         if (i == tx_ring->next_to_use &&
427                                 i == tx_ring->next_to_clean)
428                                 printk(KERN_CONT " NTC/U\n");
429                         else if (i == tx_ring->next_to_use)
430                                 printk(KERN_CONT " NTU\n");
431                         else if (i == tx_ring->next_to_clean)
432                                 printk(KERN_CONT " NTC\n");
433                         else
434                                 printk(KERN_CONT "\n");
435
436                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
437                                 print_hex_dump(KERN_INFO, "",
438                                         DUMP_PREFIX_ADDRESS,
439                                         16, 1, phys_to_virt(buffer_info->dma),
440                                         buffer_info->length, true);
441                 }
442         }
443
444         /* Print RX Rings Summary */
445 rx_ring_summary:
446         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
447         printk(KERN_INFO "Queue [NTU] [NTC]\n");
448         for (n = 0; n < adapter->num_rx_queues; n++) {
449                 rx_ring = adapter->rx_ring[n];
450                 printk(KERN_INFO " %5d %5X %5X\n", n,
451                            rx_ring->next_to_use, rx_ring->next_to_clean);
452         }
453
454         /* Print RX Rings */
455         if (!netif_msg_rx_status(adapter))
456                 goto exit;
457
458         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
459
460         /* Advanced Receive Descriptor (Read) Format
461          *    63                                           1        0
462          *    +-----------------------------------------------------+
463          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
464          *    +----------------------------------------------+------+
465          *  8 |       Header Buffer Address [63:1]           |  DD  |
466          *    +-----------------------------------------------------+
467          *
468          *
469          * Advanced Receive Descriptor (Write-Back) Format
470          *
471          *   63       48 47    32 31  30      21 20 17 16   4 3     0
472          *   +------------------------------------------------------+
473          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
474          *   | Checksum   Ident  |   |           |    | Type | Type |
475          *   +------------------------------------------------------+
476          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
477          *   +------------------------------------------------------+
478          *   63       48 47    32 31            20 19               0
479          */
480
481         for (n = 0; n < adapter->num_rx_queues; n++) {
482                 rx_ring = adapter->rx_ring[n];
483                 printk(KERN_INFO "------------------------------------\n");
484                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
485                 printk(KERN_INFO "------------------------------------\n");
486                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
487                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
488                         "<-- Adv Rx Read format\n");
489                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
490                         "[vl er S cks ln] ---------------- [bi->skb] "
491                         "<-- Adv Rx Write-Back format\n");
492
493                 for (i = 0; i < rx_ring->count; i++) {
494                         buffer_info = &rx_ring->buffer_info[i];
495                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
496                         u0 = (struct my_u0 *)rx_desc;
497                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
498                         if (staterr & E1000_RXD_STAT_DD) {
499                                 /* Descriptor Done */
500                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
501                                         "%016llX ---------------- %p", i,
502                                         le64_to_cpu(u0->a),
503                                         le64_to_cpu(u0->b),
504                                         buffer_info->skb);
505                         } else {
506                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
507                                         "%016llX %016llX %p", i,
508                                         le64_to_cpu(u0->a),
509                                         le64_to_cpu(u0->b),
510                                         (u64)buffer_info->dma,
511                                         buffer_info->skb);
512
513                                 if (netif_msg_pktdata(adapter)) {
514                                         print_hex_dump(KERN_INFO, "",
515                                                 DUMP_PREFIX_ADDRESS,
516                                                 16, 1,
517                                                 phys_to_virt(buffer_info->dma),
518                                                 rx_ring->rx_buffer_len, true);
519                                         if (rx_ring->rx_buffer_len
520                                                 < IGB_RXBUFFER_1024)
521                                                 print_hex_dump(KERN_INFO, "",
522                                                   DUMP_PREFIX_ADDRESS,
523                                                   16, 1,
524                                                   phys_to_virt(
525                                                     buffer_info->page_dma +
526                                                     buffer_info->page_offset),
527                                                   PAGE_SIZE/2, true);
528                                 }
529                         }
530
531                         if (i == rx_ring->next_to_use)
532                                 printk(KERN_CONT " NTU\n");
533                         else if (i == rx_ring->next_to_clean)
534                                 printk(KERN_CONT " NTC\n");
535                         else
536                                 printk(KERN_CONT "\n");
537
538                 }
539         }
540
541 exit:
542         return;
543 }
544
545
546 /**
547  * igb_read_clock - read raw cycle counter (to be used by time counter)
548  */
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
550 {
551         struct igb_adapter *adapter =
552                 container_of(tc, struct igb_adapter, cycles);
553         struct e1000_hw *hw = &adapter->hw;
554         u64 stamp = 0;
555         int shift = 0;
556
557         /*
558          * The timestamp latches on lowest register read. For the 82580
559          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
560          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
561          */
562         if (hw->mac.type == e1000_82580) {
563                 stamp = rd32(E1000_SYSTIMR) >> 8;
564                 shift = IGB_82580_TSYNC_SHIFT;
565         }
566
567         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
569         return stamp;
570 }
571
572 /**
573  * igb_get_hw_dev - return device
574  * used by hardware layer to print debugging information
575  **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578         struct igb_adapter *adapter = hw->back;
579         return adapter->netdev;
580 }
581
582 /**
583  * igb_init_module - Driver Registration Routine
584  *
585  * igb_init_module is the first routine called when the driver is
586  * loaded. All it does is register with the PCI subsystem.
587  **/
588 static int __init igb_init_module(void)
589 {
590         int ret;
591         printk(KERN_INFO "%s - version %s\n",
592                igb_driver_string, igb_driver_version);
593
594         printk(KERN_INFO "%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597         dca_register_notify(&dca_notifier);
598 #endif
599         ret = pci_register_driver(&igb_driver);
600         return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606  * igb_exit_module - Driver Exit Cleanup Routine
607  *
608  * igb_exit_module is called just before the driver is removed
609  * from memory.
610  **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614         dca_unregister_notify(&dca_notifier);
615 #endif
616         pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623  * igb_cache_ring_register - Descriptor ring to register mapping
624  * @adapter: board private structure to initialize
625  *
626  * Once we know the feature-set enabled for the device, we'll cache
627  * the register offset the descriptor ring is assigned to.
628  **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631         int i = 0, j = 0;
632         u32 rbase_offset = adapter->vfs_allocated_count;
633
634         switch (adapter->hw.mac.type) {
635         case e1000_82576:
636                 /* The queues are allocated for virtualization such that VF 0
637                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638                  * In order to avoid collision we start at the first free queue
639                  * and continue consuming queues in the same sequence
640                  */
641                 if (adapter->vfs_allocated_count) {
642                         for (; i < adapter->rss_queues; i++)
643                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
644                                                                Q_IDX_82576(i);
645                 }
646         case e1000_82575:
647         case e1000_82580:
648         case e1000_i350:
649         default:
650                 for (; i < adapter->num_rx_queues; i++)
651                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652                 for (; j < adapter->num_tx_queues; j++)
653                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654                 break;
655         }
656 }
657
658 static void igb_free_queues(struct igb_adapter *adapter)
659 {
660         int i;
661
662         for (i = 0; i < adapter->num_tx_queues; i++) {
663                 kfree(adapter->tx_ring[i]);
664                 adapter->tx_ring[i] = NULL;
665         }
666         for (i = 0; i < adapter->num_rx_queues; i++) {
667                 kfree(adapter->rx_ring[i]);
668                 adapter->rx_ring[i] = NULL;
669         }
670         adapter->num_rx_queues = 0;
671         adapter->num_tx_queues = 0;
672 }
673
674 /**
675  * igb_alloc_queues - Allocate memory for all rings
676  * @adapter: board private structure to initialize
677  *
678  * We allocate one ring per queue at run-time since we don't know the
679  * number of queues at compile-time.
680  **/
681 static int igb_alloc_queues(struct igb_adapter *adapter)
682 {
683         struct igb_ring *ring;
684         int i;
685
686         for (i = 0; i < adapter->num_tx_queues; i++) {
687                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
688                 if (!ring)
689                         goto err;
690                 ring->count = adapter->tx_ring_count;
691                 ring->queue_index = i;
692                 ring->dev = &adapter->pdev->dev;
693                 ring->netdev = adapter->netdev;
694                 /* For 82575, context index must be unique per ring. */
695                 if (adapter->hw.mac.type == e1000_82575)
696                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697                 adapter->tx_ring[i] = ring;
698         }
699
700         for (i = 0; i < adapter->num_rx_queues; i++) {
701                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702                 if (!ring)
703                         goto err;
704                 ring->count = adapter->rx_ring_count;
705                 ring->queue_index = i;
706                 ring->dev = &adapter->pdev->dev;
707                 ring->netdev = adapter->netdev;
708                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
709                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
710                 /* set flag indicating ring supports SCTP checksum offload */
711                 if (adapter->hw.mac.type >= e1000_82576)
712                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
713                 adapter->rx_ring[i] = ring;
714         }
715
716         igb_cache_ring_register(adapter);
717
718         return 0;
719
720 err:
721         igb_free_queues(adapter);
722
723         return -ENOMEM;
724 }
725
726 #define IGB_N0_QUEUE -1
727 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
728 {
729         u32 msixbm = 0;
730         struct igb_adapter *adapter = q_vector->adapter;
731         struct e1000_hw *hw = &adapter->hw;
732         u32 ivar, index;
733         int rx_queue = IGB_N0_QUEUE;
734         int tx_queue = IGB_N0_QUEUE;
735
736         if (q_vector->rx_ring)
737                 rx_queue = q_vector->rx_ring->reg_idx;
738         if (q_vector->tx_ring)
739                 tx_queue = q_vector->tx_ring->reg_idx;
740
741         switch (hw->mac.type) {
742         case e1000_82575:
743                 /* The 82575 assigns vectors using a bitmask, which matches the
744                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
745                    or more queues to a vector, we write the appropriate bits
746                    into the MSIXBM register for that vector. */
747                 if (rx_queue > IGB_N0_QUEUE)
748                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
749                 if (tx_queue > IGB_N0_QUEUE)
750                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
751                 if (!adapter->msix_entries && msix_vector == 0)
752                         msixbm |= E1000_EIMS_OTHER;
753                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
754                 q_vector->eims_value = msixbm;
755                 break;
756         case e1000_82576:
757                 /* 82576 uses a table-based method for assigning vectors.
758                    Each queue has a single entry in the table to which we write
759                    a vector number along with a "valid" bit.  Sadly, the layout
760                    of the table is somewhat counterintuitive. */
761                 if (rx_queue > IGB_N0_QUEUE) {
762                         index = (rx_queue & 0x7);
763                         ivar = array_rd32(E1000_IVAR0, index);
764                         if (rx_queue < 8) {
765                                 /* vector goes into low byte of register */
766                                 ivar = ivar & 0xFFFFFF00;
767                                 ivar |= msix_vector | E1000_IVAR_VALID;
768                         } else {
769                                 /* vector goes into third byte of register */
770                                 ivar = ivar & 0xFF00FFFF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
772                         }
773                         array_wr32(E1000_IVAR0, index, ivar);
774                 }
775                 if (tx_queue > IGB_N0_QUEUE) {
776                         index = (tx_queue & 0x7);
777                         ivar = array_rd32(E1000_IVAR0, index);
778                         if (tx_queue < 8) {
779                                 /* vector goes into second byte of register */
780                                 ivar = ivar & 0xFFFF00FF;
781                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
782                         } else {
783                                 /* vector goes into high byte of register */
784                                 ivar = ivar & 0x00FFFFFF;
785                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
786                         }
787                         array_wr32(E1000_IVAR0, index, ivar);
788                 }
789                 q_vector->eims_value = 1 << msix_vector;
790                 break;
791         case e1000_82580:
792         case e1000_i350:
793                 /* 82580 uses the same table-based approach as 82576 but has fewer
794                    entries as a result we carry over for queues greater than 4. */
795                 if (rx_queue > IGB_N0_QUEUE) {
796                         index = (rx_queue >> 1);
797                         ivar = array_rd32(E1000_IVAR0, index);
798                         if (rx_queue & 0x1) {
799                                 /* vector goes into third byte of register */
800                                 ivar = ivar & 0xFF00FFFF;
801                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
802                         } else {
803                                 /* vector goes into low byte of register */
804                                 ivar = ivar & 0xFFFFFF00;
805                                 ivar |= msix_vector | E1000_IVAR_VALID;
806                         }
807                         array_wr32(E1000_IVAR0, index, ivar);
808                 }
809                 if (tx_queue > IGB_N0_QUEUE) {
810                         index = (tx_queue >> 1);
811                         ivar = array_rd32(E1000_IVAR0, index);
812                         if (tx_queue & 0x1) {
813                                 /* vector goes into high byte of register */
814                                 ivar = ivar & 0x00FFFFFF;
815                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
816                         } else {
817                                 /* vector goes into second byte of register */
818                                 ivar = ivar & 0xFFFF00FF;
819                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
820                         }
821                         array_wr32(E1000_IVAR0, index, ivar);
822                 }
823                 q_vector->eims_value = 1 << msix_vector;
824                 break;
825         default:
826                 BUG();
827                 break;
828         }
829
830         /* add q_vector eims value to global eims_enable_mask */
831         adapter->eims_enable_mask |= q_vector->eims_value;
832
833         /* configure q_vector to set itr on first interrupt */
834         q_vector->set_itr = 1;
835 }
836
837 /**
838  * igb_configure_msix - Configure MSI-X hardware
839  *
840  * igb_configure_msix sets up the hardware to properly
841  * generate MSI-X interrupts.
842  **/
843 static void igb_configure_msix(struct igb_adapter *adapter)
844 {
845         u32 tmp;
846         int i, vector = 0;
847         struct e1000_hw *hw = &adapter->hw;
848
849         adapter->eims_enable_mask = 0;
850
851         /* set vector for other causes, i.e. link changes */
852         switch (hw->mac.type) {
853         case e1000_82575:
854                 tmp = rd32(E1000_CTRL_EXT);
855                 /* enable MSI-X PBA support*/
856                 tmp |= E1000_CTRL_EXT_PBA_CLR;
857
858                 /* Auto-Mask interrupts upon ICR read. */
859                 tmp |= E1000_CTRL_EXT_EIAME;
860                 tmp |= E1000_CTRL_EXT_IRCA;
861
862                 wr32(E1000_CTRL_EXT, tmp);
863
864                 /* enable msix_other interrupt */
865                 array_wr32(E1000_MSIXBM(0), vector++,
866                                       E1000_EIMS_OTHER);
867                 adapter->eims_other = E1000_EIMS_OTHER;
868
869                 break;
870
871         case e1000_82576:
872         case e1000_82580:
873         case e1000_i350:
874                 /* Turn on MSI-X capability first, or our settings
875                  * won't stick.  And it will take days to debug. */
876                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
877                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
878                                 E1000_GPIE_NSICR);
879
880                 /* enable msix_other interrupt */
881                 adapter->eims_other = 1 << vector;
882                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
883
884                 wr32(E1000_IVAR_MISC, tmp);
885                 break;
886         default:
887                 /* do nothing, since nothing else supports MSI-X */
888                 break;
889         } /* switch (hw->mac.type) */
890
891         adapter->eims_enable_mask |= adapter->eims_other;
892
893         for (i = 0; i < adapter->num_q_vectors; i++)
894                 igb_assign_vector(adapter->q_vector[i], vector++);
895
896         wrfl();
897 }
898
899 /**
900  * igb_request_msix - Initialize MSI-X interrupts
901  *
902  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
903  * kernel.
904  **/
905 static int igb_request_msix(struct igb_adapter *adapter)
906 {
907         struct net_device *netdev = adapter->netdev;
908         struct e1000_hw *hw = &adapter->hw;
909         int i, err = 0, vector = 0;
910
911         err = request_irq(adapter->msix_entries[vector].vector,
912                           igb_msix_other, 0, netdev->name, adapter);
913         if (err)
914                 goto out;
915         vector++;
916
917         for (i = 0; i < adapter->num_q_vectors; i++) {
918                 struct igb_q_vector *q_vector = adapter->q_vector[i];
919
920                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
921
922                 if (q_vector->rx_ring && q_vector->tx_ring)
923                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
924                                 q_vector->rx_ring->queue_index);
925                 else if (q_vector->tx_ring)
926                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
927                                 q_vector->tx_ring->queue_index);
928                 else if (q_vector->rx_ring)
929                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
930                                 q_vector->rx_ring->queue_index);
931                 else
932                         sprintf(q_vector->name, "%s-unused", netdev->name);
933
934                 err = request_irq(adapter->msix_entries[vector].vector,
935                                   igb_msix_ring, 0, q_vector->name,
936                                   q_vector);
937                 if (err)
938                         goto out;
939                 vector++;
940         }
941
942         igb_configure_msix(adapter);
943         return 0;
944 out:
945         return err;
946 }
947
948 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
949 {
950         if (adapter->msix_entries) {
951                 pci_disable_msix(adapter->pdev);
952                 kfree(adapter->msix_entries);
953                 adapter->msix_entries = NULL;
954         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
955                 pci_disable_msi(adapter->pdev);
956         }
957 }
958
959 /**
960  * igb_free_q_vectors - Free memory allocated for interrupt vectors
961  * @adapter: board private structure to initialize
962  *
963  * This function frees the memory allocated to the q_vectors.  In addition if
964  * NAPI is enabled it will delete any references to the NAPI struct prior
965  * to freeing the q_vector.
966  **/
967 static void igb_free_q_vectors(struct igb_adapter *adapter)
968 {
969         int v_idx;
970
971         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
972                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
973                 adapter->q_vector[v_idx] = NULL;
974                 if (!q_vector)
975                         continue;
976                 netif_napi_del(&q_vector->napi);
977                 kfree(q_vector);
978         }
979         adapter->num_q_vectors = 0;
980 }
981
982 /**
983  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
984  *
985  * This function resets the device so that it has 0 rx queues, tx queues, and
986  * MSI-X interrupts allocated.
987  */
988 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
989 {
990         igb_free_queues(adapter);
991         igb_free_q_vectors(adapter);
992         igb_reset_interrupt_capability(adapter);
993 }
994
995 /**
996  * igb_set_interrupt_capability - set MSI or MSI-X if supported
997  *
998  * Attempt to configure interrupts using the best available
999  * capabilities of the hardware and kernel.
1000  **/
1001 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1002 {
1003         int err;
1004         int numvecs, i;
1005
1006         /* Number of supported queues. */
1007         adapter->num_rx_queues = adapter->rss_queues;
1008         if (adapter->vfs_allocated_count)
1009                 adapter->num_tx_queues = 1;
1010         else
1011                 adapter->num_tx_queues = adapter->rss_queues;
1012
1013         /* start with one vector for every rx queue */
1014         numvecs = adapter->num_rx_queues;
1015
1016         /* if tx handler is separate add 1 for every tx queue */
1017         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1018                 numvecs += adapter->num_tx_queues;
1019
1020         /* store the number of vectors reserved for queues */
1021         adapter->num_q_vectors = numvecs;
1022
1023         /* add 1 vector for link status interrupts */
1024         numvecs++;
1025         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1026                                         GFP_KERNEL);
1027         if (!adapter->msix_entries)
1028                 goto msi_only;
1029
1030         for (i = 0; i < numvecs; i++)
1031                 adapter->msix_entries[i].entry = i;
1032
1033         err = pci_enable_msix(adapter->pdev,
1034                               adapter->msix_entries,
1035                               numvecs);
1036         if (err == 0)
1037                 goto out;
1038
1039         igb_reset_interrupt_capability(adapter);
1040
1041         /* If we can't do MSI-X, try MSI */
1042 msi_only:
1043 #ifdef CONFIG_PCI_IOV
1044         /* disable SR-IOV for non MSI-X configurations */
1045         if (adapter->vf_data) {
1046                 struct e1000_hw *hw = &adapter->hw;
1047                 /* disable iov and allow time for transactions to clear */
1048                 pci_disable_sriov(adapter->pdev);
1049                 msleep(500);
1050
1051                 kfree(adapter->vf_data);
1052                 adapter->vf_data = NULL;
1053                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1054                 msleep(100);
1055                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1056         }
1057 #endif
1058         adapter->vfs_allocated_count = 0;
1059         adapter->rss_queues = 1;
1060         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061         adapter->num_rx_queues = 1;
1062         adapter->num_tx_queues = 1;
1063         adapter->num_q_vectors = 1;
1064         if (!pci_enable_msi(adapter->pdev))
1065                 adapter->flags |= IGB_FLAG_HAS_MSI;
1066 out:
1067         /* Notify the stack of the (possibly) reduced queue counts. */
1068         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069         return netif_set_real_num_rx_queues(adapter->netdev,
1070                                             adapter->num_rx_queues);
1071 }
1072
1073 /**
1074  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075  * @adapter: board private structure to initialize
1076  *
1077  * We allocate one q_vector per queue interrupt.  If allocation fails we
1078  * return -ENOMEM.
1079  **/
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1081 {
1082         struct igb_q_vector *q_vector;
1083         struct e1000_hw *hw = &adapter->hw;
1084         int v_idx;
1085
1086         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1088                 if (!q_vector)
1089                         goto err_out;
1090                 q_vector->adapter = adapter;
1091                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092                 q_vector->itr_val = IGB_START_ITR;
1093                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094                 adapter->q_vector[v_idx] = q_vector;
1095         }
1096         return 0;
1097
1098 err_out:
1099         igb_free_q_vectors(adapter);
1100         return -ENOMEM;
1101 }
1102
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104                                       int ring_idx, int v_idx)
1105 {
1106         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109         q_vector->rx_ring->q_vector = q_vector;
1110         q_vector->itr_val = adapter->rx_itr_setting;
1111         if (q_vector->itr_val && q_vector->itr_val <= 3)
1112                 q_vector->itr_val = IGB_START_ITR;
1113 }
1114
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116                                       int ring_idx, int v_idx)
1117 {
1118         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1119
1120         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121         q_vector->tx_ring->q_vector = q_vector;
1122         q_vector->itr_val = adapter->tx_itr_setting;
1123         if (q_vector->itr_val && q_vector->itr_val <= 3)
1124                 q_vector->itr_val = IGB_START_ITR;
1125 }
1126
1127 /**
1128  * igb_map_ring_to_vector - maps allocated queues to vectors
1129  *
1130  * This function maps the recently allocated queues to vectors.
1131  **/
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1133 {
1134         int i;
1135         int v_idx = 0;
1136
1137         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138             (adapter->num_q_vectors < adapter->num_tx_queues))
1139                 return -ENOMEM;
1140
1141         if (adapter->num_q_vectors >=
1142             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143                 for (i = 0; i < adapter->num_rx_queues; i++)
1144                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145                 for (i = 0; i < adapter->num_tx_queues; i++)
1146                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147         } else {
1148                 for (i = 0; i < adapter->num_rx_queues; i++) {
1149                         if (i < adapter->num_tx_queues)
1150                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1152                 }
1153                 for (; i < adapter->num_tx_queues; i++)
1154                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1155         }
1156         return 0;
1157 }
1158
1159 /**
1160  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1161  *
1162  * This function initializes the interrupts and allocates all of the queues.
1163  **/
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1165 {
1166         struct pci_dev *pdev = adapter->pdev;
1167         int err;
1168
1169         err = igb_set_interrupt_capability(adapter);
1170         if (err)
1171                 return err;
1172
1173         err = igb_alloc_q_vectors(adapter);
1174         if (err) {
1175                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176                 goto err_alloc_q_vectors;
1177         }
1178
1179         err = igb_alloc_queues(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182                 goto err_alloc_queues;
1183         }
1184
1185         err = igb_map_ring_to_vector(adapter);
1186         if (err) {
1187                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188                 goto err_map_queues;
1189         }
1190
1191
1192         return 0;
1193 err_map_queues:
1194         igb_free_queues(adapter);
1195 err_alloc_queues:
1196         igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198         igb_reset_interrupt_capability(adapter);
1199         return err;
1200 }
1201
1202 /**
1203  * igb_request_irq - initialize interrupts
1204  *
1205  * Attempts to configure interrupts using the best available
1206  * capabilities of the hardware and kernel.
1207  **/
1208 static int igb_request_irq(struct igb_adapter *adapter)
1209 {
1210         struct net_device *netdev = adapter->netdev;
1211         struct pci_dev *pdev = adapter->pdev;
1212         int err = 0;
1213
1214         if (adapter->msix_entries) {
1215                 err = igb_request_msix(adapter);
1216                 if (!err)
1217                         goto request_done;
1218                 /* fall back to MSI */
1219                 igb_clear_interrupt_scheme(adapter);
1220                 if (!pci_enable_msi(adapter->pdev))
1221                         adapter->flags |= IGB_FLAG_HAS_MSI;
1222                 igb_free_all_tx_resources(adapter);
1223                 igb_free_all_rx_resources(adapter);
1224                 adapter->num_tx_queues = 1;
1225                 adapter->num_rx_queues = 1;
1226                 adapter->num_q_vectors = 1;
1227                 err = igb_alloc_q_vectors(adapter);
1228                 if (err) {
1229                         dev_err(&pdev->dev,
1230                                 "Unable to allocate memory for vectors\n");
1231                         goto request_done;
1232                 }
1233                 err = igb_alloc_queues(adapter);
1234                 if (err) {
1235                         dev_err(&pdev->dev,
1236                                 "Unable to allocate memory for queues\n");
1237                         igb_free_q_vectors(adapter);
1238                         goto request_done;
1239                 }
1240                 igb_setup_all_tx_resources(adapter);
1241                 igb_setup_all_rx_resources(adapter);
1242         } else {
1243                 igb_assign_vector(adapter->q_vector[0], 0);
1244         }
1245
1246         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248                                   netdev->name, adapter);
1249                 if (!err)
1250                         goto request_done;
1251
1252                 /* fall back to legacy interrupts */
1253                 igb_reset_interrupt_capability(adapter);
1254                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1255         }
1256
1257         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258                           netdev->name, adapter);
1259
1260         if (err)
1261                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262                         err);
1263
1264 request_done:
1265         return err;
1266 }
1267
1268 static void igb_free_irq(struct igb_adapter *adapter)
1269 {
1270         if (adapter->msix_entries) {
1271                 int vector = 0, i;
1272
1273                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1274
1275                 for (i = 0; i < adapter->num_q_vectors; i++) {
1276                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1277                         free_irq(adapter->msix_entries[vector++].vector,
1278                                  q_vector);
1279                 }
1280         } else {
1281                 free_irq(adapter->pdev->irq, adapter);
1282         }
1283 }
1284
1285 /**
1286  * igb_irq_disable - Mask off interrupt generation on the NIC
1287  * @adapter: board private structure
1288  **/
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1290 {
1291         struct e1000_hw *hw = &adapter->hw;
1292
1293         /*
1294          * we need to be careful when disabling interrupts.  The VFs are also
1295          * mapped into these registers and so clearing the bits can cause
1296          * issues on the VF drivers so we only need to clear what we set
1297          */
1298         if (adapter->msix_entries) {
1299                 u32 regval = rd32(E1000_EIAM);
1300                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302                 regval = rd32(E1000_EIAC);
1303                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1304         }
1305
1306         wr32(E1000_IAM, 0);
1307         wr32(E1000_IMC, ~0);
1308         wrfl();
1309         if (adapter->msix_entries) {
1310                 int i;
1311                 for (i = 0; i < adapter->num_q_vectors; i++)
1312                         synchronize_irq(adapter->msix_entries[i].vector);
1313         } else {
1314                 synchronize_irq(adapter->pdev->irq);
1315         }
1316 }
1317
1318 /**
1319  * igb_irq_enable - Enable default interrupt generation settings
1320  * @adapter: board private structure
1321  **/
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1323 {
1324         struct e1000_hw *hw = &adapter->hw;
1325
1326         if (adapter->msix_entries) {
1327                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328                 u32 regval = rd32(E1000_EIAC);
1329                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330                 regval = rd32(E1000_EIAM);
1331                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333                 if (adapter->vfs_allocated_count) {
1334                         wr32(E1000_MBVFIMR, 0xFF);
1335                         ims |= E1000_IMS_VMMB;
1336                 }
1337                 if (adapter->hw.mac.type == e1000_82580)
1338                         ims |= E1000_IMS_DRSTA;
1339
1340                 wr32(E1000_IMS, ims);
1341         } else {
1342                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1343                                 E1000_IMS_DRSTA);
1344                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1345                                 E1000_IMS_DRSTA);
1346         }
1347 }
1348
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1350 {
1351         struct e1000_hw *hw = &adapter->hw;
1352         u16 vid = adapter->hw.mng_cookie.vlan_id;
1353         u16 old_vid = adapter->mng_vlan_id;
1354
1355         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356                 /* add VID to filter table */
1357                 igb_vfta_set(hw, vid, true);
1358                 adapter->mng_vlan_id = vid;
1359         } else {
1360                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1361         }
1362
1363         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1364             (vid != old_vid) &&
1365             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1366                 /* remove VID from filter table */
1367                 igb_vfta_set(hw, old_vid, false);
1368         }
1369 }
1370
1371 /**
1372  * igb_release_hw_control - release control of the h/w to f/w
1373  * @adapter: address of board private structure
1374  *
1375  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376  * For ASF and Pass Through versions of f/w this means that the
1377  * driver is no longer loaded.
1378  *
1379  **/
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1381 {
1382         struct e1000_hw *hw = &adapter->hw;
1383         u32 ctrl_ext;
1384
1385         /* Let firmware take over control of h/w */
1386         ctrl_ext = rd32(E1000_CTRL_EXT);
1387         wr32(E1000_CTRL_EXT,
1388                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1389 }
1390
1391 /**
1392  * igb_get_hw_control - get control of the h/w from f/w
1393  * @adapter: address of board private structure
1394  *
1395  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396  * For ASF and Pass Through versions of f/w this means that
1397  * the driver is loaded.
1398  *
1399  **/
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1401 {
1402         struct e1000_hw *hw = &adapter->hw;
1403         u32 ctrl_ext;
1404
1405         /* Let firmware know the driver has taken over */
1406         ctrl_ext = rd32(E1000_CTRL_EXT);
1407         wr32(E1000_CTRL_EXT,
1408                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1409 }
1410
1411 /**
1412  * igb_configure - configure the hardware for RX and TX
1413  * @adapter: private board structure
1414  **/
1415 static void igb_configure(struct igb_adapter *adapter)
1416 {
1417         struct net_device *netdev = adapter->netdev;
1418         int i;
1419
1420         igb_get_hw_control(adapter);
1421         igb_set_rx_mode(netdev);
1422
1423         igb_restore_vlan(adapter);
1424
1425         igb_setup_tctl(adapter);
1426         igb_setup_mrqc(adapter);
1427         igb_setup_rctl(adapter);
1428
1429         igb_configure_tx(adapter);
1430         igb_configure_rx(adapter);
1431
1432         igb_rx_fifo_flush_82575(&adapter->hw);
1433
1434         /* call igb_desc_unused which always leaves
1435          * at least 1 descriptor unused to make sure
1436          * next_to_use != next_to_clean */
1437         for (i = 0; i < adapter->num_rx_queues; i++) {
1438                 struct igb_ring *ring = adapter->rx_ring[i];
1439                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1440         }
1441 }
1442
1443 /**
1444  * igb_power_up_link - Power up the phy/serdes link
1445  * @adapter: address of board private structure
1446  **/
1447 void igb_power_up_link(struct igb_adapter *adapter)
1448 {
1449         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450                 igb_power_up_phy_copper(&adapter->hw);
1451         else
1452                 igb_power_up_serdes_link_82575(&adapter->hw);
1453 }
1454
1455 /**
1456  * igb_power_down_link - Power down the phy/serdes link
1457  * @adapter: address of board private structure
1458  */
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1460 {
1461         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462                 igb_power_down_phy_copper_82575(&adapter->hw);
1463         else
1464                 igb_shutdown_serdes_link_82575(&adapter->hw);
1465 }
1466
1467 /**
1468  * igb_up - Open the interface and prepare it to handle traffic
1469  * @adapter: board private structure
1470  **/
1471 int igb_up(struct igb_adapter *adapter)
1472 {
1473         struct e1000_hw *hw = &adapter->hw;
1474         int i;
1475
1476         /* hardware has been reset, we need to reload some things */
1477         igb_configure(adapter);
1478
1479         clear_bit(__IGB_DOWN, &adapter->state);
1480
1481         for (i = 0; i < adapter->num_q_vectors; i++) {
1482                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483                 napi_enable(&q_vector->napi);
1484         }
1485         if (adapter->msix_entries)
1486                 igb_configure_msix(adapter);
1487         else
1488                 igb_assign_vector(adapter->q_vector[0], 0);
1489
1490         /* Clear any pending interrupts. */
1491         rd32(E1000_ICR);
1492         igb_irq_enable(adapter);
1493
1494         /* notify VFs that reset has been completed */
1495         if (adapter->vfs_allocated_count) {
1496                 u32 reg_data = rd32(E1000_CTRL_EXT);
1497                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498                 wr32(E1000_CTRL_EXT, reg_data);
1499         }
1500
1501         netif_tx_start_all_queues(adapter->netdev);
1502
1503         /* start the watchdog. */
1504         hw->mac.get_link_status = 1;
1505         schedule_work(&adapter->watchdog_task);
1506
1507         return 0;
1508 }
1509
1510 void igb_down(struct igb_adapter *adapter)
1511 {
1512         struct net_device *netdev = adapter->netdev;
1513         struct e1000_hw *hw = &adapter->hw;
1514         u32 tctl, rctl;
1515         int i;
1516
1517         /* signal that we're down so the interrupt handler does not
1518          * reschedule our watchdog timer */
1519         set_bit(__IGB_DOWN, &adapter->state);
1520
1521         /* disable receives in the hardware */
1522         rctl = rd32(E1000_RCTL);
1523         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524         /* flush and sleep below */
1525
1526         netif_tx_stop_all_queues(netdev);
1527
1528         /* disable transmits in the hardware */
1529         tctl = rd32(E1000_TCTL);
1530         tctl &= ~E1000_TCTL_EN;
1531         wr32(E1000_TCTL, tctl);
1532         /* flush both disables and wait for them to finish */
1533         wrfl();
1534         msleep(10);
1535
1536         for (i = 0; i < adapter->num_q_vectors; i++) {
1537                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538                 napi_disable(&q_vector->napi);
1539         }
1540
1541         igb_irq_disable(adapter);
1542
1543         del_timer_sync(&adapter->watchdog_timer);
1544         del_timer_sync(&adapter->phy_info_timer);
1545
1546         netif_carrier_off(netdev);
1547
1548         /* record the stats before reset*/
1549         spin_lock(&adapter->stats64_lock);
1550         igb_update_stats(adapter, &adapter->stats64);
1551         spin_unlock(&adapter->stats64_lock);
1552
1553         adapter->link_speed = 0;
1554         adapter->link_duplex = 0;
1555
1556         if (!pci_channel_offline(adapter->pdev))
1557                 igb_reset(adapter);
1558         igb_clean_all_tx_rings(adapter);
1559         igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1561
1562         /* since we reset the hardware DCA settings were cleared */
1563         igb_setup_dca(adapter);
1564 #endif
1565 }
1566
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1568 {
1569         WARN_ON(in_interrupt());
1570         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1571                 msleep(1);
1572         igb_down(adapter);
1573         igb_up(adapter);
1574         clear_bit(__IGB_RESETTING, &adapter->state);
1575 }
1576
1577 void igb_reset(struct igb_adapter *adapter)
1578 {
1579         struct pci_dev *pdev = adapter->pdev;
1580         struct e1000_hw *hw = &adapter->hw;
1581         struct e1000_mac_info *mac = &hw->mac;
1582         struct e1000_fc_info *fc = &hw->fc;
1583         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1584         u16 hwm;
1585
1586         /* Repartition Pba for greater than 9k mtu
1587          * To take effect CTRL.RST is required.
1588          */
1589         switch (mac->type) {
1590         case e1000_i350:
1591         case e1000_82580:
1592                 pba = rd32(E1000_RXPBS);
1593                 pba = igb_rxpbs_adjust_82580(pba);
1594                 break;
1595         case e1000_82576:
1596                 pba = rd32(E1000_RXPBS);
1597                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1598                 break;
1599         case e1000_82575:
1600         default:
1601                 pba = E1000_PBA_34K;
1602                 break;
1603         }
1604
1605         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606             (mac->type < e1000_82576)) {
1607                 /* adjust PBA for jumbo frames */
1608                 wr32(E1000_PBA, pba);
1609
1610                 /* To maintain wire speed transmits, the Tx FIFO should be
1611                  * large enough to accommodate two full transmit packets,
1612                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1613                  * the Rx FIFO should be large enough to accommodate at least
1614                  * one full receive packet and is similarly rounded up and
1615                  * expressed in KB. */
1616                 pba = rd32(E1000_PBA);
1617                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618                 tx_space = pba >> 16;
1619                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1620                 pba &= 0xffff;
1621                 /* the tx fifo also stores 16 bytes of information about the tx
1622                  * but don't include ethernet FCS because hardware appends it */
1623                 min_tx_space = (adapter->max_frame_size +
1624                                 sizeof(union e1000_adv_tx_desc) -
1625                                 ETH_FCS_LEN) * 2;
1626                 min_tx_space = ALIGN(min_tx_space, 1024);
1627                 min_tx_space >>= 10;
1628                 /* software strips receive CRC, so leave room for it */
1629                 min_rx_space = adapter->max_frame_size;
1630                 min_rx_space = ALIGN(min_rx_space, 1024);
1631                 min_rx_space >>= 10;
1632
1633                 /* If current Tx allocation is less than the min Tx FIFO size,
1634                  * and the min Tx FIFO size is less than the current Rx FIFO
1635                  * allocation, take space away from current Rx allocation */
1636                 if (tx_space < min_tx_space &&
1637                     ((min_tx_space - tx_space) < pba)) {
1638                         pba = pba - (min_tx_space - tx_space);
1639
1640                         /* if short on rx space, rx wins and must trump tx
1641                          * adjustment */
1642                         if (pba < min_rx_space)
1643                                 pba = min_rx_space;
1644                 }
1645                 wr32(E1000_PBA, pba);
1646         }
1647
1648         /* flow control settings */
1649         /* The high water mark must be low enough to fit one full frame
1650          * (or the size used for early receive) above it in the Rx FIFO.
1651          * Set it to the lower of:
1652          * - 90% of the Rx FIFO size, or
1653          * - the full Rx FIFO size minus one full frame */
1654         hwm = min(((pba << 10) * 9 / 10),
1655                         ((pba << 10) - 2 * adapter->max_frame_size));
1656
1657         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1658         fc->low_water = fc->high_water - 16;
1659         fc->pause_time = 0xFFFF;
1660         fc->send_xon = 1;
1661         fc->current_mode = fc->requested_mode;
1662
1663         /* disable receive for all VFs and wait one second */
1664         if (adapter->vfs_allocated_count) {
1665                 int i;
1666                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1668
1669                 /* ping all the active vfs to let them know we are going down */
1670                 igb_ping_all_vfs(adapter);
1671
1672                 /* disable transmits and receives */
1673                 wr32(E1000_VFRE, 0);
1674                 wr32(E1000_VFTE, 0);
1675         }
1676
1677         /* Allow time for pending master requests to run */
1678         hw->mac.ops.reset_hw(hw);
1679         wr32(E1000_WUC, 0);
1680
1681         if (hw->mac.ops.init_hw(hw))
1682                 dev_err(&pdev->dev, "Hardware Error\n");
1683         if (hw->mac.type > e1000_82580) {
1684                 if (adapter->flags & IGB_FLAG_DMAC) {
1685                         u32 reg;
1686
1687                         /*
1688                          * DMA Coalescing high water mark needs to be higher
1689                          * than * the * Rx threshold.  The Rx threshold is
1690                          * currently * pba - 6, so we * should use a high water
1691                          * mark of pba * - 4. */
1692                         hwm = (pba - 4) << 10;
1693
1694                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695                                & E1000_DMACR_DMACTHR_MASK);
1696
1697                         /* transition to L0x or L1 if available..*/
1698                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1699
1700                         /* watchdog timer= +-1000 usec in 32usec intervals */
1701                         reg |= (1000 >> 5);
1702                         wr32(E1000_DMACR, reg);
1703
1704                         /* no lower threshold to disable coalescing(smart fifb)
1705                          * -UTRESH=0*/
1706                         wr32(E1000_DMCRTRH, 0);
1707
1708                         /* set hwm to PBA -  2 * max frame size */
1709                         wr32(E1000_FCRTC, hwm);
1710
1711                         /*
1712                          * This sets the time to wait before requesting tran-
1713                          * sition to * low power state to number of usecs needed
1714                          * to receive 1 512 * byte frame at gigabit line rate
1715                          */
1716                         reg = rd32(E1000_DMCTLX);
1717                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1718
1719                         /* Delay 255 usec before entering Lx state. */
1720                         reg |= 0xFF;
1721                         wr32(E1000_DMCTLX, reg);
1722
1723                         /* free space in Tx packet buffer to wake from DMAC */
1724                         wr32(E1000_DMCTXTH,
1725                              (IGB_MIN_TXPBSIZE -
1726                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1727                              >> 6);
1728
1729                         /* make low power state decision controlled by DMAC */
1730                         reg = rd32(E1000_PCIEMISC);
1731                         reg |= E1000_PCIEMISC_LX_DECISION;
1732                         wr32(E1000_PCIEMISC, reg);
1733                 } /* end if IGB_FLAG_DMAC set */
1734         }
1735         if (hw->mac.type == e1000_82580) {
1736                 u32 reg = rd32(E1000_PCIEMISC);
1737                 wr32(E1000_PCIEMISC,
1738                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1739         }
1740         if (!netif_running(adapter->netdev))
1741                 igb_power_down_link(adapter);
1742
1743         igb_update_mng_vlan(adapter);
1744
1745         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748         igb_get_phy_info(hw);
1749 }
1750
1751 static const struct net_device_ops igb_netdev_ops = {
1752         .ndo_open               = igb_open,
1753         .ndo_stop               = igb_close,
1754         .ndo_start_xmit         = igb_xmit_frame_adv,
1755         .ndo_get_stats64        = igb_get_stats64,
1756         .ndo_set_rx_mode        = igb_set_rx_mode,
1757         .ndo_set_multicast_list = igb_set_rx_mode,
1758         .ndo_set_mac_address    = igb_set_mac,
1759         .ndo_change_mtu         = igb_change_mtu,
1760         .ndo_do_ioctl           = igb_ioctl,
1761         .ndo_tx_timeout         = igb_tx_timeout,
1762         .ndo_validate_addr      = eth_validate_addr,
1763         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1764         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1765         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1766         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1767         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1768         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1769         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1770 #ifdef CONFIG_NET_POLL_CONTROLLER
1771         .ndo_poll_controller    = igb_netpoll,
1772 #endif
1773 };
1774
1775 /**
1776  * igb_probe - Device Initialization Routine
1777  * @pdev: PCI device information struct
1778  * @ent: entry in igb_pci_tbl
1779  *
1780  * Returns 0 on success, negative on failure
1781  *
1782  * igb_probe initializes an adapter identified by a pci_dev structure.
1783  * The OS initialization, configuring of the adapter private structure,
1784  * and a hardware reset occur.
1785  **/
1786 static int __devinit igb_probe(struct pci_dev *pdev,
1787                                const struct pci_device_id *ent)
1788 {
1789         struct net_device *netdev;
1790         struct igb_adapter *adapter;
1791         struct e1000_hw *hw;
1792         u16 eeprom_data = 0;
1793         s32 ret_val;
1794         static int global_quad_port_a; /* global quad port a indication */
1795         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1796         unsigned long mmio_start, mmio_len;
1797         int err, pci_using_dac;
1798         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1799         u8 part_str[E1000_PBANUM_LENGTH];
1800
1801         /* Catch broken hardware that put the wrong VF device ID in
1802          * the PCIe SR-IOV capability.
1803          */
1804         if (pdev->is_virtfn) {
1805                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1806                      pci_name(pdev), pdev->vendor, pdev->device);
1807                 return -EINVAL;
1808         }
1809
1810         err = pci_enable_device_mem(pdev);
1811         if (err)
1812                 return err;
1813
1814         pci_using_dac = 0;
1815         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1816         if (!err) {
1817                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1818                 if (!err)
1819                         pci_using_dac = 1;
1820         } else {
1821                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1822                 if (err) {
1823                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1824                         if (err) {
1825                                 dev_err(&pdev->dev, "No usable DMA "
1826                                         "configuration, aborting\n");
1827                                 goto err_dma;
1828                         }
1829                 }
1830         }
1831
1832         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1833                                            IORESOURCE_MEM),
1834                                            igb_driver_name);
1835         if (err)
1836                 goto err_pci_reg;
1837
1838         pci_enable_pcie_error_reporting(pdev);
1839
1840         pci_set_master(pdev);
1841         pci_save_state(pdev);
1842
1843         err = -ENOMEM;
1844         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1845                                    IGB_ABS_MAX_TX_QUEUES);
1846         if (!netdev)
1847                 goto err_alloc_etherdev;
1848
1849         SET_NETDEV_DEV(netdev, &pdev->dev);
1850
1851         pci_set_drvdata(pdev, netdev);
1852         adapter = netdev_priv(netdev);
1853         adapter->netdev = netdev;
1854         adapter->pdev = pdev;
1855         hw = &adapter->hw;
1856         hw->back = adapter;
1857         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1858
1859         mmio_start = pci_resource_start(pdev, 0);
1860         mmio_len = pci_resource_len(pdev, 0);
1861
1862         err = -EIO;
1863         hw->hw_addr = ioremap(mmio_start, mmio_len);
1864         if (!hw->hw_addr)
1865                 goto err_ioremap;
1866
1867         netdev->netdev_ops = &igb_netdev_ops;
1868         igb_set_ethtool_ops(netdev);
1869         netdev->watchdog_timeo = 5 * HZ;
1870
1871         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1872
1873         netdev->mem_start = mmio_start;
1874         netdev->mem_end = mmio_start + mmio_len;
1875
1876         /* PCI config space info */
1877         hw->vendor_id = pdev->vendor;
1878         hw->device_id = pdev->device;
1879         hw->revision_id = pdev->revision;
1880         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1881         hw->subsystem_device_id = pdev->subsystem_device;
1882
1883         /* Copy the default MAC, PHY and NVM function pointers */
1884         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1885         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1886         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1887         /* Initialize skew-specific constants */
1888         err = ei->get_invariants(hw);
1889         if (err)
1890                 goto err_sw_init;
1891
1892         /* setup the private structure */
1893         err = igb_sw_init(adapter);
1894         if (err)
1895                 goto err_sw_init;
1896
1897         igb_get_bus_info_pcie(hw);
1898
1899         hw->phy.autoneg_wait_to_complete = false;
1900
1901         /* Copper options */
1902         if (hw->phy.media_type == e1000_media_type_copper) {
1903                 hw->phy.mdix = AUTO_ALL_MODES;
1904                 hw->phy.disable_polarity_correction = false;
1905                 hw->phy.ms_type = e1000_ms_hw_default;
1906         }
1907
1908         if (igb_check_reset_block(hw))
1909                 dev_info(&pdev->dev,
1910                         "PHY reset is blocked due to SOL/IDER session.\n");
1911
1912         netdev->features = NETIF_F_SG |
1913                            NETIF_F_IP_CSUM |
1914                            NETIF_F_HW_VLAN_TX |
1915                            NETIF_F_HW_VLAN_RX |
1916                            NETIF_F_HW_VLAN_FILTER;
1917
1918         netdev->features |= NETIF_F_IPV6_CSUM;
1919         netdev->features |= NETIF_F_TSO;
1920         netdev->features |= NETIF_F_TSO6;
1921         netdev->features |= NETIF_F_GRO;
1922
1923         netdev->vlan_features |= NETIF_F_TSO;
1924         netdev->vlan_features |= NETIF_F_TSO6;
1925         netdev->vlan_features |= NETIF_F_IP_CSUM;
1926         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1927         netdev->vlan_features |= NETIF_F_SG;
1928
1929         if (pci_using_dac) {
1930                 netdev->features |= NETIF_F_HIGHDMA;
1931                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1932         }
1933
1934         if (hw->mac.type >= e1000_82576)
1935                 netdev->features |= NETIF_F_SCTP_CSUM;
1936
1937         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1938
1939         /* before reading the NVM, reset the controller to put the device in a
1940          * known good starting state */
1941         hw->mac.ops.reset_hw(hw);
1942
1943         /* make sure the NVM is good */
1944         if (hw->nvm.ops.validate(hw) < 0) {
1945                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1946                 err = -EIO;
1947                 goto err_eeprom;
1948         }
1949
1950         /* copy the MAC address out of the NVM */
1951         if (hw->mac.ops.read_mac_addr(hw))
1952                 dev_err(&pdev->dev, "NVM Read Error\n");
1953
1954         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1955         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1956
1957         if (!is_valid_ether_addr(netdev->perm_addr)) {
1958                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1959                 err = -EIO;
1960                 goto err_eeprom;
1961         }
1962
1963         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1964                     (unsigned long) adapter);
1965         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1966                     (unsigned long) adapter);
1967
1968         INIT_WORK(&adapter->reset_task, igb_reset_task);
1969         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1970
1971         /* Initialize link properties that are user-changeable */
1972         adapter->fc_autoneg = true;
1973         hw->mac.autoneg = true;
1974         hw->phy.autoneg_advertised = 0x2f;
1975
1976         hw->fc.requested_mode = e1000_fc_default;
1977         hw->fc.current_mode = e1000_fc_default;
1978
1979         igb_validate_mdi_setting(hw);
1980
1981         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1982          * enable the ACPI Magic Packet filter
1983          */
1984
1985         if (hw->bus.func == 0)
1986                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1987         else if (hw->mac.type == e1000_82580)
1988                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1989                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1990                                  &eeprom_data);
1991         else if (hw->bus.func == 1)
1992                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1993
1994         if (eeprom_data & eeprom_apme_mask)
1995                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1996
1997         /* now that we have the eeprom settings, apply the special cases where
1998          * the eeprom may be wrong or the board simply won't support wake on
1999          * lan on a particular port */
2000         switch (pdev->device) {
2001         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2002                 adapter->eeprom_wol = 0;
2003                 break;
2004         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2005         case E1000_DEV_ID_82576_FIBER:
2006         case E1000_DEV_ID_82576_SERDES:
2007                 /* Wake events only supported on port A for dual fiber
2008                  * regardless of eeprom setting */
2009                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2010                         adapter->eeprom_wol = 0;
2011                 break;
2012         case E1000_DEV_ID_82576_QUAD_COPPER:
2013         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2014                 /* if quad port adapter, disable WoL on all but port A */
2015                 if (global_quad_port_a != 0)
2016                         adapter->eeprom_wol = 0;
2017                 else
2018                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2019                 /* Reset for multiple quad port adapters */
2020                 if (++global_quad_port_a == 4)
2021                         global_quad_port_a = 0;
2022                 break;
2023         }
2024
2025         /* initialize the wol settings based on the eeprom settings */
2026         adapter->wol = adapter->eeprom_wol;
2027         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2028
2029         /* reset the hardware with the new settings */
2030         igb_reset(adapter);
2031
2032         /* let the f/w know that the h/w is now under the control of the
2033          * driver. */
2034         igb_get_hw_control(adapter);
2035
2036         strcpy(netdev->name, "eth%d");
2037         err = register_netdev(netdev);
2038         if (err)
2039                 goto err_register;
2040
2041         /* carrier off reporting is important to ethtool even BEFORE open */
2042         netif_carrier_off(netdev);
2043
2044 #ifdef CONFIG_IGB_DCA
2045         if (dca_add_requester(&pdev->dev) == 0) {
2046                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2047                 dev_info(&pdev->dev, "DCA enabled\n");
2048                 igb_setup_dca(adapter);
2049         }
2050
2051 #endif
2052         /* do hw tstamp init after resetting */
2053         igb_init_hw_timer(adapter);
2054
2055         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2056         /* print bus type/speed/width info */
2057         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2058                  netdev->name,
2059                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2060                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2061                                                             "unknown"),
2062                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2063                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2064                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2065                    "unknown"),
2066                  netdev->dev_addr);
2067
2068         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2069         if (ret_val)
2070                 strcpy(part_str, "Unknown");
2071         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2072         dev_info(&pdev->dev,
2073                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2074                 adapter->msix_entries ? "MSI-X" :
2075                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2076                 adapter->num_rx_queues, adapter->num_tx_queues);
2077         switch (hw->mac.type) {
2078         case e1000_i350:
2079                 igb_set_eee_i350(hw);
2080                 break;
2081         default:
2082                 break;
2083         }
2084         return 0;
2085
2086 err_register:
2087         igb_release_hw_control(adapter);
2088 err_eeprom:
2089         if (!igb_check_reset_block(hw))
2090                 igb_reset_phy(hw);
2091
2092         if (hw->flash_address)
2093                 iounmap(hw->flash_address);
2094 err_sw_init:
2095         igb_clear_interrupt_scheme(adapter);
2096         iounmap(hw->hw_addr);
2097 err_ioremap:
2098         free_netdev(netdev);
2099 err_alloc_etherdev:
2100         pci_release_selected_regions(pdev,
2101                                      pci_select_bars(pdev, IORESOURCE_MEM));
2102 err_pci_reg:
2103 err_dma:
2104         pci_disable_device(pdev);
2105         return err;
2106 }
2107
2108 /**
2109  * igb_remove - Device Removal Routine
2110  * @pdev: PCI device information struct
2111  *
2112  * igb_remove is called by the PCI subsystem to alert the driver
2113  * that it should release a PCI device.  The could be caused by a
2114  * Hot-Plug event, or because the driver is going to be removed from
2115  * memory.
2116  **/
2117 static void __devexit igb_remove(struct pci_dev *pdev)
2118 {
2119         struct net_device *netdev = pci_get_drvdata(pdev);
2120         struct igb_adapter *adapter = netdev_priv(netdev);
2121         struct e1000_hw *hw = &adapter->hw;
2122
2123         /*
2124          * The watchdog timer may be rescheduled, so explicitly
2125          * disable watchdog from being rescheduled.
2126          */
2127         set_bit(__IGB_DOWN, &adapter->state);
2128         del_timer_sync(&adapter->watchdog_timer);
2129         del_timer_sync(&adapter->phy_info_timer);
2130
2131         cancel_work_sync(&adapter->reset_task);
2132         cancel_work_sync(&adapter->watchdog_task);
2133
2134 #ifdef CONFIG_IGB_DCA
2135         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2136                 dev_info(&pdev->dev, "DCA disabled\n");
2137                 dca_remove_requester(&pdev->dev);
2138                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2139                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2140         }
2141 #endif
2142
2143         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2144          * would have already happened in close and is redundant. */
2145         igb_release_hw_control(adapter);
2146
2147         unregister_netdev(netdev);
2148
2149         igb_clear_interrupt_scheme(adapter);
2150
2151 #ifdef CONFIG_PCI_IOV
2152         /* reclaim resources allocated to VFs */
2153         if (adapter->vf_data) {
2154                 /* disable iov and allow time for transactions to clear */
2155                 pci_disable_sriov(pdev);
2156                 msleep(500);
2157
2158                 kfree(adapter->vf_data);
2159                 adapter->vf_data = NULL;
2160                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2161                 msleep(100);
2162                 dev_info(&pdev->dev, "IOV Disabled\n");
2163         }
2164 #endif
2165
2166         iounmap(hw->hw_addr);
2167         if (hw->flash_address)
2168                 iounmap(hw->flash_address);
2169         pci_release_selected_regions(pdev,
2170                                      pci_select_bars(pdev, IORESOURCE_MEM));
2171
2172         free_netdev(netdev);
2173
2174         pci_disable_pcie_error_reporting(pdev);
2175
2176         pci_disable_device(pdev);
2177 }
2178
2179 /**
2180  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2181  * @adapter: board private structure to initialize
2182  *
2183  * This function initializes the vf specific data storage and then attempts to
2184  * allocate the VFs.  The reason for ordering it this way is because it is much
2185  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2186  * the memory for the VFs.
2187  **/
2188 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2189 {
2190 #ifdef CONFIG_PCI_IOV
2191         struct pci_dev *pdev = adapter->pdev;
2192
2193         if (adapter->vfs_allocated_count) {
2194                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2195                                            sizeof(struct vf_data_storage),
2196                                            GFP_KERNEL);
2197                 /* if allocation failed then we do not support SR-IOV */
2198                 if (!adapter->vf_data) {
2199                         adapter->vfs_allocated_count = 0;
2200                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2201                                 "Data Storage\n");
2202                 }
2203         }
2204
2205         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2206                 kfree(adapter->vf_data);
2207                 adapter->vf_data = NULL;
2208 #endif /* CONFIG_PCI_IOV */
2209                 adapter->vfs_allocated_count = 0;
2210 #ifdef CONFIG_PCI_IOV
2211         } else {
2212                 unsigned char mac_addr[ETH_ALEN];
2213                 int i;
2214                 dev_info(&pdev->dev, "%d vfs allocated\n",
2215                          adapter->vfs_allocated_count);
2216                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2217                         random_ether_addr(mac_addr);
2218                         igb_set_vf_mac(adapter, i, mac_addr);
2219                 }
2220                 /* DMA Coalescing is not supported in IOV mode. */
2221                 if (adapter->flags & IGB_FLAG_DMAC)
2222                         adapter->flags &= ~IGB_FLAG_DMAC;
2223         }
2224 #endif /* CONFIG_PCI_IOV */
2225 }
2226
2227
2228 /**
2229  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2230  * @adapter: board private structure to initialize
2231  *
2232  * igb_init_hw_timer initializes the function pointer and values for the hw
2233  * timer found in hardware.
2234  **/
2235 static void igb_init_hw_timer(struct igb_adapter *adapter)
2236 {
2237         struct e1000_hw *hw = &adapter->hw;
2238
2239         switch (hw->mac.type) {
2240         case e1000_i350:
2241         case e1000_82580:
2242                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2243                 adapter->cycles.read = igb_read_clock;
2244                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2245                 adapter->cycles.mult = 1;
2246                 /*
2247                  * The 82580 timesync updates the system timer every 8ns by 8ns
2248                  * and the value cannot be shifted.  Instead we need to shift
2249                  * the registers to generate a 64bit timer value.  As a result
2250                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2251                  * 24 in order to generate a larger value for synchronization.
2252                  */
2253                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2254                 /* disable system timer temporarily by setting bit 31 */
2255                 wr32(E1000_TSAUXC, 0x80000000);
2256                 wrfl();
2257
2258                 /* Set registers so that rollover occurs soon to test this. */
2259                 wr32(E1000_SYSTIMR, 0x00000000);
2260                 wr32(E1000_SYSTIML, 0x80000000);
2261                 wr32(E1000_SYSTIMH, 0x000000FF);
2262                 wrfl();
2263
2264                 /* enable system timer by clearing bit 31 */
2265                 wr32(E1000_TSAUXC, 0x0);
2266                 wrfl();
2267
2268                 timecounter_init(&adapter->clock,
2269                                  &adapter->cycles,
2270                                  ktime_to_ns(ktime_get_real()));
2271                 /*
2272                  * Synchronize our NIC clock against system wall clock. NIC
2273                  * time stamp reading requires ~3us per sample, each sample
2274                  * was pretty stable even under load => only require 10
2275                  * samples for each offset comparison.
2276                  */
2277                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2278                 adapter->compare.source = &adapter->clock;
2279                 adapter->compare.target = ktime_get_real;
2280                 adapter->compare.num_samples = 10;
2281                 timecompare_update(&adapter->compare, 0);
2282                 break;
2283         case e1000_82576:
2284                 /*
2285                  * Initialize hardware timer: we keep it running just in case
2286                  * that some program needs it later on.
2287                  */
2288                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2289                 adapter->cycles.read = igb_read_clock;
2290                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2291                 adapter->cycles.mult = 1;
2292                 /**
2293                  * Scale the NIC clock cycle by a large factor so that
2294                  * relatively small clock corrections can be added or
2295                  * subtracted at each clock tick. The drawbacks of a large
2296                  * factor are a) that the clock register overflows more quickly
2297                  * (not such a big deal) and b) that the increment per tick has
2298                  * to fit into 24 bits.  As a result we need to use a shift of
2299                  * 19 so we can fit a value of 16 into the TIMINCA register.
2300                  */
2301                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2302                 wr32(E1000_TIMINCA,
2303                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2304                                 (16 << IGB_82576_TSYNC_SHIFT));
2305
2306                 /* Set registers so that rollover occurs soon to test this. */
2307                 wr32(E1000_SYSTIML, 0x00000000);
2308                 wr32(E1000_SYSTIMH, 0xFF800000);
2309                 wrfl();
2310
2311                 timecounter_init(&adapter->clock,
2312                                  &adapter->cycles,
2313                                  ktime_to_ns(ktime_get_real()));
2314                 /*
2315                  * Synchronize our NIC clock against system wall clock. NIC
2316                  * time stamp reading requires ~3us per sample, each sample
2317                  * was pretty stable even under load => only require 10
2318                  * samples for each offset comparison.
2319                  */
2320                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2321                 adapter->compare.source = &adapter->clock;
2322                 adapter->compare.target = ktime_get_real;
2323                 adapter->compare.num_samples = 10;
2324                 timecompare_update(&adapter->compare, 0);
2325                 break;
2326         case e1000_82575:
2327                 /* 82575 does not support timesync */
2328         default:
2329                 break;
2330         }
2331
2332 }
2333
2334 /**
2335  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2336  * @adapter: board private structure to initialize
2337  *
2338  * igb_sw_init initializes the Adapter private data structure.
2339  * Fields are initialized based on PCI device information and
2340  * OS network device settings (MTU size).
2341  **/
2342 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2343 {
2344         struct e1000_hw *hw = &adapter->hw;
2345         struct net_device *netdev = adapter->netdev;
2346         struct pci_dev *pdev = adapter->pdev;
2347
2348         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2349
2350         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2351         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2352         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2353         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2354
2355         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2356         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2357
2358         spin_lock_init(&adapter->stats64_lock);
2359 #ifdef CONFIG_PCI_IOV
2360         switch (hw->mac.type) {
2361         case e1000_82576:
2362         case e1000_i350:
2363                 if (max_vfs > 7) {
2364                         dev_warn(&pdev->dev,
2365                                  "Maximum of 7 VFs per PF, using max\n");
2366                         adapter->vfs_allocated_count = 7;
2367                 } else
2368                         adapter->vfs_allocated_count = max_vfs;
2369                 break;
2370         default:
2371                 break;
2372         }
2373 #endif /* CONFIG_PCI_IOV */
2374         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2375
2376         /*
2377          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2378          * then we should combine the queues into a queue pair in order to
2379          * conserve interrupts due to limited supply
2380          */
2381         if ((adapter->rss_queues > 4) ||
2382             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2383                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2384
2385         /* This call may decrease the number of queues */
2386         if (igb_init_interrupt_scheme(adapter)) {
2387                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2388                 return -ENOMEM;
2389         }
2390
2391         igb_probe_vfs(adapter);
2392
2393         /* Explicitly disable IRQ since the NIC can be in any state. */
2394         igb_irq_disable(adapter);
2395
2396         if (hw->mac.type == e1000_i350)
2397                 adapter->flags &= ~IGB_FLAG_DMAC;
2398
2399         set_bit(__IGB_DOWN, &adapter->state);
2400         return 0;
2401 }
2402
2403 /**
2404  * igb_open - Called when a network interface is made active
2405  * @netdev: network interface device structure
2406  *
2407  * Returns 0 on success, negative value on failure
2408  *
2409  * The open entry point is called when a network interface is made
2410  * active by the system (IFF_UP).  At this point all resources needed
2411  * for transmit and receive operations are allocated, the interrupt
2412  * handler is registered with the OS, the watchdog timer is started,
2413  * and the stack is notified that the interface is ready.
2414  **/
2415 static int igb_open(struct net_device *netdev)
2416 {
2417         struct igb_adapter *adapter = netdev_priv(netdev);
2418         struct e1000_hw *hw = &adapter->hw;
2419         int err;
2420         int i;
2421
2422         /* disallow open during test */
2423         if (test_bit(__IGB_TESTING, &adapter->state))
2424                 return -EBUSY;
2425
2426         netif_carrier_off(netdev);
2427
2428         /* allocate transmit descriptors */
2429         err = igb_setup_all_tx_resources(adapter);
2430         if (err)
2431                 goto err_setup_tx;
2432
2433         /* allocate receive descriptors */
2434         err = igb_setup_all_rx_resources(adapter);
2435         if (err)
2436                 goto err_setup_rx;
2437
2438         igb_power_up_link(adapter);
2439
2440         /* before we allocate an interrupt, we must be ready to handle it.
2441          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2442          * as soon as we call pci_request_irq, so we have to setup our
2443          * clean_rx handler before we do so.  */
2444         igb_configure(adapter);
2445
2446         err = igb_request_irq(adapter);
2447         if (err)
2448                 goto err_req_irq;
2449
2450         /* From here on the code is the same as igb_up() */
2451         clear_bit(__IGB_DOWN, &adapter->state);
2452
2453         for (i = 0; i < adapter->num_q_vectors; i++) {
2454                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2455                 napi_enable(&q_vector->napi);
2456         }
2457
2458         /* Clear any pending interrupts. */
2459         rd32(E1000_ICR);
2460
2461         igb_irq_enable(adapter);
2462
2463         /* notify VFs that reset has been completed */
2464         if (adapter->vfs_allocated_count) {
2465                 u32 reg_data = rd32(E1000_CTRL_EXT);
2466                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2467                 wr32(E1000_CTRL_EXT, reg_data);
2468         }
2469
2470         netif_tx_start_all_queues(netdev);
2471
2472         /* start the watchdog. */
2473         hw->mac.get_link_status = 1;
2474         schedule_work(&adapter->watchdog_task);
2475
2476         return 0;
2477
2478 err_req_irq:
2479         igb_release_hw_control(adapter);
2480         igb_power_down_link(adapter);
2481         igb_free_all_rx_resources(adapter);
2482 err_setup_rx:
2483         igb_free_all_tx_resources(adapter);
2484 err_setup_tx:
2485         igb_reset(adapter);
2486
2487         return err;
2488 }
2489
2490 /**
2491  * igb_close - Disables a network interface
2492  * @netdev: network interface device structure
2493  *
2494  * Returns 0, this is not allowed to fail
2495  *
2496  * The close entry point is called when an interface is de-activated
2497  * by the OS.  The hardware is still under the driver's control, but
2498  * needs to be disabled.  A global MAC reset is issued to stop the
2499  * hardware, and all transmit and receive resources are freed.
2500  **/
2501 static int igb_close(struct net_device *netdev)
2502 {
2503         struct igb_adapter *adapter = netdev_priv(netdev);
2504
2505         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2506         igb_down(adapter);
2507
2508         igb_free_irq(adapter);
2509
2510         igb_free_all_tx_resources(adapter);
2511         igb_free_all_rx_resources(adapter);
2512
2513         return 0;
2514 }
2515
2516 /**
2517  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2518  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2519  *
2520  * Return 0 on success, negative on failure
2521  **/
2522 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2523 {
2524         struct device *dev = tx_ring->dev;
2525         int size;
2526
2527         size = sizeof(struct igb_buffer) * tx_ring->count;
2528         tx_ring->buffer_info = vzalloc(size);
2529         if (!tx_ring->buffer_info)
2530                 goto err;
2531
2532         /* round up to nearest 4K */
2533         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2534         tx_ring->size = ALIGN(tx_ring->size, 4096);
2535
2536         tx_ring->desc = dma_alloc_coherent(dev,
2537                                            tx_ring->size,
2538                                            &tx_ring->dma,
2539                                            GFP_KERNEL);
2540
2541         if (!tx_ring->desc)
2542                 goto err;
2543
2544         tx_ring->next_to_use = 0;
2545         tx_ring->next_to_clean = 0;
2546         return 0;
2547
2548 err:
2549         vfree(tx_ring->buffer_info);
2550         dev_err(dev,
2551                 "Unable to allocate memory for the transmit descriptor ring\n");
2552         return -ENOMEM;
2553 }
2554
2555 /**
2556  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2557  *                                (Descriptors) for all queues
2558  * @adapter: board private structure
2559  *
2560  * Return 0 on success, negative on failure
2561  **/
2562 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2563 {
2564         struct pci_dev *pdev = adapter->pdev;
2565         int i, err = 0;
2566
2567         for (i = 0; i < adapter->num_tx_queues; i++) {
2568                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2569                 if (err) {
2570                         dev_err(&pdev->dev,
2571                                 "Allocation for Tx Queue %u failed\n", i);
2572                         for (i--; i >= 0; i--)
2573                                 igb_free_tx_resources(adapter->tx_ring[i]);
2574                         break;
2575                 }
2576         }
2577
2578         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2579                 int r_idx = i % adapter->num_tx_queues;
2580                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2581         }
2582         return err;
2583 }
2584
2585 /**
2586  * igb_setup_tctl - configure the transmit control registers
2587  * @adapter: Board private structure
2588  **/
2589 void igb_setup_tctl(struct igb_adapter *adapter)
2590 {
2591         struct e1000_hw *hw = &adapter->hw;
2592         u32 tctl;
2593
2594         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2595         wr32(E1000_TXDCTL(0), 0);
2596
2597         /* Program the Transmit Control Register */
2598         tctl = rd32(E1000_TCTL);
2599         tctl &= ~E1000_TCTL_CT;
2600         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2601                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2602
2603         igb_config_collision_dist(hw);
2604
2605         /* Enable transmits */
2606         tctl |= E1000_TCTL_EN;
2607
2608         wr32(E1000_TCTL, tctl);
2609 }
2610
2611 /**
2612  * igb_configure_tx_ring - Configure transmit ring after Reset
2613  * @adapter: board private structure
2614  * @ring: tx ring to configure
2615  *
2616  * Configure a transmit ring after a reset.
2617  **/
2618 void igb_configure_tx_ring(struct igb_adapter *adapter,
2619                            struct igb_ring *ring)
2620 {
2621         struct e1000_hw *hw = &adapter->hw;
2622         u32 txdctl;
2623         u64 tdba = ring->dma;
2624         int reg_idx = ring->reg_idx;
2625
2626         /* disable the queue */
2627         txdctl = rd32(E1000_TXDCTL(reg_idx));
2628         wr32(E1000_TXDCTL(reg_idx),
2629                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2630         wrfl();
2631         mdelay(10);
2632
2633         wr32(E1000_TDLEN(reg_idx),
2634                         ring->count * sizeof(union e1000_adv_tx_desc));
2635         wr32(E1000_TDBAL(reg_idx),
2636                         tdba & 0x00000000ffffffffULL);
2637         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2638
2639         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2640         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2641         writel(0, ring->head);
2642         writel(0, ring->tail);
2643
2644         txdctl |= IGB_TX_PTHRESH;
2645         txdctl |= IGB_TX_HTHRESH << 8;
2646         txdctl |= IGB_TX_WTHRESH << 16;
2647
2648         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2649         wr32(E1000_TXDCTL(reg_idx), txdctl);
2650 }
2651
2652 /**
2653  * igb_configure_tx - Configure transmit Unit after Reset
2654  * @adapter: board private structure
2655  *
2656  * Configure the Tx unit of the MAC after a reset.
2657  **/
2658 static void igb_configure_tx(struct igb_adapter *adapter)
2659 {
2660         int i;
2661
2662         for (i = 0; i < adapter->num_tx_queues; i++)
2663                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2664 }
2665
2666 /**
2667  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2668  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2669  *
2670  * Returns 0 on success, negative on failure
2671  **/
2672 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2673 {
2674         struct device *dev = rx_ring->dev;
2675         int size, desc_len;
2676
2677         size = sizeof(struct igb_buffer) * rx_ring->count;
2678         rx_ring->buffer_info = vzalloc(size);
2679         if (!rx_ring->buffer_info)
2680                 goto err;
2681
2682         desc_len = sizeof(union e1000_adv_rx_desc);
2683
2684         /* Round up to nearest 4K */
2685         rx_ring->size = rx_ring->count * desc_len;
2686         rx_ring->size = ALIGN(rx_ring->size, 4096);
2687
2688         rx_ring->desc = dma_alloc_coherent(dev,
2689                                            rx_ring->size,
2690                                            &rx_ring->dma,
2691                                            GFP_KERNEL);
2692
2693         if (!rx_ring->desc)
2694                 goto err;
2695
2696         rx_ring->next_to_clean = 0;
2697         rx_ring->next_to_use = 0;
2698
2699         return 0;
2700
2701 err:
2702         vfree(rx_ring->buffer_info);
2703         rx_ring->buffer_info = NULL;
2704         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2705                 " ring\n");
2706         return -ENOMEM;
2707 }
2708
2709 /**
2710  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2711  *                                (Descriptors) for all queues
2712  * @adapter: board private structure
2713  *
2714  * Return 0 on success, negative on failure
2715  **/
2716 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2717 {
2718         struct pci_dev *pdev = adapter->pdev;
2719         int i, err = 0;
2720
2721         for (i = 0; i < adapter->num_rx_queues; i++) {
2722                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2723                 if (err) {
2724                         dev_err(&pdev->dev,
2725                                 "Allocation for Rx Queue %u failed\n", i);
2726                         for (i--; i >= 0; i--)
2727                                 igb_free_rx_resources(adapter->rx_ring[i]);
2728                         break;
2729                 }
2730         }
2731
2732         return err;
2733 }
2734
2735 /**
2736  * igb_setup_mrqc - configure the multiple receive queue control registers
2737  * @adapter: Board private structure
2738  **/
2739 static void igb_setup_mrqc(struct igb_adapter *adapter)
2740 {
2741         struct e1000_hw *hw = &adapter->hw;
2742         u32 mrqc, rxcsum;
2743         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2744         union e1000_reta {
2745                 u32 dword;
2746                 u8  bytes[4];
2747         } reta;
2748         static const u8 rsshash[40] = {
2749                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2750                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2751                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2752                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2753
2754         /* Fill out hash function seeds */
2755         for (j = 0; j < 10; j++) {
2756                 u32 rsskey = rsshash[(j * 4)];
2757                 rsskey |= rsshash[(j * 4) + 1] << 8;
2758                 rsskey |= rsshash[(j * 4) + 2] << 16;
2759                 rsskey |= rsshash[(j * 4) + 3] << 24;
2760                 array_wr32(E1000_RSSRK(0), j, rsskey);
2761         }
2762
2763         num_rx_queues = adapter->rss_queues;
2764
2765         if (adapter->vfs_allocated_count) {
2766                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2767                 switch (hw->mac.type) {
2768                 case e1000_i350:
2769                 case e1000_82580:
2770                         num_rx_queues = 1;
2771                         shift = 0;
2772                         break;
2773                 case e1000_82576:
2774                         shift = 3;
2775                         num_rx_queues = 2;
2776                         break;
2777                 case e1000_82575:
2778                         shift = 2;
2779                         shift2 = 6;
2780                 default:
2781                         break;
2782                 }
2783         } else {
2784                 if (hw->mac.type == e1000_82575)
2785                         shift = 6;
2786         }
2787
2788         for (j = 0; j < (32 * 4); j++) {
2789                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2790                 if (shift2)
2791                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2792                 if ((j & 3) == 3)
2793                         wr32(E1000_RETA(j >> 2), reta.dword);
2794         }
2795
2796         /*
2797          * Disable raw packet checksumming so that RSS hash is placed in
2798          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2799          * offloads as they are enabled by default
2800          */
2801         rxcsum = rd32(E1000_RXCSUM);
2802         rxcsum |= E1000_RXCSUM_PCSD;
2803
2804         if (adapter->hw.mac.type >= e1000_82576)
2805                 /* Enable Receive Checksum Offload for SCTP */
2806                 rxcsum |= E1000_RXCSUM_CRCOFL;
2807
2808         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2809         wr32(E1000_RXCSUM, rxcsum);
2810
2811         /* If VMDq is enabled then we set the appropriate mode for that, else
2812          * we default to RSS so that an RSS hash is calculated per packet even
2813          * if we are only using one queue */
2814         if (adapter->vfs_allocated_count) {
2815                 if (hw->mac.type > e1000_82575) {
2816                         /* Set the default pool for the PF's first queue */
2817                         u32 vtctl = rd32(E1000_VT_CTL);
2818                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2819                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2820                         vtctl |= adapter->vfs_allocated_count <<
2821                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2822                         wr32(E1000_VT_CTL, vtctl);
2823                 }
2824                 if (adapter->rss_queues > 1)
2825                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2826                 else
2827                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2828         } else {
2829                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2830         }
2831         igb_vmm_control(adapter);
2832
2833         /*
2834          * Generate RSS hash based on TCP port numbers and/or
2835          * IPv4/v6 src and dst addresses since UDP cannot be
2836          * hashed reliably due to IP fragmentation
2837          */
2838         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2839                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2840                 E1000_MRQC_RSS_FIELD_IPV6 |
2841                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2842                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2843
2844         wr32(E1000_MRQC, mrqc);
2845 }
2846
2847 /**
2848  * igb_setup_rctl - configure the receive control registers
2849  * @adapter: Board private structure
2850  **/
2851 void igb_setup_rctl(struct igb_adapter *adapter)
2852 {
2853         struct e1000_hw *hw = &adapter->hw;
2854         u32 rctl;
2855
2856         rctl = rd32(E1000_RCTL);
2857
2858         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2859         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2860
2861         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2862                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2863
2864         /*
2865          * enable stripping of CRC. It's unlikely this will break BMC
2866          * redirection as it did with e1000. Newer features require
2867          * that the HW strips the CRC.
2868          */
2869         rctl |= E1000_RCTL_SECRC;
2870
2871         /* disable store bad packets and clear size bits. */
2872         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2873
2874         /* enable LPE to prevent packets larger than max_frame_size */
2875         rctl |= E1000_RCTL_LPE;
2876
2877         /* disable queue 0 to prevent tail write w/o re-config */
2878         wr32(E1000_RXDCTL(0), 0);
2879
2880         /* Attention!!!  For SR-IOV PF driver operations you must enable
2881          * queue drop for all VF and PF queues to prevent head of line blocking
2882          * if an un-trusted VF does not provide descriptors to hardware.
2883          */
2884         if (adapter->vfs_allocated_count) {
2885                 /* set all queue drop enable bits */
2886                 wr32(E1000_QDE, ALL_QUEUES);
2887         }
2888
2889         wr32(E1000_RCTL, rctl);
2890 }
2891
2892 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2893                                    int vfn)
2894 {
2895         struct e1000_hw *hw = &adapter->hw;
2896         u32 vmolr;
2897
2898         /* if it isn't the PF check to see if VFs are enabled and
2899          * increase the size to support vlan tags */
2900         if (vfn < adapter->vfs_allocated_count &&
2901             adapter->vf_data[vfn].vlans_enabled)
2902                 size += VLAN_TAG_SIZE;
2903
2904         vmolr = rd32(E1000_VMOLR(vfn));
2905         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2906         vmolr |= size | E1000_VMOLR_LPE;
2907         wr32(E1000_VMOLR(vfn), vmolr);
2908
2909         return 0;
2910 }
2911
2912 /**
2913  * igb_rlpml_set - set maximum receive packet size
2914  * @adapter: board private structure
2915  *
2916  * Configure maximum receivable packet size.
2917  **/
2918 static void igb_rlpml_set(struct igb_adapter *adapter)
2919 {
2920         u32 max_frame_size = adapter->max_frame_size;
2921         struct e1000_hw *hw = &adapter->hw;
2922         u16 pf_id = adapter->vfs_allocated_count;
2923
2924         if (adapter->vlgrp)
2925                 max_frame_size += VLAN_TAG_SIZE;
2926
2927         /* if vfs are enabled we set RLPML to the largest possible request
2928          * size and set the VMOLR RLPML to the size we need */
2929         if (pf_id) {
2930                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2931                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2932         }
2933
2934         wr32(E1000_RLPML, max_frame_size);
2935 }
2936
2937 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2938                                  int vfn, bool aupe)
2939 {
2940         struct e1000_hw *hw = &adapter->hw;
2941         u32 vmolr;
2942
2943         /*
2944          * This register exists only on 82576 and newer so if we are older then
2945          * we should exit and do nothing
2946          */
2947         if (hw->mac.type < e1000_82576)
2948                 return;
2949
2950         vmolr = rd32(E1000_VMOLR(vfn));
2951         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2952         if (aupe)
2953                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2954         else
2955                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2956
2957         /* clear all bits that might not be set */
2958         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2959
2960         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2961                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2962         /*
2963          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2964          * multicast packets
2965          */
2966         if (vfn <= adapter->vfs_allocated_count)
2967                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2968
2969         wr32(E1000_VMOLR(vfn), vmolr);
2970 }
2971
2972 /**
2973  * igb_configure_rx_ring - Configure a receive ring after Reset
2974  * @adapter: board private structure
2975  * @ring: receive ring to be configured
2976  *
2977  * Configure the Rx unit of the MAC after a reset.
2978  **/
2979 void igb_configure_rx_ring(struct igb_adapter *adapter,
2980                            struct igb_ring *ring)
2981 {
2982         struct e1000_hw *hw = &adapter->hw;
2983         u64 rdba = ring->dma;
2984         int reg_idx = ring->reg_idx;
2985         u32 srrctl, rxdctl;
2986
2987         /* disable the queue */
2988         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2989         wr32(E1000_RXDCTL(reg_idx),
2990                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2991
2992         /* Set DMA base address registers */
2993         wr32(E1000_RDBAL(reg_idx),
2994              rdba & 0x00000000ffffffffULL);
2995         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2996         wr32(E1000_RDLEN(reg_idx),
2997                        ring->count * sizeof(union e1000_adv_rx_desc));
2998
2999         /* initialize head and tail */
3000         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3001         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3002         writel(0, ring->head);
3003         writel(0, ring->tail);
3004
3005         /* set descriptor configuration */
3006         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3007                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3008                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3009 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3010                 srrctl |= IGB_RXBUFFER_16384 >>
3011                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3012 #else
3013                 srrctl |= (PAGE_SIZE / 2) >>
3014                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3015 #endif
3016                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3017         } else {
3018                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3019                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3020                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3021         }
3022         if (hw->mac.type == e1000_82580)
3023                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3024         /* Only set Drop Enable if we are supporting multiple queues */
3025         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3026                 srrctl |= E1000_SRRCTL_DROP_EN;
3027
3028         wr32(E1000_SRRCTL(reg_idx), srrctl);
3029
3030         /* set filtering for VMDQ pools */
3031         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3032
3033         /* enable receive descriptor fetching */
3034         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3035         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3036         rxdctl &= 0xFFF00000;
3037         rxdctl |= IGB_RX_PTHRESH;
3038         rxdctl |= IGB_RX_HTHRESH << 8;
3039         rxdctl |= IGB_RX_WTHRESH << 16;
3040         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3041 }
3042
3043 /**
3044  * igb_configure_rx - Configure receive Unit after Reset
3045  * @adapter: board private structure
3046  *
3047  * Configure the Rx unit of the MAC after a reset.
3048  **/
3049 static void igb_configure_rx(struct igb_adapter *adapter)
3050 {
3051         int i;
3052
3053         /* set UTA to appropriate mode */
3054         igb_set_uta(adapter);
3055
3056         /* set the correct pool for the PF default MAC address in entry 0 */
3057         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3058                          adapter->vfs_allocated_count);
3059
3060         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3061          * the Base and Length of the Rx Descriptor Ring */
3062         for (i = 0; i < adapter->num_rx_queues; i++)
3063                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3064 }
3065
3066 /**
3067  * igb_free_tx_resources - Free Tx Resources per Queue
3068  * @tx_ring: Tx descriptor ring for a specific queue
3069  *
3070  * Free all transmit software resources
3071  **/
3072 void igb_free_tx_resources(struct igb_ring *tx_ring)
3073 {
3074         igb_clean_tx_ring(tx_ring);
3075
3076         vfree(tx_ring->buffer_info);
3077         tx_ring->buffer_info = NULL;
3078
3079         /* if not set, then don't free */
3080         if (!tx_ring->desc)
3081                 return;
3082
3083         dma_free_coherent(tx_ring->dev, tx_ring->size,
3084                           tx_ring->desc, tx_ring->dma);
3085
3086         tx_ring->desc = NULL;
3087 }
3088
3089 /**
3090  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3091  * @adapter: board private structure
3092  *
3093  * Free all transmit software resources
3094  **/
3095 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3096 {
3097         int i;
3098
3099         for (i = 0; i < adapter->num_tx_queues; i++)
3100                 igb_free_tx_resources(adapter->tx_ring[i]);
3101 }
3102
3103 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3104                                     struct igb_buffer *buffer_info)
3105 {
3106         if (buffer_info->dma) {
3107                 if (buffer_info->mapped_as_page)
3108                         dma_unmap_page(tx_ring->dev,
3109                                         buffer_info->dma,
3110                                         buffer_info->length,
3111                                         DMA_TO_DEVICE);
3112                 else
3113                         dma_unmap_single(tx_ring->dev,
3114                                         buffer_info->dma,
3115                                         buffer_info->length,
3116                                         DMA_TO_DEVICE);
3117                 buffer_info->dma = 0;
3118         }
3119         if (buffer_info->skb) {
3120                 dev_kfree_skb_any(buffer_info->skb);
3121                 buffer_info->skb = NULL;
3122         }
3123         buffer_info->time_stamp = 0;
3124         buffer_info->length = 0;
3125         buffer_info->next_to_watch = 0;
3126         buffer_info->mapped_as_page = false;
3127 }
3128
3129 /**
3130  * igb_clean_tx_ring - Free Tx Buffers
3131  * @tx_ring: ring to be cleaned
3132  **/
3133 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3134 {
3135         struct igb_buffer *buffer_info;
3136         unsigned long size;
3137         unsigned int i;
3138
3139         if (!tx_ring->buffer_info)
3140                 return;
3141         /* Free all the Tx ring sk_buffs */
3142
3143         for (i = 0; i < tx_ring->count; i++) {
3144                 buffer_info = &tx_ring->buffer_info[i];
3145                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3146         }
3147
3148         size = sizeof(struct igb_buffer) * tx_ring->count;
3149         memset(tx_ring->buffer_info, 0, size);
3150
3151         /* Zero out the descriptor ring */
3152         memset(tx_ring->desc, 0, tx_ring->size);
3153
3154         tx_ring->next_to_use = 0;
3155         tx_ring->next_to_clean = 0;
3156 }
3157
3158 /**
3159  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3160  * @adapter: board private structure
3161  **/
3162 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3163 {
3164         int i;
3165
3166         for (i = 0; i < adapter->num_tx_queues; i++)
3167                 igb_clean_tx_ring(adapter->tx_ring[i]);
3168 }
3169
3170 /**
3171  * igb_free_rx_resources - Free Rx Resources
3172  * @rx_ring: ring to clean the resources from
3173  *
3174  * Free all receive software resources
3175  **/
3176 void igb_free_rx_resources(struct igb_ring *rx_ring)
3177 {
3178         igb_clean_rx_ring(rx_ring);
3179
3180         vfree(rx_ring->buffer_info);
3181         rx_ring->buffer_info = NULL;
3182
3183         /* if not set, then don't free */
3184         if (!rx_ring->desc)
3185                 return;
3186
3187         dma_free_coherent(rx_ring->dev, rx_ring->size,
3188                           rx_ring->desc, rx_ring->dma);
3189
3190         rx_ring->desc = NULL;
3191 }
3192
3193 /**
3194  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3195  * @adapter: board private structure
3196  *
3197  * Free all receive software resources
3198  **/
3199 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3200 {
3201         int i;
3202
3203         for (i = 0; i < adapter->num_rx_queues; i++)
3204                 igb_free_rx_resources(adapter->rx_ring[i]);
3205 }
3206
3207 /**
3208  * igb_clean_rx_ring - Free Rx Buffers per Queue
3209  * @rx_ring: ring to free buffers from
3210  **/
3211 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3212 {
3213         struct igb_buffer *buffer_info;
3214         unsigned long size;
3215         unsigned int i;
3216
3217         if (!rx_ring->buffer_info)
3218                 return;
3219
3220         /* Free all the Rx ring sk_buffs */
3221         for (i = 0; i < rx_ring->count; i++) {
3222                 buffer_info = &rx_ring->buffer_info[i];
3223                 if (buffer_info->dma) {
3224                         dma_unmap_single(rx_ring->dev,
3225                                          buffer_info->dma,
3226                                          rx_ring->rx_buffer_len,
3227                                          DMA_FROM_DEVICE);
3228                         buffer_info->dma = 0;
3229                 }
3230
3231                 if (buffer_info->skb) {
3232                         dev_kfree_skb(buffer_info->skb);
3233                         buffer_info->skb = NULL;
3234                 }
3235                 if (buffer_info->page_dma) {
3236                         dma_unmap_page(rx_ring->dev,
3237                                        buffer_info->page_dma,
3238                                        PAGE_SIZE / 2,
3239                                        DMA_FROM_DEVICE);
3240                         buffer_info->page_dma = 0;
3241                 }
3242                 if (buffer_info->page) {
3243                         put_page(buffer_info->page);
3244                         buffer_info->page = NULL;
3245                         buffer_info->page_offset = 0;
3246                 }
3247         }
3248
3249         size = sizeof(struct igb_buffer) * rx_ring->count;
3250         memset(rx_ring->buffer_info, 0, size);
3251
3252         /* Zero out the descriptor ring */
3253         memset(rx_ring->desc, 0, rx_ring->size);
3254
3255         rx_ring->next_to_clean = 0;
3256         rx_ring->next_to_use = 0;
3257 }
3258
3259 /**
3260  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3261  * @adapter: board private structure
3262  **/
3263 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3264 {
3265         int i;
3266
3267         for (i = 0; i < adapter->num_rx_queues; i++)
3268                 igb_clean_rx_ring(adapter->rx_ring[i]);
3269 }
3270
3271 /**
3272  * igb_set_mac - Change the Ethernet Address of the NIC
3273  * @netdev: network interface device structure
3274  * @p: pointer to an address structure
3275  *
3276  * Returns 0 on success, negative on failure
3277  **/
3278 static int igb_set_mac(struct net_device *netdev, void *p)
3279 {
3280         struct igb_adapter *adapter = netdev_priv(netdev);
3281         struct e1000_hw *hw = &adapter->hw;
3282         struct sockaddr *addr = p;
3283
3284         if (!is_valid_ether_addr(addr->sa_data))
3285                 return -EADDRNOTAVAIL;
3286
3287         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3288         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3289
3290         /* set the correct pool for the new PF MAC address in entry 0 */
3291         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3292                          adapter->vfs_allocated_count);
3293
3294         return 0;
3295 }
3296
3297 /**
3298  * igb_write_mc_addr_list - write multicast addresses to MTA
3299  * @netdev: network interface device structure
3300  *
3301  * Writes multicast address list to the MTA hash table.
3302  * Returns: -ENOMEM on failure
3303  *                0 on no addresses written
3304  *                X on writing X addresses to MTA
3305  **/
3306 static int igb_write_mc_addr_list(struct net_device *netdev)
3307 {
3308         struct igb_adapter *adapter = netdev_priv(netdev);
3309         struct e1000_hw *hw = &adapter->hw;
3310         struct netdev_hw_addr *ha;
3311         u8  *mta_list;
3312         int i;
3313
3314         if (netdev_mc_empty(netdev)) {
3315                 /* nothing to program, so clear mc list */
3316                 igb_update_mc_addr_list(hw, NULL, 0);
3317                 igb_restore_vf_multicasts(adapter);
3318                 return 0;
3319         }
3320
3321         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3322         if (!mta_list)
3323                 return -ENOMEM;
3324
3325         /* The shared function expects a packed array of only addresses. */
3326         i = 0;
3327         netdev_for_each_mc_addr(ha, netdev)
3328                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3329
3330         igb_update_mc_addr_list(hw, mta_list, i);
3331         kfree(mta_list);
3332
3333         return netdev_mc_count(netdev);
3334 }
3335
3336 /**
3337  * igb_write_uc_addr_list - write unicast addresses to RAR table
3338  * @netdev: network interface device structure
3339  *
3340  * Writes unicast address list to the RAR table.
3341  * Returns: -ENOMEM on failure/insufficient address space
3342  *                0 on no addresses written
3343  *                X on writing X addresses to the RAR table
3344  **/
3345 static int igb_write_uc_addr_list(struct net_device *netdev)
3346 {
3347         struct igb_adapter *adapter = netdev_priv(netdev);
3348         struct e1000_hw *hw = &adapter->hw;
3349         unsigned int vfn = adapter->vfs_allocated_count;
3350         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3351         int count = 0;
3352
3353         /* return ENOMEM indicating insufficient memory for addresses */
3354         if (netdev_uc_count(netdev) > rar_entries)
3355                 return -ENOMEM;
3356
3357         if (!netdev_uc_empty(netdev) && rar_entries) {
3358                 struct netdev_hw_addr *ha;
3359
3360                 netdev_for_each_uc_addr(ha, netdev) {
3361                         if (!rar_entries)
3362                                 break;
3363                         igb_rar_set_qsel(adapter, ha->addr,
3364                                          rar_entries--,
3365                                          vfn);
3366                         count++;
3367                 }
3368         }
3369         /* write the addresses in reverse order to avoid write combining */
3370         for (; rar_entries > 0 ; rar_entries--) {
3371                 wr32(E1000_RAH(rar_entries), 0);
3372                 wr32(E1000_RAL(rar_entries), 0);
3373         }
3374         wrfl();
3375
3376         return count;
3377 }
3378
3379 /**
3380  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3381  * @netdev: network interface device structure
3382  *
3383  * The set_rx_mode entry point is called whenever the unicast or multicast
3384  * address lists or the network interface flags are updated.  This routine is
3385  * responsible for configuring the hardware for proper unicast, multicast,
3386  * promiscuous mode, and all-multi behavior.
3387  **/
3388 static void igb_set_rx_mode(struct net_device *netdev)
3389 {
3390         struct igb_adapter *adapter = netdev_priv(netdev);
3391         struct e1000_hw *hw = &adapter->hw;
3392         unsigned int vfn = adapter->vfs_allocated_count;
3393         u32 rctl, vmolr = 0;
3394         int count;
3395
3396         /* Check for Promiscuous and All Multicast modes */
3397         rctl = rd32(E1000_RCTL);
3398
3399         /* clear the effected bits */
3400         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3401
3402         if (netdev->flags & IFF_PROMISC) {
3403                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3404                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3405         } else {
3406                 if (netdev->flags & IFF_ALLMULTI) {
3407                         rctl |= E1000_RCTL_MPE;
3408                         vmolr |= E1000_VMOLR_MPME;
3409                 } else {
3410                         /*
3411                          * Write addresses to the MTA, if the attempt fails
3412                          * then we should just turn on promiscuous mode so
3413                          * that we can at least receive multicast traffic
3414                          */
3415                         count = igb_write_mc_addr_list(netdev);
3416                         if (count < 0) {
3417                                 rctl |= E1000_RCTL_MPE;
3418                                 vmolr |= E1000_VMOLR_MPME;
3419                         } else if (count) {
3420                                 vmolr |= E1000_VMOLR_ROMPE;
3421                         }
3422                 }
3423                 /*
3424                  * Write addresses to available RAR registers, if there is not
3425                  * sufficient space to store all the addresses then enable
3426                  * unicast promiscuous mode
3427                  */
3428                 count = igb_write_uc_addr_list(netdev);
3429                 if (count < 0) {
3430                         rctl |= E1000_RCTL_UPE;
3431                         vmolr |= E1000_VMOLR_ROPE;
3432                 }
3433                 rctl |= E1000_RCTL_VFE;
3434         }
3435         wr32(E1000_RCTL, rctl);
3436
3437         /*
3438          * In order to support SR-IOV and eventually VMDq it is necessary to set
3439          * the VMOLR to enable the appropriate modes.  Without this workaround
3440          * we will have issues with VLAN tag stripping not being done for frames
3441          * that are only arriving because we are the default pool
3442          */
3443         if (hw->mac.type < e1000_82576)
3444                 return;
3445
3446         vmolr |= rd32(E1000_VMOLR(vfn)) &
3447                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3448         wr32(E1000_VMOLR(vfn), vmolr);
3449         igb_restore_vf_multicasts(adapter);
3450 }
3451
3452 static void igb_check_wvbr(struct igb_adapter *adapter)
3453 {
3454         struct e1000_hw *hw = &adapter->hw;
3455         u32 wvbr = 0;
3456
3457         switch (hw->mac.type) {
3458         case e1000_82576:
3459         case e1000_i350:
3460                 if (!(wvbr = rd32(E1000_WVBR)))
3461                         return;
3462                 break;
3463         default:
3464                 break;
3465         }
3466
3467         adapter->wvbr |= wvbr;
3468 }
3469
3470 #define IGB_STAGGERED_QUEUE_OFFSET 8
3471
3472 static void igb_spoof_check(struct igb_adapter *adapter)
3473 {
3474         int j;
3475
3476         if (!adapter->wvbr)
3477                 return;
3478
3479         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3480                 if (adapter->wvbr & (1 << j) ||
3481                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3482                         dev_warn(&adapter->pdev->dev,
3483                                 "Spoof event(s) detected on VF %d\n", j);
3484                         adapter->wvbr &=
3485                                 ~((1 << j) |
3486                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3487                 }
3488         }
3489 }
3490
3491 /* Need to wait a few seconds after link up to get diagnostic information from
3492  * the phy */
3493 static void igb_update_phy_info(unsigned long data)
3494 {
3495         struct igb_adapter *adapter = (struct igb_adapter *) data;
3496         igb_get_phy_info(&adapter->hw);
3497 }
3498
3499 /**
3500  * igb_has_link - check shared code for link and determine up/down
3501  * @adapter: pointer to driver private info
3502  **/
3503 bool igb_has_link(struct igb_adapter *adapter)
3504 {
3505         struct e1000_hw *hw = &adapter->hw;
3506         bool link_active = false;
3507         s32 ret_val = 0;
3508
3509         /* get_link_status is set on LSC (link status) interrupt or
3510          * rx sequence error interrupt.  get_link_status will stay
3511          * false until the e1000_check_for_link establishes link
3512          * for copper adapters ONLY
3513          */
3514         switch (hw->phy.media_type) {
3515         case e1000_media_type_copper:
3516                 if (hw->mac.get_link_status) {
3517                         ret_val = hw->mac.ops.check_for_link(hw);
3518                         link_active = !hw->mac.get_link_status;
3519                 } else {
3520                         link_active = true;
3521                 }
3522                 break;
3523         case e1000_media_type_internal_serdes:
3524                 ret_val = hw->mac.ops.check_for_link(hw);
3525                 link_active = hw->mac.serdes_has_link;
3526                 break;
3527         default:
3528         case e1000_media_type_unknown:
3529                 break;
3530         }
3531
3532         return link_active;
3533 }
3534
3535 /**
3536  * igb_watchdog - Timer Call-back
3537  * @data: pointer to adapter cast into an unsigned long
3538  **/
3539 static void igb_watchdog(unsigned long data)
3540 {
3541         struct igb_adapter *adapter = (struct igb_adapter *)data;
3542         /* Do the rest outside of interrupt context */
3543         schedule_work(&adapter->watchdog_task);
3544 }
3545
3546 static void igb_watchdog_task(struct work_struct *work)
3547 {
3548         struct igb_adapter *adapter = container_of(work,
3549                                                    struct igb_adapter,
3550                                                    watchdog_task);
3551         struct e1000_hw *hw = &adapter->hw;
3552         struct net_device *netdev = adapter->netdev;
3553         u32 link, ctrl_ext, thstat;
3554         int i;
3555
3556         link = igb_has_link(adapter);
3557         if (link) {
3558                 if (!netif_carrier_ok(netdev)) {
3559                         u32 ctrl;
3560                         hw->mac.ops.get_speed_and_duplex(hw,
3561                                                          &adapter->link_speed,
3562                                                          &adapter->link_duplex);
3563
3564                         ctrl = rd32(E1000_CTRL);
3565                         /* Links status message must follow this format */
3566                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3567                                  "Flow Control: %s\n",
3568                                netdev->name,
3569                                adapter->link_speed,
3570                                adapter->link_duplex == FULL_DUPLEX ?
3571                                  "Full Duplex" : "Half Duplex",
3572                                ((ctrl & E1000_CTRL_TFCE) &&
3573                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3574                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3575                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3576
3577                         /* check for thermal sensor event on i350,
3578                          * copper only */
3579                         if (hw->mac.type == e1000_i350) {
3580                                 thstat = rd32(E1000_THSTAT);
3581                                 ctrl_ext = rd32(E1000_CTRL_EXT);
3582                                 if ((hw->phy.media_type ==
3583                                      e1000_media_type_copper) && !(ctrl_ext &
3584                                      E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3585                                         if (thstat &
3586                                             E1000_THSTAT_LINK_THROTTLE) {
3587                                                 printk(KERN_INFO "igb: %s The "
3588                                                        "network adapter link "
3589                                                        "speed was downshifted "
3590                                                        "because it "
3591                                                        "overheated.\n",
3592                                                        netdev->name);
3593                                         }
3594                                 }
3595                         }
3596                         /* adjust timeout factor according to speed/duplex */
3597                         adapter->tx_timeout_factor = 1;
3598                         switch (adapter->link_speed) {
3599                         case SPEED_10:
3600                                 adapter->tx_timeout_factor = 14;
3601                                 break;
3602                         case SPEED_100:
3603                                 /* maybe add some timeout factor ? */
3604                                 break;
3605                         }
3606
3607                         netif_carrier_on(netdev);
3608
3609                         igb_ping_all_vfs(adapter);
3610                         igb_check_vf_rate_limit(adapter);
3611
3612                         /* link state has changed, schedule phy info update */
3613                         if (!test_bit(__IGB_DOWN, &adapter->state))
3614                                 mod_timer(&adapter->phy_info_timer,
3615                                           round_jiffies(jiffies + 2 * HZ));
3616                 }
3617         } else {
3618                 if (netif_carrier_ok(netdev)) {
3619                         adapter->link_speed = 0;
3620                         adapter->link_duplex = 0;
3621                         /* check for thermal sensor event on i350
3622                          * copper only*/
3623                         if (hw->mac.type == e1000_i350) {
3624                                 thstat = rd32(E1000_THSTAT);
3625                                 ctrl_ext = rd32(E1000_CTRL_EXT);
3626                                 if ((hw->phy.media_type ==
3627                                      e1000_media_type_copper) && !(ctrl_ext &
3628                                      E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3629                                         if (thstat & E1000_THSTAT_PWR_DOWN) {
3630                                                 printk(KERN_ERR "igb: %s The "
3631                                                 "network adapter was stopped "
3632                                                 "because it overheated.\n",
3633                                                 netdev->name);
3634                                         }
3635                                 }
3636                         }
3637                         /* Links status message must follow this format */
3638                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3639                                netdev->name);
3640                         netif_carrier_off(netdev);
3641
3642                         igb_ping_all_vfs(adapter);
3643
3644                         /* link state has changed, schedule phy info update */
3645                         if (!test_bit(__IGB_DOWN, &adapter->state))
3646                                 mod_timer(&adapter->phy_info_timer,
3647                                           round_jiffies(jiffies + 2 * HZ));
3648                 }
3649         }
3650
3651         spin_lock(&adapter->stats64_lock);
3652         igb_update_stats(adapter, &adapter->stats64);
3653         spin_unlock(&adapter->stats64_lock);
3654
3655         for (i = 0; i < adapter->num_tx_queues; i++) {
3656                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3657                 if (!netif_carrier_ok(netdev)) {
3658                         /* We've lost link, so the controller stops DMA,
3659                          * but we've got queued Tx work that's never going
3660                          * to get done, so reset controller to flush Tx.
3661                          * (Do the reset outside of interrupt context). */
3662                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3663                                 adapter->tx_timeout_count++;
3664                                 schedule_work(&adapter->reset_task);
3665                                 /* return immediately since reset is imminent */
3666                                 return;
3667                         }
3668                 }
3669
3670                 /* Force detection of hung controller every watchdog period */
3671                 tx_ring->detect_tx_hung = true;
3672         }
3673
3674         /* Cause software interrupt to ensure rx ring is cleaned */
3675         if (adapter->msix_entries) {
3676                 u32 eics = 0;
3677                 for (i = 0; i < adapter->num_q_vectors; i++) {
3678                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3679                         eics |= q_vector->eims_value;
3680                 }
3681                 wr32(E1000_EICS, eics);
3682         } else {
3683                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3684         }
3685
3686         igb_spoof_check(adapter);
3687
3688         /* Reset the timer */
3689         if (!test_bit(__IGB_DOWN, &adapter->state))
3690                 mod_timer(&adapter->watchdog_timer,
3691                           round_jiffies(jiffies + 2 * HZ));
3692 }
3693
3694 enum latency_range {
3695         lowest_latency = 0,
3696         low_latency = 1,
3697         bulk_latency = 2,
3698         latency_invalid = 255
3699 };
3700
3701 /**
3702  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3703  *
3704  *      Stores a new ITR value based on strictly on packet size.  This
3705  *      algorithm is less sophisticated than that used in igb_update_itr,
3706  *      due to the difficulty of synchronizing statistics across multiple
3707  *      receive rings.  The divisors and thresholds used by this function
3708  *      were determined based on theoretical maximum wire speed and testing
3709  *      data, in order to minimize response time while increasing bulk
3710  *      throughput.
3711  *      This functionality is controlled by the InterruptThrottleRate module
3712  *      parameter (see igb_param.c)
3713  *      NOTE:  This function is called only when operating in a multiqueue
3714  *             receive environment.
3715  * @q_vector: pointer to q_vector
3716  **/
3717 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3718 {
3719         int new_val = q_vector->itr_val;
3720         int avg_wire_size = 0;
3721         struct igb_adapter *adapter = q_vector->adapter;
3722         struct igb_ring *ring;
3723         unsigned int packets;
3724
3725         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3726          * ints/sec - ITR timer value of 120 ticks.
3727          */
3728         if (adapter->link_speed != SPEED_1000) {
3729                 new_val = 976;
3730                 goto set_itr_val;
3731         }
3732
3733         ring = q_vector->rx_ring;
3734         if (ring) {
3735                 packets = ACCESS_ONCE(ring->total_packets);
3736
3737                 if (packets)
3738                         avg_wire_size = ring->total_bytes / packets;
3739         }
3740
3741         ring = q_vector->tx_ring;
3742         if (ring) {
3743                 packets = ACCESS_ONCE(ring->total_packets);
3744
3745                 if (packets)
3746                         avg_wire_size = max_t(u32, avg_wire_size,
3747                                               ring->total_bytes / packets);
3748         }
3749
3750         /* if avg_wire_size isn't set no work was done */
3751         if (!avg_wire_size)
3752                 goto clear_counts;
3753
3754         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3755         avg_wire_size += 24;
3756
3757         /* Don't starve jumbo frames */
3758         avg_wire_size = min(avg_wire_size, 3000);
3759
3760         /* Give a little boost to mid-size frames */
3761         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3762                 new_val = avg_wire_size / 3;
3763         else
3764                 new_val = avg_wire_size / 2;
3765
3766         /* when in itr mode 3 do not exceed 20K ints/sec */
3767         if (adapter->rx_itr_setting == 3 && new_val < 196)
3768                 new_val = 196;
3769
3770 set_itr_val:
3771         if (new_val != q_vector->itr_val) {
3772                 q_vector->itr_val = new_val;
3773                 q_vector->set_itr = 1;
3774         }
3775 clear_counts:
3776         if (q_vector->rx_ring) {
3777                 q_vector->rx_ring->total_bytes = 0;
3778                 q_vector->rx_ring->total_packets = 0;
3779         }
3780         if (q_vector->tx_ring) {
3781                 q_vector->tx_ring->total_bytes = 0;
3782                 q_vector->tx_ring->total_packets = 0;
3783         }
3784 }
3785
3786 /**
3787  * igb_update_itr - update the dynamic ITR value based on statistics
3788  *      Stores a new ITR value based on packets and byte
3789  *      counts during the last interrupt.  The advantage of per interrupt
3790  *      computation is faster updates and more accurate ITR for the current
3791  *      traffic pattern.  Constants in this function were computed
3792  *      based on theoretical maximum wire speed and thresholds were set based
3793  *      on testing data as well as attempting to minimize response time
3794  *      while increasing bulk throughput.
3795  *      this functionality is controlled by the InterruptThrottleRate module
3796  *      parameter (see igb_param.c)
3797  *      NOTE:  These calculations are only valid when operating in a single-
3798  *             queue environment.
3799  * @adapter: pointer to adapter
3800  * @itr_setting: current q_vector->itr_val
3801  * @packets: the number of packets during this measurement interval
3802  * @bytes: the number of bytes during this measurement interval
3803  **/
3804 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3805                                    int packets, int bytes)
3806 {
3807         unsigned int retval = itr_setting;
3808
3809         if (packets == 0)
3810                 goto update_itr_done;
3811
3812         switch (itr_setting) {
3813         case lowest_latency:
3814                 /* handle TSO and jumbo frames */
3815                 if (bytes/packets > 8000)
3816                         retval = bulk_latency;
3817                 else if ((packets < 5) && (bytes > 512))
3818                         retval = low_latency;
3819                 break;
3820         case low_latency:  /* 50 usec aka 20000 ints/s */
3821                 if (bytes > 10000) {
3822                         /* this if handles the TSO accounting */
3823                         if (bytes/packets > 8000) {
3824                                 retval = bulk_latency;
3825                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3826                                 retval = bulk_latency;
3827                         } else if ((packets > 35)) {
3828                                 retval = lowest_latency;
3829                         }
3830                 } else if (bytes/packets > 2000) {
3831                         retval = bulk_latency;
3832                 } else if (packets <= 2 && bytes < 512) {
3833                         retval = lowest_latency;
3834                 }
3835                 break;
3836         case bulk_latency: /* 250 usec aka 4000 ints/s */
3837                 if (bytes > 25000) {
3838                         if (packets > 35)
3839                                 retval = low_latency;
3840                 } else if (bytes < 1500) {
3841                         retval = low_latency;
3842                 }
3843                 break;
3844         }
3845
3846 update_itr_done:
3847         return retval;
3848 }
3849
3850 static void igb_set_itr(struct igb_adapter *adapter)
3851 {
3852         struct igb_q_vector *q_vector = adapter->q_vector[0];
3853         u16 current_itr;
3854         u32 new_itr = q_vector->itr_val;
3855
3856         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3857         if (adapter->link_speed != SPEED_1000) {
3858                 current_itr = 0;
3859                 new_itr = 4000;
3860                 goto set_itr_now;
3861         }
3862
3863         adapter->rx_itr = igb_update_itr(adapter,
3864                                     adapter->rx_itr,
3865                                     q_vector->rx_ring->total_packets,
3866                                     q_vector->rx_ring->total_bytes);
3867
3868         adapter->tx_itr = igb_update_itr(adapter,
3869                                     adapter->tx_itr,
3870                                     q_vector->tx_ring->total_packets,
3871                                     q_vector->tx_ring->total_bytes);
3872         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3873
3874         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3875         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3876                 current_itr = low_latency;
3877
3878         switch (current_itr) {
3879         /* counts and packets in update_itr are dependent on these numbers */
3880         case lowest_latency:
3881                 new_itr = 56;  /* aka 70,000 ints/sec */
3882                 break;
3883         case low_latency:
3884                 new_itr = 196; /* aka 20,000 ints/sec */
3885                 break;
3886         case bulk_latency:
3887                 new_itr = 980; /* aka 4,000 ints/sec */
3888                 break;
3889         default:
3890                 break;
3891         }
3892
3893 set_itr_now:
3894         q_vector->rx_ring->total_bytes = 0;
3895         q_vector->rx_ring->total_packets = 0;
3896         q_vector->tx_ring->total_bytes = 0;
3897         q_vector->tx_ring->total_packets = 0;
3898
3899         if (new_itr != q_vector->itr_val) {
3900                 /* this attempts to bias the interrupt rate towards Bulk
3901                  * by adding intermediate steps when interrupt rate is
3902                  * increasing */
3903                 new_itr = new_itr > q_vector->itr_val ?
3904                              max((new_itr * q_vector->itr_val) /
3905                                  (new_itr + (q_vector->itr_val >> 2)),
3906                                  new_itr) :
3907                              new_itr;
3908                 /* Don't write the value here; it resets the adapter's
3909                  * internal timer, and causes us to delay far longer than
3910                  * we should between interrupts.  Instead, we write the ITR
3911                  * value at the beginning of the next interrupt so the timing
3912                  * ends up being correct.
3913                  */
3914                 q_vector->itr_val = new_itr;
3915                 q_vector->set_itr = 1;
3916         }
3917 }
3918
3919 #define IGB_TX_FLAGS_CSUM               0x00000001
3920 #define IGB_TX_FLAGS_VLAN               0x00000002
3921 #define IGB_TX_FLAGS_TSO                0x00000004
3922 #define IGB_TX_FLAGS_IPV4               0x00000008
3923 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3924 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3925 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3926
3927 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3928                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3929 {
3930         struct e1000_adv_tx_context_desc *context_desc;
3931         unsigned int i;
3932         int err;
3933         struct igb_buffer *buffer_info;
3934         u32 info = 0, tu_cmd = 0;
3935         u32 mss_l4len_idx;
3936         u8 l4len;
3937
3938         if (skb_header_cloned(skb)) {
3939                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3940                 if (err)
3941                         return err;
3942         }
3943
3944         l4len = tcp_hdrlen(skb);
3945         *hdr_len += l4len;
3946
3947         if (skb->protocol == htons(ETH_P_IP)) {
3948                 struct iphdr *iph = ip_hdr(skb);
3949                 iph->tot_len = 0;
3950                 iph->check = 0;
3951                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3952                                                          iph->daddr, 0,
3953                                                          IPPROTO_TCP,
3954                                                          0);
3955         } else if (skb_is_gso_v6(skb)) {
3956                 ipv6_hdr(skb)->payload_len = 0;
3957                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3958                                                        &ipv6_hdr(skb)->daddr,
3959                                                        0, IPPROTO_TCP, 0);
3960         }
3961
3962         i = tx_ring->next_to_use;
3963
3964         buffer_info = &tx_ring->buffer_info[i];
3965         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3966         /* VLAN MACLEN IPLEN */
3967         if (tx_flags & IGB_TX_FLAGS_VLAN)
3968                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3969         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3970         *hdr_len += skb_network_offset(skb);
3971         info |= skb_network_header_len(skb);
3972         *hdr_len += skb_network_header_len(skb);
3973         context_desc->vlan_macip_lens = cpu_to_le32(info);
3974
3975         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3976         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3977
3978         if (skb->protocol == htons(ETH_P_IP))
3979                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3980         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3981
3982         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3983
3984         /* MSS L4LEN IDX */
3985         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3986         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3987
3988         /* For 82575, context index must be unique per ring. */
3989         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3990                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3991
3992         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3993         context_desc->seqnum_seed = 0;
3994
3995         buffer_info->time_stamp = jiffies;
3996         buffer_info->next_to_watch = i;
3997         buffer_info->dma = 0;
3998         i++;
3999         if (i == tx_ring->count)
4000                 i = 0;
4001
4002         tx_ring->next_to_use = i;
4003
4004         return true;
4005 }
4006
4007 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4008                                    struct sk_buff *skb, u32 tx_flags)
4009 {
4010         struct e1000_adv_tx_context_desc *context_desc;
4011         struct device *dev = tx_ring->dev;
4012         struct igb_buffer *buffer_info;
4013         u32 info = 0, tu_cmd = 0;
4014         unsigned int i;
4015
4016         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4017             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4018                 i = tx_ring->next_to_use;
4019                 buffer_info = &tx_ring->buffer_info[i];
4020                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4021
4022                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4023                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4024
4025                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4026                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4027                         info |= skb_network_header_len(skb);
4028
4029                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4030
4031                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4032
4033                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4034                         __be16 protocol;
4035
4036                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4037                                 const struct vlan_ethhdr *vhdr =
4038                                           (const struct vlan_ethhdr*)skb->data;
4039
4040                                 protocol = vhdr->h_vlan_encapsulated_proto;
4041                         } else {
4042                                 protocol = skb->protocol;
4043                         }
4044
4045                         switch (protocol) {
4046                         case cpu_to_be16(ETH_P_IP):
4047                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4048                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4049                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4050                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4051                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4052                                 break;
4053                         case cpu_to_be16(ETH_P_IPV6):
4054                                 /* XXX what about other V6 headers?? */
4055                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4056                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4057                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4058                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4059                                 break;
4060                         default:
4061                                 if (unlikely(net_ratelimit()))
4062                                         dev_warn(dev,
4063                                             "partial checksum but proto=%x!\n",
4064                                             skb->protocol);
4065                                 break;
4066                         }
4067                 }
4068
4069                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4070                 context_desc->seqnum_seed = 0;
4071                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4072                         context_desc->mss_l4len_idx =
4073                                 cpu_to_le32(tx_ring->reg_idx << 4);
4074
4075                 buffer_info->time_stamp = jiffies;
4076                 buffer_info->next_to_watch = i;
4077                 buffer_info->dma = 0;
4078
4079                 i++;
4080                 if (i == tx_ring->count)
4081                         i = 0;
4082                 tx_ring->next_to_use = i;
4083
4084                 return true;
4085         }
4086         return false;
4087 }
4088
4089 #define IGB_MAX_TXD_PWR 16
4090 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4091
4092 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4093                                  unsigned int first)
4094 {
4095         struct igb_buffer *buffer_info;
4096         struct device *dev = tx_ring->dev;
4097         unsigned int hlen = skb_headlen(skb);
4098         unsigned int count = 0, i;
4099         unsigned int f;
4100         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4101
4102         i = tx_ring->next_to_use;
4103
4104         buffer_info = &tx_ring->buffer_info[i];
4105         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4106         buffer_info->length = hlen;
4107         /* set time_stamp *before* dma to help avoid a possible race */
4108         buffer_info->time_stamp = jiffies;
4109         buffer_info->next_to_watch = i;
4110         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4111                                           DMA_TO_DEVICE);
4112         if (dma_mapping_error(dev, buffer_info->dma))
4113                 goto dma_error;
4114
4115         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4116                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4117                 unsigned int len = frag->size;
4118
4119                 count++;
4120                 i++;
4121                 if (i == tx_ring->count)
4122                         i = 0;
4123
4124                 buffer_info = &tx_ring->buffer_info[i];
4125                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4126                 buffer_info->length = len;
4127                 buffer_info->time_stamp = jiffies;
4128                 buffer_info->next_to_watch = i;
4129                 buffer_info->mapped_as_page = true;
4130                 buffer_info->dma = dma_map_page(dev,
4131                                                 frag->page,
4132                                                 frag->page_offset,
4133                                                 len,
4134                                                 DMA_TO_DEVICE);
4135                 if (dma_mapping_error(dev, buffer_info->dma))
4136                         goto dma_error;
4137
4138         }
4139
4140         tx_ring->buffer_info[i].skb = skb;
4141         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4142         /* multiply data chunks by size of headers */
4143         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4144         tx_ring->buffer_info[i].gso_segs = gso_segs;
4145         tx_ring->buffer_info[first].next_to_watch = i;
4146
4147         return ++count;
4148
4149 dma_error:
4150         dev_err(dev, "TX DMA map failed\n");
4151
4152         /* clear timestamp and dma mappings for failed buffer_info mapping */
4153         buffer_info->dma = 0;
4154         buffer_info->time_stamp = 0;
4155         buffer_info->length = 0;
4156         buffer_info->next_to_watch = 0;
4157         buffer_info->mapped_as_page = false;
4158
4159         /* clear timestamp and dma mappings for remaining portion of packet */
4160         while (count--) {
4161                 if (i == 0)
4162                         i = tx_ring->count;
4163                 i--;
4164                 buffer_info = &tx_ring->buffer_info[i];
4165                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4166         }
4167
4168         return 0;
4169 }
4170
4171 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4172                                     u32 tx_flags, int count, u32 paylen,
4173                                     u8 hdr_len)
4174 {
4175         union e1000_adv_tx_desc *tx_desc;
4176         struct igb_buffer *buffer_info;
4177         u32 olinfo_status = 0, cmd_type_len;
4178         unsigned int i = tx_ring->next_to_use;
4179
4180         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4181                         E1000_ADVTXD_DCMD_DEXT);
4182
4183         if (tx_flags & IGB_TX_FLAGS_VLAN)
4184                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4185
4186         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4187                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4188
4189         if (tx_flags & IGB_TX_FLAGS_TSO) {
4190                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4191
4192                 /* insert tcp checksum */
4193                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4194
4195                 /* insert ip checksum */
4196                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4197                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4198
4199         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4200                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4201         }
4202
4203         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4204             (tx_flags & (IGB_TX_FLAGS_CSUM |
4205                          IGB_TX_FLAGS_TSO |
4206                          IGB_TX_FLAGS_VLAN)))
4207                 olinfo_status |= tx_ring->reg_idx << 4;
4208
4209         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4210
4211         do {
4212                 buffer_info = &tx_ring->buffer_info[i];
4213                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4214                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4215                 tx_desc->read.cmd_type_len =
4216                         cpu_to_le32(cmd_type_len | buffer_info->length);
4217                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4218                 count--;
4219                 i++;
4220                 if (i == tx_ring->count)
4221                         i = 0;
4222         } while (count > 0);
4223
4224         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4225         /* Force memory writes to complete before letting h/w
4226          * know there are new descriptors to fetch.  (Only
4227          * applicable for weak-ordered memory model archs,
4228          * such as IA-64). */
4229         wmb();
4230
4231         tx_ring->next_to_use = i;
4232         writel(i, tx_ring->tail);
4233         /* we need this if more than one processor can write to our tail
4234          * at a time, it syncronizes IO on IA64/Altix systems */
4235         mmiowb();
4236 }
4237
4238 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4239 {
4240         struct net_device *netdev = tx_ring->netdev;
4241
4242         netif_stop_subqueue(netdev, tx_ring->queue_index);
4243
4244         /* Herbert's original patch had:
4245          *  smp_mb__after_netif_stop_queue();
4246          * but since that doesn't exist yet, just open code it. */
4247         smp_mb();
4248
4249         /* We need to check again in a case another CPU has just
4250          * made room available. */
4251         if (igb_desc_unused(tx_ring) < size)
4252                 return -EBUSY;
4253
4254         /* A reprieve! */
4255         netif_wake_subqueue(netdev, tx_ring->queue_index);
4256
4257         u64_stats_update_begin(&tx_ring->tx_syncp2);
4258         tx_ring->tx_stats.restart_queue2++;
4259         u64_stats_update_end(&tx_ring->tx_syncp2);
4260
4261         return 0;
4262 }
4263
4264 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4265 {
4266         if (igb_desc_unused(tx_ring) >= size)
4267                 return 0;
4268         return __igb_maybe_stop_tx(tx_ring, size);
4269 }
4270
4271 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4272                                     struct igb_ring *tx_ring)
4273 {
4274         int tso = 0, count;
4275         u32 tx_flags = 0;
4276         u16 first;
4277         u8 hdr_len = 0;
4278
4279         /* need: 1 descriptor per page,
4280          *       + 2 desc gap to keep tail from touching head,
4281          *       + 1 desc for skb->data,
4282          *       + 1 desc for context descriptor,
4283          * otherwise try next time */
4284         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4285                 /* this is a hard error */
4286                 return NETDEV_TX_BUSY;
4287         }
4288
4289         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4290                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4291                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4292         }
4293
4294         if (vlan_tx_tag_present(skb)) {
4295                 tx_flags |= IGB_TX_FLAGS_VLAN;
4296                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4297         }
4298
4299         if (skb->protocol == htons(ETH_P_IP))
4300                 tx_flags |= IGB_TX_FLAGS_IPV4;
4301
4302         first = tx_ring->next_to_use;
4303         if (skb_is_gso(skb)) {
4304                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4305
4306                 if (tso < 0) {
4307                         dev_kfree_skb_any(skb);
4308                         return NETDEV_TX_OK;
4309                 }
4310         }
4311
4312         if (tso)
4313                 tx_flags |= IGB_TX_FLAGS_TSO;
4314         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4315                  (skb->ip_summed == CHECKSUM_PARTIAL))
4316                 tx_flags |= IGB_TX_FLAGS_CSUM;
4317
4318         /*
4319          * count reflects descriptors mapped, if 0 or less then mapping error
4320          * has occurred and we need to rewind the descriptor queue
4321          */
4322         count = igb_tx_map_adv(tx_ring, skb, first);
4323         if (!count) {
4324                 dev_kfree_skb_any(skb);
4325                 tx_ring->buffer_info[first].time_stamp = 0;
4326                 tx_ring->next_to_use = first;
4327                 return NETDEV_TX_OK;
4328         }
4329
4330         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4331
4332         /* Make sure there is space in the ring for the next send. */
4333         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4334
4335         return NETDEV_TX_OK;
4336 }
4337
4338 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4339                                       struct net_device *netdev)
4340 {
4341         struct igb_adapter *adapter = netdev_priv(netdev);
4342         struct igb_ring *tx_ring;
4343         int r_idx = 0;
4344
4345         if (test_bit(__IGB_DOWN, &adapter->state)) {
4346                 dev_kfree_skb_any(skb);
4347                 return NETDEV_TX_OK;
4348         }
4349
4350         if (skb->len <= 0) {
4351                 dev_kfree_skb_any(skb);
4352                 return NETDEV_TX_OK;
4353         }
4354
4355         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4356         tx_ring = adapter->multi_tx_table[r_idx];
4357
4358         /* This goes back to the question of how to logically map a tx queue
4359          * to a flow.  Right now, performance is impacted slightly negatively
4360          * if using multiple tx queues.  If the stack breaks away from a
4361          * single qdisc implementation, we can look at this again. */
4362         return igb_xmit_frame_ring_adv(skb, tx_ring);
4363 }
4364
4365 /**
4366  * igb_tx_timeout - Respond to a Tx Hang
4367  * @netdev: network interface device structure
4368  **/
4369 static void igb_tx_timeout(struct net_device *netdev)
4370 {
4371         struct igb_adapter *adapter = netdev_priv(netdev);
4372         struct e1000_hw *hw = &adapter->hw;
4373
4374         /* Do the reset outside of interrupt context */
4375         adapter->tx_timeout_count++;
4376
4377         if (hw->mac.type == e1000_82580)
4378                 hw->dev_spec._82575.global_device_reset = true;
4379
4380         schedule_work(&adapter->reset_task);
4381         wr32(E1000_EICS,
4382              (adapter->eims_enable_mask & ~adapter->eims_other));
4383 }
4384
4385 static void igb_reset_task(struct work_struct *work)
4386 {
4387         struct igb_adapter *adapter;
4388         adapter = container_of(work, struct igb_adapter, reset_task);
4389
4390         igb_dump(adapter);
4391         netdev_err(adapter->netdev, "Reset adapter\n");
4392         igb_reinit_locked(adapter);
4393 }
4394
4395 /**
4396  * igb_get_stats64 - Get System Network Statistics
4397  * @netdev: network interface device structure
4398  * @stats: rtnl_link_stats64 pointer
4399  *
4400  **/
4401 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4402                                                  struct rtnl_link_stats64 *stats)
4403 {
4404         struct igb_adapter *adapter = netdev_priv(netdev);
4405
4406         spin_lock(&adapter->stats64_lock);
4407         igb_update_stats(adapter, &adapter->stats64);
4408         memcpy(stats, &adapter->stats64, sizeof(*stats));
4409         spin_unlock(&adapter->stats64_lock);
4410
4411         return stats;
4412 }
4413
4414 /**
4415  * igb_change_mtu - Change the Maximum Transfer Unit
4416  * @netdev: network interface device structure
4417  * @new_mtu: new value for maximum frame size
4418  *
4419  * Returns 0 on success, negative on failure
4420  **/
4421 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4422 {
4423         struct igb_adapter *adapter = netdev_priv(netdev);
4424         struct pci_dev *pdev = adapter->pdev;
4425         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4426         u32 rx_buffer_len, i;
4427
4428         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4429                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4430                 return -EINVAL;
4431         }
4432
4433         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4434                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4435                 return -EINVAL;
4436         }
4437
4438         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4439                 msleep(1);
4440
4441         /* igb_down has a dependency on max_frame_size */
4442         adapter->max_frame_size = max_frame;
4443
4444         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4445          * means we reserve 2 more, this pushes us to allocate from the next
4446          * larger slab size.
4447          * i.e. RXBUFFER_2048 --> size-4096 slab
4448          */
4449
4450         if (adapter->hw.mac.type == e1000_82580)
4451                 max_frame += IGB_TS_HDR_LEN;
4452
4453         if (max_frame <= IGB_RXBUFFER_1024)
4454                 rx_buffer_len = IGB_RXBUFFER_1024;
4455         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4456                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4457         else
4458                 rx_buffer_len = IGB_RXBUFFER_128;
4459
4460         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4461              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4462                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4463
4464         if ((adapter->hw.mac.type == e1000_82580) &&
4465             (rx_buffer_len == IGB_RXBUFFER_128))
4466                 rx_buffer_len += IGB_RXBUFFER_64;
4467
4468         if (netif_running(netdev))
4469                 igb_down(adapter);
4470
4471         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4472                  netdev->mtu, new_mtu);
4473         netdev->mtu = new_mtu;
4474
4475         for (i = 0; i < adapter->num_rx_queues; i++)
4476                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4477
4478         if (netif_running(netdev))
4479                 igb_up(adapter);
4480         else
4481                 igb_reset(adapter);
4482
4483         clear_bit(__IGB_RESETTING, &adapter->state);
4484
4485         return 0;
4486 }
4487
4488 /**
4489  * igb_update_stats - Update the board statistics counters
4490  * @adapter: board private structure
4491  **/
4492
4493 void igb_update_stats(struct igb_adapter *adapter,
4494                       struct rtnl_link_stats64 *net_stats)
4495 {
4496         struct e1000_hw *hw = &adapter->hw;
4497         struct pci_dev *pdev = adapter->pdev;
4498         u32 reg, mpc;
4499         u16 phy_tmp;
4500         int i;
4501         u64 bytes, packets;
4502         unsigned int start;
4503         u64 _bytes, _packets;
4504
4505 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4506
4507         /*
4508          * Prevent stats update while adapter is being reset, or if the pci
4509          * connection is down.
4510          */
4511         if (adapter->link_speed == 0)
4512                 return;
4513         if (pci_channel_offline(pdev))
4514                 return;
4515
4516         bytes = 0;
4517         packets = 0;
4518         for (i = 0; i < adapter->num_rx_queues; i++) {
4519                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4520                 struct igb_ring *ring = adapter->rx_ring[i];
4521
4522                 ring->rx_stats.drops += rqdpc_tmp;
4523                 net_stats->rx_fifo_errors += rqdpc_tmp;
4524
4525                 do {
4526                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4527                         _bytes = ring->rx_stats.bytes;
4528                         _packets = ring->rx_stats.packets;
4529                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4530                 bytes += _bytes;
4531                 packets += _packets;
4532         }
4533
4534         net_stats->rx_bytes = bytes;
4535         net_stats->rx_packets = packets;
4536
4537         bytes = 0;
4538         packets = 0;
4539         for (i = 0; i < adapter->num_tx_queues; i++) {
4540                 struct igb_ring *ring = adapter->tx_ring[i];
4541                 do {
4542                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4543                         _bytes = ring->tx_stats.bytes;
4544                         _packets = ring->tx_stats.packets;
4545                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4546                 bytes += _bytes;
4547                 packets += _packets;
4548         }
4549         net_stats->tx_bytes = bytes;
4550         net_stats->tx_packets = packets;
4551
4552         /* read stats registers */
4553         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4554         adapter->stats.gprc += rd32(E1000_GPRC);
4555         adapter->stats.gorc += rd32(E1000_GORCL);
4556         rd32(E1000_GORCH); /* clear GORCL */
4557         adapter->stats.bprc += rd32(E1000_BPRC);
4558         adapter->stats.mprc += rd32(E1000_MPRC);
4559         adapter->stats.roc += rd32(E1000_ROC);
4560
4561         adapter->stats.prc64 += rd32(E1000_PRC64);
4562         adapter->stats.prc127 += rd32(E1000_PRC127);
4563         adapter->stats.prc255 += rd32(E1000_PRC255);
4564         adapter->stats.prc511 += rd32(E1000_PRC511);
4565         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4566         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4567         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4568         adapter->stats.sec += rd32(E1000_SEC);
4569
4570         mpc = rd32(E1000_MPC);
4571         adapter->stats.mpc += mpc;
4572         net_stats->rx_fifo_errors += mpc;
4573         adapter->stats.scc += rd32(E1000_SCC);
4574         adapter->stats.ecol += rd32(E1000_ECOL);
4575         adapter->stats.mcc += rd32(E1000_MCC);
4576         adapter->stats.latecol += rd32(E1000_LATECOL);
4577         adapter->stats.dc += rd32(E1000_DC);
4578         adapter->stats.rlec += rd32(E1000_RLEC);
4579         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4580         adapter->stats.xontxc += rd32(E1000_XONTXC);
4581         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4582         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4583         adapter->stats.fcruc += rd32(E1000_FCRUC);
4584         adapter->stats.gptc += rd32(E1000_GPTC);
4585         adapter->stats.gotc += rd32(E1000_GOTCL);
4586         rd32(E1000_GOTCH); /* clear GOTCL */
4587         adapter->stats.rnbc += rd32(E1000_RNBC);
4588         adapter->stats.ruc += rd32(E1000_RUC);
4589         adapter->stats.rfc += rd32(E1000_RFC);
4590         adapter->stats.rjc += rd32(E1000_RJC);
4591         adapter->stats.tor += rd32(E1000_TORH);
4592         adapter->stats.tot += rd32(E1000_TOTH);
4593         adapter->stats.tpr += rd32(E1000_TPR);
4594
4595         adapter->stats.ptc64 += rd32(E1000_PTC64);
4596         adapter->stats.ptc127 += rd32(E1000_PTC127);
4597         adapter->stats.ptc255 += rd32(E1000_PTC255);
4598         adapter->stats.ptc511 += rd32(E1000_PTC511);
4599         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4600         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4601
4602         adapter->stats.mptc += rd32(E1000_MPTC);
4603         adapter->stats.bptc += rd32(E1000_BPTC);
4604
4605         adapter->stats.tpt += rd32(E1000_TPT);
4606         adapter->stats.colc += rd32(E1000_COLC);
4607
4608         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4609         /* read internal phy specific stats */
4610         reg = rd32(E1000_CTRL_EXT);
4611         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4612                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4613                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4614         }
4615
4616         adapter->stats.tsctc += rd32(E1000_TSCTC);
4617         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4618
4619         adapter->stats.iac += rd32(E1000_IAC);
4620         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4621         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4622         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4623         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4624         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4625         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4626         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4627         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4628
4629         /* Fill out the OS statistics structure */
4630         net_stats->multicast = adapter->stats.mprc;
4631         net_stats->collisions = adapter->stats.colc;
4632
4633         /* Rx Errors */
4634
4635         /* RLEC on some newer hardware can be incorrect so build
4636          * our own version based on RUC and ROC */
4637         net_stats->rx_errors = adapter->stats.rxerrc +
4638                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4639                 adapter->stats.ruc + adapter->stats.roc +
4640                 adapter->stats.cexterr;
4641         net_stats->rx_length_errors = adapter->stats.ruc +
4642                                       adapter->stats.roc;
4643         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4644         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4645         net_stats->rx_missed_errors = adapter->stats.mpc;
4646
4647         /* Tx Errors */
4648         net_stats->tx_errors = adapter->stats.ecol +
4649                                adapter->stats.latecol;
4650         net_stats->tx_aborted_errors = adapter->stats.ecol;
4651         net_stats->tx_window_errors = adapter->stats.latecol;
4652         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4653
4654         /* Tx Dropped needs to be maintained elsewhere */
4655
4656         /* Phy Stats */
4657         if (hw->phy.media_type == e1000_media_type_copper) {
4658                 if ((adapter->link_speed == SPEED_1000) &&
4659                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4660                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4661                         adapter->phy_stats.idle_errors += phy_tmp;
4662                 }
4663         }
4664
4665         /* Management Stats */
4666         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4667         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4668         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4669
4670         /* OS2BMC Stats */
4671         reg = rd32(E1000_MANC);
4672         if (reg & E1000_MANC_EN_BMC2OS) {
4673                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4674                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4675                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4676                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4677         }
4678 }
4679
4680 static irqreturn_t igb_msix_other(int irq, void *data)
4681 {
4682         struct igb_adapter *adapter = data;
4683         struct e1000_hw *hw = &adapter->hw;
4684         u32 icr = rd32(E1000_ICR);
4685         /* reading ICR causes bit 31 of EICR to be cleared */
4686
4687         if (icr & E1000_ICR_DRSTA)
4688                 schedule_work(&adapter->reset_task);
4689
4690         if (icr & E1000_ICR_DOUTSYNC) {
4691                 /* HW is reporting DMA is out of sync */
4692                 adapter->stats.doosync++;
4693                 /* The DMA Out of Sync is also indication of a spoof event
4694                  * in IOV mode. Check the Wrong VM Behavior register to
4695                  * see if it is really a spoof event. */
4696                 igb_check_wvbr(adapter);
4697         }
4698
4699         /* Check for a mailbox event */
4700         if (icr & E1000_ICR_VMMB)
4701                 igb_msg_task(adapter);
4702
4703         if (icr & E1000_ICR_LSC) {
4704                 hw->mac.get_link_status = 1;
4705                 /* guard against interrupt when we're going down */
4706                 if (!test_bit(__IGB_DOWN, &adapter->state))
4707                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4708         }
4709
4710         if (adapter->vfs_allocated_count)
4711                 wr32(E1000_IMS, E1000_IMS_LSC |
4712                                 E1000_IMS_VMMB |
4713                                 E1000_IMS_DOUTSYNC);
4714         else
4715                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4716         wr32(E1000_EIMS, adapter->eims_other);
4717
4718         return IRQ_HANDLED;
4719 }
4720
4721 static void igb_write_itr(struct igb_q_vector *q_vector)
4722 {
4723         struct igb_adapter *adapter = q_vector->adapter;
4724         u32 itr_val = q_vector->itr_val & 0x7FFC;
4725
4726         if (!q_vector->set_itr)
4727                 return;
4728
4729         if (!itr_val)
4730                 itr_val = 0x4;
4731
4732         if (adapter->hw.mac.type == e1000_82575)
4733                 itr_val |= itr_val << 16;
4734         else
4735                 itr_val |= 0x8000000;
4736
4737         writel(itr_val, q_vector->itr_register);
4738         q_vector->set_itr = 0;
4739 }
4740
4741 static irqreturn_t igb_msix_ring(int irq, void *data)
4742 {
4743         struct igb_q_vector *q_vector = data;
4744
4745         /* Write the ITR value calculated from the previous interrupt. */
4746         igb_write_itr(q_vector);
4747
4748         napi_schedule(&q_vector->napi);
4749
4750         return IRQ_HANDLED;
4751 }
4752
4753 #ifdef CONFIG_IGB_DCA
4754 static void igb_update_dca(struct igb_q_vector *q_vector)
4755 {
4756         struct igb_adapter *adapter = q_vector->adapter;
4757         struct e1000_hw *hw = &adapter->hw;
4758         int cpu = get_cpu();
4759
4760         if (q_vector->cpu == cpu)
4761                 goto out_no_update;
4762
4763         if (q_vector->tx_ring) {
4764                 int q = q_vector->tx_ring->reg_idx;
4765                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4766                 if (hw->mac.type == e1000_82575) {
4767                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4768                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4769                 } else {
4770                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4771                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4772                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4773                 }
4774                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4775                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4776         }
4777         if (q_vector->rx_ring) {
4778                 int q = q_vector->rx_ring->reg_idx;
4779                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4780                 if (hw->mac.type == e1000_82575) {
4781                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4782                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4783                 } else {
4784                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4785                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4786                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4787                 }
4788                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4789                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4790                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4791                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4792         }
4793         q_vector->cpu = cpu;
4794 out_no_update:
4795         put_cpu();
4796 }
4797
4798 static void igb_setup_dca(struct igb_adapter *adapter)
4799 {
4800         struct e1000_hw *hw = &adapter->hw;
4801         int i;
4802
4803         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4804                 return;
4805
4806         /* Always use CB2 mode, difference is masked in the CB driver. */
4807         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4808
4809         for (i = 0; i < adapter->num_q_vectors; i++) {
4810                 adapter->q_vector[i]->cpu = -1;
4811                 igb_update_dca(adapter->q_vector[i]);
4812         }
4813 }
4814
4815 static int __igb_notify_dca(struct device *dev, void *data)
4816 {
4817         struct net_device *netdev = dev_get_drvdata(dev);
4818         struct igb_adapter *adapter = netdev_priv(netdev);
4819         struct pci_dev *pdev = adapter->pdev;
4820         struct e1000_hw *hw = &adapter->hw;
4821         unsigned long event = *(unsigned long *)data;
4822
4823         switch (event) {
4824         case DCA_PROVIDER_ADD:
4825                 /* if already enabled, don't do it again */
4826                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4827                         break;
4828                 if (dca_add_requester(dev) == 0) {
4829                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4830                         dev_info(&pdev->dev, "DCA enabled\n");
4831                         igb_setup_dca(adapter);
4832                         break;
4833                 }
4834                 /* Fall Through since DCA is disabled. */
4835         case DCA_PROVIDER_REMOVE:
4836                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4837                         /* without this a class_device is left
4838                          * hanging around in the sysfs model */
4839                         dca_remove_requester(dev);
4840                         dev_info(&pdev->dev, "DCA disabled\n");
4841                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4842                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4843                 }
4844                 break;
4845         }
4846
4847         return 0;
4848 }
4849
4850 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4851                           void *p)
4852 {
4853         int ret_val;
4854
4855         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4856                                          __igb_notify_dca);
4857
4858         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4859 }
4860 #endif /* CONFIG_IGB_DCA */
4861
4862 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4863 {
4864         struct e1000_hw *hw = &adapter->hw;
4865         u32 ping;
4866         int i;
4867
4868         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4869                 ping = E1000_PF_CONTROL_MSG;
4870                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4871                         ping |= E1000_VT_MSGTYPE_CTS;
4872                 igb_write_mbx(hw, &ping, 1, i);
4873         }
4874 }
4875
4876 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4877 {
4878         struct e1000_hw *hw = &adapter->hw;
4879         u32 vmolr = rd32(E1000_VMOLR(vf));
4880         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4881
4882         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4883                             IGB_VF_FLAG_MULTI_PROMISC);
4884         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4885
4886         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4887                 vmolr |= E1000_VMOLR_MPME;
4888                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4889                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4890         } else {
4891                 /*
4892                  * if we have hashes and we are clearing a multicast promisc
4893                  * flag we need to write the hashes to the MTA as this step
4894                  * was previously skipped
4895                  */
4896                 if (vf_data->num_vf_mc_hashes > 30) {
4897                         vmolr |= E1000_VMOLR_MPME;
4898                 } else if (vf_data->num_vf_mc_hashes) {
4899                         int j;
4900                         vmolr |= E1000_VMOLR_ROMPE;
4901                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4902                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4903                 }
4904         }
4905
4906         wr32(E1000_VMOLR(vf), vmolr);
4907
4908         /* there are flags left unprocessed, likely not supported */
4909         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4910                 return -EINVAL;
4911
4912         return 0;
4913
4914 }
4915
4916 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4917                                   u32 *msgbuf, u32 vf)
4918 {
4919         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4920         u16 *hash_list = (u16 *)&msgbuf[1];
4921         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4922         int i;
4923
4924         /* salt away the number of multicast addresses assigned
4925          * to this VF for later use to restore when the PF multi cast
4926          * list changes
4927          */
4928         vf_data->num_vf_mc_hashes = n;
4929
4930         /* only up to 30 hash values supported */
4931         if (n > 30)
4932                 n = 30;
4933
4934         /* store the hashes for later use */
4935         for (i = 0; i < n; i++)
4936                 vf_data->vf_mc_hashes[i] = hash_list[i];
4937
4938         /* Flush and reset the mta with the new values */
4939         igb_set_rx_mode(adapter->netdev);
4940
4941         return 0;
4942 }
4943
4944 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4945 {
4946         struct e1000_hw *hw = &adapter->hw;
4947         struct vf_data_storage *vf_data;
4948         int i, j;
4949
4950         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4951                 u32 vmolr = rd32(E1000_VMOLR(i));
4952                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4953
4954                 vf_data = &adapter->vf_data[i];
4955
4956                 if ((vf_data->num_vf_mc_hashes > 30) ||
4957                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4958                         vmolr |= E1000_VMOLR_MPME;
4959                 } else if (vf_data->num_vf_mc_hashes) {
4960                         vmolr |= E1000_VMOLR_ROMPE;
4961                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4962                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4963                 }
4964                 wr32(E1000_VMOLR(i), vmolr);
4965         }
4966 }
4967
4968 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4969 {
4970         struct e1000_hw *hw = &adapter->hw;
4971         u32 pool_mask, reg, vid;
4972         int i;
4973
4974         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4975
4976         /* Find the vlan filter for this id */
4977         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4978                 reg = rd32(E1000_VLVF(i));
4979
4980                 /* remove the vf from the pool */
4981                 reg &= ~pool_mask;
4982
4983                 /* if pool is empty then remove entry from vfta */
4984                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4985                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4986                         reg = 0;
4987                         vid = reg & E1000_VLVF_VLANID_MASK;
4988                         igb_vfta_set(hw, vid, false);
4989                 }
4990
4991                 wr32(E1000_VLVF(i), reg);
4992         }
4993
4994         adapter->vf_data[vf].vlans_enabled = 0;
4995 }
4996
4997 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4998 {
4999         struct e1000_hw *hw = &adapter->hw;
5000         u32 reg, i;
5001
5002         /* The vlvf table only exists on 82576 hardware and newer */
5003         if (hw->mac.type < e1000_82576)
5004                 return -1;
5005
5006         /* we only need to do this if VMDq is enabled */
5007         if (!adapter->vfs_allocated_count)
5008                 return -1;
5009
5010         /* Find the vlan filter for this id */
5011         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5012                 reg = rd32(E1000_VLVF(i));
5013                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5014                     vid == (reg & E1000_VLVF_VLANID_MASK))
5015                         break;
5016         }
5017
5018         if (add) {
5019                 if (i == E1000_VLVF_ARRAY_SIZE) {
5020                         /* Did not find a matching VLAN ID entry that was
5021                          * enabled.  Search for a free filter entry, i.e.
5022                          * one without the enable bit set
5023                          */
5024                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5025                                 reg = rd32(E1000_VLVF(i));
5026                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5027                                         break;
5028                         }
5029                 }
5030                 if (i < E1000_VLVF_ARRAY_SIZE) {
5031                         /* Found an enabled/available entry */
5032                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5033
5034                         /* if !enabled we need to set this up in vfta */
5035                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5036                                 /* add VID to filter table */
5037                                 igb_vfta_set(hw, vid, true);
5038                                 reg |= E1000_VLVF_VLANID_ENABLE;
5039                         }
5040                         reg &= ~E1000_VLVF_VLANID_MASK;
5041                         reg |= vid;
5042                         wr32(E1000_VLVF(i), reg);
5043
5044                         /* do not modify RLPML for PF devices */
5045                         if (vf >= adapter->vfs_allocated_count)
5046                                 return 0;
5047
5048                         if (!adapter->vf_data[vf].vlans_enabled) {
5049                                 u32 size;
5050                                 reg = rd32(E1000_VMOLR(vf));
5051                                 size = reg & E1000_VMOLR_RLPML_MASK;
5052                                 size += 4;
5053                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5054                                 reg |= size;
5055                                 wr32(E1000_VMOLR(vf), reg);
5056                         }
5057
5058                         adapter->vf_data[vf].vlans_enabled++;
5059                         return 0;
5060                 }
5061         } else {
5062                 if (i < E1000_VLVF_ARRAY_SIZE) {
5063                         /* remove vf from the pool */
5064                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5065                         /* if pool is empty then remove entry from vfta */
5066                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5067                                 reg = 0;
5068                                 igb_vfta_set(hw, vid, false);
5069                         }
5070                         wr32(E1000_VLVF(i), reg);
5071
5072                         /* do not modify RLPML for PF devices */
5073                         if (vf >= adapter->vfs_allocated_count)
5074                                 return 0;
5075
5076                         adapter->vf_data[vf].vlans_enabled--;
5077                         if (!adapter->vf_data[vf].vlans_enabled) {
5078                                 u32 size;
5079                                 reg = rd32(E1000_VMOLR(vf));
5080                                 size = reg & E1000_VMOLR_RLPML_MASK;
5081                                 size -= 4;
5082                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5083                                 reg |= size;
5084                                 wr32(E1000_VMOLR(vf), reg);
5085                         }
5086                 }
5087         }
5088         return 0;
5089 }
5090
5091 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5092 {
5093         struct e1000_hw *hw = &adapter->hw;
5094
5095         if (vid)
5096                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5097         else
5098                 wr32(E1000_VMVIR(vf), 0);
5099 }
5100
5101 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5102                                int vf, u16 vlan, u8 qos)
5103 {
5104         int err = 0;
5105         struct igb_adapter *adapter = netdev_priv(netdev);
5106
5107         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5108                 return -EINVAL;
5109         if (vlan || qos) {
5110                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5111                 if (err)
5112                         goto out;
5113                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5114                 igb_set_vmolr(adapter, vf, !vlan);
5115                 adapter->vf_data[vf].pf_vlan = vlan;
5116                 adapter->vf_data[vf].pf_qos = qos;
5117                 dev_info(&adapter->pdev->dev,
5118                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5119                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5120                         dev_warn(&adapter->pdev->dev,
5121                                  "The VF VLAN has been set,"
5122                                  " but the PF device is not up.\n");
5123                         dev_warn(&adapter->pdev->dev,
5124                                  "Bring the PF device up before"
5125                                  " attempting to use the VF device.\n");
5126                 }
5127         } else {
5128                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5129                                    false, vf);
5130                 igb_set_vmvir(adapter, vlan, vf);
5131                 igb_set_vmolr(adapter, vf, true);
5132                 adapter->vf_data[vf].pf_vlan = 0;
5133                 adapter->vf_data[vf].pf_qos = 0;
5134        }
5135 out:
5136        return err;
5137 }
5138
5139 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5140 {
5141         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5142         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5143
5144         return igb_vlvf_set(adapter, vid, add, vf);
5145 }
5146
5147 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5148 {
5149         /* clear flags - except flag that indicates PF has set the MAC */
5150         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5151         adapter->vf_data[vf].last_nack = jiffies;
5152
5153         /* reset offloads to defaults */
5154         igb_set_vmolr(adapter, vf, true);
5155
5156         /* reset vlans for device */
5157         igb_clear_vf_vfta(adapter, vf);
5158         if (adapter->vf_data[vf].pf_vlan)
5159                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5160                                     adapter->vf_data[vf].pf_vlan,
5161                                     adapter->vf_data[vf].pf_qos);
5162         else
5163                 igb_clear_vf_vfta(adapter, vf);
5164
5165         /* reset multicast table array for vf */
5166         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5167
5168         /* Flush and reset the mta with the new values */
5169         igb_set_rx_mode(adapter->netdev);
5170 }
5171
5172 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5173 {
5174         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5175
5176         /* generate a new mac address as we were hotplug removed/added */
5177         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5178                 random_ether_addr(vf_mac);
5179
5180         /* process remaining reset events */
5181         igb_vf_reset(adapter, vf);
5182 }
5183
5184 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5185 {
5186         struct e1000_hw *hw = &adapter->hw;
5187         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5188         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5189         u32 reg, msgbuf[3];
5190         u8 *addr = (u8 *)(&msgbuf[1]);
5191
5192         /* process all the same items cleared in a function level reset */
5193         igb_vf_reset(adapter, vf);
5194
5195         /* set vf mac address */
5196         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5197
5198         /* enable transmit and receive for vf */
5199         reg = rd32(E1000_VFTE);
5200         wr32(E1000_VFTE, reg | (1 << vf));
5201         reg = rd32(E1000_VFRE);
5202         wr32(E1000_VFRE, reg | (1 << vf));
5203
5204         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5205
5206         /* reply to reset with ack and vf mac address */
5207         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5208         memcpy(addr, vf_mac, 6);
5209         igb_write_mbx(hw, msgbuf, 3, vf);
5210 }
5211
5212 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5213 {
5214         /*
5215          * The VF MAC Address is stored in a packed array of bytes
5216          * starting at the second 32 bit word of the msg array
5217          */
5218         unsigned char *addr = (char *)&msg[1];
5219         int err = -1;
5220
5221         if (is_valid_ether_addr(addr))
5222                 err = igb_set_vf_mac(adapter, vf, addr);
5223
5224         return err;
5225 }
5226
5227 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5228 {
5229         struct e1000_hw *hw = &adapter->hw;
5230         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5231         u32 msg = E1000_VT_MSGTYPE_NACK;
5232
5233         /* if device isn't clear to send it shouldn't be reading either */
5234         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5235             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5236                 igb_write_mbx(hw, &msg, 1, vf);
5237                 vf_data->last_nack = jiffies;
5238         }
5239 }
5240
5241 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5242 {
5243         struct pci_dev *pdev = adapter->pdev;
5244         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5245         struct e1000_hw *hw = &adapter->hw;
5246         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5247         s32 retval;
5248
5249         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5250
5251         if (retval) {
5252                 /* if receive failed revoke VF CTS stats and restart init */
5253                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5254                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5255                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5256                         return;
5257                 goto out;
5258         }
5259
5260         /* this is a message we already processed, do nothing */
5261         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5262                 return;
5263
5264         /*
5265          * until the vf completes a reset it should not be
5266          * allowed to start any configuration.
5267          */
5268
5269         if (msgbuf[0] == E1000_VF_RESET) {
5270                 igb_vf_reset_msg(adapter, vf);
5271                 return;
5272         }
5273
5274         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5275                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5276                         return;
5277                 retval = -1;
5278                 goto out;
5279         }
5280
5281         switch ((msgbuf[0] & 0xFFFF)) {
5282         case E1000_VF_SET_MAC_ADDR:
5283                 retval = -EINVAL;
5284                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5285                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5286                 else
5287                         dev_warn(&pdev->dev,
5288                                  "VF %d attempted to override administratively "
5289                                  "set MAC address\nReload the VF driver to "
5290                                  "resume operations\n", vf);
5291                 break;
5292         case E1000_VF_SET_PROMISC:
5293                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5294                 break;
5295         case E1000_VF_SET_MULTICAST:
5296                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5297                 break;
5298         case E1000_VF_SET_LPE:
5299                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5300                 break;
5301         case E1000_VF_SET_VLAN:
5302                 retval = -1;
5303                 if (vf_data->pf_vlan)
5304                         dev_warn(&pdev->dev,
5305                                  "VF %d attempted to override administratively "
5306                                  "set VLAN tag\nReload the VF driver to "
5307                                  "resume operations\n", vf);
5308                 else
5309                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5310                 break;
5311         default:
5312                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5313                 retval = -1;
5314                 break;
5315         }
5316
5317         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5318 out:
5319         /* notify the VF of the results of what it sent us */
5320         if (retval)
5321                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5322         else
5323                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5324
5325         igb_write_mbx(hw, msgbuf, 1, vf);
5326 }
5327
5328 static void igb_msg_task(struct igb_adapter *adapter)
5329 {
5330         struct e1000_hw *hw = &adapter->hw;
5331         u32 vf;
5332
5333         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5334                 /* process any reset requests */
5335                 if (!igb_check_for_rst(hw, vf))
5336                         igb_vf_reset_event(adapter, vf);
5337
5338                 /* process any messages pending */
5339                 if (!igb_check_for_msg(hw, vf))
5340                         igb_rcv_msg_from_vf(adapter, vf);
5341
5342                 /* process any acks */
5343                 if (!igb_check_for_ack(hw, vf))
5344                         igb_rcv_ack_from_vf(adapter, vf);
5345         }
5346 }
5347
5348 /**
5349  *  igb_set_uta - Set unicast filter table address
5350  *  @adapter: board private structure
5351  *
5352  *  The unicast table address is a register array of 32-bit registers.
5353  *  The table is meant to be used in a way similar to how the MTA is used
5354  *  however due to certain limitations in the hardware it is necessary to
5355  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5356  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5357  **/
5358 static void igb_set_uta(struct igb_adapter *adapter)
5359 {
5360         struct e1000_hw *hw = &adapter->hw;
5361         int i;
5362
5363         /* The UTA table only exists on 82576 hardware and newer */
5364         if (hw->mac.type < e1000_82576)
5365                 return;
5366
5367         /* we only need to do this if VMDq is enabled */
5368         if (!adapter->vfs_allocated_count)
5369                 return;
5370
5371         for (i = 0; i < hw->mac.uta_reg_count; i++)
5372                 array_wr32(E1000_UTA, i, ~0);
5373 }
5374
5375 /**
5376  * igb_intr_msi - Interrupt Handler
5377  * @irq: interrupt number
5378  * @data: pointer to a network interface device structure
5379  **/
5380 static irqreturn_t igb_intr_msi(int irq, void *data)
5381 {
5382         struct igb_adapter *adapter = data;
5383         struct igb_q_vector *q_vector = adapter->q_vector[0];
5384         struct e1000_hw *hw = &adapter->hw;
5385         /* read ICR disables interrupts using IAM */
5386         u32 icr = rd32(E1000_ICR);
5387
5388         igb_write_itr(q_vector);
5389
5390         if (icr & E1000_ICR_DRSTA)
5391                 schedule_work(&adapter->reset_task);
5392
5393         if (icr & E1000_ICR_DOUTSYNC) {
5394                 /* HW is reporting DMA is out of sync */
5395                 adapter->stats.doosync++;
5396         }
5397
5398         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5399                 hw->mac.get_link_status = 1;
5400                 if (!test_bit(__IGB_DOWN, &adapter->state))
5401                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5402         }
5403
5404         napi_schedule(&q_vector->napi);
5405
5406         return IRQ_HANDLED;
5407 }
5408
5409 /**
5410  * igb_intr - Legacy Interrupt Handler
5411  * @irq: interrupt number
5412  * @data: pointer to a network interface device structure
5413  **/
5414 static irqreturn_t igb_intr(int irq, void *data)
5415 {
5416         struct igb_adapter *adapter = data;
5417         struct igb_q_vector *q_vector = adapter->q_vector[0];
5418         struct e1000_hw *hw = &adapter->hw;
5419         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5420          * need for the IMC write */
5421         u32 icr = rd32(E1000_ICR);
5422         if (!icr)
5423                 return IRQ_NONE;  /* Not our interrupt */
5424
5425         igb_write_itr(q_vector);
5426
5427         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5428          * not set, then the adapter didn't send an interrupt */
5429         if (!(icr & E1000_ICR_INT_ASSERTED))
5430                 return IRQ_NONE;
5431
5432         if (icr & E1000_ICR_DRSTA)
5433                 schedule_work(&adapter->reset_task);
5434
5435         if (icr & E1000_ICR_DOUTSYNC) {
5436                 /* HW is reporting DMA is out of sync */
5437                 adapter->stats.doosync++;
5438         }
5439
5440         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5441                 hw->mac.get_link_status = 1;
5442                 /* guard against interrupt when we're going down */
5443                 if (!test_bit(__IGB_DOWN, &adapter->state))
5444                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5445         }
5446
5447         napi_schedule(&q_vector->napi);
5448
5449         return IRQ_HANDLED;
5450 }
5451
5452 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5453 {
5454         struct igb_adapter *adapter = q_vector->adapter;
5455         struct e1000_hw *hw = &adapter->hw;
5456
5457         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5458             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5459                 if (!adapter->msix_entries)
5460                         igb_set_itr(adapter);
5461                 else
5462                         igb_update_ring_itr(q_vector);
5463         }
5464
5465         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5466                 if (adapter->msix_entries)
5467                         wr32(E1000_EIMS, q_vector->eims_value);
5468                 else
5469                         igb_irq_enable(adapter);
5470         }
5471 }
5472
5473 /**
5474  * igb_poll - NAPI Rx polling callback
5475  * @napi: napi polling structure
5476  * @budget: count of how many packets we should handle
5477  **/
5478 static int igb_poll(struct napi_struct *napi, int budget)
5479 {
5480         struct igb_q_vector *q_vector = container_of(napi,
5481                                                      struct igb_q_vector,
5482                                                      napi);
5483         int tx_clean_complete = 1, work_done = 0;
5484
5485 #ifdef CONFIG_IGB_DCA
5486         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5487                 igb_update_dca(q_vector);
5488 #endif
5489         if (q_vector->tx_ring)
5490                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5491
5492         if (q_vector->rx_ring)
5493                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5494
5495         if (!tx_clean_complete)
5496                 work_done = budget;
5497
5498         /* If not enough Rx work done, exit the polling mode */
5499         if (work_done < budget) {
5500                 napi_complete(napi);
5501                 igb_ring_irq_enable(q_vector);
5502         }
5503
5504         return work_done;
5505 }
5506
5507 /**
5508  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5509  * @adapter: board private structure
5510  * @shhwtstamps: timestamp structure to update
5511  * @regval: unsigned 64bit system time value.
5512  *
5513  * We need to convert the system time value stored in the RX/TXSTMP registers
5514  * into a hwtstamp which can be used by the upper level timestamping functions
5515  */
5516 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5517                                    struct skb_shared_hwtstamps *shhwtstamps,
5518                                    u64 regval)
5519 {
5520         u64 ns;
5521
5522         /*
5523          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5524          * 24 to match clock shift we setup earlier.
5525          */
5526         if (adapter->hw.mac.type == e1000_82580)
5527                 regval <<= IGB_82580_TSYNC_SHIFT;
5528
5529         ns = timecounter_cyc2time(&adapter->clock, regval);
5530         timecompare_update(&adapter->compare, ns);
5531         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5532         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5533         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5534 }
5535
5536 /**
5537  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5538  * @q_vector: pointer to q_vector containing needed info
5539  * @buffer: pointer to igb_buffer structure
5540  *
5541  * If we were asked to do hardware stamping and such a time stamp is
5542  * available, then it must have been for this skb here because we only
5543  * allow only one such packet into the queue.
5544  */
5545 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5546 {
5547         struct igb_adapter *adapter = q_vector->adapter;
5548         struct e1000_hw *hw = &adapter->hw;
5549         struct skb_shared_hwtstamps shhwtstamps;
5550         u64 regval;
5551
5552         /* if skb does not support hw timestamp or TX stamp not valid exit */
5553         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5554             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5555                 return;
5556
5557         regval = rd32(E1000_TXSTMPL);
5558         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5559
5560         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5561         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5562 }
5563
5564 /**
5565  * igb_clean_tx_irq - Reclaim resources after transmit completes
5566  * @q_vector: pointer to q_vector containing needed info
5567  * returns true if ring is completely cleaned
5568  **/
5569 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5570 {
5571         struct igb_adapter *adapter = q_vector->adapter;
5572         struct igb_ring *tx_ring = q_vector->tx_ring;
5573         struct net_device *netdev = tx_ring->netdev;
5574         struct e1000_hw *hw = &adapter->hw;
5575         struct igb_buffer *buffer_info;
5576         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5577         unsigned int total_bytes = 0, total_packets = 0;
5578         unsigned int i, eop, count = 0;
5579         bool cleaned = false;
5580
5581         i = tx_ring->next_to_clean;
5582         eop = tx_ring->buffer_info[i].next_to_watch;
5583         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5584
5585         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5586                (count < tx_ring->count)) {
5587                 rmb();  /* read buffer_info after eop_desc status */
5588                 for (cleaned = false; !cleaned; count++) {
5589                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5590                         buffer_info = &tx_ring->buffer_info[i];
5591                         cleaned = (i == eop);
5592
5593                         if (buffer_info->skb) {
5594                                 total_bytes += buffer_info->bytecount;
5595                                 /* gso_segs is currently only valid for tcp */
5596                                 total_packets += buffer_info->gso_segs;
5597                                 igb_tx_hwtstamp(q_vector, buffer_info);
5598                         }
5599
5600                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5601                         tx_desc->wb.status = 0;
5602
5603                         i++;
5604                         if (i == tx_ring->count)
5605                                 i = 0;
5606                 }
5607                 eop = tx_ring->buffer_info[i].next_to_watch;
5608                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5609         }
5610
5611         tx_ring->next_to_clean = i;
5612
5613         if (unlikely(count &&
5614                      netif_carrier_ok(netdev) &&
5615                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5616                 /* Make sure that anybody stopping the queue after this
5617                  * sees the new next_to_clean.
5618                  */
5619                 smp_mb();
5620                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5621                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5622                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5623
5624                         u64_stats_update_begin(&tx_ring->tx_syncp);
5625                         tx_ring->tx_stats.restart_queue++;
5626                         u64_stats_update_end(&tx_ring->tx_syncp);
5627                 }
5628         }
5629
5630         if (tx_ring->detect_tx_hung) {
5631                 /* Detect a transmit hang in hardware, this serializes the
5632                  * check with the clearing of time_stamp and movement of i */
5633                 tx_ring->detect_tx_hung = false;
5634                 if (tx_ring->buffer_info[i].time_stamp &&
5635                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5636                                (adapter->tx_timeout_factor * HZ)) &&
5637                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5638
5639                         /* detected Tx unit hang */
5640                         dev_err(tx_ring->dev,
5641                                 "Detected Tx Unit Hang\n"
5642                                 "  Tx Queue             <%d>\n"
5643                                 "  TDH                  <%x>\n"
5644                                 "  TDT                  <%x>\n"
5645                                 "  next_to_use          <%x>\n"
5646                                 "  next_to_clean        <%x>\n"
5647                                 "buffer_info[next_to_clean]\n"
5648                                 "  time_stamp           <%lx>\n"
5649                                 "  next_to_watch        <%x>\n"
5650                                 "  jiffies              <%lx>\n"
5651                                 "  desc.status          <%x>\n",
5652                                 tx_ring->queue_index,
5653                                 readl(tx_ring->head),
5654                                 readl(tx_ring->tail),
5655                                 tx_ring->next_to_use,
5656                                 tx_ring->next_to_clean,
5657                                 tx_ring->buffer_info[eop].time_stamp,
5658                                 eop,
5659                                 jiffies,
5660                                 eop_desc->wb.status);
5661                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5662                 }
5663         }
5664         tx_ring->total_bytes += total_bytes;
5665         tx_ring->total_packets += total_packets;
5666         u64_stats_update_begin(&tx_ring->tx_syncp);
5667         tx_ring->tx_stats.bytes += total_bytes;
5668         tx_ring->tx_stats.packets += total_packets;
5669         u64_stats_update_end(&tx_ring->tx_syncp);
5670         return count < tx_ring->count;
5671 }
5672
5673 /**
5674  * igb_receive_skb - helper function to handle rx indications
5675  * @q_vector: structure containing interrupt and ring information
5676  * @skb: packet to send up
5677  * @vlan_tag: vlan tag for packet
5678  **/
5679 static void igb_receive_skb(struct igb_q_vector *q_vector,
5680                             struct sk_buff *skb,
5681                             u16 vlan_tag)
5682 {
5683         struct igb_adapter *adapter = q_vector->adapter;
5684
5685         if (vlan_tag && adapter->vlgrp)
5686                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5687                                  vlan_tag, skb);
5688         else
5689                 napi_gro_receive(&q_vector->napi, skb);
5690 }
5691
5692 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5693                                        u32 status_err, struct sk_buff *skb)
5694 {
5695         skb_checksum_none_assert(skb);
5696
5697         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5698         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5699              (status_err & E1000_RXD_STAT_IXSM))
5700                 return;
5701
5702         /* TCP/UDP checksum error bit is set */
5703         if (status_err &
5704             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5705                 /*
5706                  * work around errata with sctp packets where the TCPE aka
5707                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5708                  * packets, (aka let the stack check the crc32c)
5709                  */
5710                 if ((skb->len == 60) &&
5711                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5712                         u64_stats_update_begin(&ring->rx_syncp);
5713                         ring->rx_stats.csum_err++;
5714                         u64_stats_update_end(&ring->rx_syncp);
5715                 }
5716                 /* let the stack verify checksum errors */
5717                 return;
5718         }
5719         /* It must be a TCP or UDP packet with a valid checksum */
5720         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5721                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5722
5723         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5724 }
5725
5726 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5727                                    struct sk_buff *skb)
5728 {
5729         struct igb_adapter *adapter = q_vector->adapter;
5730         struct e1000_hw *hw = &adapter->hw;
5731         u64 regval;
5732
5733         /*
5734          * If this bit is set, then the RX registers contain the time stamp. No
5735          * other packet will be time stamped until we read these registers, so
5736          * read the registers to make them available again. Because only one
5737          * packet can be time stamped at a time, we know that the register
5738          * values must belong to this one here and therefore we don't need to
5739          * compare any of the additional attributes stored for it.
5740          *
5741          * If nothing went wrong, then it should have a shared tx_flags that we
5742          * can turn into a skb_shared_hwtstamps.
5743          */
5744         if (staterr & E1000_RXDADV_STAT_TSIP) {
5745                 u32 *stamp = (u32 *)skb->data;
5746                 regval = le32_to_cpu(*(stamp + 2));
5747                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5748                 skb_pull(skb, IGB_TS_HDR_LEN);
5749         } else {
5750                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5751                         return;
5752
5753                 regval = rd32(E1000_RXSTMPL);
5754                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5755         }
5756
5757         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5758 }
5759 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5760                                union e1000_adv_rx_desc *rx_desc)
5761 {
5762         /* HW will not DMA in data larger than the given buffer, even if it
5763          * parses the (NFS, of course) header to be larger.  In that case, it
5764          * fills the header buffer and spills the rest into the page.
5765          */
5766         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5767                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5768         if (hlen > rx_ring->rx_buffer_len)
5769                 hlen = rx_ring->rx_buffer_len;
5770         return hlen;
5771 }
5772
5773 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5774                                  int *work_done, int budget)
5775 {
5776         struct igb_ring *rx_ring = q_vector->rx_ring;
5777         struct net_device *netdev = rx_ring->netdev;
5778         struct device *dev = rx_ring->dev;
5779         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5780         struct igb_buffer *buffer_info , *next_buffer;
5781         struct sk_buff *skb;
5782         bool cleaned = false;
5783         int cleaned_count = 0;
5784         int current_node = numa_node_id();
5785         unsigned int total_bytes = 0, total_packets = 0;
5786         unsigned int i;
5787         u32 staterr;
5788         u16 length;
5789         u16 vlan_tag;
5790
5791         i = rx_ring->next_to_clean;
5792         buffer_info = &rx_ring->buffer_info[i];
5793         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5794         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5795
5796         while (staterr & E1000_RXD_STAT_DD) {
5797                 if (*work_done >= budget)
5798                         break;
5799                 (*work_done)++;
5800                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5801
5802                 skb = buffer_info->skb;
5803                 prefetch(skb->data - NET_IP_ALIGN);
5804                 buffer_info->skb = NULL;
5805
5806                 i++;
5807                 if (i == rx_ring->count)
5808                         i = 0;
5809
5810                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5811                 prefetch(next_rxd);
5812                 next_buffer = &rx_ring->buffer_info[i];
5813
5814                 length = le16_to_cpu(rx_desc->wb.upper.length);
5815                 cleaned = true;
5816                 cleaned_count++;
5817
5818                 if (buffer_info->dma) {
5819                         dma_unmap_single(dev, buffer_info->dma,
5820                                          rx_ring->rx_buffer_len,
5821                                          DMA_FROM_DEVICE);
5822                         buffer_info->dma = 0;
5823                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5824                                 skb_put(skb, length);
5825                                 goto send_up;
5826                         }
5827                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5828                 }
5829
5830                 if (length) {
5831                         dma_unmap_page(dev, buffer_info->page_dma,
5832                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5833                         buffer_info->page_dma = 0;
5834
5835                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5836                                                 buffer_info->page,
5837                                                 buffer_info->page_offset,
5838                                                 length);
5839
5840                         if ((page_count(buffer_info->page) != 1) ||
5841                             (page_to_nid(buffer_info->page) != current_node))
5842                                 buffer_info->page = NULL;
5843                         else
5844                                 get_page(buffer_info->page);
5845
5846                         skb->len += length;
5847                         skb->data_len += length;
5848                         skb->truesize += length;
5849                 }
5850
5851                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5852                         buffer_info->skb = next_buffer->skb;
5853                         buffer_info->dma = next_buffer->dma;
5854                         next_buffer->skb = skb;
5855                         next_buffer->dma = 0;
5856                         goto next_desc;
5857                 }
5858 send_up:
5859                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5860                         dev_kfree_skb_irq(skb);
5861                         goto next_desc;
5862                 }
5863
5864                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5865                         igb_rx_hwtstamp(q_vector, staterr, skb);
5866                 total_bytes += skb->len;
5867                 total_packets++;
5868
5869                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5870
5871                 skb->protocol = eth_type_trans(skb, netdev);
5872                 skb_record_rx_queue(skb, rx_ring->queue_index);
5873
5874                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5875                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5876
5877                 igb_receive_skb(q_vector, skb, vlan_tag);
5878
5879 next_desc:
5880                 rx_desc->wb.upper.status_error = 0;
5881
5882                 /* return some buffers to hardware, one at a time is too slow */
5883                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5884                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5885                         cleaned_count = 0;
5886                 }
5887
5888                 /* use prefetched values */
5889                 rx_desc = next_rxd;
5890                 buffer_info = next_buffer;
5891                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5892         }
5893
5894         rx_ring->next_to_clean = i;
5895         cleaned_count = igb_desc_unused(rx_ring);
5896
5897         if (cleaned_count)
5898                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5899
5900         rx_ring->total_packets += total_packets;
5901         rx_ring->total_bytes += total_bytes;
5902         u64_stats_update_begin(&rx_ring->rx_syncp);
5903         rx_ring->rx_stats.packets += total_packets;
5904         rx_ring->rx_stats.bytes += total_bytes;
5905         u64_stats_update_end(&rx_ring->rx_syncp);
5906         return cleaned;
5907 }
5908
5909 /**
5910  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5911  * @adapter: address of board private structure
5912  **/
5913 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5914 {
5915         struct net_device *netdev = rx_ring->netdev;
5916         union e1000_adv_rx_desc *rx_desc;
5917         struct igb_buffer *buffer_info;
5918         struct sk_buff *skb;
5919         unsigned int i;
5920         int bufsz;
5921
5922         i = rx_ring->next_to_use;
5923         buffer_info = &rx_ring->buffer_info[i];
5924
5925         bufsz = rx_ring->rx_buffer_len;
5926
5927         while (cleaned_count--) {
5928                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5929
5930                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5931                         if (!buffer_info->page) {
5932                                 buffer_info->page = netdev_alloc_page(netdev);
5933                                 if (unlikely(!buffer_info->page)) {
5934                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5935                                         rx_ring->rx_stats.alloc_failed++;
5936                                         u64_stats_update_end(&rx_ring->rx_syncp);
5937                                         goto no_buffers;
5938                                 }
5939                                 buffer_info->page_offset = 0;
5940                         } else {
5941                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5942                         }
5943                         buffer_info->page_dma =
5944                                 dma_map_page(rx_ring->dev, buffer_info->page,
5945                                              buffer_info->page_offset,
5946                                              PAGE_SIZE / 2,
5947                                              DMA_FROM_DEVICE);
5948                         if (dma_mapping_error(rx_ring->dev,
5949                                               buffer_info->page_dma)) {
5950                                 buffer_info->page_dma = 0;
5951                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5952                                 rx_ring->rx_stats.alloc_failed++;
5953                                 u64_stats_update_end(&rx_ring->rx_syncp);
5954                                 goto no_buffers;
5955                         }
5956                 }
5957
5958                 skb = buffer_info->skb;
5959                 if (!skb) {
5960                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5961                         if (unlikely(!skb)) {
5962                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5963                                 rx_ring->rx_stats.alloc_failed++;
5964                                 u64_stats_update_end(&rx_ring->rx_syncp);
5965                                 goto no_buffers;
5966                         }
5967
5968                         buffer_info->skb = skb;
5969                 }
5970                 if (!buffer_info->dma) {
5971                         buffer_info->dma = dma_map_single(rx_ring->dev,
5972                                                           skb->data,
5973                                                           bufsz,
5974                                                           DMA_FROM_DEVICE);
5975                         if (dma_mapping_error(rx_ring->dev,
5976                                               buffer_info->dma)) {
5977                                 buffer_info->dma = 0;
5978                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5979                                 rx_ring->rx_stats.alloc_failed++;
5980                                 u64_stats_update_end(&rx_ring->rx_syncp);
5981                                 goto no_buffers;
5982                         }
5983                 }
5984                 /* Refresh the desc even if buffer_addrs didn't change because
5985                  * each write-back erases this info. */
5986                 if (bufsz < IGB_RXBUFFER_1024) {
5987                         rx_desc->read.pkt_addr =
5988                              cpu_to_le64(buffer_info->page_dma);
5989                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5990                 } else {
5991                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5992                         rx_desc->read.hdr_addr = 0;
5993                 }
5994
5995                 i++;
5996                 if (i == rx_ring->count)
5997                         i = 0;
5998                 buffer_info = &rx_ring->buffer_info[i];
5999         }
6000
6001 no_buffers:
6002         if (rx_ring->next_to_use != i) {
6003                 rx_ring->next_to_use = i;
6004                 if (i == 0)
6005                         i = (rx_ring->count - 1);
6006                 else
6007                         i--;
6008
6009                 /* Force memory writes to complete before letting h/w
6010                  * know there are new descriptors to fetch.  (Only
6011                  * applicable for weak-ordered memory model archs,
6012                  * such as IA-64). */
6013                 wmb();
6014                 writel(i, rx_ring->tail);
6015         }
6016 }
6017
6018 /**
6019  * igb_mii_ioctl -
6020  * @netdev:
6021  * @ifreq:
6022  * @cmd:
6023  **/
6024 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6025 {
6026         struct igb_adapter *adapter = netdev_priv(netdev);
6027         struct mii_ioctl_data *data = if_mii(ifr);
6028
6029         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6030                 return -EOPNOTSUPP;
6031
6032         switch (cmd) {
6033         case SIOCGMIIPHY:
6034                 data->phy_id = adapter->hw.phy.addr;
6035                 break;
6036         case SIOCGMIIREG:
6037                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6038                                      &data->val_out))
6039                         return -EIO;
6040                 break;
6041         case SIOCSMIIREG:
6042         default:
6043                 return -EOPNOTSUPP;
6044         }
6045         return 0;
6046 }
6047
6048 /**
6049  * igb_hwtstamp_ioctl - control hardware time stamping
6050  * @netdev:
6051  * @ifreq:
6052  * @cmd:
6053  *
6054  * Outgoing time stamping can be enabled and disabled. Play nice and
6055  * disable it when requested, although it shouldn't case any overhead
6056  * when no packet needs it. At most one packet in the queue may be
6057  * marked for time stamping, otherwise it would be impossible to tell
6058  * for sure to which packet the hardware time stamp belongs.
6059  *
6060  * Incoming time stamping has to be configured via the hardware
6061  * filters. Not all combinations are supported, in particular event
6062  * type has to be specified. Matching the kind of event packet is
6063  * not supported, with the exception of "all V2 events regardless of
6064  * level 2 or 4".
6065  *
6066  **/
6067 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6068                               struct ifreq *ifr, int cmd)
6069 {
6070         struct igb_adapter *adapter = netdev_priv(netdev);
6071         struct e1000_hw *hw = &adapter->hw;
6072         struct hwtstamp_config config;
6073         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6074         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6075         u32 tsync_rx_cfg = 0;
6076         bool is_l4 = false;
6077         bool is_l2 = false;
6078         u32 regval;
6079
6080         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6081                 return -EFAULT;
6082
6083         /* reserved for future extensions */
6084         if (config.flags)
6085                 return -EINVAL;
6086
6087         switch (config.tx_type) {
6088         case HWTSTAMP_TX_OFF:
6089                 tsync_tx_ctl = 0;
6090         case HWTSTAMP_TX_ON:
6091                 break;
6092         default:
6093                 return -ERANGE;
6094         }
6095
6096         switch (config.rx_filter) {
6097         case HWTSTAMP_FILTER_NONE:
6098                 tsync_rx_ctl = 0;
6099                 break;
6100         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6101         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6102         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6103         case HWTSTAMP_FILTER_ALL:
6104                 /*
6105                  * register TSYNCRXCFG must be set, therefore it is not
6106                  * possible to time stamp both Sync and Delay_Req messages
6107                  * => fall back to time stamping all packets
6108                  */
6109                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6110                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6111                 break;
6112         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6113                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6114                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6115                 is_l4 = true;
6116                 break;
6117         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6118                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6119                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6120                 is_l4 = true;
6121                 break;
6122         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6123         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6124                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6125                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6126                 is_l2 = true;
6127                 is_l4 = true;
6128                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6129                 break;
6130         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6131         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6132                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6133                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6134                 is_l2 = true;
6135                 is_l4 = true;
6136                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6137                 break;
6138         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6139         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6140         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6141                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6142                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6143                 is_l2 = true;
6144                 break;
6145         default:
6146                 return -ERANGE;
6147         }
6148
6149         if (hw->mac.type == e1000_82575) {
6150                 if (tsync_rx_ctl | tsync_tx_ctl)
6151                         return -EINVAL;
6152                 return 0;
6153         }
6154
6155         /*
6156          * Per-packet timestamping only works if all packets are
6157          * timestamped, so enable timestamping in all packets as
6158          * long as one rx filter was configured.
6159          */
6160         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6161                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6162                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6163         }
6164
6165         /* enable/disable TX */
6166         regval = rd32(E1000_TSYNCTXCTL);
6167         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6168         regval |= tsync_tx_ctl;
6169         wr32(E1000_TSYNCTXCTL, regval);
6170
6171         /* enable/disable RX */
6172         regval = rd32(E1000_TSYNCRXCTL);
6173         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6174         regval |= tsync_rx_ctl;
6175         wr32(E1000_TSYNCRXCTL, regval);
6176
6177         /* define which PTP packets are time stamped */
6178         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6179
6180         /* define ethertype filter for timestamped packets */
6181         if (is_l2)
6182                 wr32(E1000_ETQF(3),
6183                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6184                                  E1000_ETQF_1588 | /* enable timestamping */
6185                                  ETH_P_1588));     /* 1588 eth protocol type */
6186         else
6187                 wr32(E1000_ETQF(3), 0);
6188
6189 #define PTP_PORT 319
6190         /* L4 Queue Filter[3]: filter by destination port and protocol */
6191         if (is_l4) {
6192                 u32 ftqf = (IPPROTO_UDP /* UDP */
6193                         | E1000_FTQF_VF_BP /* VF not compared */
6194                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6195                         | E1000_FTQF_MASK); /* mask all inputs */
6196                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6197
6198                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6199                 wr32(E1000_IMIREXT(3),
6200                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6201                 if (hw->mac.type == e1000_82576) {
6202                         /* enable source port check */
6203                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6204                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6205                 }
6206                 wr32(E1000_FTQF(3), ftqf);
6207         } else {
6208                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6209         }
6210         wrfl();
6211
6212         adapter->hwtstamp_config = config;
6213
6214         /* clear TX/RX time stamp registers, just to be sure */
6215         regval = rd32(E1000_TXSTMPH);
6216         regval = rd32(E1000_RXSTMPH);
6217
6218         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6219                 -EFAULT : 0;
6220 }
6221
6222 /**
6223  * igb_ioctl -
6224  * @netdev:
6225  * @ifreq:
6226  * @cmd:
6227  **/
6228 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6229 {
6230         switch (cmd) {
6231         case SIOCGMIIPHY:
6232         case SIOCGMIIREG:
6233         case SIOCSMIIREG:
6234                 return igb_mii_ioctl(netdev, ifr, cmd);
6235         case SIOCSHWTSTAMP:
6236                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6237         default:
6238                 return -EOPNOTSUPP;
6239         }
6240 }
6241
6242 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6243 {
6244         struct igb_adapter *adapter = hw->back;
6245         u16 cap_offset;
6246
6247         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6248         if (!cap_offset)
6249                 return -E1000_ERR_CONFIG;
6250
6251         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6252
6253         return 0;
6254 }
6255
6256 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6257 {
6258         struct igb_adapter *adapter = hw->back;
6259         u16 cap_offset;
6260
6261         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6262         if (!cap_offset)
6263                 return -E1000_ERR_CONFIG;
6264
6265         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6266
6267         return 0;
6268 }
6269
6270 static void igb_vlan_rx_register(struct net_device *netdev,
6271                                  struct vlan_group *grp)
6272 {
6273         struct igb_adapter *adapter = netdev_priv(netdev);
6274         struct e1000_hw *hw = &adapter->hw;
6275         u32 ctrl, rctl;
6276
6277         igb_irq_disable(adapter);
6278         adapter->vlgrp = grp;
6279
6280         if (grp) {
6281                 /* enable VLAN tag insert/strip */
6282                 ctrl = rd32(E1000_CTRL);
6283                 ctrl |= E1000_CTRL_VME;
6284                 wr32(E1000_CTRL, ctrl);
6285
6286                 /* Disable CFI check */
6287                 rctl = rd32(E1000_RCTL);
6288                 rctl &= ~E1000_RCTL_CFIEN;
6289                 wr32(E1000_RCTL, rctl);
6290         } else {
6291                 /* disable VLAN tag insert/strip */
6292                 ctrl = rd32(E1000_CTRL);
6293                 ctrl &= ~E1000_CTRL_VME;
6294                 wr32(E1000_CTRL, ctrl);
6295         }
6296
6297         igb_rlpml_set(adapter);
6298
6299         if (!test_bit(__IGB_DOWN, &adapter->state))
6300                 igb_irq_enable(adapter);
6301 }
6302
6303 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6304 {
6305         struct igb_adapter *adapter = netdev_priv(netdev);
6306         struct e1000_hw *hw = &adapter->hw;
6307         int pf_id = adapter->vfs_allocated_count;
6308
6309         /* attempt to add filter to vlvf array */
6310         igb_vlvf_set(adapter, vid, true, pf_id);
6311
6312         /* add the filter since PF can receive vlans w/o entry in vlvf */
6313         igb_vfta_set(hw, vid, true);
6314 }
6315
6316 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6317 {
6318         struct igb_adapter *adapter = netdev_priv(netdev);
6319         struct e1000_hw *hw = &adapter->hw;
6320         int pf_id = adapter->vfs_allocated_count;
6321         s32 err;
6322
6323         igb_irq_disable(adapter);
6324         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6325
6326         if (!test_bit(__IGB_DOWN, &adapter->state))
6327                 igb_irq_enable(adapter);
6328
6329         /* remove vlan from VLVF table array */
6330         err = igb_vlvf_set(adapter, vid, false, pf_id);
6331
6332         /* if vid was not present in VLVF just remove it from table */
6333         if (err)
6334                 igb_vfta_set(hw, vid, false);
6335 }
6336
6337 static void igb_restore_vlan(struct igb_adapter *adapter)
6338 {
6339         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6340
6341         if (adapter->vlgrp) {
6342                 u16 vid;
6343                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6344                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6345                                 continue;
6346                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6347                 }
6348         }
6349 }
6350
6351 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6352 {
6353         struct pci_dev *pdev = adapter->pdev;
6354         struct e1000_mac_info *mac = &adapter->hw.mac;
6355
6356         mac->autoneg = 0;
6357
6358         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6359         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6360                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6361                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6362                 return -EINVAL;
6363         }
6364
6365         switch (spddplx) {
6366         case SPEED_10 + DUPLEX_HALF:
6367                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6368                 break;
6369         case SPEED_10 + DUPLEX_FULL:
6370                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6371                 break;
6372         case SPEED_100 + DUPLEX_HALF:
6373                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6374                 break;
6375         case SPEED_100 + DUPLEX_FULL:
6376                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6377                 break;
6378         case SPEED_1000 + DUPLEX_FULL:
6379                 mac->autoneg = 1;
6380                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6381                 break;
6382         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6383         default:
6384                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6385                 return -EINVAL;
6386         }
6387         return 0;
6388 }
6389
6390 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6391 {
6392         struct net_device *netdev = pci_get_drvdata(pdev);
6393         struct igb_adapter *adapter = netdev_priv(netdev);
6394         struct e1000_hw *hw = &adapter->hw;
6395         u32 ctrl, rctl, status;
6396         u32 wufc = adapter->wol;
6397 #ifdef CONFIG_PM
6398         int retval = 0;
6399 #endif
6400
6401         netif_device_detach(netdev);
6402
6403         if (netif_running(netdev))
6404                 igb_close(netdev);
6405
6406         igb_clear_interrupt_scheme(adapter);
6407
6408 #ifdef CONFIG_PM
6409         retval = pci_save_state(pdev);
6410         if (retval)
6411                 return retval;
6412 #endif
6413
6414         status = rd32(E1000_STATUS);
6415         if (status & E1000_STATUS_LU)
6416                 wufc &= ~E1000_WUFC_LNKC;
6417
6418         if (wufc) {
6419                 igb_setup_rctl(adapter);
6420                 igb_set_rx_mode(netdev);
6421
6422                 /* turn on all-multi mode if wake on multicast is enabled */
6423                 if (wufc & E1000_WUFC_MC) {
6424                         rctl = rd32(E1000_RCTL);
6425                         rctl |= E1000_RCTL_MPE;
6426                         wr32(E1000_RCTL, rctl);
6427                 }
6428
6429                 ctrl = rd32(E1000_CTRL);
6430                 /* advertise wake from D3Cold */
6431                 #define E1000_CTRL_ADVD3WUC 0x00100000
6432                 /* phy power management enable */
6433                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6434                 ctrl |= E1000_CTRL_ADVD3WUC;
6435                 wr32(E1000_CTRL, ctrl);
6436
6437                 /* Allow time for pending master requests to run */
6438                 igb_disable_pcie_master(hw);
6439
6440                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6441                 wr32(E1000_WUFC, wufc);
6442         } else {
6443                 wr32(E1000_WUC, 0);
6444                 wr32(E1000_WUFC, 0);
6445         }
6446
6447         *enable_wake = wufc || adapter->en_mng_pt;
6448         if (!*enable_wake)
6449                 igb_power_down_link(adapter);
6450         else
6451                 igb_power_up_link(adapter);
6452
6453         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6454          * would have already happened in close and is redundant. */
6455         igb_release_hw_control(adapter);
6456
6457         pci_disable_device(pdev);
6458
6459         return 0;
6460 }
6461
6462 #ifdef CONFIG_PM
6463 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6464 {
6465         int retval;
6466         bool wake;
6467
6468         retval = __igb_shutdown(pdev, &wake);
6469         if (retval)
6470                 return retval;
6471
6472         if (wake) {
6473                 pci_prepare_to_sleep(pdev);
6474         } else {
6475                 pci_wake_from_d3(pdev, false);
6476                 pci_set_power_state(pdev, PCI_D3hot);
6477         }
6478
6479         return 0;
6480 }
6481
6482 static int igb_resume(struct pci_dev *pdev)
6483 {
6484         struct net_device *netdev = pci_get_drvdata(pdev);
6485         struct igb_adapter *adapter = netdev_priv(netdev);
6486         struct e1000_hw *hw = &adapter->hw;
6487         u32 err;
6488
6489         pci_set_power_state(pdev, PCI_D0);
6490         pci_restore_state(pdev);
6491         pci_save_state(pdev);
6492
6493         err = pci_enable_device_mem(pdev);
6494         if (err) {
6495                 dev_err(&pdev->dev,
6496                         "igb: Cannot enable PCI device from suspend\n");
6497                 return err;
6498         }
6499         pci_set_master(pdev);
6500
6501         pci_enable_wake(pdev, PCI_D3hot, 0);
6502         pci_enable_wake(pdev, PCI_D3cold, 0);
6503
6504         if (igb_init_interrupt_scheme(adapter)) {
6505                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6506                 return -ENOMEM;
6507         }
6508
6509         igb_reset(adapter);
6510
6511         /* let the f/w know that the h/w is now under the control of the
6512          * driver. */
6513         igb_get_hw_control(adapter);
6514
6515         wr32(E1000_WUS, ~0);
6516
6517         if (netif_running(netdev)) {
6518                 err = igb_open(netdev);
6519                 if (err)
6520                         return err;
6521         }
6522
6523         netif_device_attach(netdev);
6524
6525         return 0;
6526 }
6527 #endif
6528
6529 static void igb_shutdown(struct pci_dev *pdev)
6530 {
6531         bool wake;
6532
6533         __igb_shutdown(pdev, &wake);
6534
6535         if (system_state == SYSTEM_POWER_OFF) {
6536                 pci_wake_from_d3(pdev, wake);
6537                 pci_set_power_state(pdev, PCI_D3hot);
6538         }
6539 }
6540
6541 #ifdef CONFIG_NET_POLL_CONTROLLER
6542 /*
6543  * Polling 'interrupt' - used by things like netconsole to send skbs
6544  * without having to re-enable interrupts. It's not called while
6545  * the interrupt routine is executing.
6546  */
6547 static void igb_netpoll(struct net_device *netdev)
6548 {
6549         struct igb_adapter *adapter = netdev_priv(netdev);
6550         struct e1000_hw *hw = &adapter->hw;
6551         int i;
6552
6553         if (!adapter->msix_entries) {
6554                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6555                 igb_irq_disable(adapter);
6556                 napi_schedule(&q_vector->napi);
6557                 return;
6558         }
6559
6560         for (i = 0; i < adapter->num_q_vectors; i++) {
6561                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6562                 wr32(E1000_EIMC, q_vector->eims_value);
6563                 napi_schedule(&q_vector->napi);
6564         }
6565 }
6566 #endif /* CONFIG_NET_POLL_CONTROLLER */
6567
6568 /**
6569  * igb_io_error_detected - called when PCI error is detected
6570  * @pdev: Pointer to PCI device
6571  * @state: The current pci connection state
6572  *
6573  * This function is called after a PCI bus error affecting
6574  * this device has been detected.
6575  */
6576 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6577                                               pci_channel_state_t state)
6578 {
6579         struct net_device *netdev = pci_get_drvdata(pdev);
6580         struct igb_adapter *adapter = netdev_priv(netdev);
6581
6582         netif_device_detach(netdev);
6583
6584         if (state == pci_channel_io_perm_failure)
6585                 return PCI_ERS_RESULT_DISCONNECT;
6586
6587         if (netif_running(netdev))
6588                 igb_down(adapter);
6589         pci_disable_device(pdev);
6590
6591         /* Request a slot slot reset. */
6592         return PCI_ERS_RESULT_NEED_RESET;
6593 }
6594
6595 /**
6596  * igb_io_slot_reset - called after the pci bus has been reset.
6597  * @pdev: Pointer to PCI device
6598  *
6599  * Restart the card from scratch, as if from a cold-boot. Implementation
6600  * resembles the first-half of the igb_resume routine.
6601  */
6602 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6603 {
6604         struct net_device *netdev = pci_get_drvdata(pdev);
6605         struct igb_adapter *adapter = netdev_priv(netdev);
6606         struct e1000_hw *hw = &adapter->hw;
6607         pci_ers_result_t result;
6608         int err;
6609
6610         if (pci_enable_device_mem(pdev)) {
6611                 dev_err(&pdev->dev,
6612                         "Cannot re-enable PCI device after reset.\n");
6613                 result = PCI_ERS_RESULT_DISCONNECT;
6614         } else {
6615                 pci_set_master(pdev);
6616                 pci_restore_state(pdev);
6617                 pci_save_state(pdev);
6618
6619                 pci_enable_wake(pdev, PCI_D3hot, 0);
6620                 pci_enable_wake(pdev, PCI_D3cold, 0);
6621
6622                 igb_reset(adapter);
6623                 wr32(E1000_WUS, ~0);
6624                 result = PCI_ERS_RESULT_RECOVERED;
6625         }
6626
6627         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6628         if (err) {
6629                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6630                         "failed 0x%0x\n", err);
6631                 /* non-fatal, continue */
6632         }
6633
6634         return result;
6635 }
6636
6637 /**
6638  * igb_io_resume - called when traffic can start flowing again.
6639  * @pdev: Pointer to PCI device
6640  *
6641  * This callback is called when the error recovery driver tells us that
6642  * its OK to resume normal operation. Implementation resembles the
6643  * second-half of the igb_resume routine.
6644  */
6645 static void igb_io_resume(struct pci_dev *pdev)
6646 {
6647         struct net_device *netdev = pci_get_drvdata(pdev);
6648         struct igb_adapter *adapter = netdev_priv(netdev);
6649
6650         if (netif_running(netdev)) {
6651                 if (igb_up(adapter)) {
6652                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6653                         return;
6654                 }
6655         }
6656
6657         netif_device_attach(netdev);
6658
6659         /* let the f/w know that the h/w is now under the control of the
6660          * driver. */
6661         igb_get_hw_control(adapter);
6662 }
6663
6664 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6665                              u8 qsel)
6666 {
6667         u32 rar_low, rar_high;
6668         struct e1000_hw *hw = &adapter->hw;
6669
6670         /* HW expects these in little endian so we reverse the byte order
6671          * from network order (big endian) to little endian
6672          */
6673         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6674                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6675         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6676
6677         /* Indicate to hardware the Address is Valid. */
6678         rar_high |= E1000_RAH_AV;
6679
6680         if (hw->mac.type == e1000_82575)
6681                 rar_high |= E1000_RAH_POOL_1 * qsel;
6682         else
6683                 rar_high |= E1000_RAH_POOL_1 << qsel;
6684
6685         wr32(E1000_RAL(index), rar_low);
6686         wrfl();
6687         wr32(E1000_RAH(index), rar_high);
6688         wrfl();
6689 }
6690
6691 static int igb_set_vf_mac(struct igb_adapter *adapter,
6692                           int vf, unsigned char *mac_addr)
6693 {
6694         struct e1000_hw *hw = &adapter->hw;
6695         /* VF MAC addresses start at end of receive addresses and moves
6696          * torwards the first, as a result a collision should not be possible */
6697         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6698
6699         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6700
6701         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6702
6703         return 0;
6704 }
6705
6706 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6707 {
6708         struct igb_adapter *adapter = netdev_priv(netdev);
6709         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6710                 return -EINVAL;
6711         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6712         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6713         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6714                                       " change effective.");
6715         if (test_bit(__IGB_DOWN, &adapter->state)) {
6716                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6717                          " but the PF device is not up.\n");
6718                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6719                          " attempting to use the VF device.\n");
6720         }
6721         return igb_set_vf_mac(adapter, vf, mac);
6722 }
6723
6724 static int igb_link_mbps(int internal_link_speed)
6725 {
6726         switch (internal_link_speed) {
6727         case SPEED_100:
6728                 return 100;
6729         case SPEED_1000:
6730                 return 1000;
6731         default:
6732                 return 0;
6733         }
6734 }
6735
6736 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6737                                   int link_speed)
6738 {
6739         int rf_dec, rf_int;
6740         u32 bcnrc_val;
6741
6742         if (tx_rate != 0) {
6743                 /* Calculate the rate factor values to set */
6744                 rf_int = link_speed / tx_rate;
6745                 rf_dec = (link_speed - (rf_int * tx_rate));
6746                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6747
6748                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6749                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6750                                E1000_RTTBCNRC_RF_INT_MASK);
6751                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6752         } else {
6753                 bcnrc_val = 0;
6754         }
6755
6756         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6757         wr32(E1000_RTTBCNRC, bcnrc_val);
6758 }
6759
6760 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6761 {
6762         int actual_link_speed, i;
6763         bool reset_rate = false;
6764
6765         /* VF TX rate limit was not set or not supported */
6766         if ((adapter->vf_rate_link_speed == 0) ||
6767             (adapter->hw.mac.type != e1000_82576))
6768                 return;
6769
6770         actual_link_speed = igb_link_mbps(adapter->link_speed);
6771         if (actual_link_speed != adapter->vf_rate_link_speed) {
6772                 reset_rate = true;
6773                 adapter->vf_rate_link_speed = 0;
6774                 dev_info(&adapter->pdev->dev,
6775                          "Link speed has been changed. VF Transmit "
6776                          "rate is disabled\n");
6777         }
6778
6779         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6780                 if (reset_rate)
6781                         adapter->vf_data[i].tx_rate = 0;
6782
6783                 igb_set_vf_rate_limit(&adapter->hw, i,
6784                                       adapter->vf_data[i].tx_rate,
6785                                       actual_link_speed);
6786         }
6787 }
6788
6789 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6790 {
6791         struct igb_adapter *adapter = netdev_priv(netdev);
6792         struct e1000_hw *hw = &adapter->hw;
6793         int actual_link_speed;
6794
6795         if (hw->mac.type != e1000_82576)
6796                 return -EOPNOTSUPP;
6797
6798         actual_link_speed = igb_link_mbps(adapter->link_speed);
6799         if ((vf >= adapter->vfs_allocated_count) ||
6800             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6801             (tx_rate < 0) || (tx_rate > actual_link_speed))
6802                 return -EINVAL;
6803
6804         adapter->vf_rate_link_speed = actual_link_speed;
6805         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6806         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6807
6808         return 0;
6809 }
6810
6811 static int igb_ndo_get_vf_config(struct net_device *netdev,
6812                                  int vf, struct ifla_vf_info *ivi)
6813 {
6814         struct igb_adapter *adapter = netdev_priv(netdev);
6815         if (vf >= adapter->vfs_allocated_count)
6816                 return -EINVAL;
6817         ivi->vf = vf;
6818         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6819         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6820         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6821         ivi->qos = adapter->vf_data[vf].pf_qos;
6822         return 0;
6823 }
6824
6825 static void igb_vmm_control(struct igb_adapter *adapter)
6826 {
6827         struct e1000_hw *hw = &adapter->hw;
6828         u32 reg;
6829
6830         switch (hw->mac.type) {
6831         case e1000_82575:
6832         default:
6833                 /* replication is not supported for 82575 */
6834                 return;
6835         case e1000_82576:
6836                 /* notify HW that the MAC is adding vlan tags */
6837                 reg = rd32(E1000_DTXCTL);
6838                 reg |= E1000_DTXCTL_VLAN_ADDED;
6839                 wr32(E1000_DTXCTL, reg);
6840         case e1000_82580:
6841                 /* enable replication vlan tag stripping */
6842                 reg = rd32(E1000_RPLOLR);
6843                 reg |= E1000_RPLOLR_STRVLAN;
6844                 wr32(E1000_RPLOLR, reg);
6845         case e1000_i350:
6846                 /* none of the above registers are supported by i350 */
6847                 break;
6848         }
6849
6850         if (adapter->vfs_allocated_count) {
6851                 igb_vmdq_set_loopback_pf(hw, true);
6852                 igb_vmdq_set_replication_pf(hw, true);
6853                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6854                                                 adapter->vfs_allocated_count);
6855         } else {
6856                 igb_vmdq_set_loopback_pf(hw, false);
6857                 igb_vmdq_set_replication_pf(hw, false);
6858         }
6859 }
6860
6861 /* igb_main.c */