Merge branch 'for-linus' of git://git.infradead.org/users/eparis/notify
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
87         /* required last entry */
88         {0, }
89 };
90
91 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
92
93 void igb_reset(struct igb_adapter *);
94 static int igb_setup_all_tx_resources(struct igb_adapter *);
95 static int igb_setup_all_rx_resources(struct igb_adapter *);
96 static void igb_free_all_tx_resources(struct igb_adapter *);
97 static void igb_free_all_rx_resources(struct igb_adapter *);
98 static void igb_setup_mrqc(struct igb_adapter *);
99 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
100 static void __devexit igb_remove(struct pci_dev *pdev);
101 static int igb_sw_init(struct igb_adapter *);
102 static int igb_open(struct net_device *);
103 static int igb_close(struct net_device *);
104 static void igb_configure_tx(struct igb_adapter *);
105 static void igb_configure_rx(struct igb_adapter *);
106 static void igb_clean_all_tx_rings(struct igb_adapter *);
107 static void igb_clean_all_rx_rings(struct igb_adapter *);
108 static void igb_clean_tx_ring(struct igb_ring *);
109 static void igb_clean_rx_ring(struct igb_ring *);
110 static void igb_set_rx_mode(struct net_device *);
111 static void igb_update_phy_info(unsigned long);
112 static void igb_watchdog(unsigned long);
113 static void igb_watchdog_task(struct work_struct *);
114 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
115 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
116                                                  struct rtnl_link_stats64 *stats);
117 static int igb_change_mtu(struct net_device *, int);
118 static int igb_set_mac(struct net_device *, void *);
119 static void igb_set_uta(struct igb_adapter *adapter);
120 static irqreturn_t igb_intr(int irq, void *);
121 static irqreturn_t igb_intr_msi(int irq, void *);
122 static irqreturn_t igb_msix_other(int irq, void *);
123 static irqreturn_t igb_msix_ring(int irq, void *);
124 #ifdef CONFIG_IGB_DCA
125 static void igb_update_dca(struct igb_q_vector *);
126 static void igb_setup_dca(struct igb_adapter *);
127 #endif /* CONFIG_IGB_DCA */
128 static bool igb_clean_tx_irq(struct igb_q_vector *);
129 static int igb_poll(struct napi_struct *, int);
130 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
131 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
132 static void igb_tx_timeout(struct net_device *);
133 static void igb_reset_task(struct work_struct *);
134 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
135 static void igb_vlan_rx_add_vid(struct net_device *, u16);
136 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
137 static void igb_restore_vlan(struct igb_adapter *);
138 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
139 static void igb_ping_all_vfs(struct igb_adapter *);
140 static void igb_msg_task(struct igb_adapter *);
141 static void igb_vmm_control(struct igb_adapter *);
142 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
143 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
144 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
146                                int vf, u16 vlan, u8 qos);
147 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
148 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
149                                  struct ifla_vf_info *ivi);
150
151 #ifdef CONFIG_PM
152 static int igb_suspend(struct pci_dev *, pm_message_t);
153 static int igb_resume(struct pci_dev *);
154 #endif
155 static void igb_shutdown(struct pci_dev *);
156 #ifdef CONFIG_IGB_DCA
157 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
158 static struct notifier_block dca_notifier = {
159         .notifier_call  = igb_notify_dca,
160         .next           = NULL,
161         .priority       = 0
162 };
163 #endif
164 #ifdef CONFIG_NET_POLL_CONTROLLER
165 /* for netdump / net console */
166 static void igb_netpoll(struct net_device *);
167 #endif
168 #ifdef CONFIG_PCI_IOV
169 static unsigned int max_vfs = 0;
170 module_param(max_vfs, uint, 0);
171 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
172                  "per physical function");
173 #endif /* CONFIG_PCI_IOV */
174
175 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
176                      pci_channel_state_t);
177 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
178 static void igb_io_resume(struct pci_dev *);
179
180 static struct pci_error_handlers igb_err_handler = {
181         .error_detected = igb_io_error_detected,
182         .slot_reset = igb_io_slot_reset,
183         .resume = igb_io_resume,
184 };
185
186
187 static struct pci_driver igb_driver = {
188         .name     = igb_driver_name,
189         .id_table = igb_pci_tbl,
190         .probe    = igb_probe,
191         .remove   = __devexit_p(igb_remove),
192 #ifdef CONFIG_PM
193         /* Power Managment Hooks */
194         .suspend  = igb_suspend,
195         .resume   = igb_resume,
196 #endif
197         .shutdown = igb_shutdown,
198         .err_handler = &igb_err_handler
199 };
200
201 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
202 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
203 MODULE_LICENSE("GPL");
204 MODULE_VERSION(DRV_VERSION);
205
206 struct igb_reg_info {
207         u32 ofs;
208         char *name;
209 };
210
211 static const struct igb_reg_info igb_reg_info_tbl[] = {
212
213         /* General Registers */
214         {E1000_CTRL, "CTRL"},
215         {E1000_STATUS, "STATUS"},
216         {E1000_CTRL_EXT, "CTRL_EXT"},
217
218         /* Interrupt Registers */
219         {E1000_ICR, "ICR"},
220
221         /* RX Registers */
222         {E1000_RCTL, "RCTL"},
223         {E1000_RDLEN(0), "RDLEN"},
224         {E1000_RDH(0), "RDH"},
225         {E1000_RDT(0), "RDT"},
226         {E1000_RXDCTL(0), "RXDCTL"},
227         {E1000_RDBAL(0), "RDBAL"},
228         {E1000_RDBAH(0), "RDBAH"},
229
230         /* TX Registers */
231         {E1000_TCTL, "TCTL"},
232         {E1000_TDBAL(0), "TDBAL"},
233         {E1000_TDBAH(0), "TDBAH"},
234         {E1000_TDLEN(0), "TDLEN"},
235         {E1000_TDH(0), "TDH"},
236         {E1000_TDT(0), "TDT"},
237         {E1000_TXDCTL(0), "TXDCTL"},
238         {E1000_TDFH, "TDFH"},
239         {E1000_TDFT, "TDFT"},
240         {E1000_TDFHS, "TDFHS"},
241         {E1000_TDFPC, "TDFPC"},
242
243         /* List Terminator */
244         {}
245 };
246
247 /*
248  * igb_regdump - register printout routine
249  */
250 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
251 {
252         int n = 0;
253         char rname[16];
254         u32 regs[8];
255
256         switch (reginfo->ofs) {
257         case E1000_RDLEN(0):
258                 for (n = 0; n < 4; n++)
259                         regs[n] = rd32(E1000_RDLEN(n));
260                 break;
261         case E1000_RDH(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDH(n));
264                 break;
265         case E1000_RDT(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDT(n));
268                 break;
269         case E1000_RXDCTL(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RXDCTL(n));
272                 break;
273         case E1000_RDBAL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDBAL(n));
276                 break;
277         case E1000_RDBAH(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAH(n));
280                 break;
281         case E1000_TDBAL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAL(n));
284                 break;
285         case E1000_TDBAH(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_TDBAH(n));
288                 break;
289         case E1000_TDLEN(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDLEN(n));
292                 break;
293         case E1000_TDH(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDH(n));
296                 break;
297         case E1000_TDT(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDT(n));
300                 break;
301         case E1000_TXDCTL(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TXDCTL(n));
304                 break;
305         default:
306                 printk(KERN_INFO "%-15s %08x\n",
307                         reginfo->name, rd32(reginfo->ofs));
308                 return;
309         }
310
311         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
312         printk(KERN_INFO "%-15s ", rname);
313         for (n = 0; n < 4; n++)
314                 printk(KERN_CONT "%08x ", regs[n]);
315         printk(KERN_CONT "\n");
316 }
317
318 /*
319  * igb_dump - Print registers, tx-rings and rx-rings
320  */
321 static void igb_dump(struct igb_adapter *adapter)
322 {
323         struct net_device *netdev = adapter->netdev;
324         struct e1000_hw *hw = &adapter->hw;
325         struct igb_reg_info *reginfo;
326         int n = 0;
327         struct igb_ring *tx_ring;
328         union e1000_adv_tx_desc *tx_desc;
329         struct my_u0 { u64 a; u64 b; } *u0;
330         struct igb_buffer *buffer_info;
331         struct igb_ring *rx_ring;
332         union e1000_adv_rx_desc *rx_desc;
333         u32 staterr;
334         int i = 0;
335
336         if (!netif_msg_hw(adapter))
337                 return;
338
339         /* Print netdevice Info */
340         if (netdev) {
341                 dev_info(&adapter->pdev->dev, "Net device Info\n");
342                 printk(KERN_INFO "Device Name     state            "
343                         "trans_start      last_rx\n");
344                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
345                 netdev->name,
346                 netdev->state,
347                 netdev->trans_start,
348                 netdev->last_rx);
349         }
350
351         /* Print Registers */
352         dev_info(&adapter->pdev->dev, "Register Dump\n");
353         printk(KERN_INFO " Register Name   Value\n");
354         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
355              reginfo->name; reginfo++) {
356                 igb_regdump(hw, reginfo);
357         }
358
359         /* Print TX Ring Summary */
360         if (!netdev || !netif_running(netdev))
361                 goto exit;
362
363         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
364         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
365                 " leng ntw timestamp\n");
366         for (n = 0; n < adapter->num_tx_queues; n++) {
367                 tx_ring = adapter->tx_ring[n];
368                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
369                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
370                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
371                            (u64)buffer_info->dma,
372                            buffer_info->length,
373                            buffer_info->next_to_watch,
374                            (u64)buffer_info->time_stamp);
375         }
376
377         /* Print TX Rings */
378         if (!netif_msg_tx_done(adapter))
379                 goto rx_ring_summary;
380
381         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
382
383         /* Transmit Descriptor Formats
384          *
385          * Advanced Transmit Descriptor
386          *   +--------------------------------------------------------------+
387          * 0 |         Buffer Address [63:0]                                |
388          *   +--------------------------------------------------------------+
389          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
390          *   +--------------------------------------------------------------+
391          *   63      46 45    40 39 38 36 35 32 31   24             15       0
392          */
393
394         for (n = 0; n < adapter->num_tx_queues; n++) {
395                 tx_ring = adapter->tx_ring[n];
396                 printk(KERN_INFO "------------------------------------\n");
397                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
398                 printk(KERN_INFO "------------------------------------\n");
399                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
400                         "[PlPOCIStDDM Ln] [bi->dma       ] "
401                         "leng  ntw timestamp        bi->skb\n");
402
403                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
404                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
405                         buffer_info = &tx_ring->buffer_info[i];
406                         u0 = (struct my_u0 *)tx_desc;
407                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
408                                 " %04X  %3X %016llX %p", i,
409                                 le64_to_cpu(u0->a),
410                                 le64_to_cpu(u0->b),
411                                 (u64)buffer_info->dma,
412                                 buffer_info->length,
413                                 buffer_info->next_to_watch,
414                                 (u64)buffer_info->time_stamp,
415                                 buffer_info->skb);
416                         if (i == tx_ring->next_to_use &&
417                                 i == tx_ring->next_to_clean)
418                                 printk(KERN_CONT " NTC/U\n");
419                         else if (i == tx_ring->next_to_use)
420                                 printk(KERN_CONT " NTU\n");
421                         else if (i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC\n");
423                         else
424                                 printk(KERN_CONT "\n");
425
426                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
427                                 print_hex_dump(KERN_INFO, "",
428                                         DUMP_PREFIX_ADDRESS,
429                                         16, 1, phys_to_virt(buffer_info->dma),
430                                         buffer_info->length, true);
431                 }
432         }
433
434         /* Print RX Rings Summary */
435 rx_ring_summary:
436         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
437         printk(KERN_INFO "Queue [NTU] [NTC]\n");
438         for (n = 0; n < adapter->num_rx_queues; n++) {
439                 rx_ring = adapter->rx_ring[n];
440                 printk(KERN_INFO " %5d %5X %5X\n", n,
441                            rx_ring->next_to_use, rx_ring->next_to_clean);
442         }
443
444         /* Print RX Rings */
445         if (!netif_msg_rx_status(adapter))
446                 goto exit;
447
448         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
449
450         /* Advanced Receive Descriptor (Read) Format
451          *    63                                           1        0
452          *    +-----------------------------------------------------+
453          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
454          *    +----------------------------------------------+------+
455          *  8 |       Header Buffer Address [63:1]           |  DD  |
456          *    +-----------------------------------------------------+
457          *
458          *
459          * Advanced Receive Descriptor (Write-Back) Format
460          *
461          *   63       48 47    32 31  30      21 20 17 16   4 3     0
462          *   +------------------------------------------------------+
463          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
464          *   | Checksum   Ident  |   |           |    | Type | Type |
465          *   +------------------------------------------------------+
466          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
467          *   +------------------------------------------------------+
468          *   63       48 47    32 31            20 19               0
469          */
470
471         for (n = 0; n < adapter->num_rx_queues; n++) {
472                 rx_ring = adapter->rx_ring[n];
473                 printk(KERN_INFO "------------------------------------\n");
474                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
475                 printk(KERN_INFO "------------------------------------\n");
476                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
477                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
478                         "<-- Adv Rx Read format\n");
479                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
480                         "[vl er S cks ln] ---------------- [bi->skb] "
481                         "<-- Adv Rx Write-Back format\n");
482
483                 for (i = 0; i < rx_ring->count; i++) {
484                         buffer_info = &rx_ring->buffer_info[i];
485                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
486                         u0 = (struct my_u0 *)rx_desc;
487                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
488                         if (staterr & E1000_RXD_STAT_DD) {
489                                 /* Descriptor Done */
490                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
491                                         "%016llX ---------------- %p", i,
492                                         le64_to_cpu(u0->a),
493                                         le64_to_cpu(u0->b),
494                                         buffer_info->skb);
495                         } else {
496                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
497                                         "%016llX %016llX %p", i,
498                                         le64_to_cpu(u0->a),
499                                         le64_to_cpu(u0->b),
500                                         (u64)buffer_info->dma,
501                                         buffer_info->skb);
502
503                                 if (netif_msg_pktdata(adapter)) {
504                                         print_hex_dump(KERN_INFO, "",
505                                                 DUMP_PREFIX_ADDRESS,
506                                                 16, 1,
507                                                 phys_to_virt(buffer_info->dma),
508                                                 rx_ring->rx_buffer_len, true);
509                                         if (rx_ring->rx_buffer_len
510                                                 < IGB_RXBUFFER_1024)
511                                                 print_hex_dump(KERN_INFO, "",
512                                                   DUMP_PREFIX_ADDRESS,
513                                                   16, 1,
514                                                   phys_to_virt(
515                                                     buffer_info->page_dma +
516                                                     buffer_info->page_offset),
517                                                   PAGE_SIZE/2, true);
518                                 }
519                         }
520
521                         if (i == rx_ring->next_to_use)
522                                 printk(KERN_CONT " NTU\n");
523                         else if (i == rx_ring->next_to_clean)
524                                 printk(KERN_CONT " NTC\n");
525                         else
526                                 printk(KERN_CONT "\n");
527
528                 }
529         }
530
531 exit:
532         return;
533 }
534
535
536 /**
537  * igb_read_clock - read raw cycle counter (to be used by time counter)
538  */
539 static cycle_t igb_read_clock(const struct cyclecounter *tc)
540 {
541         struct igb_adapter *adapter =
542                 container_of(tc, struct igb_adapter, cycles);
543         struct e1000_hw *hw = &adapter->hw;
544         u64 stamp = 0;
545         int shift = 0;
546
547         /*
548          * The timestamp latches on lowest register read. For the 82580
549          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
550          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
551          */
552         if (hw->mac.type == e1000_82580) {
553                 stamp = rd32(E1000_SYSTIMR) >> 8;
554                 shift = IGB_82580_TSYNC_SHIFT;
555         }
556
557         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
558         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
559         return stamp;
560 }
561
562 /**
563  * igb_get_hw_dev - return device
564  * used by hardware layer to print debugging information
565  **/
566 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
567 {
568         struct igb_adapter *adapter = hw->back;
569         return adapter->netdev;
570 }
571
572 /**
573  * igb_init_module - Driver Registration Routine
574  *
575  * igb_init_module is the first routine called when the driver is
576  * loaded. All it does is register with the PCI subsystem.
577  **/
578 static int __init igb_init_module(void)
579 {
580         int ret;
581         printk(KERN_INFO "%s - version %s\n",
582                igb_driver_string, igb_driver_version);
583
584         printk(KERN_INFO "%s\n", igb_copyright);
585
586 #ifdef CONFIG_IGB_DCA
587         dca_register_notify(&dca_notifier);
588 #endif
589         ret = pci_register_driver(&igb_driver);
590         return ret;
591 }
592
593 module_init(igb_init_module);
594
595 /**
596  * igb_exit_module - Driver Exit Cleanup Routine
597  *
598  * igb_exit_module is called just before the driver is removed
599  * from memory.
600  **/
601 static void __exit igb_exit_module(void)
602 {
603 #ifdef CONFIG_IGB_DCA
604         dca_unregister_notify(&dca_notifier);
605 #endif
606         pci_unregister_driver(&igb_driver);
607 }
608
609 module_exit(igb_exit_module);
610
611 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
612 /**
613  * igb_cache_ring_register - Descriptor ring to register mapping
614  * @adapter: board private structure to initialize
615  *
616  * Once we know the feature-set enabled for the device, we'll cache
617  * the register offset the descriptor ring is assigned to.
618  **/
619 static void igb_cache_ring_register(struct igb_adapter *adapter)
620 {
621         int i = 0, j = 0;
622         u32 rbase_offset = adapter->vfs_allocated_count;
623
624         switch (adapter->hw.mac.type) {
625         case e1000_82576:
626                 /* The queues are allocated for virtualization such that VF 0
627                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
628                  * In order to avoid collision we start at the first free queue
629                  * and continue consuming queues in the same sequence
630                  */
631                 if (adapter->vfs_allocated_count) {
632                         for (; i < adapter->rss_queues; i++)
633                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
634                                                                Q_IDX_82576(i);
635                 }
636         case e1000_82575:
637         case e1000_82580:
638         case e1000_i350:
639         default:
640                 for (; i < adapter->num_rx_queues; i++)
641                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
642                 for (; j < adapter->num_tx_queues; j++)
643                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
644                 break;
645         }
646 }
647
648 static void igb_free_queues(struct igb_adapter *adapter)
649 {
650         int i;
651
652         for (i = 0; i < adapter->num_tx_queues; i++) {
653                 kfree(adapter->tx_ring[i]);
654                 adapter->tx_ring[i] = NULL;
655         }
656         for (i = 0; i < adapter->num_rx_queues; i++) {
657                 kfree(adapter->rx_ring[i]);
658                 adapter->rx_ring[i] = NULL;
659         }
660         adapter->num_rx_queues = 0;
661         adapter->num_tx_queues = 0;
662 }
663
664 /**
665  * igb_alloc_queues - Allocate memory for all rings
666  * @adapter: board private structure to initialize
667  *
668  * We allocate one ring per queue at run-time since we don't know the
669  * number of queues at compile-time.
670  **/
671 static int igb_alloc_queues(struct igb_adapter *adapter)
672 {
673         struct igb_ring *ring;
674         int i;
675
676         for (i = 0; i < adapter->num_tx_queues; i++) {
677                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
678                 if (!ring)
679                         goto err;
680                 ring->count = adapter->tx_ring_count;
681                 ring->queue_index = i;
682                 ring->dev = &adapter->pdev->dev;
683                 ring->netdev = adapter->netdev;
684                 /* For 82575, context index must be unique per ring. */
685                 if (adapter->hw.mac.type == e1000_82575)
686                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
687                 adapter->tx_ring[i] = ring;
688         }
689
690         for (i = 0; i < adapter->num_rx_queues; i++) {
691                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
692                 if (!ring)
693                         goto err;
694                 ring->count = adapter->rx_ring_count;
695                 ring->queue_index = i;
696                 ring->dev = &adapter->pdev->dev;
697                 ring->netdev = adapter->netdev;
698                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
699                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
700                 /* set flag indicating ring supports SCTP checksum offload */
701                 if (adapter->hw.mac.type >= e1000_82576)
702                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
703                 adapter->rx_ring[i] = ring;
704         }
705
706         igb_cache_ring_register(adapter);
707
708         return 0;
709
710 err:
711         igb_free_queues(adapter);
712
713         return -ENOMEM;
714 }
715
716 #define IGB_N0_QUEUE -1
717 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
718 {
719         u32 msixbm = 0;
720         struct igb_adapter *adapter = q_vector->adapter;
721         struct e1000_hw *hw = &adapter->hw;
722         u32 ivar, index;
723         int rx_queue = IGB_N0_QUEUE;
724         int tx_queue = IGB_N0_QUEUE;
725
726         if (q_vector->rx_ring)
727                 rx_queue = q_vector->rx_ring->reg_idx;
728         if (q_vector->tx_ring)
729                 tx_queue = q_vector->tx_ring->reg_idx;
730
731         switch (hw->mac.type) {
732         case e1000_82575:
733                 /* The 82575 assigns vectors using a bitmask, which matches the
734                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
735                    or more queues to a vector, we write the appropriate bits
736                    into the MSIXBM register for that vector. */
737                 if (rx_queue > IGB_N0_QUEUE)
738                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
739                 if (tx_queue > IGB_N0_QUEUE)
740                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
741                 if (!adapter->msix_entries && msix_vector == 0)
742                         msixbm |= E1000_EIMS_OTHER;
743                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
744                 q_vector->eims_value = msixbm;
745                 break;
746         case e1000_82576:
747                 /* 82576 uses a table-based method for assigning vectors.
748                    Each queue has a single entry in the table to which we write
749                    a vector number along with a "valid" bit.  Sadly, the layout
750                    of the table is somewhat counterintuitive. */
751                 if (rx_queue > IGB_N0_QUEUE) {
752                         index = (rx_queue & 0x7);
753                         ivar = array_rd32(E1000_IVAR0, index);
754                         if (rx_queue < 8) {
755                                 /* vector goes into low byte of register */
756                                 ivar = ivar & 0xFFFFFF00;
757                                 ivar |= msix_vector | E1000_IVAR_VALID;
758                         } else {
759                                 /* vector goes into third byte of register */
760                                 ivar = ivar & 0xFF00FFFF;
761                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
762                         }
763                         array_wr32(E1000_IVAR0, index, ivar);
764                 }
765                 if (tx_queue > IGB_N0_QUEUE) {
766                         index = (tx_queue & 0x7);
767                         ivar = array_rd32(E1000_IVAR0, index);
768                         if (tx_queue < 8) {
769                                 /* vector goes into second byte of register */
770                                 ivar = ivar & 0xFFFF00FF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
772                         } else {
773                                 /* vector goes into high byte of register */
774                                 ivar = ivar & 0x00FFFFFF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
776                         }
777                         array_wr32(E1000_IVAR0, index, ivar);
778                 }
779                 q_vector->eims_value = 1 << msix_vector;
780                 break;
781         case e1000_82580:
782         case e1000_i350:
783                 /* 82580 uses the same table-based approach as 82576 but has fewer
784                    entries as a result we carry over for queues greater than 4. */
785                 if (rx_queue > IGB_N0_QUEUE) {
786                         index = (rx_queue >> 1);
787                         ivar = array_rd32(E1000_IVAR0, index);
788                         if (rx_queue & 0x1) {
789                                 /* vector goes into third byte of register */
790                                 ivar = ivar & 0xFF00FFFF;
791                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
792                         } else {
793                                 /* vector goes into low byte of register */
794                                 ivar = ivar & 0xFFFFFF00;
795                                 ivar |= msix_vector | E1000_IVAR_VALID;
796                         }
797                         array_wr32(E1000_IVAR0, index, ivar);
798                 }
799                 if (tx_queue > IGB_N0_QUEUE) {
800                         index = (tx_queue >> 1);
801                         ivar = array_rd32(E1000_IVAR0, index);
802                         if (tx_queue & 0x1) {
803                                 /* vector goes into high byte of register */
804                                 ivar = ivar & 0x00FFFFFF;
805                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
806                         } else {
807                                 /* vector goes into second byte of register */
808                                 ivar = ivar & 0xFFFF00FF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
810                         }
811                         array_wr32(E1000_IVAR0, index, ivar);
812                 }
813                 q_vector->eims_value = 1 << msix_vector;
814                 break;
815         default:
816                 BUG();
817                 break;
818         }
819
820         /* add q_vector eims value to global eims_enable_mask */
821         adapter->eims_enable_mask |= q_vector->eims_value;
822
823         /* configure q_vector to set itr on first interrupt */
824         q_vector->set_itr = 1;
825 }
826
827 /**
828  * igb_configure_msix - Configure MSI-X hardware
829  *
830  * igb_configure_msix sets up the hardware to properly
831  * generate MSI-X interrupts.
832  **/
833 static void igb_configure_msix(struct igb_adapter *adapter)
834 {
835         u32 tmp;
836         int i, vector = 0;
837         struct e1000_hw *hw = &adapter->hw;
838
839         adapter->eims_enable_mask = 0;
840
841         /* set vector for other causes, i.e. link changes */
842         switch (hw->mac.type) {
843         case e1000_82575:
844                 tmp = rd32(E1000_CTRL_EXT);
845                 /* enable MSI-X PBA support*/
846                 tmp |= E1000_CTRL_EXT_PBA_CLR;
847
848                 /* Auto-Mask interrupts upon ICR read. */
849                 tmp |= E1000_CTRL_EXT_EIAME;
850                 tmp |= E1000_CTRL_EXT_IRCA;
851
852                 wr32(E1000_CTRL_EXT, tmp);
853
854                 /* enable msix_other interrupt */
855                 array_wr32(E1000_MSIXBM(0), vector++,
856                                       E1000_EIMS_OTHER);
857                 adapter->eims_other = E1000_EIMS_OTHER;
858
859                 break;
860
861         case e1000_82576:
862         case e1000_82580:
863         case e1000_i350:
864                 /* Turn on MSI-X capability first, or our settings
865                  * won't stick.  And it will take days to debug. */
866                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
867                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
868                                 E1000_GPIE_NSICR);
869
870                 /* enable msix_other interrupt */
871                 adapter->eims_other = 1 << vector;
872                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
873
874                 wr32(E1000_IVAR_MISC, tmp);
875                 break;
876         default:
877                 /* do nothing, since nothing else supports MSI-X */
878                 break;
879         } /* switch (hw->mac.type) */
880
881         adapter->eims_enable_mask |= adapter->eims_other;
882
883         for (i = 0; i < adapter->num_q_vectors; i++)
884                 igb_assign_vector(adapter->q_vector[i], vector++);
885
886         wrfl();
887 }
888
889 /**
890  * igb_request_msix - Initialize MSI-X interrupts
891  *
892  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
893  * kernel.
894  **/
895 static int igb_request_msix(struct igb_adapter *adapter)
896 {
897         struct net_device *netdev = adapter->netdev;
898         struct e1000_hw *hw = &adapter->hw;
899         int i, err = 0, vector = 0;
900
901         err = request_irq(adapter->msix_entries[vector].vector,
902                           igb_msix_other, 0, netdev->name, adapter);
903         if (err)
904                 goto out;
905         vector++;
906
907         for (i = 0; i < adapter->num_q_vectors; i++) {
908                 struct igb_q_vector *q_vector = adapter->q_vector[i];
909
910                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
911
912                 if (q_vector->rx_ring && q_vector->tx_ring)
913                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
914                                 q_vector->rx_ring->queue_index);
915                 else if (q_vector->tx_ring)
916                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
917                                 q_vector->tx_ring->queue_index);
918                 else if (q_vector->rx_ring)
919                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
920                                 q_vector->rx_ring->queue_index);
921                 else
922                         sprintf(q_vector->name, "%s-unused", netdev->name);
923
924                 err = request_irq(adapter->msix_entries[vector].vector,
925                                   igb_msix_ring, 0, q_vector->name,
926                                   q_vector);
927                 if (err)
928                         goto out;
929                 vector++;
930         }
931
932         igb_configure_msix(adapter);
933         return 0;
934 out:
935         return err;
936 }
937
938 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
939 {
940         if (adapter->msix_entries) {
941                 pci_disable_msix(adapter->pdev);
942                 kfree(adapter->msix_entries);
943                 adapter->msix_entries = NULL;
944         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
945                 pci_disable_msi(adapter->pdev);
946         }
947 }
948
949 /**
950  * igb_free_q_vectors - Free memory allocated for interrupt vectors
951  * @adapter: board private structure to initialize
952  *
953  * This function frees the memory allocated to the q_vectors.  In addition if
954  * NAPI is enabled it will delete any references to the NAPI struct prior
955  * to freeing the q_vector.
956  **/
957 static void igb_free_q_vectors(struct igb_adapter *adapter)
958 {
959         int v_idx;
960
961         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
962                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
963                 adapter->q_vector[v_idx] = NULL;
964                 if (!q_vector)
965                         continue;
966                 netif_napi_del(&q_vector->napi);
967                 kfree(q_vector);
968         }
969         adapter->num_q_vectors = 0;
970 }
971
972 /**
973  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
974  *
975  * This function resets the device so that it has 0 rx queues, tx queues, and
976  * MSI-X interrupts allocated.
977  */
978 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
979 {
980         igb_free_queues(adapter);
981         igb_free_q_vectors(adapter);
982         igb_reset_interrupt_capability(adapter);
983 }
984
985 /**
986  * igb_set_interrupt_capability - set MSI or MSI-X if supported
987  *
988  * Attempt to configure interrupts using the best available
989  * capabilities of the hardware and kernel.
990  **/
991 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
992 {
993         int err;
994         int numvecs, i;
995
996         /* Number of supported queues. */
997         adapter->num_rx_queues = adapter->rss_queues;
998         if (adapter->vfs_allocated_count)
999                 adapter->num_tx_queues = 1;
1000         else
1001                 adapter->num_tx_queues = adapter->rss_queues;
1002
1003         /* start with one vector for every rx queue */
1004         numvecs = adapter->num_rx_queues;
1005
1006         /* if tx handler is separate add 1 for every tx queue */
1007         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1008                 numvecs += adapter->num_tx_queues;
1009
1010         /* store the number of vectors reserved for queues */
1011         adapter->num_q_vectors = numvecs;
1012
1013         /* add 1 vector for link status interrupts */
1014         numvecs++;
1015         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1016                                         GFP_KERNEL);
1017         if (!adapter->msix_entries)
1018                 goto msi_only;
1019
1020         for (i = 0; i < numvecs; i++)
1021                 adapter->msix_entries[i].entry = i;
1022
1023         err = pci_enable_msix(adapter->pdev,
1024                               adapter->msix_entries,
1025                               numvecs);
1026         if (err == 0)
1027                 goto out;
1028
1029         igb_reset_interrupt_capability(adapter);
1030
1031         /* If we can't do MSI-X, try MSI */
1032 msi_only:
1033 #ifdef CONFIG_PCI_IOV
1034         /* disable SR-IOV for non MSI-X configurations */
1035         if (adapter->vf_data) {
1036                 struct e1000_hw *hw = &adapter->hw;
1037                 /* disable iov and allow time for transactions to clear */
1038                 pci_disable_sriov(adapter->pdev);
1039                 msleep(500);
1040
1041                 kfree(adapter->vf_data);
1042                 adapter->vf_data = NULL;
1043                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1044                 msleep(100);
1045                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1046         }
1047 #endif
1048         adapter->vfs_allocated_count = 0;
1049         adapter->rss_queues = 1;
1050         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1051         adapter->num_rx_queues = 1;
1052         adapter->num_tx_queues = 1;
1053         adapter->num_q_vectors = 1;
1054         if (!pci_enable_msi(adapter->pdev))
1055                 adapter->flags |= IGB_FLAG_HAS_MSI;
1056 out:
1057         /* Notify the stack of the (possibly) reduced queue counts. */
1058         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1059         return netif_set_real_num_rx_queues(adapter->netdev,
1060                                             adapter->num_rx_queues);
1061 }
1062
1063 /**
1064  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1065  * @adapter: board private structure to initialize
1066  *
1067  * We allocate one q_vector per queue interrupt.  If allocation fails we
1068  * return -ENOMEM.
1069  **/
1070 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1071 {
1072         struct igb_q_vector *q_vector;
1073         struct e1000_hw *hw = &adapter->hw;
1074         int v_idx;
1075
1076         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1077                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1078                 if (!q_vector)
1079                         goto err_out;
1080                 q_vector->adapter = adapter;
1081                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1082                 q_vector->itr_val = IGB_START_ITR;
1083                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1084                 adapter->q_vector[v_idx] = q_vector;
1085         }
1086         return 0;
1087
1088 err_out:
1089         igb_free_q_vectors(adapter);
1090         return -ENOMEM;
1091 }
1092
1093 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1094                                       int ring_idx, int v_idx)
1095 {
1096         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1097
1098         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1099         q_vector->rx_ring->q_vector = q_vector;
1100         q_vector->itr_val = adapter->rx_itr_setting;
1101         if (q_vector->itr_val && q_vector->itr_val <= 3)
1102                 q_vector->itr_val = IGB_START_ITR;
1103 }
1104
1105 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1106                                       int ring_idx, int v_idx)
1107 {
1108         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109
1110         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1111         q_vector->tx_ring->q_vector = q_vector;
1112         q_vector->itr_val = adapter->tx_itr_setting;
1113         if (q_vector->itr_val && q_vector->itr_val <= 3)
1114                 q_vector->itr_val = IGB_START_ITR;
1115 }
1116
1117 /**
1118  * igb_map_ring_to_vector - maps allocated queues to vectors
1119  *
1120  * This function maps the recently allocated queues to vectors.
1121  **/
1122 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1123 {
1124         int i;
1125         int v_idx = 0;
1126
1127         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1128             (adapter->num_q_vectors < adapter->num_tx_queues))
1129                 return -ENOMEM;
1130
1131         if (adapter->num_q_vectors >=
1132             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1133                 for (i = 0; i < adapter->num_rx_queues; i++)
1134                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1135                 for (i = 0; i < adapter->num_tx_queues; i++)
1136                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1137         } else {
1138                 for (i = 0; i < adapter->num_rx_queues; i++) {
1139                         if (i < adapter->num_tx_queues)
1140                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1141                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1142                 }
1143                 for (; i < adapter->num_tx_queues; i++)
1144                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1145         }
1146         return 0;
1147 }
1148
1149 /**
1150  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1151  *
1152  * This function initializes the interrupts and allocates all of the queues.
1153  **/
1154 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1155 {
1156         struct pci_dev *pdev = adapter->pdev;
1157         int err;
1158
1159         err = igb_set_interrupt_capability(adapter);
1160         if (err)
1161                 return err;
1162
1163         err = igb_alloc_q_vectors(adapter);
1164         if (err) {
1165                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1166                 goto err_alloc_q_vectors;
1167         }
1168
1169         err = igb_alloc_queues(adapter);
1170         if (err) {
1171                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1172                 goto err_alloc_queues;
1173         }
1174
1175         err = igb_map_ring_to_vector(adapter);
1176         if (err) {
1177                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1178                 goto err_map_queues;
1179         }
1180
1181
1182         return 0;
1183 err_map_queues:
1184         igb_free_queues(adapter);
1185 err_alloc_queues:
1186         igb_free_q_vectors(adapter);
1187 err_alloc_q_vectors:
1188         igb_reset_interrupt_capability(adapter);
1189         return err;
1190 }
1191
1192 /**
1193  * igb_request_irq - initialize interrupts
1194  *
1195  * Attempts to configure interrupts using the best available
1196  * capabilities of the hardware and kernel.
1197  **/
1198 static int igb_request_irq(struct igb_adapter *adapter)
1199 {
1200         struct net_device *netdev = adapter->netdev;
1201         struct pci_dev *pdev = adapter->pdev;
1202         int err = 0;
1203
1204         if (adapter->msix_entries) {
1205                 err = igb_request_msix(adapter);
1206                 if (!err)
1207                         goto request_done;
1208                 /* fall back to MSI */
1209                 igb_clear_interrupt_scheme(adapter);
1210                 if (!pci_enable_msi(adapter->pdev))
1211                         adapter->flags |= IGB_FLAG_HAS_MSI;
1212                 igb_free_all_tx_resources(adapter);
1213                 igb_free_all_rx_resources(adapter);
1214                 adapter->num_tx_queues = 1;
1215                 adapter->num_rx_queues = 1;
1216                 adapter->num_q_vectors = 1;
1217                 err = igb_alloc_q_vectors(adapter);
1218                 if (err) {
1219                         dev_err(&pdev->dev,
1220                                 "Unable to allocate memory for vectors\n");
1221                         goto request_done;
1222                 }
1223                 err = igb_alloc_queues(adapter);
1224                 if (err) {
1225                         dev_err(&pdev->dev,
1226                                 "Unable to allocate memory for queues\n");
1227                         igb_free_q_vectors(adapter);
1228                         goto request_done;
1229                 }
1230                 igb_setup_all_tx_resources(adapter);
1231                 igb_setup_all_rx_resources(adapter);
1232         } else {
1233                 igb_assign_vector(adapter->q_vector[0], 0);
1234         }
1235
1236         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1237                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1238                                   netdev->name, adapter);
1239                 if (!err)
1240                         goto request_done;
1241
1242                 /* fall back to legacy interrupts */
1243                 igb_reset_interrupt_capability(adapter);
1244                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1245         }
1246
1247         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1248                           netdev->name, adapter);
1249
1250         if (err)
1251                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1252                         err);
1253
1254 request_done:
1255         return err;
1256 }
1257
1258 static void igb_free_irq(struct igb_adapter *adapter)
1259 {
1260         if (adapter->msix_entries) {
1261                 int vector = 0, i;
1262
1263                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1264
1265                 for (i = 0; i < adapter->num_q_vectors; i++) {
1266                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1267                         free_irq(adapter->msix_entries[vector++].vector,
1268                                  q_vector);
1269                 }
1270         } else {
1271                 free_irq(adapter->pdev->irq, adapter);
1272         }
1273 }
1274
1275 /**
1276  * igb_irq_disable - Mask off interrupt generation on the NIC
1277  * @adapter: board private structure
1278  **/
1279 static void igb_irq_disable(struct igb_adapter *adapter)
1280 {
1281         struct e1000_hw *hw = &adapter->hw;
1282
1283         /*
1284          * we need to be careful when disabling interrupts.  The VFs are also
1285          * mapped into these registers and so clearing the bits can cause
1286          * issues on the VF drivers so we only need to clear what we set
1287          */
1288         if (adapter->msix_entries) {
1289                 u32 regval = rd32(E1000_EIAM);
1290                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1291                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1292                 regval = rd32(E1000_EIAC);
1293                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1294         }
1295
1296         wr32(E1000_IAM, 0);
1297         wr32(E1000_IMC, ~0);
1298         wrfl();
1299         if (adapter->msix_entries) {
1300                 int i;
1301                 for (i = 0; i < adapter->num_q_vectors; i++)
1302                         synchronize_irq(adapter->msix_entries[i].vector);
1303         } else {
1304                 synchronize_irq(adapter->pdev->irq);
1305         }
1306 }
1307
1308 /**
1309  * igb_irq_enable - Enable default interrupt generation settings
1310  * @adapter: board private structure
1311  **/
1312 static void igb_irq_enable(struct igb_adapter *adapter)
1313 {
1314         struct e1000_hw *hw = &adapter->hw;
1315
1316         if (adapter->msix_entries) {
1317                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1318                 u32 regval = rd32(E1000_EIAC);
1319                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1320                 regval = rd32(E1000_EIAM);
1321                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1322                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1323                 if (adapter->vfs_allocated_count) {
1324                         wr32(E1000_MBVFIMR, 0xFF);
1325                         ims |= E1000_IMS_VMMB;
1326                 }
1327                 if (adapter->hw.mac.type == e1000_82580)
1328                         ims |= E1000_IMS_DRSTA;
1329
1330                 wr32(E1000_IMS, ims);
1331         } else {
1332                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1333                                 E1000_IMS_DRSTA);
1334                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1335                                 E1000_IMS_DRSTA);
1336         }
1337 }
1338
1339 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1340 {
1341         struct e1000_hw *hw = &adapter->hw;
1342         u16 vid = adapter->hw.mng_cookie.vlan_id;
1343         u16 old_vid = adapter->mng_vlan_id;
1344
1345         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1346                 /* add VID to filter table */
1347                 igb_vfta_set(hw, vid, true);
1348                 adapter->mng_vlan_id = vid;
1349         } else {
1350                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1351         }
1352
1353         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1354             (vid != old_vid) &&
1355             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1356                 /* remove VID from filter table */
1357                 igb_vfta_set(hw, old_vid, false);
1358         }
1359 }
1360
1361 /**
1362  * igb_release_hw_control - release control of the h/w to f/w
1363  * @adapter: address of board private structure
1364  *
1365  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1366  * For ASF and Pass Through versions of f/w this means that the
1367  * driver is no longer loaded.
1368  *
1369  **/
1370 static void igb_release_hw_control(struct igb_adapter *adapter)
1371 {
1372         struct e1000_hw *hw = &adapter->hw;
1373         u32 ctrl_ext;
1374
1375         /* Let firmware take over control of h/w */
1376         ctrl_ext = rd32(E1000_CTRL_EXT);
1377         wr32(E1000_CTRL_EXT,
1378                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1379 }
1380
1381 /**
1382  * igb_get_hw_control - get control of the h/w from f/w
1383  * @adapter: address of board private structure
1384  *
1385  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1386  * For ASF and Pass Through versions of f/w this means that
1387  * the driver is loaded.
1388  *
1389  **/
1390 static void igb_get_hw_control(struct igb_adapter *adapter)
1391 {
1392         struct e1000_hw *hw = &adapter->hw;
1393         u32 ctrl_ext;
1394
1395         /* Let firmware know the driver has taken over */
1396         ctrl_ext = rd32(E1000_CTRL_EXT);
1397         wr32(E1000_CTRL_EXT,
1398                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1399 }
1400
1401 /**
1402  * igb_configure - configure the hardware for RX and TX
1403  * @adapter: private board structure
1404  **/
1405 static void igb_configure(struct igb_adapter *adapter)
1406 {
1407         struct net_device *netdev = adapter->netdev;
1408         int i;
1409
1410         igb_get_hw_control(adapter);
1411         igb_set_rx_mode(netdev);
1412
1413         igb_restore_vlan(adapter);
1414
1415         igb_setup_tctl(adapter);
1416         igb_setup_mrqc(adapter);
1417         igb_setup_rctl(adapter);
1418
1419         igb_configure_tx(adapter);
1420         igb_configure_rx(adapter);
1421
1422         igb_rx_fifo_flush_82575(&adapter->hw);
1423
1424         /* call igb_desc_unused which always leaves
1425          * at least 1 descriptor unused to make sure
1426          * next_to_use != next_to_clean */
1427         for (i = 0; i < adapter->num_rx_queues; i++) {
1428                 struct igb_ring *ring = adapter->rx_ring[i];
1429                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1430         }
1431 }
1432
1433 /**
1434  * igb_power_up_link - Power up the phy/serdes link
1435  * @adapter: address of board private structure
1436  **/
1437 void igb_power_up_link(struct igb_adapter *adapter)
1438 {
1439         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1440                 igb_power_up_phy_copper(&adapter->hw);
1441         else
1442                 igb_power_up_serdes_link_82575(&adapter->hw);
1443 }
1444
1445 /**
1446  * igb_power_down_link - Power down the phy/serdes link
1447  * @adapter: address of board private structure
1448  */
1449 static void igb_power_down_link(struct igb_adapter *adapter)
1450 {
1451         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452                 igb_power_down_phy_copper_82575(&adapter->hw);
1453         else
1454                 igb_shutdown_serdes_link_82575(&adapter->hw);
1455 }
1456
1457 /**
1458  * igb_up - Open the interface and prepare it to handle traffic
1459  * @adapter: board private structure
1460  **/
1461 int igb_up(struct igb_adapter *adapter)
1462 {
1463         struct e1000_hw *hw = &adapter->hw;
1464         int i;
1465
1466         /* hardware has been reset, we need to reload some things */
1467         igb_configure(adapter);
1468
1469         clear_bit(__IGB_DOWN, &adapter->state);
1470
1471         for (i = 0; i < adapter->num_q_vectors; i++) {
1472                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1473                 napi_enable(&q_vector->napi);
1474         }
1475         if (adapter->msix_entries)
1476                 igb_configure_msix(adapter);
1477         else
1478                 igb_assign_vector(adapter->q_vector[0], 0);
1479
1480         /* Clear any pending interrupts. */
1481         rd32(E1000_ICR);
1482         igb_irq_enable(adapter);
1483
1484         /* notify VFs that reset has been completed */
1485         if (adapter->vfs_allocated_count) {
1486                 u32 reg_data = rd32(E1000_CTRL_EXT);
1487                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1488                 wr32(E1000_CTRL_EXT, reg_data);
1489         }
1490
1491         netif_tx_start_all_queues(adapter->netdev);
1492
1493         /* start the watchdog. */
1494         hw->mac.get_link_status = 1;
1495         schedule_work(&adapter->watchdog_task);
1496
1497         return 0;
1498 }
1499
1500 void igb_down(struct igb_adapter *adapter)
1501 {
1502         struct net_device *netdev = adapter->netdev;
1503         struct e1000_hw *hw = &adapter->hw;
1504         u32 tctl, rctl;
1505         int i;
1506
1507         /* signal that we're down so the interrupt handler does not
1508          * reschedule our watchdog timer */
1509         set_bit(__IGB_DOWN, &adapter->state);
1510
1511         /* disable receives in the hardware */
1512         rctl = rd32(E1000_RCTL);
1513         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1514         /* flush and sleep below */
1515
1516         netif_tx_stop_all_queues(netdev);
1517
1518         /* disable transmits in the hardware */
1519         tctl = rd32(E1000_TCTL);
1520         tctl &= ~E1000_TCTL_EN;
1521         wr32(E1000_TCTL, tctl);
1522         /* flush both disables and wait for them to finish */
1523         wrfl();
1524         msleep(10);
1525
1526         for (i = 0; i < adapter->num_q_vectors; i++) {
1527                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1528                 napi_disable(&q_vector->napi);
1529         }
1530
1531         igb_irq_disable(adapter);
1532
1533         del_timer_sync(&adapter->watchdog_timer);
1534         del_timer_sync(&adapter->phy_info_timer);
1535
1536         netif_carrier_off(netdev);
1537
1538         /* record the stats before reset*/
1539         spin_lock(&adapter->stats64_lock);
1540         igb_update_stats(adapter, &adapter->stats64);
1541         spin_unlock(&adapter->stats64_lock);
1542
1543         adapter->link_speed = 0;
1544         adapter->link_duplex = 0;
1545
1546         if (!pci_channel_offline(adapter->pdev))
1547                 igb_reset(adapter);
1548         igb_clean_all_tx_rings(adapter);
1549         igb_clean_all_rx_rings(adapter);
1550 #ifdef CONFIG_IGB_DCA
1551
1552         /* since we reset the hardware DCA settings were cleared */
1553         igb_setup_dca(adapter);
1554 #endif
1555 }
1556
1557 void igb_reinit_locked(struct igb_adapter *adapter)
1558 {
1559         WARN_ON(in_interrupt());
1560         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1561                 msleep(1);
1562         igb_down(adapter);
1563         igb_up(adapter);
1564         clear_bit(__IGB_RESETTING, &adapter->state);
1565 }
1566
1567 void igb_reset(struct igb_adapter *adapter)
1568 {
1569         struct pci_dev *pdev = adapter->pdev;
1570         struct e1000_hw *hw = &adapter->hw;
1571         struct e1000_mac_info *mac = &hw->mac;
1572         struct e1000_fc_info *fc = &hw->fc;
1573         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1574         u16 hwm;
1575
1576         /* Repartition Pba for greater than 9k mtu
1577          * To take effect CTRL.RST is required.
1578          */
1579         switch (mac->type) {
1580         case e1000_i350:
1581         case e1000_82580:
1582                 pba = rd32(E1000_RXPBS);
1583                 pba = igb_rxpbs_adjust_82580(pba);
1584                 break;
1585         case e1000_82576:
1586                 pba = rd32(E1000_RXPBS);
1587                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1588                 break;
1589         case e1000_82575:
1590         default:
1591                 pba = E1000_PBA_34K;
1592                 break;
1593         }
1594
1595         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1596             (mac->type < e1000_82576)) {
1597                 /* adjust PBA for jumbo frames */
1598                 wr32(E1000_PBA, pba);
1599
1600                 /* To maintain wire speed transmits, the Tx FIFO should be
1601                  * large enough to accommodate two full transmit packets,
1602                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1603                  * the Rx FIFO should be large enough to accommodate at least
1604                  * one full receive packet and is similarly rounded up and
1605                  * expressed in KB. */
1606                 pba = rd32(E1000_PBA);
1607                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1608                 tx_space = pba >> 16;
1609                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1610                 pba &= 0xffff;
1611                 /* the tx fifo also stores 16 bytes of information about the tx
1612                  * but don't include ethernet FCS because hardware appends it */
1613                 min_tx_space = (adapter->max_frame_size +
1614                                 sizeof(union e1000_adv_tx_desc) -
1615                                 ETH_FCS_LEN) * 2;
1616                 min_tx_space = ALIGN(min_tx_space, 1024);
1617                 min_tx_space >>= 10;
1618                 /* software strips receive CRC, so leave room for it */
1619                 min_rx_space = adapter->max_frame_size;
1620                 min_rx_space = ALIGN(min_rx_space, 1024);
1621                 min_rx_space >>= 10;
1622
1623                 /* If current Tx allocation is less than the min Tx FIFO size,
1624                  * and the min Tx FIFO size is less than the current Rx FIFO
1625                  * allocation, take space away from current Rx allocation */
1626                 if (tx_space < min_tx_space &&
1627                     ((min_tx_space - tx_space) < pba)) {
1628                         pba = pba - (min_tx_space - tx_space);
1629
1630                         /* if short on rx space, rx wins and must trump tx
1631                          * adjustment */
1632                         if (pba < min_rx_space)
1633                                 pba = min_rx_space;
1634                 }
1635                 wr32(E1000_PBA, pba);
1636         }
1637
1638         /* flow control settings */
1639         /* The high water mark must be low enough to fit one full frame
1640          * (or the size used for early receive) above it in the Rx FIFO.
1641          * Set it to the lower of:
1642          * - 90% of the Rx FIFO size, or
1643          * - the full Rx FIFO size minus one full frame */
1644         hwm = min(((pba << 10) * 9 / 10),
1645                         ((pba << 10) - 2 * adapter->max_frame_size));
1646
1647         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1648         fc->low_water = fc->high_water - 16;
1649         fc->pause_time = 0xFFFF;
1650         fc->send_xon = 1;
1651         fc->current_mode = fc->requested_mode;
1652
1653         /* disable receive for all VFs and wait one second */
1654         if (adapter->vfs_allocated_count) {
1655                 int i;
1656                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1657                         adapter->vf_data[i].flags = 0;
1658
1659                 /* ping all the active vfs to let them know we are going down */
1660                 igb_ping_all_vfs(adapter);
1661
1662                 /* disable transmits and receives */
1663                 wr32(E1000_VFRE, 0);
1664                 wr32(E1000_VFTE, 0);
1665         }
1666
1667         /* Allow time for pending master requests to run */
1668         hw->mac.ops.reset_hw(hw);
1669         wr32(E1000_WUC, 0);
1670
1671         if (hw->mac.ops.init_hw(hw))
1672                 dev_err(&pdev->dev, "Hardware Error\n");
1673
1674         if (hw->mac.type == e1000_82580) {
1675                 u32 reg = rd32(E1000_PCIEMISC);
1676                 wr32(E1000_PCIEMISC,
1677                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1678         }
1679         if (!netif_running(adapter->netdev))
1680                 igb_power_down_link(adapter);
1681
1682         igb_update_mng_vlan(adapter);
1683
1684         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1685         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1686
1687         igb_get_phy_info(hw);
1688 }
1689
1690 static const struct net_device_ops igb_netdev_ops = {
1691         .ndo_open               = igb_open,
1692         .ndo_stop               = igb_close,
1693         .ndo_start_xmit         = igb_xmit_frame_adv,
1694         .ndo_get_stats64        = igb_get_stats64,
1695         .ndo_set_rx_mode        = igb_set_rx_mode,
1696         .ndo_set_multicast_list = igb_set_rx_mode,
1697         .ndo_set_mac_address    = igb_set_mac,
1698         .ndo_change_mtu         = igb_change_mtu,
1699         .ndo_do_ioctl           = igb_ioctl,
1700         .ndo_tx_timeout         = igb_tx_timeout,
1701         .ndo_validate_addr      = eth_validate_addr,
1702         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1703         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1704         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1705         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1706         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1707         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1708         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1709 #ifdef CONFIG_NET_POLL_CONTROLLER
1710         .ndo_poll_controller    = igb_netpoll,
1711 #endif
1712 };
1713
1714 /**
1715  * igb_probe - Device Initialization Routine
1716  * @pdev: PCI device information struct
1717  * @ent: entry in igb_pci_tbl
1718  *
1719  * Returns 0 on success, negative on failure
1720  *
1721  * igb_probe initializes an adapter identified by a pci_dev structure.
1722  * The OS initialization, configuring of the adapter private structure,
1723  * and a hardware reset occur.
1724  **/
1725 static int __devinit igb_probe(struct pci_dev *pdev,
1726                                const struct pci_device_id *ent)
1727 {
1728         struct net_device *netdev;
1729         struct igb_adapter *adapter;
1730         struct e1000_hw *hw;
1731         u16 eeprom_data = 0;
1732         static int global_quad_port_a; /* global quad port a indication */
1733         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1734         unsigned long mmio_start, mmio_len;
1735         int err, pci_using_dac;
1736         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1737         u32 part_num;
1738
1739         /* Catch broken hardware that put the wrong VF device ID in
1740          * the PCIe SR-IOV capability.
1741          */
1742         if (pdev->is_virtfn) {
1743                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1744                      pci_name(pdev), pdev->vendor, pdev->device);
1745                 return -EINVAL;
1746         }
1747
1748         err = pci_enable_device_mem(pdev);
1749         if (err)
1750                 return err;
1751
1752         pci_using_dac = 0;
1753         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1754         if (!err) {
1755                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1756                 if (!err)
1757                         pci_using_dac = 1;
1758         } else {
1759                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1760                 if (err) {
1761                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1762                         if (err) {
1763                                 dev_err(&pdev->dev, "No usable DMA "
1764                                         "configuration, aborting\n");
1765                                 goto err_dma;
1766                         }
1767                 }
1768         }
1769
1770         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1771                                            IORESOURCE_MEM),
1772                                            igb_driver_name);
1773         if (err)
1774                 goto err_pci_reg;
1775
1776         pci_enable_pcie_error_reporting(pdev);
1777
1778         pci_set_master(pdev);
1779         pci_save_state(pdev);
1780
1781         err = -ENOMEM;
1782         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1783                                    IGB_ABS_MAX_TX_QUEUES);
1784         if (!netdev)
1785                 goto err_alloc_etherdev;
1786
1787         SET_NETDEV_DEV(netdev, &pdev->dev);
1788
1789         pci_set_drvdata(pdev, netdev);
1790         adapter = netdev_priv(netdev);
1791         adapter->netdev = netdev;
1792         adapter->pdev = pdev;
1793         hw = &adapter->hw;
1794         hw->back = adapter;
1795         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1796
1797         mmio_start = pci_resource_start(pdev, 0);
1798         mmio_len = pci_resource_len(pdev, 0);
1799
1800         err = -EIO;
1801         hw->hw_addr = ioremap(mmio_start, mmio_len);
1802         if (!hw->hw_addr)
1803                 goto err_ioremap;
1804
1805         netdev->netdev_ops = &igb_netdev_ops;
1806         igb_set_ethtool_ops(netdev);
1807         netdev->watchdog_timeo = 5 * HZ;
1808
1809         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1810
1811         netdev->mem_start = mmio_start;
1812         netdev->mem_end = mmio_start + mmio_len;
1813
1814         /* PCI config space info */
1815         hw->vendor_id = pdev->vendor;
1816         hw->device_id = pdev->device;
1817         hw->revision_id = pdev->revision;
1818         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1819         hw->subsystem_device_id = pdev->subsystem_device;
1820
1821         /* Copy the default MAC, PHY and NVM function pointers */
1822         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1823         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1824         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1825         /* Initialize skew-specific constants */
1826         err = ei->get_invariants(hw);
1827         if (err)
1828                 goto err_sw_init;
1829
1830         /* setup the private structure */
1831         err = igb_sw_init(adapter);
1832         if (err)
1833                 goto err_sw_init;
1834
1835         igb_get_bus_info_pcie(hw);
1836
1837         hw->phy.autoneg_wait_to_complete = false;
1838
1839         /* Copper options */
1840         if (hw->phy.media_type == e1000_media_type_copper) {
1841                 hw->phy.mdix = AUTO_ALL_MODES;
1842                 hw->phy.disable_polarity_correction = false;
1843                 hw->phy.ms_type = e1000_ms_hw_default;
1844         }
1845
1846         if (igb_check_reset_block(hw))
1847                 dev_info(&pdev->dev,
1848                         "PHY reset is blocked due to SOL/IDER session.\n");
1849
1850         netdev->features = NETIF_F_SG |
1851                            NETIF_F_IP_CSUM |
1852                            NETIF_F_HW_VLAN_TX |
1853                            NETIF_F_HW_VLAN_RX |
1854                            NETIF_F_HW_VLAN_FILTER;
1855
1856         netdev->features |= NETIF_F_IPV6_CSUM;
1857         netdev->features |= NETIF_F_TSO;
1858         netdev->features |= NETIF_F_TSO6;
1859         netdev->features |= NETIF_F_GRO;
1860
1861         netdev->vlan_features |= NETIF_F_TSO;
1862         netdev->vlan_features |= NETIF_F_TSO6;
1863         netdev->vlan_features |= NETIF_F_IP_CSUM;
1864         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1865         netdev->vlan_features |= NETIF_F_SG;
1866
1867         if (pci_using_dac) {
1868                 netdev->features |= NETIF_F_HIGHDMA;
1869                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1870         }
1871
1872         if (hw->mac.type >= e1000_82576)
1873                 netdev->features |= NETIF_F_SCTP_CSUM;
1874
1875         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1876
1877         /* before reading the NVM, reset the controller to put the device in a
1878          * known good starting state */
1879         hw->mac.ops.reset_hw(hw);
1880
1881         /* make sure the NVM is good */
1882         if (igb_validate_nvm_checksum(hw) < 0) {
1883                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1884                 err = -EIO;
1885                 goto err_eeprom;
1886         }
1887
1888         /* copy the MAC address out of the NVM */
1889         if (hw->mac.ops.read_mac_addr(hw))
1890                 dev_err(&pdev->dev, "NVM Read Error\n");
1891
1892         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1893         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1894
1895         if (!is_valid_ether_addr(netdev->perm_addr)) {
1896                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1897                 err = -EIO;
1898                 goto err_eeprom;
1899         }
1900
1901         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1902                     (unsigned long) adapter);
1903         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1904                     (unsigned long) adapter);
1905
1906         INIT_WORK(&adapter->reset_task, igb_reset_task);
1907         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1908
1909         /* Initialize link properties that are user-changeable */
1910         adapter->fc_autoneg = true;
1911         hw->mac.autoneg = true;
1912         hw->phy.autoneg_advertised = 0x2f;
1913
1914         hw->fc.requested_mode = e1000_fc_default;
1915         hw->fc.current_mode = e1000_fc_default;
1916
1917         igb_validate_mdi_setting(hw);
1918
1919         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1920          * enable the ACPI Magic Packet filter
1921          */
1922
1923         if (hw->bus.func == 0)
1924                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1925         else if (hw->mac.type == e1000_82580)
1926                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1927                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1928                                  &eeprom_data);
1929         else if (hw->bus.func == 1)
1930                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1931
1932         if (eeprom_data & eeprom_apme_mask)
1933                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1934
1935         /* now that we have the eeprom settings, apply the special cases where
1936          * the eeprom may be wrong or the board simply won't support wake on
1937          * lan on a particular port */
1938         switch (pdev->device) {
1939         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1940                 adapter->eeprom_wol = 0;
1941                 break;
1942         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1943         case E1000_DEV_ID_82576_FIBER:
1944         case E1000_DEV_ID_82576_SERDES:
1945                 /* Wake events only supported on port A for dual fiber
1946                  * regardless of eeprom setting */
1947                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1948                         adapter->eeprom_wol = 0;
1949                 break;
1950         case E1000_DEV_ID_82576_QUAD_COPPER:
1951         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1952                 /* if quad port adapter, disable WoL on all but port A */
1953                 if (global_quad_port_a != 0)
1954                         adapter->eeprom_wol = 0;
1955                 else
1956                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1957                 /* Reset for multiple quad port adapters */
1958                 if (++global_quad_port_a == 4)
1959                         global_quad_port_a = 0;
1960                 break;
1961         }
1962
1963         /* initialize the wol settings based on the eeprom settings */
1964         adapter->wol = adapter->eeprom_wol;
1965         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1966
1967         /* reset the hardware with the new settings */
1968         igb_reset(adapter);
1969
1970         /* let the f/w know that the h/w is now under the control of the
1971          * driver. */
1972         igb_get_hw_control(adapter);
1973
1974         strcpy(netdev->name, "eth%d");
1975         err = register_netdev(netdev);
1976         if (err)
1977                 goto err_register;
1978
1979         /* carrier off reporting is important to ethtool even BEFORE open */
1980         netif_carrier_off(netdev);
1981
1982 #ifdef CONFIG_IGB_DCA
1983         if (dca_add_requester(&pdev->dev) == 0) {
1984                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1985                 dev_info(&pdev->dev, "DCA enabled\n");
1986                 igb_setup_dca(adapter);
1987         }
1988
1989 #endif
1990         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1991         /* print bus type/speed/width info */
1992         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1993                  netdev->name,
1994                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1995                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1996                                                             "unknown"),
1997                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1998                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1999                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2000                    "unknown"),
2001                  netdev->dev_addr);
2002
2003         igb_read_part_num(hw, &part_num);
2004         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
2005                 (part_num >> 8), (part_num & 0xff));
2006
2007         dev_info(&pdev->dev,
2008                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2009                 adapter->msix_entries ? "MSI-X" :
2010                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2011                 adapter->num_rx_queues, adapter->num_tx_queues);
2012
2013         return 0;
2014
2015 err_register:
2016         igb_release_hw_control(adapter);
2017 err_eeprom:
2018         if (!igb_check_reset_block(hw))
2019                 igb_reset_phy(hw);
2020
2021         if (hw->flash_address)
2022                 iounmap(hw->flash_address);
2023 err_sw_init:
2024         igb_clear_interrupt_scheme(adapter);
2025         iounmap(hw->hw_addr);
2026 err_ioremap:
2027         free_netdev(netdev);
2028 err_alloc_etherdev:
2029         pci_release_selected_regions(pdev,
2030                                      pci_select_bars(pdev, IORESOURCE_MEM));
2031 err_pci_reg:
2032 err_dma:
2033         pci_disable_device(pdev);
2034         return err;
2035 }
2036
2037 /**
2038  * igb_remove - Device Removal Routine
2039  * @pdev: PCI device information struct
2040  *
2041  * igb_remove is called by the PCI subsystem to alert the driver
2042  * that it should release a PCI device.  The could be caused by a
2043  * Hot-Plug event, or because the driver is going to be removed from
2044  * memory.
2045  **/
2046 static void __devexit igb_remove(struct pci_dev *pdev)
2047 {
2048         struct net_device *netdev = pci_get_drvdata(pdev);
2049         struct igb_adapter *adapter = netdev_priv(netdev);
2050         struct e1000_hw *hw = &adapter->hw;
2051
2052         /* flush_scheduled work may reschedule our watchdog task, so
2053          * explicitly disable watchdog tasks from being rescheduled  */
2054         set_bit(__IGB_DOWN, &adapter->state);
2055         del_timer_sync(&adapter->watchdog_timer);
2056         del_timer_sync(&adapter->phy_info_timer);
2057
2058         flush_scheduled_work();
2059
2060 #ifdef CONFIG_IGB_DCA
2061         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2062                 dev_info(&pdev->dev, "DCA disabled\n");
2063                 dca_remove_requester(&pdev->dev);
2064                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2065                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2066         }
2067 #endif
2068
2069         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2070          * would have already happened in close and is redundant. */
2071         igb_release_hw_control(adapter);
2072
2073         unregister_netdev(netdev);
2074
2075         igb_clear_interrupt_scheme(adapter);
2076
2077 #ifdef CONFIG_PCI_IOV
2078         /* reclaim resources allocated to VFs */
2079         if (adapter->vf_data) {
2080                 /* disable iov and allow time for transactions to clear */
2081                 pci_disable_sriov(pdev);
2082                 msleep(500);
2083
2084                 kfree(adapter->vf_data);
2085                 adapter->vf_data = NULL;
2086                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2087                 msleep(100);
2088                 dev_info(&pdev->dev, "IOV Disabled\n");
2089         }
2090 #endif
2091
2092         iounmap(hw->hw_addr);
2093         if (hw->flash_address)
2094                 iounmap(hw->flash_address);
2095         pci_release_selected_regions(pdev,
2096                                      pci_select_bars(pdev, IORESOURCE_MEM));
2097
2098         free_netdev(netdev);
2099
2100         pci_disable_pcie_error_reporting(pdev);
2101
2102         pci_disable_device(pdev);
2103 }
2104
2105 /**
2106  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2107  * @adapter: board private structure to initialize
2108  *
2109  * This function initializes the vf specific data storage and then attempts to
2110  * allocate the VFs.  The reason for ordering it this way is because it is much
2111  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2112  * the memory for the VFs.
2113  **/
2114 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2115 {
2116 #ifdef CONFIG_PCI_IOV
2117         struct pci_dev *pdev = adapter->pdev;
2118
2119         if (adapter->vfs_allocated_count) {
2120                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2121                                            sizeof(struct vf_data_storage),
2122                                            GFP_KERNEL);
2123                 /* if allocation failed then we do not support SR-IOV */
2124                 if (!adapter->vf_data) {
2125                         adapter->vfs_allocated_count = 0;
2126                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2127                                 "Data Storage\n");
2128                 }
2129         }
2130
2131         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2132                 kfree(adapter->vf_data);
2133                 adapter->vf_data = NULL;
2134 #endif /* CONFIG_PCI_IOV */
2135                 adapter->vfs_allocated_count = 0;
2136 #ifdef CONFIG_PCI_IOV
2137         } else {
2138                 unsigned char mac_addr[ETH_ALEN];
2139                 int i;
2140                 dev_info(&pdev->dev, "%d vfs allocated\n",
2141                          adapter->vfs_allocated_count);
2142                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2143                         random_ether_addr(mac_addr);
2144                         igb_set_vf_mac(adapter, i, mac_addr);
2145                 }
2146         }
2147 #endif /* CONFIG_PCI_IOV */
2148 }
2149
2150
2151 /**
2152  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2153  * @adapter: board private structure to initialize
2154  *
2155  * igb_init_hw_timer initializes the function pointer and values for the hw
2156  * timer found in hardware.
2157  **/
2158 static void igb_init_hw_timer(struct igb_adapter *adapter)
2159 {
2160         struct e1000_hw *hw = &adapter->hw;
2161
2162         switch (hw->mac.type) {
2163         case e1000_i350:
2164         case e1000_82580:
2165                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2166                 adapter->cycles.read = igb_read_clock;
2167                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2168                 adapter->cycles.mult = 1;
2169                 /*
2170                  * The 82580 timesync updates the system timer every 8ns by 8ns
2171                  * and the value cannot be shifted.  Instead we need to shift
2172                  * the registers to generate a 64bit timer value.  As a result
2173                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2174                  * 24 in order to generate a larger value for synchronization.
2175                  */
2176                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2177                 /* disable system timer temporarily by setting bit 31 */
2178                 wr32(E1000_TSAUXC, 0x80000000);
2179                 wrfl();
2180
2181                 /* Set registers so that rollover occurs soon to test this. */
2182                 wr32(E1000_SYSTIMR, 0x00000000);
2183                 wr32(E1000_SYSTIML, 0x80000000);
2184                 wr32(E1000_SYSTIMH, 0x000000FF);
2185                 wrfl();
2186
2187                 /* enable system timer by clearing bit 31 */
2188                 wr32(E1000_TSAUXC, 0x0);
2189                 wrfl();
2190
2191                 timecounter_init(&adapter->clock,
2192                                  &adapter->cycles,
2193                                  ktime_to_ns(ktime_get_real()));
2194                 /*
2195                  * Synchronize our NIC clock against system wall clock. NIC
2196                  * time stamp reading requires ~3us per sample, each sample
2197                  * was pretty stable even under load => only require 10
2198                  * samples for each offset comparison.
2199                  */
2200                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2201                 adapter->compare.source = &adapter->clock;
2202                 adapter->compare.target = ktime_get_real;
2203                 adapter->compare.num_samples = 10;
2204                 timecompare_update(&adapter->compare, 0);
2205                 break;
2206         case e1000_82576:
2207                 /*
2208                  * Initialize hardware timer: we keep it running just in case
2209                  * that some program needs it later on.
2210                  */
2211                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2212                 adapter->cycles.read = igb_read_clock;
2213                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2214                 adapter->cycles.mult = 1;
2215                 /**
2216                  * Scale the NIC clock cycle by a large factor so that
2217                  * relatively small clock corrections can be added or
2218                  * substracted at each clock tick. The drawbacks of a large
2219                  * factor are a) that the clock register overflows more quickly
2220                  * (not such a big deal) and b) that the increment per tick has
2221                  * to fit into 24 bits.  As a result we need to use a shift of
2222                  * 19 so we can fit a value of 16 into the TIMINCA register.
2223                  */
2224                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2225                 wr32(E1000_TIMINCA,
2226                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2227                                 (16 << IGB_82576_TSYNC_SHIFT));
2228
2229                 /* Set registers so that rollover occurs soon to test this. */
2230                 wr32(E1000_SYSTIML, 0x00000000);
2231                 wr32(E1000_SYSTIMH, 0xFF800000);
2232                 wrfl();
2233
2234                 timecounter_init(&adapter->clock,
2235                                  &adapter->cycles,
2236                                  ktime_to_ns(ktime_get_real()));
2237                 /*
2238                  * Synchronize our NIC clock against system wall clock. NIC
2239                  * time stamp reading requires ~3us per sample, each sample
2240                  * was pretty stable even under load => only require 10
2241                  * samples for each offset comparison.
2242                  */
2243                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2244                 adapter->compare.source = &adapter->clock;
2245                 adapter->compare.target = ktime_get_real;
2246                 adapter->compare.num_samples = 10;
2247                 timecompare_update(&adapter->compare, 0);
2248                 break;
2249         case e1000_82575:
2250                 /* 82575 does not support timesync */
2251         default:
2252                 break;
2253         }
2254
2255 }
2256
2257 /**
2258  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2259  * @adapter: board private structure to initialize
2260  *
2261  * igb_sw_init initializes the Adapter private data structure.
2262  * Fields are initialized based on PCI device information and
2263  * OS network device settings (MTU size).
2264  **/
2265 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2266 {
2267         struct e1000_hw *hw = &adapter->hw;
2268         struct net_device *netdev = adapter->netdev;
2269         struct pci_dev *pdev = adapter->pdev;
2270
2271         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2272
2273         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2274         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2275         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2276         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2277
2278         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2279         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2280
2281         spin_lock_init(&adapter->stats64_lock);
2282 #ifdef CONFIG_PCI_IOV
2283         if (hw->mac.type == e1000_82576)
2284                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2285
2286 #endif /* CONFIG_PCI_IOV */
2287         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2288
2289         /*
2290          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2291          * then we should combine the queues into a queue pair in order to
2292          * conserve interrupts due to limited supply
2293          */
2294         if ((adapter->rss_queues > 4) ||
2295             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2296                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2297
2298         /* This call may decrease the number of queues */
2299         if (igb_init_interrupt_scheme(adapter)) {
2300                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2301                 return -ENOMEM;
2302         }
2303
2304         igb_init_hw_timer(adapter);
2305         igb_probe_vfs(adapter);
2306
2307         /* Explicitly disable IRQ since the NIC can be in any state. */
2308         igb_irq_disable(adapter);
2309
2310         set_bit(__IGB_DOWN, &adapter->state);
2311         return 0;
2312 }
2313
2314 /**
2315  * igb_open - Called when a network interface is made active
2316  * @netdev: network interface device structure
2317  *
2318  * Returns 0 on success, negative value on failure
2319  *
2320  * The open entry point is called when a network interface is made
2321  * active by the system (IFF_UP).  At this point all resources needed
2322  * for transmit and receive operations are allocated, the interrupt
2323  * handler is registered with the OS, the watchdog timer is started,
2324  * and the stack is notified that the interface is ready.
2325  **/
2326 static int igb_open(struct net_device *netdev)
2327 {
2328         struct igb_adapter *adapter = netdev_priv(netdev);
2329         struct e1000_hw *hw = &adapter->hw;
2330         int err;
2331         int i;
2332
2333         /* disallow open during test */
2334         if (test_bit(__IGB_TESTING, &adapter->state))
2335                 return -EBUSY;
2336
2337         netif_carrier_off(netdev);
2338
2339         /* allocate transmit descriptors */
2340         err = igb_setup_all_tx_resources(adapter);
2341         if (err)
2342                 goto err_setup_tx;
2343
2344         /* allocate receive descriptors */
2345         err = igb_setup_all_rx_resources(adapter);
2346         if (err)
2347                 goto err_setup_rx;
2348
2349         igb_power_up_link(adapter);
2350
2351         /* before we allocate an interrupt, we must be ready to handle it.
2352          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2353          * as soon as we call pci_request_irq, so we have to setup our
2354          * clean_rx handler before we do so.  */
2355         igb_configure(adapter);
2356
2357         err = igb_request_irq(adapter);
2358         if (err)
2359                 goto err_req_irq;
2360
2361         /* From here on the code is the same as igb_up() */
2362         clear_bit(__IGB_DOWN, &adapter->state);
2363
2364         for (i = 0; i < adapter->num_q_vectors; i++) {
2365                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2366                 napi_enable(&q_vector->napi);
2367         }
2368
2369         /* Clear any pending interrupts. */
2370         rd32(E1000_ICR);
2371
2372         igb_irq_enable(adapter);
2373
2374         /* notify VFs that reset has been completed */
2375         if (adapter->vfs_allocated_count) {
2376                 u32 reg_data = rd32(E1000_CTRL_EXT);
2377                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2378                 wr32(E1000_CTRL_EXT, reg_data);
2379         }
2380
2381         netif_tx_start_all_queues(netdev);
2382
2383         /* start the watchdog. */
2384         hw->mac.get_link_status = 1;
2385         schedule_work(&adapter->watchdog_task);
2386
2387         return 0;
2388
2389 err_req_irq:
2390         igb_release_hw_control(adapter);
2391         igb_power_down_link(adapter);
2392         igb_free_all_rx_resources(adapter);
2393 err_setup_rx:
2394         igb_free_all_tx_resources(adapter);
2395 err_setup_tx:
2396         igb_reset(adapter);
2397
2398         return err;
2399 }
2400
2401 /**
2402  * igb_close - Disables a network interface
2403  * @netdev: network interface device structure
2404  *
2405  * Returns 0, this is not allowed to fail
2406  *
2407  * The close entry point is called when an interface is de-activated
2408  * by the OS.  The hardware is still under the driver's control, but
2409  * needs to be disabled.  A global MAC reset is issued to stop the
2410  * hardware, and all transmit and receive resources are freed.
2411  **/
2412 static int igb_close(struct net_device *netdev)
2413 {
2414         struct igb_adapter *adapter = netdev_priv(netdev);
2415
2416         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2417         igb_down(adapter);
2418
2419         igb_free_irq(adapter);
2420
2421         igb_free_all_tx_resources(adapter);
2422         igb_free_all_rx_resources(adapter);
2423
2424         return 0;
2425 }
2426
2427 /**
2428  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2429  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2430  *
2431  * Return 0 on success, negative on failure
2432  **/
2433 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2434 {
2435         struct device *dev = tx_ring->dev;
2436         int size;
2437
2438         size = sizeof(struct igb_buffer) * tx_ring->count;
2439         tx_ring->buffer_info = vmalloc(size);
2440         if (!tx_ring->buffer_info)
2441                 goto err;
2442         memset(tx_ring->buffer_info, 0, size);
2443
2444         /* round up to nearest 4K */
2445         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2446         tx_ring->size = ALIGN(tx_ring->size, 4096);
2447
2448         tx_ring->desc = dma_alloc_coherent(dev,
2449                                            tx_ring->size,
2450                                            &tx_ring->dma,
2451                                            GFP_KERNEL);
2452
2453         if (!tx_ring->desc)
2454                 goto err;
2455
2456         tx_ring->next_to_use = 0;
2457         tx_ring->next_to_clean = 0;
2458         return 0;
2459
2460 err:
2461         vfree(tx_ring->buffer_info);
2462         dev_err(dev,
2463                 "Unable to allocate memory for the transmit descriptor ring\n");
2464         return -ENOMEM;
2465 }
2466
2467 /**
2468  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2469  *                                (Descriptors) for all queues
2470  * @adapter: board private structure
2471  *
2472  * Return 0 on success, negative on failure
2473  **/
2474 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2475 {
2476         struct pci_dev *pdev = adapter->pdev;
2477         int i, err = 0;
2478
2479         for (i = 0; i < adapter->num_tx_queues; i++) {
2480                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2481                 if (err) {
2482                         dev_err(&pdev->dev,
2483                                 "Allocation for Tx Queue %u failed\n", i);
2484                         for (i--; i >= 0; i--)
2485                                 igb_free_tx_resources(adapter->tx_ring[i]);
2486                         break;
2487                 }
2488         }
2489
2490         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2491                 int r_idx = i % adapter->num_tx_queues;
2492                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2493         }
2494         return err;
2495 }
2496
2497 /**
2498  * igb_setup_tctl - configure the transmit control registers
2499  * @adapter: Board private structure
2500  **/
2501 void igb_setup_tctl(struct igb_adapter *adapter)
2502 {
2503         struct e1000_hw *hw = &adapter->hw;
2504         u32 tctl;
2505
2506         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2507         wr32(E1000_TXDCTL(0), 0);
2508
2509         /* Program the Transmit Control Register */
2510         tctl = rd32(E1000_TCTL);
2511         tctl &= ~E1000_TCTL_CT;
2512         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2513                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2514
2515         igb_config_collision_dist(hw);
2516
2517         /* Enable transmits */
2518         tctl |= E1000_TCTL_EN;
2519
2520         wr32(E1000_TCTL, tctl);
2521 }
2522
2523 /**
2524  * igb_configure_tx_ring - Configure transmit ring after Reset
2525  * @adapter: board private structure
2526  * @ring: tx ring to configure
2527  *
2528  * Configure a transmit ring after a reset.
2529  **/
2530 void igb_configure_tx_ring(struct igb_adapter *adapter,
2531                            struct igb_ring *ring)
2532 {
2533         struct e1000_hw *hw = &adapter->hw;
2534         u32 txdctl;
2535         u64 tdba = ring->dma;
2536         int reg_idx = ring->reg_idx;
2537
2538         /* disable the queue */
2539         txdctl = rd32(E1000_TXDCTL(reg_idx));
2540         wr32(E1000_TXDCTL(reg_idx),
2541                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2542         wrfl();
2543         mdelay(10);
2544
2545         wr32(E1000_TDLEN(reg_idx),
2546                         ring->count * sizeof(union e1000_adv_tx_desc));
2547         wr32(E1000_TDBAL(reg_idx),
2548                         tdba & 0x00000000ffffffffULL);
2549         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2550
2551         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2552         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2553         writel(0, ring->head);
2554         writel(0, ring->tail);
2555
2556         txdctl |= IGB_TX_PTHRESH;
2557         txdctl |= IGB_TX_HTHRESH << 8;
2558         txdctl |= IGB_TX_WTHRESH << 16;
2559
2560         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2561         wr32(E1000_TXDCTL(reg_idx), txdctl);
2562 }
2563
2564 /**
2565  * igb_configure_tx - Configure transmit Unit after Reset
2566  * @adapter: board private structure
2567  *
2568  * Configure the Tx unit of the MAC after a reset.
2569  **/
2570 static void igb_configure_tx(struct igb_adapter *adapter)
2571 {
2572         int i;
2573
2574         for (i = 0; i < adapter->num_tx_queues; i++)
2575                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2576 }
2577
2578 /**
2579  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2580  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2581  *
2582  * Returns 0 on success, negative on failure
2583  **/
2584 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2585 {
2586         struct device *dev = rx_ring->dev;
2587         int size, desc_len;
2588
2589         size = sizeof(struct igb_buffer) * rx_ring->count;
2590         rx_ring->buffer_info = vmalloc(size);
2591         if (!rx_ring->buffer_info)
2592                 goto err;
2593         memset(rx_ring->buffer_info, 0, size);
2594
2595         desc_len = sizeof(union e1000_adv_rx_desc);
2596
2597         /* Round up to nearest 4K */
2598         rx_ring->size = rx_ring->count * desc_len;
2599         rx_ring->size = ALIGN(rx_ring->size, 4096);
2600
2601         rx_ring->desc = dma_alloc_coherent(dev,
2602                                            rx_ring->size,
2603                                            &rx_ring->dma,
2604                                            GFP_KERNEL);
2605
2606         if (!rx_ring->desc)
2607                 goto err;
2608
2609         rx_ring->next_to_clean = 0;
2610         rx_ring->next_to_use = 0;
2611
2612         return 0;
2613
2614 err:
2615         vfree(rx_ring->buffer_info);
2616         rx_ring->buffer_info = NULL;
2617         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2618                 " ring\n");
2619         return -ENOMEM;
2620 }
2621
2622 /**
2623  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2624  *                                (Descriptors) for all queues
2625  * @adapter: board private structure
2626  *
2627  * Return 0 on success, negative on failure
2628  **/
2629 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2630 {
2631         struct pci_dev *pdev = adapter->pdev;
2632         int i, err = 0;
2633
2634         for (i = 0; i < adapter->num_rx_queues; i++) {
2635                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2636                 if (err) {
2637                         dev_err(&pdev->dev,
2638                                 "Allocation for Rx Queue %u failed\n", i);
2639                         for (i--; i >= 0; i--)
2640                                 igb_free_rx_resources(adapter->rx_ring[i]);
2641                         break;
2642                 }
2643         }
2644
2645         return err;
2646 }
2647
2648 /**
2649  * igb_setup_mrqc - configure the multiple receive queue control registers
2650  * @adapter: Board private structure
2651  **/
2652 static void igb_setup_mrqc(struct igb_adapter *adapter)
2653 {
2654         struct e1000_hw *hw = &adapter->hw;
2655         u32 mrqc, rxcsum;
2656         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2657         union e1000_reta {
2658                 u32 dword;
2659                 u8  bytes[4];
2660         } reta;
2661         static const u8 rsshash[40] = {
2662                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2663                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2664                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2665                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2666
2667         /* Fill out hash function seeds */
2668         for (j = 0; j < 10; j++) {
2669                 u32 rsskey = rsshash[(j * 4)];
2670                 rsskey |= rsshash[(j * 4) + 1] << 8;
2671                 rsskey |= rsshash[(j * 4) + 2] << 16;
2672                 rsskey |= rsshash[(j * 4) + 3] << 24;
2673                 array_wr32(E1000_RSSRK(0), j, rsskey);
2674         }
2675
2676         num_rx_queues = adapter->rss_queues;
2677
2678         if (adapter->vfs_allocated_count) {
2679                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2680                 switch (hw->mac.type) {
2681                 case e1000_i350:
2682                 case e1000_82580:
2683                         num_rx_queues = 1;
2684                         shift = 0;
2685                         break;
2686                 case e1000_82576:
2687                         shift = 3;
2688                         num_rx_queues = 2;
2689                         break;
2690                 case e1000_82575:
2691                         shift = 2;
2692                         shift2 = 6;
2693                 default:
2694                         break;
2695                 }
2696         } else {
2697                 if (hw->mac.type == e1000_82575)
2698                         shift = 6;
2699         }
2700
2701         for (j = 0; j < (32 * 4); j++) {
2702                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2703                 if (shift2)
2704                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2705                 if ((j & 3) == 3)
2706                         wr32(E1000_RETA(j >> 2), reta.dword);
2707         }
2708
2709         /*
2710          * Disable raw packet checksumming so that RSS hash is placed in
2711          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2712          * offloads as they are enabled by default
2713          */
2714         rxcsum = rd32(E1000_RXCSUM);
2715         rxcsum |= E1000_RXCSUM_PCSD;
2716
2717         if (adapter->hw.mac.type >= e1000_82576)
2718                 /* Enable Receive Checksum Offload for SCTP */
2719                 rxcsum |= E1000_RXCSUM_CRCOFL;
2720
2721         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2722         wr32(E1000_RXCSUM, rxcsum);
2723
2724         /* If VMDq is enabled then we set the appropriate mode for that, else
2725          * we default to RSS so that an RSS hash is calculated per packet even
2726          * if we are only using one queue */
2727         if (adapter->vfs_allocated_count) {
2728                 if (hw->mac.type > e1000_82575) {
2729                         /* Set the default pool for the PF's first queue */
2730                         u32 vtctl = rd32(E1000_VT_CTL);
2731                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2732                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2733                         vtctl |= adapter->vfs_allocated_count <<
2734                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2735                         wr32(E1000_VT_CTL, vtctl);
2736                 }
2737                 if (adapter->rss_queues > 1)
2738                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2739                 else
2740                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2741         } else {
2742                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2743         }
2744         igb_vmm_control(adapter);
2745
2746         /*
2747          * Generate RSS hash based on TCP port numbers and/or
2748          * IPv4/v6 src and dst addresses since UDP cannot be
2749          * hashed reliably due to IP fragmentation
2750          */
2751         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2752                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2753                 E1000_MRQC_RSS_FIELD_IPV6 |
2754                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2755                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2756
2757         wr32(E1000_MRQC, mrqc);
2758 }
2759
2760 /**
2761  * igb_setup_rctl - configure the receive control registers
2762  * @adapter: Board private structure
2763  **/
2764 void igb_setup_rctl(struct igb_adapter *adapter)
2765 {
2766         struct e1000_hw *hw = &adapter->hw;
2767         u32 rctl;
2768
2769         rctl = rd32(E1000_RCTL);
2770
2771         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2772         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2773
2774         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2775                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2776
2777         /*
2778          * enable stripping of CRC. It's unlikely this will break BMC
2779          * redirection as it did with e1000. Newer features require
2780          * that the HW strips the CRC.
2781          */
2782         rctl |= E1000_RCTL_SECRC;
2783
2784         /* disable store bad packets and clear size bits. */
2785         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2786
2787         /* enable LPE to prevent packets larger than max_frame_size */
2788         rctl |= E1000_RCTL_LPE;
2789
2790         /* disable queue 0 to prevent tail write w/o re-config */
2791         wr32(E1000_RXDCTL(0), 0);
2792
2793         /* Attention!!!  For SR-IOV PF driver operations you must enable
2794          * queue drop for all VF and PF queues to prevent head of line blocking
2795          * if an un-trusted VF does not provide descriptors to hardware.
2796          */
2797         if (adapter->vfs_allocated_count) {
2798                 /* set all queue drop enable bits */
2799                 wr32(E1000_QDE, ALL_QUEUES);
2800         }
2801
2802         wr32(E1000_RCTL, rctl);
2803 }
2804
2805 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2806                                    int vfn)
2807 {
2808         struct e1000_hw *hw = &adapter->hw;
2809         u32 vmolr;
2810
2811         /* if it isn't the PF check to see if VFs are enabled and
2812          * increase the size to support vlan tags */
2813         if (vfn < adapter->vfs_allocated_count &&
2814             adapter->vf_data[vfn].vlans_enabled)
2815                 size += VLAN_TAG_SIZE;
2816
2817         vmolr = rd32(E1000_VMOLR(vfn));
2818         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2819         vmolr |= size | E1000_VMOLR_LPE;
2820         wr32(E1000_VMOLR(vfn), vmolr);
2821
2822         return 0;
2823 }
2824
2825 /**
2826  * igb_rlpml_set - set maximum receive packet size
2827  * @adapter: board private structure
2828  *
2829  * Configure maximum receivable packet size.
2830  **/
2831 static void igb_rlpml_set(struct igb_adapter *adapter)
2832 {
2833         u32 max_frame_size = adapter->max_frame_size;
2834         struct e1000_hw *hw = &adapter->hw;
2835         u16 pf_id = adapter->vfs_allocated_count;
2836
2837         if (adapter->vlgrp)
2838                 max_frame_size += VLAN_TAG_SIZE;
2839
2840         /* if vfs are enabled we set RLPML to the largest possible request
2841          * size and set the VMOLR RLPML to the size we need */
2842         if (pf_id) {
2843                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2844                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2845         }
2846
2847         wr32(E1000_RLPML, max_frame_size);
2848 }
2849
2850 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2851                                  int vfn, bool aupe)
2852 {
2853         struct e1000_hw *hw = &adapter->hw;
2854         u32 vmolr;
2855
2856         /*
2857          * This register exists only on 82576 and newer so if we are older then
2858          * we should exit and do nothing
2859          */
2860         if (hw->mac.type < e1000_82576)
2861                 return;
2862
2863         vmolr = rd32(E1000_VMOLR(vfn));
2864         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2865         if (aupe)
2866                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2867         else
2868                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2869
2870         /* clear all bits that might not be set */
2871         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2872
2873         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2874                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2875         /*
2876          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2877          * multicast packets
2878          */
2879         if (vfn <= adapter->vfs_allocated_count)
2880                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2881
2882         wr32(E1000_VMOLR(vfn), vmolr);
2883 }
2884
2885 /**
2886  * igb_configure_rx_ring - Configure a receive ring after Reset
2887  * @adapter: board private structure
2888  * @ring: receive ring to be configured
2889  *
2890  * Configure the Rx unit of the MAC after a reset.
2891  **/
2892 void igb_configure_rx_ring(struct igb_adapter *adapter,
2893                            struct igb_ring *ring)
2894 {
2895         struct e1000_hw *hw = &adapter->hw;
2896         u64 rdba = ring->dma;
2897         int reg_idx = ring->reg_idx;
2898         u32 srrctl, rxdctl;
2899
2900         /* disable the queue */
2901         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2902         wr32(E1000_RXDCTL(reg_idx),
2903                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2904
2905         /* Set DMA base address registers */
2906         wr32(E1000_RDBAL(reg_idx),
2907              rdba & 0x00000000ffffffffULL);
2908         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2909         wr32(E1000_RDLEN(reg_idx),
2910                        ring->count * sizeof(union e1000_adv_rx_desc));
2911
2912         /* initialize head and tail */
2913         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2914         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2915         writel(0, ring->head);
2916         writel(0, ring->tail);
2917
2918         /* set descriptor configuration */
2919         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2920                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2921                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2922 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2923                 srrctl |= IGB_RXBUFFER_16384 >>
2924                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2925 #else
2926                 srrctl |= (PAGE_SIZE / 2) >>
2927                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2928 #endif
2929                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2930         } else {
2931                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2932                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2933                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2934         }
2935         if (hw->mac.type == e1000_82580)
2936                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2937         /* Only set Drop Enable if we are supporting multiple queues */
2938         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2939                 srrctl |= E1000_SRRCTL_DROP_EN;
2940
2941         wr32(E1000_SRRCTL(reg_idx), srrctl);
2942
2943         /* set filtering for VMDQ pools */
2944         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2945
2946         /* enable receive descriptor fetching */
2947         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2948         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2949         rxdctl &= 0xFFF00000;
2950         rxdctl |= IGB_RX_PTHRESH;
2951         rxdctl |= IGB_RX_HTHRESH << 8;
2952         rxdctl |= IGB_RX_WTHRESH << 16;
2953         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2954 }
2955
2956 /**
2957  * igb_configure_rx - Configure receive Unit after Reset
2958  * @adapter: board private structure
2959  *
2960  * Configure the Rx unit of the MAC after a reset.
2961  **/
2962 static void igb_configure_rx(struct igb_adapter *adapter)
2963 {
2964         int i;
2965
2966         /* set UTA to appropriate mode */
2967         igb_set_uta(adapter);
2968
2969         /* set the correct pool for the PF default MAC address in entry 0 */
2970         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2971                          adapter->vfs_allocated_count);
2972
2973         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2974          * the Base and Length of the Rx Descriptor Ring */
2975         for (i = 0; i < adapter->num_rx_queues; i++)
2976                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2977 }
2978
2979 /**
2980  * igb_free_tx_resources - Free Tx Resources per Queue
2981  * @tx_ring: Tx descriptor ring for a specific queue
2982  *
2983  * Free all transmit software resources
2984  **/
2985 void igb_free_tx_resources(struct igb_ring *tx_ring)
2986 {
2987         igb_clean_tx_ring(tx_ring);
2988
2989         vfree(tx_ring->buffer_info);
2990         tx_ring->buffer_info = NULL;
2991
2992         /* if not set, then don't free */
2993         if (!tx_ring->desc)
2994                 return;
2995
2996         dma_free_coherent(tx_ring->dev, tx_ring->size,
2997                           tx_ring->desc, tx_ring->dma);
2998
2999         tx_ring->desc = NULL;
3000 }
3001
3002 /**
3003  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3004  * @adapter: board private structure
3005  *
3006  * Free all transmit software resources
3007  **/
3008 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3009 {
3010         int i;
3011
3012         for (i = 0; i < adapter->num_tx_queues; i++)
3013                 igb_free_tx_resources(adapter->tx_ring[i]);
3014 }
3015
3016 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3017                                     struct igb_buffer *buffer_info)
3018 {
3019         if (buffer_info->dma) {
3020                 if (buffer_info->mapped_as_page)
3021                         dma_unmap_page(tx_ring->dev,
3022                                         buffer_info->dma,
3023                                         buffer_info->length,
3024                                         DMA_TO_DEVICE);
3025                 else
3026                         dma_unmap_single(tx_ring->dev,
3027                                         buffer_info->dma,
3028                                         buffer_info->length,
3029                                         DMA_TO_DEVICE);
3030                 buffer_info->dma = 0;
3031         }
3032         if (buffer_info->skb) {
3033                 dev_kfree_skb_any(buffer_info->skb);
3034                 buffer_info->skb = NULL;
3035         }
3036         buffer_info->time_stamp = 0;
3037         buffer_info->length = 0;
3038         buffer_info->next_to_watch = 0;
3039         buffer_info->mapped_as_page = false;
3040 }
3041
3042 /**
3043  * igb_clean_tx_ring - Free Tx Buffers
3044  * @tx_ring: ring to be cleaned
3045  **/
3046 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3047 {
3048         struct igb_buffer *buffer_info;
3049         unsigned long size;
3050         unsigned int i;
3051
3052         if (!tx_ring->buffer_info)
3053                 return;
3054         /* Free all the Tx ring sk_buffs */
3055
3056         for (i = 0; i < tx_ring->count; i++) {
3057                 buffer_info = &tx_ring->buffer_info[i];
3058                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3059         }
3060
3061         size = sizeof(struct igb_buffer) * tx_ring->count;
3062         memset(tx_ring->buffer_info, 0, size);
3063
3064         /* Zero out the descriptor ring */
3065         memset(tx_ring->desc, 0, tx_ring->size);
3066
3067         tx_ring->next_to_use = 0;
3068         tx_ring->next_to_clean = 0;
3069 }
3070
3071 /**
3072  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3073  * @adapter: board private structure
3074  **/
3075 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3076 {
3077         int i;
3078
3079         for (i = 0; i < adapter->num_tx_queues; i++)
3080                 igb_clean_tx_ring(adapter->tx_ring[i]);
3081 }
3082
3083 /**
3084  * igb_free_rx_resources - Free Rx Resources
3085  * @rx_ring: ring to clean the resources from
3086  *
3087  * Free all receive software resources
3088  **/
3089 void igb_free_rx_resources(struct igb_ring *rx_ring)
3090 {
3091         igb_clean_rx_ring(rx_ring);
3092
3093         vfree(rx_ring->buffer_info);
3094         rx_ring->buffer_info = NULL;
3095
3096         /* if not set, then don't free */
3097         if (!rx_ring->desc)
3098                 return;
3099
3100         dma_free_coherent(rx_ring->dev, rx_ring->size,
3101                           rx_ring->desc, rx_ring->dma);
3102
3103         rx_ring->desc = NULL;
3104 }
3105
3106 /**
3107  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3108  * @adapter: board private structure
3109  *
3110  * Free all receive software resources
3111  **/
3112 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3113 {
3114         int i;
3115
3116         for (i = 0; i < adapter->num_rx_queues; i++)
3117                 igb_free_rx_resources(adapter->rx_ring[i]);
3118 }
3119
3120 /**
3121  * igb_clean_rx_ring - Free Rx Buffers per Queue
3122  * @rx_ring: ring to free buffers from
3123  **/
3124 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3125 {
3126         struct igb_buffer *buffer_info;
3127         unsigned long size;
3128         unsigned int i;
3129
3130         if (!rx_ring->buffer_info)
3131                 return;
3132
3133         /* Free all the Rx ring sk_buffs */
3134         for (i = 0; i < rx_ring->count; i++) {
3135                 buffer_info = &rx_ring->buffer_info[i];
3136                 if (buffer_info->dma) {
3137                         dma_unmap_single(rx_ring->dev,
3138                                          buffer_info->dma,
3139                                          rx_ring->rx_buffer_len,
3140                                          DMA_FROM_DEVICE);
3141                         buffer_info->dma = 0;
3142                 }
3143
3144                 if (buffer_info->skb) {
3145                         dev_kfree_skb(buffer_info->skb);
3146                         buffer_info->skb = NULL;
3147                 }
3148                 if (buffer_info->page_dma) {
3149                         dma_unmap_page(rx_ring->dev,
3150                                        buffer_info->page_dma,
3151                                        PAGE_SIZE / 2,
3152                                        DMA_FROM_DEVICE);
3153                         buffer_info->page_dma = 0;
3154                 }
3155                 if (buffer_info->page) {
3156                         put_page(buffer_info->page);
3157                         buffer_info->page = NULL;
3158                         buffer_info->page_offset = 0;
3159                 }
3160         }
3161
3162         size = sizeof(struct igb_buffer) * rx_ring->count;
3163         memset(rx_ring->buffer_info, 0, size);
3164
3165         /* Zero out the descriptor ring */
3166         memset(rx_ring->desc, 0, rx_ring->size);
3167
3168         rx_ring->next_to_clean = 0;
3169         rx_ring->next_to_use = 0;
3170 }
3171
3172 /**
3173  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3174  * @adapter: board private structure
3175  **/
3176 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3177 {
3178         int i;
3179
3180         for (i = 0; i < adapter->num_rx_queues; i++)
3181                 igb_clean_rx_ring(adapter->rx_ring[i]);
3182 }
3183
3184 /**
3185  * igb_set_mac - Change the Ethernet Address of the NIC
3186  * @netdev: network interface device structure
3187  * @p: pointer to an address structure
3188  *
3189  * Returns 0 on success, negative on failure
3190  **/
3191 static int igb_set_mac(struct net_device *netdev, void *p)
3192 {
3193         struct igb_adapter *adapter = netdev_priv(netdev);
3194         struct e1000_hw *hw = &adapter->hw;
3195         struct sockaddr *addr = p;
3196
3197         if (!is_valid_ether_addr(addr->sa_data))
3198                 return -EADDRNOTAVAIL;
3199
3200         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3201         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3202
3203         /* set the correct pool for the new PF MAC address in entry 0 */
3204         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3205                          adapter->vfs_allocated_count);
3206
3207         return 0;
3208 }
3209
3210 /**
3211  * igb_write_mc_addr_list - write multicast addresses to MTA
3212  * @netdev: network interface device structure
3213  *
3214  * Writes multicast address list to the MTA hash table.
3215  * Returns: -ENOMEM on failure
3216  *                0 on no addresses written
3217  *                X on writing X addresses to MTA
3218  **/
3219 static int igb_write_mc_addr_list(struct net_device *netdev)
3220 {
3221         struct igb_adapter *adapter = netdev_priv(netdev);
3222         struct e1000_hw *hw = &adapter->hw;
3223         struct netdev_hw_addr *ha;
3224         u8  *mta_list;
3225         int i;
3226
3227         if (netdev_mc_empty(netdev)) {
3228                 /* nothing to program, so clear mc list */
3229                 igb_update_mc_addr_list(hw, NULL, 0);
3230                 igb_restore_vf_multicasts(adapter);
3231                 return 0;
3232         }
3233
3234         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3235         if (!mta_list)
3236                 return -ENOMEM;
3237
3238         /* The shared function expects a packed array of only addresses. */
3239         i = 0;
3240         netdev_for_each_mc_addr(ha, netdev)
3241                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3242
3243         igb_update_mc_addr_list(hw, mta_list, i);
3244         kfree(mta_list);
3245
3246         return netdev_mc_count(netdev);
3247 }
3248
3249 /**
3250  * igb_write_uc_addr_list - write unicast addresses to RAR table
3251  * @netdev: network interface device structure
3252  *
3253  * Writes unicast address list to the RAR table.
3254  * Returns: -ENOMEM on failure/insufficient address space
3255  *                0 on no addresses written
3256  *                X on writing X addresses to the RAR table
3257  **/
3258 static int igb_write_uc_addr_list(struct net_device *netdev)
3259 {
3260         struct igb_adapter *adapter = netdev_priv(netdev);
3261         struct e1000_hw *hw = &adapter->hw;
3262         unsigned int vfn = adapter->vfs_allocated_count;
3263         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3264         int count = 0;
3265
3266         /* return ENOMEM indicating insufficient memory for addresses */
3267         if (netdev_uc_count(netdev) > rar_entries)
3268                 return -ENOMEM;
3269
3270         if (!netdev_uc_empty(netdev) && rar_entries) {
3271                 struct netdev_hw_addr *ha;
3272
3273                 netdev_for_each_uc_addr(ha, netdev) {
3274                         if (!rar_entries)
3275                                 break;
3276                         igb_rar_set_qsel(adapter, ha->addr,
3277                                          rar_entries--,
3278                                          vfn);
3279                         count++;
3280                 }
3281         }
3282         /* write the addresses in reverse order to avoid write combining */
3283         for (; rar_entries > 0 ; rar_entries--) {
3284                 wr32(E1000_RAH(rar_entries), 0);
3285                 wr32(E1000_RAL(rar_entries), 0);
3286         }
3287         wrfl();
3288
3289         return count;
3290 }
3291
3292 /**
3293  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3294  * @netdev: network interface device structure
3295  *
3296  * The set_rx_mode entry point is called whenever the unicast or multicast
3297  * address lists or the network interface flags are updated.  This routine is
3298  * responsible for configuring the hardware for proper unicast, multicast,
3299  * promiscuous mode, and all-multi behavior.
3300  **/
3301 static void igb_set_rx_mode(struct net_device *netdev)
3302 {
3303         struct igb_adapter *adapter = netdev_priv(netdev);
3304         struct e1000_hw *hw = &adapter->hw;
3305         unsigned int vfn = adapter->vfs_allocated_count;
3306         u32 rctl, vmolr = 0;
3307         int count;
3308
3309         /* Check for Promiscuous and All Multicast modes */
3310         rctl = rd32(E1000_RCTL);
3311
3312         /* clear the effected bits */
3313         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3314
3315         if (netdev->flags & IFF_PROMISC) {
3316                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3317                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3318         } else {
3319                 if (netdev->flags & IFF_ALLMULTI) {
3320                         rctl |= E1000_RCTL_MPE;
3321                         vmolr |= E1000_VMOLR_MPME;
3322                 } else {
3323                         /*
3324                          * Write addresses to the MTA, if the attempt fails
3325                          * then we should just turn on promiscous mode so
3326                          * that we can at least receive multicast traffic
3327                          */
3328                         count = igb_write_mc_addr_list(netdev);
3329                         if (count < 0) {
3330                                 rctl |= E1000_RCTL_MPE;
3331                                 vmolr |= E1000_VMOLR_MPME;
3332                         } else if (count) {
3333                                 vmolr |= E1000_VMOLR_ROMPE;
3334                         }
3335                 }
3336                 /*
3337                  * Write addresses to available RAR registers, if there is not
3338                  * sufficient space to store all the addresses then enable
3339                  * unicast promiscous mode
3340                  */
3341                 count = igb_write_uc_addr_list(netdev);
3342                 if (count < 0) {
3343                         rctl |= E1000_RCTL_UPE;
3344                         vmolr |= E1000_VMOLR_ROPE;
3345                 }
3346                 rctl |= E1000_RCTL_VFE;
3347         }
3348         wr32(E1000_RCTL, rctl);
3349
3350         /*
3351          * In order to support SR-IOV and eventually VMDq it is necessary to set
3352          * the VMOLR to enable the appropriate modes.  Without this workaround
3353          * we will have issues with VLAN tag stripping not being done for frames
3354          * that are only arriving because we are the default pool
3355          */
3356         if (hw->mac.type < e1000_82576)
3357                 return;
3358
3359         vmolr |= rd32(E1000_VMOLR(vfn)) &
3360                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3361         wr32(E1000_VMOLR(vfn), vmolr);
3362         igb_restore_vf_multicasts(adapter);
3363 }
3364
3365 /* Need to wait a few seconds after link up to get diagnostic information from
3366  * the phy */
3367 static void igb_update_phy_info(unsigned long data)
3368 {
3369         struct igb_adapter *adapter = (struct igb_adapter *) data;
3370         igb_get_phy_info(&adapter->hw);
3371 }
3372
3373 /**
3374  * igb_has_link - check shared code for link and determine up/down
3375  * @adapter: pointer to driver private info
3376  **/
3377 bool igb_has_link(struct igb_adapter *adapter)
3378 {
3379         struct e1000_hw *hw = &adapter->hw;
3380         bool link_active = false;
3381         s32 ret_val = 0;
3382
3383         /* get_link_status is set on LSC (link status) interrupt or
3384          * rx sequence error interrupt.  get_link_status will stay
3385          * false until the e1000_check_for_link establishes link
3386          * for copper adapters ONLY
3387          */
3388         switch (hw->phy.media_type) {
3389         case e1000_media_type_copper:
3390                 if (hw->mac.get_link_status) {
3391                         ret_val = hw->mac.ops.check_for_link(hw);
3392                         link_active = !hw->mac.get_link_status;
3393                 } else {
3394                         link_active = true;
3395                 }
3396                 break;
3397         case e1000_media_type_internal_serdes:
3398                 ret_val = hw->mac.ops.check_for_link(hw);
3399                 link_active = hw->mac.serdes_has_link;
3400                 break;
3401         default:
3402         case e1000_media_type_unknown:
3403                 break;
3404         }
3405
3406         return link_active;
3407 }
3408
3409 /**
3410  * igb_watchdog - Timer Call-back
3411  * @data: pointer to adapter cast into an unsigned long
3412  **/
3413 static void igb_watchdog(unsigned long data)
3414 {
3415         struct igb_adapter *adapter = (struct igb_adapter *)data;
3416         /* Do the rest outside of interrupt context */
3417         schedule_work(&adapter->watchdog_task);
3418 }
3419
3420 static void igb_watchdog_task(struct work_struct *work)
3421 {
3422         struct igb_adapter *adapter = container_of(work,
3423                                                    struct igb_adapter,
3424                                                    watchdog_task);
3425         struct e1000_hw *hw = &adapter->hw;
3426         struct net_device *netdev = adapter->netdev;
3427         u32 link;
3428         int i;
3429
3430         link = igb_has_link(adapter);
3431         if (link) {
3432                 if (!netif_carrier_ok(netdev)) {
3433                         u32 ctrl;
3434                         hw->mac.ops.get_speed_and_duplex(hw,
3435                                                          &adapter->link_speed,
3436                                                          &adapter->link_duplex);
3437
3438                         ctrl = rd32(E1000_CTRL);
3439                         /* Links status message must follow this format */
3440                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3441                                  "Flow Control: %s\n",
3442                                netdev->name,
3443                                adapter->link_speed,
3444                                adapter->link_duplex == FULL_DUPLEX ?
3445                                  "Full Duplex" : "Half Duplex",
3446                                ((ctrl & E1000_CTRL_TFCE) &&
3447                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3448                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3449                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3450
3451                         /* adjust timeout factor according to speed/duplex */
3452                         adapter->tx_timeout_factor = 1;
3453                         switch (adapter->link_speed) {
3454                         case SPEED_10:
3455                                 adapter->tx_timeout_factor = 14;
3456                                 break;
3457                         case SPEED_100:
3458                                 /* maybe add some timeout factor ? */
3459                                 break;
3460                         }
3461
3462                         netif_carrier_on(netdev);
3463
3464                         igb_ping_all_vfs(adapter);
3465
3466                         /* link state has changed, schedule phy info update */
3467                         if (!test_bit(__IGB_DOWN, &adapter->state))
3468                                 mod_timer(&adapter->phy_info_timer,
3469                                           round_jiffies(jiffies + 2 * HZ));
3470                 }
3471         } else {
3472                 if (netif_carrier_ok(netdev)) {
3473                         adapter->link_speed = 0;
3474                         adapter->link_duplex = 0;
3475                         /* Links status message must follow this format */
3476                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3477                                netdev->name);
3478                         netif_carrier_off(netdev);
3479
3480                         igb_ping_all_vfs(adapter);
3481
3482                         /* link state has changed, schedule phy info update */
3483                         if (!test_bit(__IGB_DOWN, &adapter->state))
3484                                 mod_timer(&adapter->phy_info_timer,
3485                                           round_jiffies(jiffies + 2 * HZ));
3486                 }
3487         }
3488
3489         spin_lock(&adapter->stats64_lock);
3490         igb_update_stats(adapter, &adapter->stats64);
3491         spin_unlock(&adapter->stats64_lock);
3492
3493         for (i = 0; i < adapter->num_tx_queues; i++) {
3494                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3495                 if (!netif_carrier_ok(netdev)) {
3496                         /* We've lost link, so the controller stops DMA,
3497                          * but we've got queued Tx work that's never going
3498                          * to get done, so reset controller to flush Tx.
3499                          * (Do the reset outside of interrupt context). */
3500                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3501                                 adapter->tx_timeout_count++;
3502                                 schedule_work(&adapter->reset_task);
3503                                 /* return immediately since reset is imminent */
3504                                 return;
3505                         }
3506                 }
3507
3508                 /* Force detection of hung controller every watchdog period */
3509                 tx_ring->detect_tx_hung = true;
3510         }
3511
3512         /* Cause software interrupt to ensure rx ring is cleaned */
3513         if (adapter->msix_entries) {
3514                 u32 eics = 0;
3515                 for (i = 0; i < adapter->num_q_vectors; i++) {
3516                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3517                         eics |= q_vector->eims_value;
3518                 }
3519                 wr32(E1000_EICS, eics);
3520         } else {
3521                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3522         }
3523
3524         /* Reset the timer */
3525         if (!test_bit(__IGB_DOWN, &adapter->state))
3526                 mod_timer(&adapter->watchdog_timer,
3527                           round_jiffies(jiffies + 2 * HZ));
3528 }
3529
3530 enum latency_range {
3531         lowest_latency = 0,
3532         low_latency = 1,
3533         bulk_latency = 2,
3534         latency_invalid = 255
3535 };
3536
3537 /**
3538  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3539  *
3540  *      Stores a new ITR value based on strictly on packet size.  This
3541  *      algorithm is less sophisticated than that used in igb_update_itr,
3542  *      due to the difficulty of synchronizing statistics across multiple
3543  *      receive rings.  The divisors and thresholds used by this function
3544  *      were determined based on theoretical maximum wire speed and testing
3545  *      data, in order to minimize response time while increasing bulk
3546  *      throughput.
3547  *      This functionality is controlled by the InterruptThrottleRate module
3548  *      parameter (see igb_param.c)
3549  *      NOTE:  This function is called only when operating in a multiqueue
3550  *             receive environment.
3551  * @q_vector: pointer to q_vector
3552  **/
3553 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3554 {
3555         int new_val = q_vector->itr_val;
3556         int avg_wire_size = 0;
3557         struct igb_adapter *adapter = q_vector->adapter;
3558         struct igb_ring *ring;
3559         unsigned int packets;
3560
3561         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3562          * ints/sec - ITR timer value of 120 ticks.
3563          */
3564         if (adapter->link_speed != SPEED_1000) {
3565                 new_val = 976;
3566                 goto set_itr_val;
3567         }
3568
3569         ring = q_vector->rx_ring;
3570         if (ring) {
3571                 packets = ACCESS_ONCE(ring->total_packets);
3572
3573                 if (packets)
3574                         avg_wire_size = ring->total_bytes / packets;
3575         }
3576
3577         ring = q_vector->tx_ring;
3578         if (ring) {
3579                 packets = ACCESS_ONCE(ring->total_packets);
3580
3581                 if (packets)
3582                         avg_wire_size = max_t(u32, avg_wire_size,
3583                                               ring->total_bytes / packets);
3584         }
3585
3586         /* if avg_wire_size isn't set no work was done */
3587         if (!avg_wire_size)
3588                 goto clear_counts;
3589
3590         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3591         avg_wire_size += 24;
3592
3593         /* Don't starve jumbo frames */
3594         avg_wire_size = min(avg_wire_size, 3000);
3595
3596         /* Give a little boost to mid-size frames */
3597         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3598                 new_val = avg_wire_size / 3;
3599         else
3600                 new_val = avg_wire_size / 2;
3601
3602         /* when in itr mode 3 do not exceed 20K ints/sec */
3603         if (adapter->rx_itr_setting == 3 && new_val < 196)
3604                 new_val = 196;
3605
3606 set_itr_val:
3607         if (new_val != q_vector->itr_val) {
3608                 q_vector->itr_val = new_val;
3609                 q_vector->set_itr = 1;
3610         }
3611 clear_counts:
3612         if (q_vector->rx_ring) {
3613                 q_vector->rx_ring->total_bytes = 0;
3614                 q_vector->rx_ring->total_packets = 0;
3615         }
3616         if (q_vector->tx_ring) {
3617                 q_vector->tx_ring->total_bytes = 0;
3618                 q_vector->tx_ring->total_packets = 0;
3619         }
3620 }
3621
3622 /**
3623  * igb_update_itr - update the dynamic ITR value based on statistics
3624  *      Stores a new ITR value based on packets and byte
3625  *      counts during the last interrupt.  The advantage of per interrupt
3626  *      computation is faster updates and more accurate ITR for the current
3627  *      traffic pattern.  Constants in this function were computed
3628  *      based on theoretical maximum wire speed and thresholds were set based
3629  *      on testing data as well as attempting to minimize response time
3630  *      while increasing bulk throughput.
3631  *      this functionality is controlled by the InterruptThrottleRate module
3632  *      parameter (see igb_param.c)
3633  *      NOTE:  These calculations are only valid when operating in a single-
3634  *             queue environment.
3635  * @adapter: pointer to adapter
3636  * @itr_setting: current q_vector->itr_val
3637  * @packets: the number of packets during this measurement interval
3638  * @bytes: the number of bytes during this measurement interval
3639  **/
3640 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3641                                    int packets, int bytes)
3642 {
3643         unsigned int retval = itr_setting;
3644
3645         if (packets == 0)
3646                 goto update_itr_done;
3647
3648         switch (itr_setting) {
3649         case lowest_latency:
3650                 /* handle TSO and jumbo frames */
3651                 if (bytes/packets > 8000)
3652                         retval = bulk_latency;
3653                 else if ((packets < 5) && (bytes > 512))
3654                         retval = low_latency;
3655                 break;
3656         case low_latency:  /* 50 usec aka 20000 ints/s */
3657                 if (bytes > 10000) {
3658                         /* this if handles the TSO accounting */
3659                         if (bytes/packets > 8000) {
3660                                 retval = bulk_latency;
3661                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3662                                 retval = bulk_latency;
3663                         } else if ((packets > 35)) {
3664                                 retval = lowest_latency;
3665                         }
3666                 } else if (bytes/packets > 2000) {
3667                         retval = bulk_latency;
3668                 } else if (packets <= 2 && bytes < 512) {
3669                         retval = lowest_latency;
3670                 }
3671                 break;
3672         case bulk_latency: /* 250 usec aka 4000 ints/s */
3673                 if (bytes > 25000) {
3674                         if (packets > 35)
3675                                 retval = low_latency;
3676                 } else if (bytes < 1500) {
3677                         retval = low_latency;
3678                 }
3679                 break;
3680         }
3681
3682 update_itr_done:
3683         return retval;
3684 }
3685
3686 static void igb_set_itr(struct igb_adapter *adapter)
3687 {
3688         struct igb_q_vector *q_vector = adapter->q_vector[0];
3689         u16 current_itr;
3690         u32 new_itr = q_vector->itr_val;
3691
3692         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3693         if (adapter->link_speed != SPEED_1000) {
3694                 current_itr = 0;
3695                 new_itr = 4000;
3696                 goto set_itr_now;
3697         }
3698
3699         adapter->rx_itr = igb_update_itr(adapter,
3700                                     adapter->rx_itr,
3701                                     q_vector->rx_ring->total_packets,
3702                                     q_vector->rx_ring->total_bytes);
3703
3704         adapter->tx_itr = igb_update_itr(adapter,
3705                                     adapter->tx_itr,
3706                                     q_vector->tx_ring->total_packets,
3707                                     q_vector->tx_ring->total_bytes);
3708         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3709
3710         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3711         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3712                 current_itr = low_latency;
3713
3714         switch (current_itr) {
3715         /* counts and packets in update_itr are dependent on these numbers */
3716         case lowest_latency:
3717                 new_itr = 56;  /* aka 70,000 ints/sec */
3718                 break;
3719         case low_latency:
3720                 new_itr = 196; /* aka 20,000 ints/sec */
3721                 break;
3722         case bulk_latency:
3723                 new_itr = 980; /* aka 4,000 ints/sec */
3724                 break;
3725         default:
3726                 break;
3727         }
3728
3729 set_itr_now:
3730         q_vector->rx_ring->total_bytes = 0;
3731         q_vector->rx_ring->total_packets = 0;
3732         q_vector->tx_ring->total_bytes = 0;
3733         q_vector->tx_ring->total_packets = 0;
3734
3735         if (new_itr != q_vector->itr_val) {
3736                 /* this attempts to bias the interrupt rate towards Bulk
3737                  * by adding intermediate steps when interrupt rate is
3738                  * increasing */
3739                 new_itr = new_itr > q_vector->itr_val ?
3740                              max((new_itr * q_vector->itr_val) /
3741                                  (new_itr + (q_vector->itr_val >> 2)),
3742                                  new_itr) :
3743                              new_itr;
3744                 /* Don't write the value here; it resets the adapter's
3745                  * internal timer, and causes us to delay far longer than
3746                  * we should between interrupts.  Instead, we write the ITR
3747                  * value at the beginning of the next interrupt so the timing
3748                  * ends up being correct.
3749                  */
3750                 q_vector->itr_val = new_itr;
3751                 q_vector->set_itr = 1;
3752         }
3753 }
3754
3755 #define IGB_TX_FLAGS_CSUM               0x00000001
3756 #define IGB_TX_FLAGS_VLAN               0x00000002
3757 #define IGB_TX_FLAGS_TSO                0x00000004
3758 #define IGB_TX_FLAGS_IPV4               0x00000008
3759 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3760 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3761 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3762
3763 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3764                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3765 {
3766         struct e1000_adv_tx_context_desc *context_desc;
3767         unsigned int i;
3768         int err;
3769         struct igb_buffer *buffer_info;
3770         u32 info = 0, tu_cmd = 0;
3771         u32 mss_l4len_idx;
3772         u8 l4len;
3773
3774         if (skb_header_cloned(skb)) {
3775                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3776                 if (err)
3777                         return err;
3778         }
3779
3780         l4len = tcp_hdrlen(skb);
3781         *hdr_len += l4len;
3782
3783         if (skb->protocol == htons(ETH_P_IP)) {
3784                 struct iphdr *iph = ip_hdr(skb);
3785                 iph->tot_len = 0;
3786                 iph->check = 0;
3787                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3788                                                          iph->daddr, 0,
3789                                                          IPPROTO_TCP,
3790                                                          0);
3791         } else if (skb_is_gso_v6(skb)) {
3792                 ipv6_hdr(skb)->payload_len = 0;
3793                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3794                                                        &ipv6_hdr(skb)->daddr,
3795                                                        0, IPPROTO_TCP, 0);
3796         }
3797
3798         i = tx_ring->next_to_use;
3799
3800         buffer_info = &tx_ring->buffer_info[i];
3801         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3802         /* VLAN MACLEN IPLEN */
3803         if (tx_flags & IGB_TX_FLAGS_VLAN)
3804                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3805         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3806         *hdr_len += skb_network_offset(skb);
3807         info |= skb_network_header_len(skb);
3808         *hdr_len += skb_network_header_len(skb);
3809         context_desc->vlan_macip_lens = cpu_to_le32(info);
3810
3811         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3812         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3813
3814         if (skb->protocol == htons(ETH_P_IP))
3815                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3816         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3817
3818         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3819
3820         /* MSS L4LEN IDX */
3821         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3822         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3823
3824         /* For 82575, context index must be unique per ring. */
3825         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3826                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3827
3828         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3829         context_desc->seqnum_seed = 0;
3830
3831         buffer_info->time_stamp = jiffies;
3832         buffer_info->next_to_watch = i;
3833         buffer_info->dma = 0;
3834         i++;
3835         if (i == tx_ring->count)
3836                 i = 0;
3837
3838         tx_ring->next_to_use = i;
3839
3840         return true;
3841 }
3842
3843 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3844                                    struct sk_buff *skb, u32 tx_flags)
3845 {
3846         struct e1000_adv_tx_context_desc *context_desc;
3847         struct device *dev = tx_ring->dev;
3848         struct igb_buffer *buffer_info;
3849         u32 info = 0, tu_cmd = 0;
3850         unsigned int i;
3851
3852         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3853             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3854                 i = tx_ring->next_to_use;
3855                 buffer_info = &tx_ring->buffer_info[i];
3856                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3857
3858                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3859                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3860
3861                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3862                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3863                         info |= skb_network_header_len(skb);
3864
3865                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3866
3867                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3868
3869                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3870                         __be16 protocol;
3871
3872                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3873                                 const struct vlan_ethhdr *vhdr =
3874                                           (const struct vlan_ethhdr*)skb->data;
3875
3876                                 protocol = vhdr->h_vlan_encapsulated_proto;
3877                         } else {
3878                                 protocol = skb->protocol;
3879                         }
3880
3881                         switch (protocol) {
3882                         case cpu_to_be16(ETH_P_IP):
3883                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3884                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3885                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3886                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3887                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3888                                 break;
3889                         case cpu_to_be16(ETH_P_IPV6):
3890                                 /* XXX what about other V6 headers?? */
3891                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3892                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3893                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3894                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3895                                 break;
3896                         default:
3897                                 if (unlikely(net_ratelimit()))
3898                                         dev_warn(dev,
3899                                             "partial checksum but proto=%x!\n",
3900                                             skb->protocol);
3901                                 break;
3902                         }
3903                 }
3904
3905                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3906                 context_desc->seqnum_seed = 0;
3907                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3908                         context_desc->mss_l4len_idx =
3909                                 cpu_to_le32(tx_ring->reg_idx << 4);
3910
3911                 buffer_info->time_stamp = jiffies;
3912                 buffer_info->next_to_watch = i;
3913                 buffer_info->dma = 0;
3914
3915                 i++;
3916                 if (i == tx_ring->count)
3917                         i = 0;
3918                 tx_ring->next_to_use = i;
3919
3920                 return true;
3921         }
3922         return false;
3923 }
3924
3925 #define IGB_MAX_TXD_PWR 16
3926 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3927
3928 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3929                                  unsigned int first)
3930 {
3931         struct igb_buffer *buffer_info;
3932         struct device *dev = tx_ring->dev;
3933         unsigned int hlen = skb_headlen(skb);
3934         unsigned int count = 0, i;
3935         unsigned int f;
3936         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3937
3938         i = tx_ring->next_to_use;
3939
3940         buffer_info = &tx_ring->buffer_info[i];
3941         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3942         buffer_info->length = hlen;
3943         /* set time_stamp *before* dma to help avoid a possible race */
3944         buffer_info->time_stamp = jiffies;
3945         buffer_info->next_to_watch = i;
3946         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3947                                           DMA_TO_DEVICE);
3948         if (dma_mapping_error(dev, buffer_info->dma))
3949                 goto dma_error;
3950
3951         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3952                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3953                 unsigned int len = frag->size;
3954
3955                 count++;
3956                 i++;
3957                 if (i == tx_ring->count)
3958                         i = 0;
3959
3960                 buffer_info = &tx_ring->buffer_info[i];
3961                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3962                 buffer_info->length = len;
3963                 buffer_info->time_stamp = jiffies;
3964                 buffer_info->next_to_watch = i;
3965                 buffer_info->mapped_as_page = true;
3966                 buffer_info->dma = dma_map_page(dev,
3967                                                 frag->page,
3968                                                 frag->page_offset,
3969                                                 len,
3970                                                 DMA_TO_DEVICE);
3971                 if (dma_mapping_error(dev, buffer_info->dma))
3972                         goto dma_error;
3973
3974         }
3975
3976         tx_ring->buffer_info[i].skb = skb;
3977         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
3978         /* multiply data chunks by size of headers */
3979         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3980         tx_ring->buffer_info[i].gso_segs = gso_segs;
3981         tx_ring->buffer_info[first].next_to_watch = i;
3982
3983         return ++count;
3984
3985 dma_error:
3986         dev_err(dev, "TX DMA map failed\n");
3987
3988         /* clear timestamp and dma mappings for failed buffer_info mapping */
3989         buffer_info->dma = 0;
3990         buffer_info->time_stamp = 0;
3991         buffer_info->length = 0;
3992         buffer_info->next_to_watch = 0;
3993         buffer_info->mapped_as_page = false;
3994
3995         /* clear timestamp and dma mappings for remaining portion of packet */
3996         while (count--) {
3997                 if (i == 0)
3998                         i = tx_ring->count;
3999                 i--;
4000                 buffer_info = &tx_ring->buffer_info[i];
4001                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4002         }
4003
4004         return 0;
4005 }
4006
4007 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4008                                     u32 tx_flags, int count, u32 paylen,
4009                                     u8 hdr_len)
4010 {
4011         union e1000_adv_tx_desc *tx_desc;
4012         struct igb_buffer *buffer_info;
4013         u32 olinfo_status = 0, cmd_type_len;
4014         unsigned int i = tx_ring->next_to_use;
4015
4016         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4017                         E1000_ADVTXD_DCMD_DEXT);
4018
4019         if (tx_flags & IGB_TX_FLAGS_VLAN)
4020                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4021
4022         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4023                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4024
4025         if (tx_flags & IGB_TX_FLAGS_TSO) {
4026                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4027
4028                 /* insert tcp checksum */
4029                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4030
4031                 /* insert ip checksum */
4032                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4033                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4034
4035         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4036                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4037         }
4038
4039         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4040             (tx_flags & (IGB_TX_FLAGS_CSUM |
4041                          IGB_TX_FLAGS_TSO |
4042                          IGB_TX_FLAGS_VLAN)))
4043                 olinfo_status |= tx_ring->reg_idx << 4;
4044
4045         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4046
4047         do {
4048                 buffer_info = &tx_ring->buffer_info[i];
4049                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4050                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4051                 tx_desc->read.cmd_type_len =
4052                         cpu_to_le32(cmd_type_len | buffer_info->length);
4053                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4054                 count--;
4055                 i++;
4056                 if (i == tx_ring->count)
4057                         i = 0;
4058         } while (count > 0);
4059
4060         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4061         /* Force memory writes to complete before letting h/w
4062          * know there are new descriptors to fetch.  (Only
4063          * applicable for weak-ordered memory model archs,
4064          * such as IA-64). */
4065         wmb();
4066
4067         tx_ring->next_to_use = i;
4068         writel(i, tx_ring->tail);
4069         /* we need this if more than one processor can write to our tail
4070          * at a time, it syncronizes IO on IA64/Altix systems */
4071         mmiowb();
4072 }
4073
4074 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4075 {
4076         struct net_device *netdev = tx_ring->netdev;
4077
4078         netif_stop_subqueue(netdev, tx_ring->queue_index);
4079
4080         /* Herbert's original patch had:
4081          *  smp_mb__after_netif_stop_queue();
4082          * but since that doesn't exist yet, just open code it. */
4083         smp_mb();
4084
4085         /* We need to check again in a case another CPU has just
4086          * made room available. */
4087         if (igb_desc_unused(tx_ring) < size)
4088                 return -EBUSY;
4089
4090         /* A reprieve! */
4091         netif_wake_subqueue(netdev, tx_ring->queue_index);
4092
4093         u64_stats_update_begin(&tx_ring->tx_syncp2);
4094         tx_ring->tx_stats.restart_queue2++;
4095         u64_stats_update_end(&tx_ring->tx_syncp2);
4096
4097         return 0;
4098 }
4099
4100 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4101 {
4102         if (igb_desc_unused(tx_ring) >= size)
4103                 return 0;
4104         return __igb_maybe_stop_tx(tx_ring, size);
4105 }
4106
4107 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4108                                     struct igb_ring *tx_ring)
4109 {
4110         int tso = 0, count;
4111         u32 tx_flags = 0;
4112         u16 first;
4113         u8 hdr_len = 0;
4114
4115         /* need: 1 descriptor per page,
4116          *       + 2 desc gap to keep tail from touching head,
4117          *       + 1 desc for skb->data,
4118          *       + 1 desc for context descriptor,
4119          * otherwise try next time */
4120         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4121                 /* this is a hard error */
4122                 return NETDEV_TX_BUSY;
4123         }
4124
4125         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4126                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4127                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4128         }
4129
4130         if (vlan_tx_tag_present(skb)) {
4131                 tx_flags |= IGB_TX_FLAGS_VLAN;
4132                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4133         }
4134
4135         if (skb->protocol == htons(ETH_P_IP))
4136                 tx_flags |= IGB_TX_FLAGS_IPV4;
4137
4138         first = tx_ring->next_to_use;
4139         if (skb_is_gso(skb)) {
4140                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4141
4142                 if (tso < 0) {
4143                         dev_kfree_skb_any(skb);
4144                         return NETDEV_TX_OK;
4145                 }
4146         }
4147
4148         if (tso)
4149                 tx_flags |= IGB_TX_FLAGS_TSO;
4150         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4151                  (skb->ip_summed == CHECKSUM_PARTIAL))
4152                 tx_flags |= IGB_TX_FLAGS_CSUM;
4153
4154         /*
4155          * count reflects descriptors mapped, if 0 or less then mapping error
4156          * has occured and we need to rewind the descriptor queue
4157          */
4158         count = igb_tx_map_adv(tx_ring, skb, first);
4159         if (!count) {
4160                 dev_kfree_skb_any(skb);
4161                 tx_ring->buffer_info[first].time_stamp = 0;
4162                 tx_ring->next_to_use = first;
4163                 return NETDEV_TX_OK;
4164         }
4165
4166         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4167
4168         /* Make sure there is space in the ring for the next send. */
4169         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4170
4171         return NETDEV_TX_OK;
4172 }
4173
4174 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4175                                       struct net_device *netdev)
4176 {
4177         struct igb_adapter *adapter = netdev_priv(netdev);
4178         struct igb_ring *tx_ring;
4179         int r_idx = 0;
4180
4181         if (test_bit(__IGB_DOWN, &adapter->state)) {
4182                 dev_kfree_skb_any(skb);
4183                 return NETDEV_TX_OK;
4184         }
4185
4186         if (skb->len <= 0) {
4187                 dev_kfree_skb_any(skb);
4188                 return NETDEV_TX_OK;
4189         }
4190
4191         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4192         tx_ring = adapter->multi_tx_table[r_idx];
4193
4194         /* This goes back to the question of how to logically map a tx queue
4195          * to a flow.  Right now, performance is impacted slightly negatively
4196          * if using multiple tx queues.  If the stack breaks away from a
4197          * single qdisc implementation, we can look at this again. */
4198         return igb_xmit_frame_ring_adv(skb, tx_ring);
4199 }
4200
4201 /**
4202  * igb_tx_timeout - Respond to a Tx Hang
4203  * @netdev: network interface device structure
4204  **/
4205 static void igb_tx_timeout(struct net_device *netdev)
4206 {
4207         struct igb_adapter *adapter = netdev_priv(netdev);
4208         struct e1000_hw *hw = &adapter->hw;
4209
4210         /* Do the reset outside of interrupt context */
4211         adapter->tx_timeout_count++;
4212
4213         if (hw->mac.type == e1000_82580)
4214                 hw->dev_spec._82575.global_device_reset = true;
4215
4216         schedule_work(&adapter->reset_task);
4217         wr32(E1000_EICS,
4218              (adapter->eims_enable_mask & ~adapter->eims_other));
4219 }
4220
4221 static void igb_reset_task(struct work_struct *work)
4222 {
4223         struct igb_adapter *adapter;
4224         adapter = container_of(work, struct igb_adapter, reset_task);
4225
4226         igb_dump(adapter);
4227         netdev_err(adapter->netdev, "Reset adapter\n");
4228         igb_reinit_locked(adapter);
4229 }
4230
4231 /**
4232  * igb_get_stats64 - Get System Network Statistics
4233  * @netdev: network interface device structure
4234  * @stats: rtnl_link_stats64 pointer
4235  *
4236  **/
4237 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4238                                                  struct rtnl_link_stats64 *stats)
4239 {
4240         struct igb_adapter *adapter = netdev_priv(netdev);
4241
4242         spin_lock(&adapter->stats64_lock);
4243         igb_update_stats(adapter, &adapter->stats64);
4244         memcpy(stats, &adapter->stats64, sizeof(*stats));
4245         spin_unlock(&adapter->stats64_lock);
4246
4247         return stats;
4248 }
4249
4250 /**
4251  * igb_change_mtu - Change the Maximum Transfer Unit
4252  * @netdev: network interface device structure
4253  * @new_mtu: new value for maximum frame size
4254  *
4255  * Returns 0 on success, negative on failure
4256  **/
4257 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4258 {
4259         struct igb_adapter *adapter = netdev_priv(netdev);
4260         struct pci_dev *pdev = adapter->pdev;
4261         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4262         u32 rx_buffer_len, i;
4263
4264         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4265                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4266                 return -EINVAL;
4267         }
4268
4269         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4270                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4271                 return -EINVAL;
4272         }
4273
4274         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4275                 msleep(1);
4276
4277         /* igb_down has a dependency on max_frame_size */
4278         adapter->max_frame_size = max_frame;
4279
4280         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4281          * means we reserve 2 more, this pushes us to allocate from the next
4282          * larger slab size.
4283          * i.e. RXBUFFER_2048 --> size-4096 slab
4284          */
4285
4286         if (adapter->hw.mac.type == e1000_82580)
4287                 max_frame += IGB_TS_HDR_LEN;
4288
4289         if (max_frame <= IGB_RXBUFFER_1024)
4290                 rx_buffer_len = IGB_RXBUFFER_1024;
4291         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4292                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4293         else
4294                 rx_buffer_len = IGB_RXBUFFER_128;
4295
4296         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4297              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4298                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4299
4300         if ((adapter->hw.mac.type == e1000_82580) &&
4301             (rx_buffer_len == IGB_RXBUFFER_128))
4302                 rx_buffer_len += IGB_RXBUFFER_64;
4303
4304         if (netif_running(netdev))
4305                 igb_down(adapter);
4306
4307         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4308                  netdev->mtu, new_mtu);
4309         netdev->mtu = new_mtu;
4310
4311         for (i = 0; i < adapter->num_rx_queues; i++)
4312                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4313
4314         if (netif_running(netdev))
4315                 igb_up(adapter);
4316         else
4317                 igb_reset(adapter);
4318
4319         clear_bit(__IGB_RESETTING, &adapter->state);
4320
4321         return 0;
4322 }
4323
4324 /**
4325  * igb_update_stats - Update the board statistics counters
4326  * @adapter: board private structure
4327  **/
4328
4329 void igb_update_stats(struct igb_adapter *adapter,
4330                       struct rtnl_link_stats64 *net_stats)
4331 {
4332         struct e1000_hw *hw = &adapter->hw;
4333         struct pci_dev *pdev = adapter->pdev;
4334         u32 reg, mpc;
4335         u16 phy_tmp;
4336         int i;
4337         u64 bytes, packets;
4338         unsigned int start;
4339         u64 _bytes, _packets;
4340
4341 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4342
4343         /*
4344          * Prevent stats update while adapter is being reset, or if the pci
4345          * connection is down.
4346          */
4347         if (adapter->link_speed == 0)
4348                 return;
4349         if (pci_channel_offline(pdev))
4350                 return;
4351
4352         bytes = 0;
4353         packets = 0;
4354         for (i = 0; i < adapter->num_rx_queues; i++) {
4355                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4356                 struct igb_ring *ring = adapter->rx_ring[i];
4357
4358                 ring->rx_stats.drops += rqdpc_tmp;
4359                 net_stats->rx_fifo_errors += rqdpc_tmp;
4360
4361                 do {
4362                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4363                         _bytes = ring->rx_stats.bytes;
4364                         _packets = ring->rx_stats.packets;
4365                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4366                 bytes += _bytes;
4367                 packets += _packets;
4368         }
4369
4370         net_stats->rx_bytes = bytes;
4371         net_stats->rx_packets = packets;
4372
4373         bytes = 0;
4374         packets = 0;
4375         for (i = 0; i < adapter->num_tx_queues; i++) {
4376                 struct igb_ring *ring = adapter->tx_ring[i];
4377                 do {
4378                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4379                         _bytes = ring->tx_stats.bytes;
4380                         _packets = ring->tx_stats.packets;
4381                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4382                 bytes += _bytes;
4383                 packets += _packets;
4384         }
4385         net_stats->tx_bytes = bytes;
4386         net_stats->tx_packets = packets;
4387
4388         /* read stats registers */
4389         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4390         adapter->stats.gprc += rd32(E1000_GPRC);
4391         adapter->stats.gorc += rd32(E1000_GORCL);
4392         rd32(E1000_GORCH); /* clear GORCL */
4393         adapter->stats.bprc += rd32(E1000_BPRC);
4394         adapter->stats.mprc += rd32(E1000_MPRC);
4395         adapter->stats.roc += rd32(E1000_ROC);
4396
4397         adapter->stats.prc64 += rd32(E1000_PRC64);
4398         adapter->stats.prc127 += rd32(E1000_PRC127);
4399         adapter->stats.prc255 += rd32(E1000_PRC255);
4400         adapter->stats.prc511 += rd32(E1000_PRC511);
4401         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4402         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4403         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4404         adapter->stats.sec += rd32(E1000_SEC);
4405
4406         mpc = rd32(E1000_MPC);
4407         adapter->stats.mpc += mpc;
4408         net_stats->rx_fifo_errors += mpc;
4409         adapter->stats.scc += rd32(E1000_SCC);
4410         adapter->stats.ecol += rd32(E1000_ECOL);
4411         adapter->stats.mcc += rd32(E1000_MCC);
4412         adapter->stats.latecol += rd32(E1000_LATECOL);
4413         adapter->stats.dc += rd32(E1000_DC);
4414         adapter->stats.rlec += rd32(E1000_RLEC);
4415         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4416         adapter->stats.xontxc += rd32(E1000_XONTXC);
4417         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4418         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4419         adapter->stats.fcruc += rd32(E1000_FCRUC);
4420         adapter->stats.gptc += rd32(E1000_GPTC);
4421         adapter->stats.gotc += rd32(E1000_GOTCL);
4422         rd32(E1000_GOTCH); /* clear GOTCL */
4423         adapter->stats.rnbc += rd32(E1000_RNBC);
4424         adapter->stats.ruc += rd32(E1000_RUC);
4425         adapter->stats.rfc += rd32(E1000_RFC);
4426         adapter->stats.rjc += rd32(E1000_RJC);
4427         adapter->stats.tor += rd32(E1000_TORH);
4428         adapter->stats.tot += rd32(E1000_TOTH);
4429         adapter->stats.tpr += rd32(E1000_TPR);
4430
4431         adapter->stats.ptc64 += rd32(E1000_PTC64);
4432         adapter->stats.ptc127 += rd32(E1000_PTC127);
4433         adapter->stats.ptc255 += rd32(E1000_PTC255);
4434         adapter->stats.ptc511 += rd32(E1000_PTC511);
4435         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4436         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4437
4438         adapter->stats.mptc += rd32(E1000_MPTC);
4439         adapter->stats.bptc += rd32(E1000_BPTC);
4440
4441         adapter->stats.tpt += rd32(E1000_TPT);
4442         adapter->stats.colc += rd32(E1000_COLC);
4443
4444         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4445         /* read internal phy specific stats */
4446         reg = rd32(E1000_CTRL_EXT);
4447         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4448                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4449                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4450         }
4451
4452         adapter->stats.tsctc += rd32(E1000_TSCTC);
4453         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4454
4455         adapter->stats.iac += rd32(E1000_IAC);
4456         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4457         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4458         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4459         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4460         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4461         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4462         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4463         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4464
4465         /* Fill out the OS statistics structure */
4466         net_stats->multicast = adapter->stats.mprc;
4467         net_stats->collisions = adapter->stats.colc;
4468
4469         /* Rx Errors */
4470
4471         /* RLEC on some newer hardware can be incorrect so build
4472          * our own version based on RUC and ROC */
4473         net_stats->rx_errors = adapter->stats.rxerrc +
4474                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4475                 adapter->stats.ruc + adapter->stats.roc +
4476                 adapter->stats.cexterr;
4477         net_stats->rx_length_errors = adapter->stats.ruc +
4478                                       adapter->stats.roc;
4479         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4480         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4481         net_stats->rx_missed_errors = adapter->stats.mpc;
4482
4483         /* Tx Errors */
4484         net_stats->tx_errors = adapter->stats.ecol +
4485                                adapter->stats.latecol;
4486         net_stats->tx_aborted_errors = adapter->stats.ecol;
4487         net_stats->tx_window_errors = adapter->stats.latecol;
4488         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4489
4490         /* Tx Dropped needs to be maintained elsewhere */
4491
4492         /* Phy Stats */
4493         if (hw->phy.media_type == e1000_media_type_copper) {
4494                 if ((adapter->link_speed == SPEED_1000) &&
4495                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4496                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4497                         adapter->phy_stats.idle_errors += phy_tmp;
4498                 }
4499         }
4500
4501         /* Management Stats */
4502         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4503         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4504         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4505 }
4506
4507 static irqreturn_t igb_msix_other(int irq, void *data)
4508 {
4509         struct igb_adapter *adapter = data;
4510         struct e1000_hw *hw = &adapter->hw;
4511         u32 icr = rd32(E1000_ICR);
4512         /* reading ICR causes bit 31 of EICR to be cleared */
4513
4514         if (icr & E1000_ICR_DRSTA)
4515                 schedule_work(&adapter->reset_task);
4516
4517         if (icr & E1000_ICR_DOUTSYNC) {
4518                 /* HW is reporting DMA is out of sync */
4519                 adapter->stats.doosync++;
4520         }
4521
4522         /* Check for a mailbox event */
4523         if (icr & E1000_ICR_VMMB)
4524                 igb_msg_task(adapter);
4525
4526         if (icr & E1000_ICR_LSC) {
4527                 hw->mac.get_link_status = 1;
4528                 /* guard against interrupt when we're going down */
4529                 if (!test_bit(__IGB_DOWN, &adapter->state))
4530                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4531         }
4532
4533         if (adapter->vfs_allocated_count)
4534                 wr32(E1000_IMS, E1000_IMS_LSC |
4535                                 E1000_IMS_VMMB |
4536                                 E1000_IMS_DOUTSYNC);
4537         else
4538                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4539         wr32(E1000_EIMS, adapter->eims_other);
4540
4541         return IRQ_HANDLED;
4542 }
4543
4544 static void igb_write_itr(struct igb_q_vector *q_vector)
4545 {
4546         struct igb_adapter *adapter = q_vector->adapter;
4547         u32 itr_val = q_vector->itr_val & 0x7FFC;
4548
4549         if (!q_vector->set_itr)
4550                 return;
4551
4552         if (!itr_val)
4553                 itr_val = 0x4;
4554
4555         if (adapter->hw.mac.type == e1000_82575)
4556                 itr_val |= itr_val << 16;
4557         else
4558                 itr_val |= 0x8000000;
4559
4560         writel(itr_val, q_vector->itr_register);
4561         q_vector->set_itr = 0;
4562 }
4563
4564 static irqreturn_t igb_msix_ring(int irq, void *data)
4565 {
4566         struct igb_q_vector *q_vector = data;
4567
4568         /* Write the ITR value calculated from the previous interrupt. */
4569         igb_write_itr(q_vector);
4570
4571         napi_schedule(&q_vector->napi);
4572
4573         return IRQ_HANDLED;
4574 }
4575
4576 #ifdef CONFIG_IGB_DCA
4577 static void igb_update_dca(struct igb_q_vector *q_vector)
4578 {
4579         struct igb_adapter *adapter = q_vector->adapter;
4580         struct e1000_hw *hw = &adapter->hw;
4581         int cpu = get_cpu();
4582
4583         if (q_vector->cpu == cpu)
4584                 goto out_no_update;
4585
4586         if (q_vector->tx_ring) {
4587                 int q = q_vector->tx_ring->reg_idx;
4588                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4589                 if (hw->mac.type == e1000_82575) {
4590                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4591                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4592                 } else {
4593                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4594                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4595                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4596                 }
4597                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4598                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4599         }
4600         if (q_vector->rx_ring) {
4601                 int q = q_vector->rx_ring->reg_idx;
4602                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4603                 if (hw->mac.type == e1000_82575) {
4604                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4605                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4606                 } else {
4607                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4608                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4609                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4610                 }
4611                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4612                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4613                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4614                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4615         }
4616         q_vector->cpu = cpu;
4617 out_no_update:
4618         put_cpu();
4619 }
4620
4621 static void igb_setup_dca(struct igb_adapter *adapter)
4622 {
4623         struct e1000_hw *hw = &adapter->hw;
4624         int i;
4625
4626         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4627                 return;
4628
4629         /* Always use CB2 mode, difference is masked in the CB driver. */
4630         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4631
4632         for (i = 0; i < adapter->num_q_vectors; i++) {
4633                 adapter->q_vector[i]->cpu = -1;
4634                 igb_update_dca(adapter->q_vector[i]);
4635         }
4636 }
4637
4638 static int __igb_notify_dca(struct device *dev, void *data)
4639 {
4640         struct net_device *netdev = dev_get_drvdata(dev);
4641         struct igb_adapter *adapter = netdev_priv(netdev);
4642         struct pci_dev *pdev = adapter->pdev;
4643         struct e1000_hw *hw = &adapter->hw;
4644         unsigned long event = *(unsigned long *)data;
4645
4646         switch (event) {
4647         case DCA_PROVIDER_ADD:
4648                 /* if already enabled, don't do it again */
4649                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4650                         break;
4651                 if (dca_add_requester(dev) == 0) {
4652                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4653                         dev_info(&pdev->dev, "DCA enabled\n");
4654                         igb_setup_dca(adapter);
4655                         break;
4656                 }
4657                 /* Fall Through since DCA is disabled. */
4658         case DCA_PROVIDER_REMOVE:
4659                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4660                         /* without this a class_device is left
4661                          * hanging around in the sysfs model */
4662                         dca_remove_requester(dev);
4663                         dev_info(&pdev->dev, "DCA disabled\n");
4664                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4665                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4666                 }
4667                 break;
4668         }
4669
4670         return 0;
4671 }
4672
4673 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4674                           void *p)
4675 {
4676         int ret_val;
4677
4678         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4679                                          __igb_notify_dca);
4680
4681         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4682 }
4683 #endif /* CONFIG_IGB_DCA */
4684
4685 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4686 {
4687         struct e1000_hw *hw = &adapter->hw;
4688         u32 ping;
4689         int i;
4690
4691         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4692                 ping = E1000_PF_CONTROL_MSG;
4693                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4694                         ping |= E1000_VT_MSGTYPE_CTS;
4695                 igb_write_mbx(hw, &ping, 1, i);
4696         }
4697 }
4698
4699 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4700 {
4701         struct e1000_hw *hw = &adapter->hw;
4702         u32 vmolr = rd32(E1000_VMOLR(vf));
4703         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4704
4705         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4706                             IGB_VF_FLAG_MULTI_PROMISC);
4707         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4708
4709         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4710                 vmolr |= E1000_VMOLR_MPME;
4711                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4712                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4713         } else {
4714                 /*
4715                  * if we have hashes and we are clearing a multicast promisc
4716                  * flag we need to write the hashes to the MTA as this step
4717                  * was previously skipped
4718                  */
4719                 if (vf_data->num_vf_mc_hashes > 30) {
4720                         vmolr |= E1000_VMOLR_MPME;
4721                 } else if (vf_data->num_vf_mc_hashes) {
4722                         int j;
4723                         vmolr |= E1000_VMOLR_ROMPE;
4724                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4725                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4726                 }
4727         }
4728
4729         wr32(E1000_VMOLR(vf), vmolr);
4730
4731         /* there are flags left unprocessed, likely not supported */
4732         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4733                 return -EINVAL;
4734
4735         return 0;
4736
4737 }
4738
4739 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4740                                   u32 *msgbuf, u32 vf)
4741 {
4742         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4743         u16 *hash_list = (u16 *)&msgbuf[1];
4744         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4745         int i;
4746
4747         /* salt away the number of multicast addresses assigned
4748          * to this VF for later use to restore when the PF multi cast
4749          * list changes
4750          */
4751         vf_data->num_vf_mc_hashes = n;
4752
4753         /* only up to 30 hash values supported */
4754         if (n > 30)
4755                 n = 30;
4756
4757         /* store the hashes for later use */
4758         for (i = 0; i < n; i++)
4759                 vf_data->vf_mc_hashes[i] = hash_list[i];
4760
4761         /* Flush and reset the mta with the new values */
4762         igb_set_rx_mode(adapter->netdev);
4763
4764         return 0;
4765 }
4766
4767 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4768 {
4769         struct e1000_hw *hw = &adapter->hw;
4770         struct vf_data_storage *vf_data;
4771         int i, j;
4772
4773         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4774                 u32 vmolr = rd32(E1000_VMOLR(i));
4775                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4776
4777                 vf_data = &adapter->vf_data[i];
4778
4779                 if ((vf_data->num_vf_mc_hashes > 30) ||
4780                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4781                         vmolr |= E1000_VMOLR_MPME;
4782                 } else if (vf_data->num_vf_mc_hashes) {
4783                         vmolr |= E1000_VMOLR_ROMPE;
4784                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4785                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4786                 }
4787                 wr32(E1000_VMOLR(i), vmolr);
4788         }
4789 }
4790
4791 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4792 {
4793         struct e1000_hw *hw = &adapter->hw;
4794         u32 pool_mask, reg, vid;
4795         int i;
4796
4797         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4798
4799         /* Find the vlan filter for this id */
4800         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4801                 reg = rd32(E1000_VLVF(i));
4802
4803                 /* remove the vf from the pool */
4804                 reg &= ~pool_mask;
4805
4806                 /* if pool is empty then remove entry from vfta */
4807                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4808                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4809                         reg = 0;
4810                         vid = reg & E1000_VLVF_VLANID_MASK;
4811                         igb_vfta_set(hw, vid, false);
4812                 }
4813
4814                 wr32(E1000_VLVF(i), reg);
4815         }
4816
4817         adapter->vf_data[vf].vlans_enabled = 0;
4818 }
4819
4820 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4821 {
4822         struct e1000_hw *hw = &adapter->hw;
4823         u32 reg, i;
4824
4825         /* The vlvf table only exists on 82576 hardware and newer */
4826         if (hw->mac.type < e1000_82576)
4827                 return -1;
4828
4829         /* we only need to do this if VMDq is enabled */
4830         if (!adapter->vfs_allocated_count)
4831                 return -1;
4832
4833         /* Find the vlan filter for this id */
4834         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4835                 reg = rd32(E1000_VLVF(i));
4836                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4837                     vid == (reg & E1000_VLVF_VLANID_MASK))
4838                         break;
4839         }
4840
4841         if (add) {
4842                 if (i == E1000_VLVF_ARRAY_SIZE) {
4843                         /* Did not find a matching VLAN ID entry that was
4844                          * enabled.  Search for a free filter entry, i.e.
4845                          * one without the enable bit set
4846                          */
4847                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4848                                 reg = rd32(E1000_VLVF(i));
4849                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4850                                         break;
4851                         }
4852                 }
4853                 if (i < E1000_VLVF_ARRAY_SIZE) {
4854                         /* Found an enabled/available entry */
4855                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4856
4857                         /* if !enabled we need to set this up in vfta */
4858                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4859                                 /* add VID to filter table */
4860                                 igb_vfta_set(hw, vid, true);
4861                                 reg |= E1000_VLVF_VLANID_ENABLE;
4862                         }
4863                         reg &= ~E1000_VLVF_VLANID_MASK;
4864                         reg |= vid;
4865                         wr32(E1000_VLVF(i), reg);
4866
4867                         /* do not modify RLPML for PF devices */
4868                         if (vf >= adapter->vfs_allocated_count)
4869                                 return 0;
4870
4871                         if (!adapter->vf_data[vf].vlans_enabled) {
4872                                 u32 size;
4873                                 reg = rd32(E1000_VMOLR(vf));
4874                                 size = reg & E1000_VMOLR_RLPML_MASK;
4875                                 size += 4;
4876                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4877                                 reg |= size;
4878                                 wr32(E1000_VMOLR(vf), reg);
4879                         }
4880
4881                         adapter->vf_data[vf].vlans_enabled++;
4882                         return 0;
4883                 }
4884         } else {
4885                 if (i < E1000_VLVF_ARRAY_SIZE) {
4886                         /* remove vf from the pool */
4887                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4888                         /* if pool is empty then remove entry from vfta */
4889                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4890                                 reg = 0;
4891                                 igb_vfta_set(hw, vid, false);
4892                         }
4893                         wr32(E1000_VLVF(i), reg);
4894
4895                         /* do not modify RLPML for PF devices */
4896                         if (vf >= adapter->vfs_allocated_count)
4897                                 return 0;
4898
4899                         adapter->vf_data[vf].vlans_enabled--;
4900                         if (!adapter->vf_data[vf].vlans_enabled) {
4901                                 u32 size;
4902                                 reg = rd32(E1000_VMOLR(vf));
4903                                 size = reg & E1000_VMOLR_RLPML_MASK;
4904                                 size -= 4;
4905                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4906                                 reg |= size;
4907                                 wr32(E1000_VMOLR(vf), reg);
4908                         }
4909                 }
4910         }
4911         return 0;
4912 }
4913
4914 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4915 {
4916         struct e1000_hw *hw = &adapter->hw;
4917
4918         if (vid)
4919                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4920         else
4921                 wr32(E1000_VMVIR(vf), 0);
4922 }
4923
4924 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4925                                int vf, u16 vlan, u8 qos)
4926 {
4927         int err = 0;
4928         struct igb_adapter *adapter = netdev_priv(netdev);
4929
4930         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4931                 return -EINVAL;
4932         if (vlan || qos) {
4933                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4934                 if (err)
4935                         goto out;
4936                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4937                 igb_set_vmolr(adapter, vf, !vlan);
4938                 adapter->vf_data[vf].pf_vlan = vlan;
4939                 adapter->vf_data[vf].pf_qos = qos;
4940                 dev_info(&adapter->pdev->dev,
4941                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4942                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4943                         dev_warn(&adapter->pdev->dev,
4944                                  "The VF VLAN has been set,"
4945                                  " but the PF device is not up.\n");
4946                         dev_warn(&adapter->pdev->dev,
4947                                  "Bring the PF device up before"
4948                                  " attempting to use the VF device.\n");
4949                 }
4950         } else {
4951                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4952                                    false, vf);
4953                 igb_set_vmvir(adapter, vlan, vf);
4954                 igb_set_vmolr(adapter, vf, true);
4955                 adapter->vf_data[vf].pf_vlan = 0;
4956                 adapter->vf_data[vf].pf_qos = 0;
4957        }
4958 out:
4959        return err;
4960 }
4961
4962 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4963 {
4964         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4965         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4966
4967         return igb_vlvf_set(adapter, vid, add, vf);
4968 }
4969
4970 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4971 {
4972         /* clear flags */
4973         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4974         adapter->vf_data[vf].last_nack = jiffies;
4975
4976         /* reset offloads to defaults */
4977         igb_set_vmolr(adapter, vf, true);
4978
4979         /* reset vlans for device */
4980         igb_clear_vf_vfta(adapter, vf);
4981         if (adapter->vf_data[vf].pf_vlan)
4982                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4983                                     adapter->vf_data[vf].pf_vlan,
4984                                     adapter->vf_data[vf].pf_qos);
4985         else
4986                 igb_clear_vf_vfta(adapter, vf);
4987
4988         /* reset multicast table array for vf */
4989         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4990
4991         /* Flush and reset the mta with the new values */
4992         igb_set_rx_mode(adapter->netdev);
4993 }
4994
4995 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4996 {
4997         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4998
4999         /* generate a new mac address as we were hotplug removed/added */
5000         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5001                 random_ether_addr(vf_mac);
5002
5003         /* process remaining reset events */
5004         igb_vf_reset(adapter, vf);
5005 }
5006
5007 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5008 {
5009         struct e1000_hw *hw = &adapter->hw;
5010         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5011         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5012         u32 reg, msgbuf[3];
5013         u8 *addr = (u8 *)(&msgbuf[1]);
5014
5015         /* process all the same items cleared in a function level reset */
5016         igb_vf_reset(adapter, vf);
5017
5018         /* set vf mac address */
5019         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5020
5021         /* enable transmit and receive for vf */
5022         reg = rd32(E1000_VFTE);
5023         wr32(E1000_VFTE, reg | (1 << vf));
5024         reg = rd32(E1000_VFRE);
5025         wr32(E1000_VFRE, reg | (1 << vf));
5026
5027         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
5028
5029         /* reply to reset with ack and vf mac address */
5030         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5031         memcpy(addr, vf_mac, 6);
5032         igb_write_mbx(hw, msgbuf, 3, vf);
5033 }
5034
5035 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5036 {
5037         /*
5038          * The VF MAC Address is stored in a packed array of bytes
5039          * starting at the second 32 bit word of the msg array
5040          */
5041         unsigned char *addr = (char *)&msg[1];
5042         int err = -1;
5043
5044         if (is_valid_ether_addr(addr))
5045                 err = igb_set_vf_mac(adapter, vf, addr);
5046
5047         return err;
5048 }
5049
5050 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5051 {
5052         struct e1000_hw *hw = &adapter->hw;
5053         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5054         u32 msg = E1000_VT_MSGTYPE_NACK;
5055
5056         /* if device isn't clear to send it shouldn't be reading either */
5057         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5058             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5059                 igb_write_mbx(hw, &msg, 1, vf);
5060                 vf_data->last_nack = jiffies;
5061         }
5062 }
5063
5064 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5065 {
5066         struct pci_dev *pdev = adapter->pdev;
5067         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5068         struct e1000_hw *hw = &adapter->hw;
5069         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5070         s32 retval;
5071
5072         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5073
5074         if (retval) {
5075                 /* if receive failed revoke VF CTS stats and restart init */
5076                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5077                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5078                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5079                         return;
5080                 goto out;
5081         }
5082
5083         /* this is a message we already processed, do nothing */
5084         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5085                 return;
5086
5087         /*
5088          * until the vf completes a reset it should not be
5089          * allowed to start any configuration.
5090          */
5091
5092         if (msgbuf[0] == E1000_VF_RESET) {
5093                 igb_vf_reset_msg(adapter, vf);
5094                 return;
5095         }
5096
5097         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5098                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5099                         return;
5100                 retval = -1;
5101                 goto out;
5102         }
5103
5104         switch ((msgbuf[0] & 0xFFFF)) {
5105         case E1000_VF_SET_MAC_ADDR:
5106                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5107                 break;
5108         case E1000_VF_SET_PROMISC:
5109                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5110                 break;
5111         case E1000_VF_SET_MULTICAST:
5112                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5113                 break;
5114         case E1000_VF_SET_LPE:
5115                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5116                 break;
5117         case E1000_VF_SET_VLAN:
5118                 if (adapter->vf_data[vf].pf_vlan)
5119                         retval = -1;
5120                 else
5121                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5122                 break;
5123         default:
5124                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5125                 retval = -1;
5126                 break;
5127         }
5128
5129         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5130 out:
5131         /* notify the VF of the results of what it sent us */
5132         if (retval)
5133                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5134         else
5135                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5136
5137         igb_write_mbx(hw, msgbuf, 1, vf);
5138 }
5139
5140 static void igb_msg_task(struct igb_adapter *adapter)
5141 {
5142         struct e1000_hw *hw = &adapter->hw;
5143         u32 vf;
5144
5145         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5146                 /* process any reset requests */
5147                 if (!igb_check_for_rst(hw, vf))
5148                         igb_vf_reset_event(adapter, vf);
5149
5150                 /* process any messages pending */
5151                 if (!igb_check_for_msg(hw, vf))
5152                         igb_rcv_msg_from_vf(adapter, vf);
5153
5154                 /* process any acks */
5155                 if (!igb_check_for_ack(hw, vf))
5156                         igb_rcv_ack_from_vf(adapter, vf);
5157         }
5158 }
5159
5160 /**
5161  *  igb_set_uta - Set unicast filter table address
5162  *  @adapter: board private structure
5163  *
5164  *  The unicast table address is a register array of 32-bit registers.
5165  *  The table is meant to be used in a way similar to how the MTA is used
5166  *  however due to certain limitations in the hardware it is necessary to
5167  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5168  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5169  **/
5170 static void igb_set_uta(struct igb_adapter *adapter)
5171 {
5172         struct e1000_hw *hw = &adapter->hw;
5173         int i;
5174
5175         /* The UTA table only exists on 82576 hardware and newer */
5176         if (hw->mac.type < e1000_82576)
5177                 return;
5178
5179         /* we only need to do this if VMDq is enabled */
5180         if (!adapter->vfs_allocated_count)
5181                 return;
5182
5183         for (i = 0; i < hw->mac.uta_reg_count; i++)
5184                 array_wr32(E1000_UTA, i, ~0);
5185 }
5186
5187 /**
5188  * igb_intr_msi - Interrupt Handler
5189  * @irq: interrupt number
5190  * @data: pointer to a network interface device structure
5191  **/
5192 static irqreturn_t igb_intr_msi(int irq, void *data)
5193 {
5194         struct igb_adapter *adapter = data;
5195         struct igb_q_vector *q_vector = adapter->q_vector[0];
5196         struct e1000_hw *hw = &adapter->hw;
5197         /* read ICR disables interrupts using IAM */
5198         u32 icr = rd32(E1000_ICR);
5199
5200         igb_write_itr(q_vector);
5201
5202         if (icr & E1000_ICR_DRSTA)
5203                 schedule_work(&adapter->reset_task);
5204
5205         if (icr & E1000_ICR_DOUTSYNC) {
5206                 /* HW is reporting DMA is out of sync */
5207                 adapter->stats.doosync++;
5208         }
5209
5210         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5211                 hw->mac.get_link_status = 1;
5212                 if (!test_bit(__IGB_DOWN, &adapter->state))
5213                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5214         }
5215
5216         napi_schedule(&q_vector->napi);
5217
5218         return IRQ_HANDLED;
5219 }
5220
5221 /**
5222  * igb_intr - Legacy Interrupt Handler
5223  * @irq: interrupt number
5224  * @data: pointer to a network interface device structure
5225  **/
5226 static irqreturn_t igb_intr(int irq, void *data)
5227 {
5228         struct igb_adapter *adapter = data;
5229         struct igb_q_vector *q_vector = adapter->q_vector[0];
5230         struct e1000_hw *hw = &adapter->hw;
5231         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5232          * need for the IMC write */
5233         u32 icr = rd32(E1000_ICR);
5234         if (!icr)
5235                 return IRQ_NONE;  /* Not our interrupt */
5236
5237         igb_write_itr(q_vector);
5238
5239         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5240          * not set, then the adapter didn't send an interrupt */
5241         if (!(icr & E1000_ICR_INT_ASSERTED))
5242                 return IRQ_NONE;
5243
5244         if (icr & E1000_ICR_DRSTA)
5245                 schedule_work(&adapter->reset_task);
5246
5247         if (icr & E1000_ICR_DOUTSYNC) {
5248                 /* HW is reporting DMA is out of sync */
5249                 adapter->stats.doosync++;
5250         }
5251
5252         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5253                 hw->mac.get_link_status = 1;
5254                 /* guard against interrupt when we're going down */
5255                 if (!test_bit(__IGB_DOWN, &adapter->state))
5256                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5257         }
5258
5259         napi_schedule(&q_vector->napi);
5260
5261         return IRQ_HANDLED;
5262 }
5263
5264 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5265 {
5266         struct igb_adapter *adapter = q_vector->adapter;
5267         struct e1000_hw *hw = &adapter->hw;
5268
5269         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5270             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5271                 if (!adapter->msix_entries)
5272                         igb_set_itr(adapter);
5273                 else
5274                         igb_update_ring_itr(q_vector);
5275         }
5276
5277         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5278                 if (adapter->msix_entries)
5279                         wr32(E1000_EIMS, q_vector->eims_value);
5280                 else
5281                         igb_irq_enable(adapter);
5282         }
5283 }
5284
5285 /**
5286  * igb_poll - NAPI Rx polling callback
5287  * @napi: napi polling structure
5288  * @budget: count of how many packets we should handle
5289  **/
5290 static int igb_poll(struct napi_struct *napi, int budget)
5291 {
5292         struct igb_q_vector *q_vector = container_of(napi,
5293                                                      struct igb_q_vector,
5294                                                      napi);
5295         int tx_clean_complete = 1, work_done = 0;
5296
5297 #ifdef CONFIG_IGB_DCA
5298         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5299                 igb_update_dca(q_vector);
5300 #endif
5301         if (q_vector->tx_ring)
5302                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5303
5304         if (q_vector->rx_ring)
5305                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5306
5307         if (!tx_clean_complete)
5308                 work_done = budget;
5309
5310         /* If not enough Rx work done, exit the polling mode */
5311         if (work_done < budget) {
5312                 napi_complete(napi);
5313                 igb_ring_irq_enable(q_vector);
5314         }
5315
5316         return work_done;
5317 }
5318
5319 /**
5320  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5321  * @adapter: board private structure
5322  * @shhwtstamps: timestamp structure to update
5323  * @regval: unsigned 64bit system time value.
5324  *
5325  * We need to convert the system time value stored in the RX/TXSTMP registers
5326  * into a hwtstamp which can be used by the upper level timestamping functions
5327  */
5328 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5329                                    struct skb_shared_hwtstamps *shhwtstamps,
5330                                    u64 regval)
5331 {
5332         u64 ns;
5333
5334         /*
5335          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5336          * 24 to match clock shift we setup earlier.
5337          */
5338         if (adapter->hw.mac.type == e1000_82580)
5339                 regval <<= IGB_82580_TSYNC_SHIFT;
5340
5341         ns = timecounter_cyc2time(&adapter->clock, regval);
5342         timecompare_update(&adapter->compare, ns);
5343         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5344         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5345         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5346 }
5347
5348 /**
5349  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5350  * @q_vector: pointer to q_vector containing needed info
5351  * @buffer: pointer to igb_buffer structure
5352  *
5353  * If we were asked to do hardware stamping and such a time stamp is
5354  * available, then it must have been for this skb here because we only
5355  * allow only one such packet into the queue.
5356  */
5357 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5358 {
5359         struct igb_adapter *adapter = q_vector->adapter;
5360         struct e1000_hw *hw = &adapter->hw;
5361         struct skb_shared_hwtstamps shhwtstamps;
5362         u64 regval;
5363
5364         /* if skb does not support hw timestamp or TX stamp not valid exit */
5365         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5366             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5367                 return;
5368
5369         regval = rd32(E1000_TXSTMPL);
5370         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5371
5372         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5373         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5374 }
5375
5376 /**
5377  * igb_clean_tx_irq - Reclaim resources after transmit completes
5378  * @q_vector: pointer to q_vector containing needed info
5379  * returns true if ring is completely cleaned
5380  **/
5381 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5382 {
5383         struct igb_adapter *adapter = q_vector->adapter;
5384         struct igb_ring *tx_ring = q_vector->tx_ring;
5385         struct net_device *netdev = tx_ring->netdev;
5386         struct e1000_hw *hw = &adapter->hw;
5387         struct igb_buffer *buffer_info;
5388         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5389         unsigned int total_bytes = 0, total_packets = 0;
5390         unsigned int i, eop, count = 0;
5391         bool cleaned = false;
5392
5393         i = tx_ring->next_to_clean;
5394         eop = tx_ring->buffer_info[i].next_to_watch;
5395         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5396
5397         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5398                (count < tx_ring->count)) {
5399                 rmb();  /* read buffer_info after eop_desc status */
5400                 for (cleaned = false; !cleaned; count++) {
5401                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5402                         buffer_info = &tx_ring->buffer_info[i];
5403                         cleaned = (i == eop);
5404
5405                         if (buffer_info->skb) {
5406                                 total_bytes += buffer_info->bytecount;
5407                                 /* gso_segs is currently only valid for tcp */
5408                                 total_packets += buffer_info->gso_segs;
5409                                 igb_tx_hwtstamp(q_vector, buffer_info);
5410                         }
5411
5412                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5413                         tx_desc->wb.status = 0;
5414
5415                         i++;
5416                         if (i == tx_ring->count)
5417                                 i = 0;
5418                 }
5419                 eop = tx_ring->buffer_info[i].next_to_watch;
5420                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5421         }
5422
5423         tx_ring->next_to_clean = i;
5424
5425         if (unlikely(count &&
5426                      netif_carrier_ok(netdev) &&
5427                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5428                 /* Make sure that anybody stopping the queue after this
5429                  * sees the new next_to_clean.
5430                  */
5431                 smp_mb();
5432                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5433                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5434                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5435
5436                         u64_stats_update_begin(&tx_ring->tx_syncp);
5437                         tx_ring->tx_stats.restart_queue++;
5438                         u64_stats_update_end(&tx_ring->tx_syncp);
5439                 }
5440         }
5441
5442         if (tx_ring->detect_tx_hung) {
5443                 /* Detect a transmit hang in hardware, this serializes the
5444                  * check with the clearing of time_stamp and movement of i */
5445                 tx_ring->detect_tx_hung = false;
5446                 if (tx_ring->buffer_info[i].time_stamp &&
5447                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5448                                (adapter->tx_timeout_factor * HZ)) &&
5449                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5450
5451                         /* detected Tx unit hang */
5452                         dev_err(tx_ring->dev,
5453                                 "Detected Tx Unit Hang\n"
5454                                 "  Tx Queue             <%d>\n"
5455                                 "  TDH                  <%x>\n"
5456                                 "  TDT                  <%x>\n"
5457                                 "  next_to_use          <%x>\n"
5458                                 "  next_to_clean        <%x>\n"
5459                                 "buffer_info[next_to_clean]\n"
5460                                 "  time_stamp           <%lx>\n"
5461                                 "  next_to_watch        <%x>\n"
5462                                 "  jiffies              <%lx>\n"
5463                                 "  desc.status          <%x>\n",
5464                                 tx_ring->queue_index,
5465                                 readl(tx_ring->head),
5466                                 readl(tx_ring->tail),
5467                                 tx_ring->next_to_use,
5468                                 tx_ring->next_to_clean,
5469                                 tx_ring->buffer_info[eop].time_stamp,
5470                                 eop,
5471                                 jiffies,
5472                                 eop_desc->wb.status);
5473                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5474                 }
5475         }
5476         tx_ring->total_bytes += total_bytes;
5477         tx_ring->total_packets += total_packets;
5478         u64_stats_update_begin(&tx_ring->tx_syncp);
5479         tx_ring->tx_stats.bytes += total_bytes;
5480         tx_ring->tx_stats.packets += total_packets;
5481         u64_stats_update_end(&tx_ring->tx_syncp);
5482         return count < tx_ring->count;
5483 }
5484
5485 /**
5486  * igb_receive_skb - helper function to handle rx indications
5487  * @q_vector: structure containing interrupt and ring information
5488  * @skb: packet to send up
5489  * @vlan_tag: vlan tag for packet
5490  **/
5491 static void igb_receive_skb(struct igb_q_vector *q_vector,
5492                             struct sk_buff *skb,
5493                             u16 vlan_tag)
5494 {
5495         struct igb_adapter *adapter = q_vector->adapter;
5496
5497         if (vlan_tag && adapter->vlgrp)
5498                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5499                                  vlan_tag, skb);
5500         else
5501                 napi_gro_receive(&q_vector->napi, skb);
5502 }
5503
5504 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5505                                        u32 status_err, struct sk_buff *skb)
5506 {
5507         skb_checksum_none_assert(skb);
5508
5509         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5510         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5511              (status_err & E1000_RXD_STAT_IXSM))
5512                 return;
5513
5514         /* TCP/UDP checksum error bit is set */
5515         if (status_err &
5516             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5517                 /*
5518                  * work around errata with sctp packets where the TCPE aka
5519                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5520                  * packets, (aka let the stack check the crc32c)
5521                  */
5522                 if ((skb->len == 60) &&
5523                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5524                         u64_stats_update_begin(&ring->rx_syncp);
5525                         ring->rx_stats.csum_err++;
5526                         u64_stats_update_end(&ring->rx_syncp);
5527                 }
5528                 /* let the stack verify checksum errors */
5529                 return;
5530         }
5531         /* It must be a TCP or UDP packet with a valid checksum */
5532         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5533                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5534
5535         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5536 }
5537
5538 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5539                                    struct sk_buff *skb)
5540 {
5541         struct igb_adapter *adapter = q_vector->adapter;
5542         struct e1000_hw *hw = &adapter->hw;
5543         u64 regval;
5544
5545         /*
5546          * If this bit is set, then the RX registers contain the time stamp. No
5547          * other packet will be time stamped until we read these registers, so
5548          * read the registers to make them available again. Because only one
5549          * packet can be time stamped at a time, we know that the register
5550          * values must belong to this one here and therefore we don't need to
5551          * compare any of the additional attributes stored for it.
5552          *
5553          * If nothing went wrong, then it should have a shared tx_flags that we
5554          * can turn into a skb_shared_hwtstamps.
5555          */
5556         if (staterr & E1000_RXDADV_STAT_TSIP) {
5557                 u32 *stamp = (u32 *)skb->data;
5558                 regval = le32_to_cpu(*(stamp + 2));
5559                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5560                 skb_pull(skb, IGB_TS_HDR_LEN);
5561         } else {
5562                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5563                         return;
5564
5565                 regval = rd32(E1000_RXSTMPL);
5566                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5567         }
5568
5569         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5570 }
5571 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5572                                union e1000_adv_rx_desc *rx_desc)
5573 {
5574         /* HW will not DMA in data larger than the given buffer, even if it
5575          * parses the (NFS, of course) header to be larger.  In that case, it
5576          * fills the header buffer and spills the rest into the page.
5577          */
5578         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5579                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5580         if (hlen > rx_ring->rx_buffer_len)
5581                 hlen = rx_ring->rx_buffer_len;
5582         return hlen;
5583 }
5584
5585 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5586                                  int *work_done, int budget)
5587 {
5588         struct igb_ring *rx_ring = q_vector->rx_ring;
5589         struct net_device *netdev = rx_ring->netdev;
5590         struct device *dev = rx_ring->dev;
5591         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5592         struct igb_buffer *buffer_info , *next_buffer;
5593         struct sk_buff *skb;
5594         bool cleaned = false;
5595         int cleaned_count = 0;
5596         int current_node = numa_node_id();
5597         unsigned int total_bytes = 0, total_packets = 0;
5598         unsigned int i;
5599         u32 staterr;
5600         u16 length;
5601         u16 vlan_tag;
5602
5603         i = rx_ring->next_to_clean;
5604         buffer_info = &rx_ring->buffer_info[i];
5605         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5606         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5607
5608         while (staterr & E1000_RXD_STAT_DD) {
5609                 if (*work_done >= budget)
5610                         break;
5611                 (*work_done)++;
5612                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5613
5614                 skb = buffer_info->skb;
5615                 prefetch(skb->data - NET_IP_ALIGN);
5616                 buffer_info->skb = NULL;
5617
5618                 i++;
5619                 if (i == rx_ring->count)
5620                         i = 0;
5621
5622                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5623                 prefetch(next_rxd);
5624                 next_buffer = &rx_ring->buffer_info[i];
5625
5626                 length = le16_to_cpu(rx_desc->wb.upper.length);
5627                 cleaned = true;
5628                 cleaned_count++;
5629
5630                 if (buffer_info->dma) {
5631                         dma_unmap_single(dev, buffer_info->dma,
5632                                          rx_ring->rx_buffer_len,
5633                                          DMA_FROM_DEVICE);
5634                         buffer_info->dma = 0;
5635                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5636                                 skb_put(skb, length);
5637                                 goto send_up;
5638                         }
5639                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5640                 }
5641
5642                 if (length) {
5643                         dma_unmap_page(dev, buffer_info->page_dma,
5644                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5645                         buffer_info->page_dma = 0;
5646
5647                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5648                                                 buffer_info->page,
5649                                                 buffer_info->page_offset,
5650                                                 length);
5651
5652                         if ((page_count(buffer_info->page) != 1) ||
5653                             (page_to_nid(buffer_info->page) != current_node))
5654                                 buffer_info->page = NULL;
5655                         else
5656                                 get_page(buffer_info->page);
5657
5658                         skb->len += length;
5659                         skb->data_len += length;
5660                         skb->truesize += length;
5661                 }
5662
5663                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5664                         buffer_info->skb = next_buffer->skb;
5665                         buffer_info->dma = next_buffer->dma;
5666                         next_buffer->skb = skb;
5667                         next_buffer->dma = 0;
5668                         goto next_desc;
5669                 }
5670 send_up:
5671                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5672                         dev_kfree_skb_irq(skb);
5673                         goto next_desc;
5674                 }
5675
5676                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5677                         igb_rx_hwtstamp(q_vector, staterr, skb);
5678                 total_bytes += skb->len;
5679                 total_packets++;
5680
5681                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5682
5683                 skb->protocol = eth_type_trans(skb, netdev);
5684                 skb_record_rx_queue(skb, rx_ring->queue_index);
5685
5686                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5687                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5688
5689                 igb_receive_skb(q_vector, skb, vlan_tag);
5690
5691 next_desc:
5692                 rx_desc->wb.upper.status_error = 0;
5693
5694                 /* return some buffers to hardware, one at a time is too slow */
5695                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5696                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5697                         cleaned_count = 0;
5698                 }
5699
5700                 /* use prefetched values */
5701                 rx_desc = next_rxd;
5702                 buffer_info = next_buffer;
5703                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5704         }
5705
5706         rx_ring->next_to_clean = i;
5707         cleaned_count = igb_desc_unused(rx_ring);
5708
5709         if (cleaned_count)
5710                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5711
5712         rx_ring->total_packets += total_packets;
5713         rx_ring->total_bytes += total_bytes;
5714         u64_stats_update_begin(&rx_ring->rx_syncp);
5715         rx_ring->rx_stats.packets += total_packets;
5716         rx_ring->rx_stats.bytes += total_bytes;
5717         u64_stats_update_end(&rx_ring->rx_syncp);
5718         return cleaned;
5719 }
5720
5721 /**
5722  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5723  * @adapter: address of board private structure
5724  **/
5725 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5726 {
5727         struct net_device *netdev = rx_ring->netdev;
5728         union e1000_adv_rx_desc *rx_desc;
5729         struct igb_buffer *buffer_info;
5730         struct sk_buff *skb;
5731         unsigned int i;
5732         int bufsz;
5733
5734         i = rx_ring->next_to_use;
5735         buffer_info = &rx_ring->buffer_info[i];
5736
5737         bufsz = rx_ring->rx_buffer_len;
5738
5739         while (cleaned_count--) {
5740                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5741
5742                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5743                         if (!buffer_info->page) {
5744                                 buffer_info->page = netdev_alloc_page(netdev);
5745                                 if (unlikely(!buffer_info->page)) {
5746                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5747                                         rx_ring->rx_stats.alloc_failed++;
5748                                         u64_stats_update_end(&rx_ring->rx_syncp);
5749                                         goto no_buffers;
5750                                 }
5751                                 buffer_info->page_offset = 0;
5752                         } else {
5753                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5754                         }
5755                         buffer_info->page_dma =
5756                                 dma_map_page(rx_ring->dev, buffer_info->page,
5757                                              buffer_info->page_offset,
5758                                              PAGE_SIZE / 2,
5759                                              DMA_FROM_DEVICE);
5760                         if (dma_mapping_error(rx_ring->dev,
5761                                               buffer_info->page_dma)) {
5762                                 buffer_info->page_dma = 0;
5763                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5764                                 rx_ring->rx_stats.alloc_failed++;
5765                                 u64_stats_update_end(&rx_ring->rx_syncp);
5766                                 goto no_buffers;
5767                         }
5768                 }
5769
5770                 skb = buffer_info->skb;
5771                 if (!skb) {
5772                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5773                         if (unlikely(!skb)) {
5774                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5775                                 rx_ring->rx_stats.alloc_failed++;
5776                                 u64_stats_update_end(&rx_ring->rx_syncp);
5777                                 goto no_buffers;
5778                         }
5779
5780                         buffer_info->skb = skb;
5781                 }
5782                 if (!buffer_info->dma) {
5783                         buffer_info->dma = dma_map_single(rx_ring->dev,
5784                                                           skb->data,
5785                                                           bufsz,
5786                                                           DMA_FROM_DEVICE);
5787                         if (dma_mapping_error(rx_ring->dev,
5788                                               buffer_info->dma)) {
5789                                 buffer_info->dma = 0;
5790                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5791                                 rx_ring->rx_stats.alloc_failed++;
5792                                 u64_stats_update_end(&rx_ring->rx_syncp);
5793                                 goto no_buffers;
5794                         }
5795                 }
5796                 /* Refresh the desc even if buffer_addrs didn't change because
5797                  * each write-back erases this info. */
5798                 if (bufsz < IGB_RXBUFFER_1024) {
5799                         rx_desc->read.pkt_addr =
5800                              cpu_to_le64(buffer_info->page_dma);
5801                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5802                 } else {
5803                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5804                         rx_desc->read.hdr_addr = 0;
5805                 }
5806
5807                 i++;
5808                 if (i == rx_ring->count)
5809                         i = 0;
5810                 buffer_info = &rx_ring->buffer_info[i];
5811         }
5812
5813 no_buffers:
5814         if (rx_ring->next_to_use != i) {
5815                 rx_ring->next_to_use = i;
5816                 if (i == 0)
5817                         i = (rx_ring->count - 1);
5818                 else
5819                         i--;
5820
5821                 /* Force memory writes to complete before letting h/w
5822                  * know there are new descriptors to fetch.  (Only
5823                  * applicable for weak-ordered memory model archs,
5824                  * such as IA-64). */
5825                 wmb();
5826                 writel(i, rx_ring->tail);
5827         }
5828 }
5829
5830 /**
5831  * igb_mii_ioctl -
5832  * @netdev:
5833  * @ifreq:
5834  * @cmd:
5835  **/
5836 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5837 {
5838         struct igb_adapter *adapter = netdev_priv(netdev);
5839         struct mii_ioctl_data *data = if_mii(ifr);
5840
5841         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5842                 return -EOPNOTSUPP;
5843
5844         switch (cmd) {
5845         case SIOCGMIIPHY:
5846                 data->phy_id = adapter->hw.phy.addr;
5847                 break;
5848         case SIOCGMIIREG:
5849                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5850                                      &data->val_out))
5851                         return -EIO;
5852                 break;
5853         case SIOCSMIIREG:
5854         default:
5855                 return -EOPNOTSUPP;
5856         }
5857         return 0;
5858 }
5859
5860 /**
5861  * igb_hwtstamp_ioctl - control hardware time stamping
5862  * @netdev:
5863  * @ifreq:
5864  * @cmd:
5865  *
5866  * Outgoing time stamping can be enabled and disabled. Play nice and
5867  * disable it when requested, although it shouldn't case any overhead
5868  * when no packet needs it. At most one packet in the queue may be
5869  * marked for time stamping, otherwise it would be impossible to tell
5870  * for sure to which packet the hardware time stamp belongs.
5871  *
5872  * Incoming time stamping has to be configured via the hardware
5873  * filters. Not all combinations are supported, in particular event
5874  * type has to be specified. Matching the kind of event packet is
5875  * not supported, with the exception of "all V2 events regardless of
5876  * level 2 or 4".
5877  *
5878  **/
5879 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5880                               struct ifreq *ifr, int cmd)
5881 {
5882         struct igb_adapter *adapter = netdev_priv(netdev);
5883         struct e1000_hw *hw = &adapter->hw;
5884         struct hwtstamp_config config;
5885         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5886         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5887         u32 tsync_rx_cfg = 0;
5888         bool is_l4 = false;
5889         bool is_l2 = false;
5890         u32 regval;
5891
5892         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5893                 return -EFAULT;
5894
5895         /* reserved for future extensions */
5896         if (config.flags)
5897                 return -EINVAL;
5898
5899         switch (config.tx_type) {
5900         case HWTSTAMP_TX_OFF:
5901                 tsync_tx_ctl = 0;
5902         case HWTSTAMP_TX_ON:
5903                 break;
5904         default:
5905                 return -ERANGE;
5906         }
5907
5908         switch (config.rx_filter) {
5909         case HWTSTAMP_FILTER_NONE:
5910                 tsync_rx_ctl = 0;
5911                 break;
5912         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5913         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5914         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5915         case HWTSTAMP_FILTER_ALL:
5916                 /*
5917                  * register TSYNCRXCFG must be set, therefore it is not
5918                  * possible to time stamp both Sync and Delay_Req messages
5919                  * => fall back to time stamping all packets
5920                  */
5921                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5922                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5923                 break;
5924         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5925                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5926                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5927                 is_l4 = true;
5928                 break;
5929         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5930                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5931                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5932                 is_l4 = true;
5933                 break;
5934         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5935         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5936                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5937                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5938                 is_l2 = true;
5939                 is_l4 = true;
5940                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5941                 break;
5942         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5943         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5944                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5945                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5946                 is_l2 = true;
5947                 is_l4 = true;
5948                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5949                 break;
5950         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5951         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5952         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5953                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5954                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5955                 is_l2 = true;
5956                 break;
5957         default:
5958                 return -ERANGE;
5959         }
5960
5961         if (hw->mac.type == e1000_82575) {
5962                 if (tsync_rx_ctl | tsync_tx_ctl)
5963                         return -EINVAL;
5964                 return 0;
5965         }
5966
5967         /*
5968          * Per-packet timestamping only works if all packets are
5969          * timestamped, so enable timestamping in all packets as
5970          * long as one rx filter was configured.
5971          */
5972         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5973                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5974                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5975         }
5976
5977         /* enable/disable TX */
5978         regval = rd32(E1000_TSYNCTXCTL);
5979         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5980         regval |= tsync_tx_ctl;
5981         wr32(E1000_TSYNCTXCTL, regval);
5982
5983         /* enable/disable RX */
5984         regval = rd32(E1000_TSYNCRXCTL);
5985         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5986         regval |= tsync_rx_ctl;
5987         wr32(E1000_TSYNCRXCTL, regval);
5988
5989         /* define which PTP packets are time stamped */
5990         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5991
5992         /* define ethertype filter for timestamped packets */
5993         if (is_l2)
5994                 wr32(E1000_ETQF(3),
5995                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5996                                  E1000_ETQF_1588 | /* enable timestamping */
5997                                  ETH_P_1588));     /* 1588 eth protocol type */
5998         else
5999                 wr32(E1000_ETQF(3), 0);
6000
6001 #define PTP_PORT 319
6002         /* L4 Queue Filter[3]: filter by destination port and protocol */
6003         if (is_l4) {
6004                 u32 ftqf = (IPPROTO_UDP /* UDP */
6005                         | E1000_FTQF_VF_BP /* VF not compared */
6006                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6007                         | E1000_FTQF_MASK); /* mask all inputs */
6008                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6009
6010                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6011                 wr32(E1000_IMIREXT(3),
6012                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6013                 if (hw->mac.type == e1000_82576) {
6014                         /* enable source port check */
6015                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6016                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6017                 }
6018                 wr32(E1000_FTQF(3), ftqf);
6019         } else {
6020                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6021         }
6022         wrfl();
6023
6024         adapter->hwtstamp_config = config;
6025
6026         /* clear TX/RX time stamp registers, just to be sure */
6027         regval = rd32(E1000_TXSTMPH);
6028         regval = rd32(E1000_RXSTMPH);
6029
6030         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6031                 -EFAULT : 0;
6032 }
6033
6034 /**
6035  * igb_ioctl -
6036  * @netdev:
6037  * @ifreq:
6038  * @cmd:
6039  **/
6040 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6041 {
6042         switch (cmd) {
6043         case SIOCGMIIPHY:
6044         case SIOCGMIIREG:
6045         case SIOCSMIIREG:
6046                 return igb_mii_ioctl(netdev, ifr, cmd);
6047         case SIOCSHWTSTAMP:
6048                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6049         default:
6050                 return -EOPNOTSUPP;
6051         }
6052 }
6053
6054 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6055 {
6056         struct igb_adapter *adapter = hw->back;
6057         u16 cap_offset;
6058
6059         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6060         if (!cap_offset)
6061                 return -E1000_ERR_CONFIG;
6062
6063         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6064
6065         return 0;
6066 }
6067
6068 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6069 {
6070         struct igb_adapter *adapter = hw->back;
6071         u16 cap_offset;
6072
6073         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6074         if (!cap_offset)
6075                 return -E1000_ERR_CONFIG;
6076
6077         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6078
6079         return 0;
6080 }
6081
6082 static void igb_vlan_rx_register(struct net_device *netdev,
6083                                  struct vlan_group *grp)
6084 {
6085         struct igb_adapter *adapter = netdev_priv(netdev);
6086         struct e1000_hw *hw = &adapter->hw;
6087         u32 ctrl, rctl;
6088
6089         igb_irq_disable(adapter);
6090         adapter->vlgrp = grp;
6091
6092         if (grp) {
6093                 /* enable VLAN tag insert/strip */
6094                 ctrl = rd32(E1000_CTRL);
6095                 ctrl |= E1000_CTRL_VME;
6096                 wr32(E1000_CTRL, ctrl);
6097
6098                 /* Disable CFI check */
6099                 rctl = rd32(E1000_RCTL);
6100                 rctl &= ~E1000_RCTL_CFIEN;
6101                 wr32(E1000_RCTL, rctl);
6102         } else {
6103                 /* disable VLAN tag insert/strip */
6104                 ctrl = rd32(E1000_CTRL);
6105                 ctrl &= ~E1000_CTRL_VME;
6106                 wr32(E1000_CTRL, ctrl);
6107         }
6108
6109         igb_rlpml_set(adapter);
6110
6111         if (!test_bit(__IGB_DOWN, &adapter->state))
6112                 igb_irq_enable(adapter);
6113 }
6114
6115 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6116 {
6117         struct igb_adapter *adapter = netdev_priv(netdev);
6118         struct e1000_hw *hw = &adapter->hw;
6119         int pf_id = adapter->vfs_allocated_count;
6120
6121         /* attempt to add filter to vlvf array */
6122         igb_vlvf_set(adapter, vid, true, pf_id);
6123
6124         /* add the filter since PF can receive vlans w/o entry in vlvf */
6125         igb_vfta_set(hw, vid, true);
6126 }
6127
6128 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6129 {
6130         struct igb_adapter *adapter = netdev_priv(netdev);
6131         struct e1000_hw *hw = &adapter->hw;
6132         int pf_id = adapter->vfs_allocated_count;
6133         s32 err;
6134
6135         igb_irq_disable(adapter);
6136         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6137
6138         if (!test_bit(__IGB_DOWN, &adapter->state))
6139                 igb_irq_enable(adapter);
6140
6141         /* remove vlan from VLVF table array */
6142         err = igb_vlvf_set(adapter, vid, false, pf_id);
6143
6144         /* if vid was not present in VLVF just remove it from table */
6145         if (err)
6146                 igb_vfta_set(hw, vid, false);
6147 }
6148
6149 static void igb_restore_vlan(struct igb_adapter *adapter)
6150 {
6151         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6152
6153         if (adapter->vlgrp) {
6154                 u16 vid;
6155                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6156                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6157                                 continue;
6158                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6159                 }
6160         }
6161 }
6162
6163 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6164 {
6165         struct pci_dev *pdev = adapter->pdev;
6166         struct e1000_mac_info *mac = &adapter->hw.mac;
6167
6168         mac->autoneg = 0;
6169
6170         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6171         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6172                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6173                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6174                 return -EINVAL;
6175         }
6176
6177         switch (spddplx) {
6178         case SPEED_10 + DUPLEX_HALF:
6179                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6180                 break;
6181         case SPEED_10 + DUPLEX_FULL:
6182                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6183                 break;
6184         case SPEED_100 + DUPLEX_HALF:
6185                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6186                 break;
6187         case SPEED_100 + DUPLEX_FULL:
6188                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6189                 break;
6190         case SPEED_1000 + DUPLEX_FULL:
6191                 mac->autoneg = 1;
6192                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6193                 break;
6194         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6195         default:
6196                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6197                 return -EINVAL;
6198         }
6199         return 0;
6200 }
6201
6202 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6203 {
6204         struct net_device *netdev = pci_get_drvdata(pdev);
6205         struct igb_adapter *adapter = netdev_priv(netdev);
6206         struct e1000_hw *hw = &adapter->hw;
6207         u32 ctrl, rctl, status;
6208         u32 wufc = adapter->wol;
6209 #ifdef CONFIG_PM
6210         int retval = 0;
6211 #endif
6212
6213         netif_device_detach(netdev);
6214
6215         if (netif_running(netdev))
6216                 igb_close(netdev);
6217
6218         igb_clear_interrupt_scheme(adapter);
6219
6220 #ifdef CONFIG_PM
6221         retval = pci_save_state(pdev);
6222         if (retval)
6223                 return retval;
6224 #endif
6225
6226         status = rd32(E1000_STATUS);
6227         if (status & E1000_STATUS_LU)
6228                 wufc &= ~E1000_WUFC_LNKC;
6229
6230         if (wufc) {
6231                 igb_setup_rctl(adapter);
6232                 igb_set_rx_mode(netdev);
6233
6234                 /* turn on all-multi mode if wake on multicast is enabled */
6235                 if (wufc & E1000_WUFC_MC) {
6236                         rctl = rd32(E1000_RCTL);
6237                         rctl |= E1000_RCTL_MPE;
6238                         wr32(E1000_RCTL, rctl);
6239                 }
6240
6241                 ctrl = rd32(E1000_CTRL);
6242                 /* advertise wake from D3Cold */
6243                 #define E1000_CTRL_ADVD3WUC 0x00100000
6244                 /* phy power management enable */
6245                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6246                 ctrl |= E1000_CTRL_ADVD3WUC;
6247                 wr32(E1000_CTRL, ctrl);
6248
6249                 /* Allow time for pending master requests to run */
6250                 igb_disable_pcie_master(hw);
6251
6252                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6253                 wr32(E1000_WUFC, wufc);
6254         } else {
6255                 wr32(E1000_WUC, 0);
6256                 wr32(E1000_WUFC, 0);
6257         }
6258
6259         *enable_wake = wufc || adapter->en_mng_pt;
6260         if (!*enable_wake)
6261                 igb_power_down_link(adapter);
6262         else
6263                 igb_power_up_link(adapter);
6264
6265         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6266          * would have already happened in close and is redundant. */
6267         igb_release_hw_control(adapter);
6268
6269         pci_disable_device(pdev);
6270
6271         return 0;
6272 }
6273
6274 #ifdef CONFIG_PM
6275 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6276 {
6277         int retval;
6278         bool wake;
6279
6280         retval = __igb_shutdown(pdev, &wake);
6281         if (retval)
6282                 return retval;
6283
6284         if (wake) {
6285                 pci_prepare_to_sleep(pdev);
6286         } else {
6287                 pci_wake_from_d3(pdev, false);
6288                 pci_set_power_state(pdev, PCI_D3hot);
6289         }
6290
6291         return 0;
6292 }
6293
6294 static int igb_resume(struct pci_dev *pdev)
6295 {
6296         struct net_device *netdev = pci_get_drvdata(pdev);
6297         struct igb_adapter *adapter = netdev_priv(netdev);
6298         struct e1000_hw *hw = &adapter->hw;
6299         u32 err;
6300
6301         pci_set_power_state(pdev, PCI_D0);
6302         pci_restore_state(pdev);
6303         pci_save_state(pdev);
6304
6305         err = pci_enable_device_mem(pdev);
6306         if (err) {
6307                 dev_err(&pdev->dev,
6308                         "igb: Cannot enable PCI device from suspend\n");
6309                 return err;
6310         }
6311         pci_set_master(pdev);
6312
6313         pci_enable_wake(pdev, PCI_D3hot, 0);
6314         pci_enable_wake(pdev, PCI_D3cold, 0);
6315
6316         if (igb_init_interrupt_scheme(adapter)) {
6317                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6318                 return -ENOMEM;
6319         }
6320
6321         igb_reset(adapter);
6322
6323         /* let the f/w know that the h/w is now under the control of the
6324          * driver. */
6325         igb_get_hw_control(adapter);
6326
6327         wr32(E1000_WUS, ~0);
6328
6329         if (netif_running(netdev)) {
6330                 err = igb_open(netdev);
6331                 if (err)
6332                         return err;
6333         }
6334
6335         netif_device_attach(netdev);
6336
6337         return 0;
6338 }
6339 #endif
6340
6341 static void igb_shutdown(struct pci_dev *pdev)
6342 {
6343         bool wake;
6344
6345         __igb_shutdown(pdev, &wake);
6346
6347         if (system_state == SYSTEM_POWER_OFF) {
6348                 pci_wake_from_d3(pdev, wake);
6349                 pci_set_power_state(pdev, PCI_D3hot);
6350         }
6351 }
6352
6353 #ifdef CONFIG_NET_POLL_CONTROLLER
6354 /*
6355  * Polling 'interrupt' - used by things like netconsole to send skbs
6356  * without having to re-enable interrupts. It's not called while
6357  * the interrupt routine is executing.
6358  */
6359 static void igb_netpoll(struct net_device *netdev)
6360 {
6361         struct igb_adapter *adapter = netdev_priv(netdev);
6362         struct e1000_hw *hw = &adapter->hw;
6363         int i;
6364
6365         if (!adapter->msix_entries) {
6366                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6367                 igb_irq_disable(adapter);
6368                 napi_schedule(&q_vector->napi);
6369                 return;
6370         }
6371
6372         for (i = 0; i < adapter->num_q_vectors; i++) {
6373                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6374                 wr32(E1000_EIMC, q_vector->eims_value);
6375                 napi_schedule(&q_vector->napi);
6376         }
6377 }
6378 #endif /* CONFIG_NET_POLL_CONTROLLER */
6379
6380 /**
6381  * igb_io_error_detected - called when PCI error is detected
6382  * @pdev: Pointer to PCI device
6383  * @state: The current pci connection state
6384  *
6385  * This function is called after a PCI bus error affecting
6386  * this device has been detected.
6387  */
6388 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6389                                               pci_channel_state_t state)
6390 {
6391         struct net_device *netdev = pci_get_drvdata(pdev);
6392         struct igb_adapter *adapter = netdev_priv(netdev);
6393
6394         netif_device_detach(netdev);
6395
6396         if (state == pci_channel_io_perm_failure)
6397                 return PCI_ERS_RESULT_DISCONNECT;
6398
6399         if (netif_running(netdev))
6400                 igb_down(adapter);
6401         pci_disable_device(pdev);
6402
6403         /* Request a slot slot reset. */
6404         return PCI_ERS_RESULT_NEED_RESET;
6405 }
6406
6407 /**
6408  * igb_io_slot_reset - called after the pci bus has been reset.
6409  * @pdev: Pointer to PCI device
6410  *
6411  * Restart the card from scratch, as if from a cold-boot. Implementation
6412  * resembles the first-half of the igb_resume routine.
6413  */
6414 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6415 {
6416         struct net_device *netdev = pci_get_drvdata(pdev);
6417         struct igb_adapter *adapter = netdev_priv(netdev);
6418         struct e1000_hw *hw = &adapter->hw;
6419         pci_ers_result_t result;
6420         int err;
6421
6422         if (pci_enable_device_mem(pdev)) {
6423                 dev_err(&pdev->dev,
6424                         "Cannot re-enable PCI device after reset.\n");
6425                 result = PCI_ERS_RESULT_DISCONNECT;
6426         } else {
6427                 pci_set_master(pdev);
6428                 pci_restore_state(pdev);
6429                 pci_save_state(pdev);
6430
6431                 pci_enable_wake(pdev, PCI_D3hot, 0);
6432                 pci_enable_wake(pdev, PCI_D3cold, 0);
6433
6434                 igb_reset(adapter);
6435                 wr32(E1000_WUS, ~0);
6436                 result = PCI_ERS_RESULT_RECOVERED;
6437         }
6438
6439         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6440         if (err) {
6441                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6442                         "failed 0x%0x\n", err);
6443                 /* non-fatal, continue */
6444         }
6445
6446         return result;
6447 }
6448
6449 /**
6450  * igb_io_resume - called when traffic can start flowing again.
6451  * @pdev: Pointer to PCI device
6452  *
6453  * This callback is called when the error recovery driver tells us that
6454  * its OK to resume normal operation. Implementation resembles the
6455  * second-half of the igb_resume routine.
6456  */
6457 static void igb_io_resume(struct pci_dev *pdev)
6458 {
6459         struct net_device *netdev = pci_get_drvdata(pdev);
6460         struct igb_adapter *adapter = netdev_priv(netdev);
6461
6462         if (netif_running(netdev)) {
6463                 if (igb_up(adapter)) {
6464                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6465                         return;
6466                 }
6467         }
6468
6469         netif_device_attach(netdev);
6470
6471         /* let the f/w know that the h/w is now under the control of the
6472          * driver. */
6473         igb_get_hw_control(adapter);
6474 }
6475
6476 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6477                              u8 qsel)
6478 {
6479         u32 rar_low, rar_high;
6480         struct e1000_hw *hw = &adapter->hw;
6481
6482         /* HW expects these in little endian so we reverse the byte order
6483          * from network order (big endian) to little endian
6484          */
6485         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6486                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6487         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6488
6489         /* Indicate to hardware the Address is Valid. */
6490         rar_high |= E1000_RAH_AV;
6491
6492         if (hw->mac.type == e1000_82575)
6493                 rar_high |= E1000_RAH_POOL_1 * qsel;
6494         else
6495                 rar_high |= E1000_RAH_POOL_1 << qsel;
6496
6497         wr32(E1000_RAL(index), rar_low);
6498         wrfl();
6499         wr32(E1000_RAH(index), rar_high);
6500         wrfl();
6501 }
6502
6503 static int igb_set_vf_mac(struct igb_adapter *adapter,
6504                           int vf, unsigned char *mac_addr)
6505 {
6506         struct e1000_hw *hw = &adapter->hw;
6507         /* VF MAC addresses start at end of receive addresses and moves
6508          * torwards the first, as a result a collision should not be possible */
6509         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6510
6511         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6512
6513         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6514
6515         return 0;
6516 }
6517
6518 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6519 {
6520         struct igb_adapter *adapter = netdev_priv(netdev);
6521         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6522                 return -EINVAL;
6523         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6524         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6525         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6526                                       " change effective.");
6527         if (test_bit(__IGB_DOWN, &adapter->state)) {
6528                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6529                          " but the PF device is not up.\n");
6530                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6531                          " attempting to use the VF device.\n");
6532         }
6533         return igb_set_vf_mac(adapter, vf, mac);
6534 }
6535
6536 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6537 {
6538         return -EOPNOTSUPP;
6539 }
6540
6541 static int igb_ndo_get_vf_config(struct net_device *netdev,
6542                                  int vf, struct ifla_vf_info *ivi)
6543 {
6544         struct igb_adapter *adapter = netdev_priv(netdev);
6545         if (vf >= adapter->vfs_allocated_count)
6546                 return -EINVAL;
6547         ivi->vf = vf;
6548         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6549         ivi->tx_rate = 0;
6550         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6551         ivi->qos = adapter->vf_data[vf].pf_qos;
6552         return 0;
6553 }
6554
6555 static void igb_vmm_control(struct igb_adapter *adapter)
6556 {
6557         struct e1000_hw *hw = &adapter->hw;
6558         u32 reg;
6559
6560         switch (hw->mac.type) {
6561         case e1000_82575:
6562         default:
6563                 /* replication is not supported for 82575 */
6564                 return;
6565         case e1000_82576:
6566                 /* notify HW that the MAC is adding vlan tags */
6567                 reg = rd32(E1000_DTXCTL);
6568                 reg |= E1000_DTXCTL_VLAN_ADDED;
6569                 wr32(E1000_DTXCTL, reg);
6570         case e1000_82580:
6571                 /* enable replication vlan tag stripping */
6572                 reg = rd32(E1000_RPLOLR);
6573                 reg |= E1000_RPLOLR_STRVLAN;
6574                 wr32(E1000_RPLOLR, reg);
6575         case e1000_i350:
6576                 /* none of the above registers are supported by i350 */
6577                 break;
6578         }
6579
6580         if (adapter->vfs_allocated_count) {
6581                 igb_vmdq_set_loopback_pf(hw, true);
6582                 igb_vmdq_set_replication_pf(hw, true);
6583         } else {
6584                 igb_vmdq_set_loopback_pf(hw, false);
6585                 igb_vmdq_set_replication_pf(hw, false);
6586         }
6587 }
6588
6589 /* igb_main.c */