Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
89         /* required last entry */
90         {0, }
91 };
92
93 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
94
95 void igb_reset(struct igb_adapter *);
96 static int igb_setup_all_tx_resources(struct igb_adapter *);
97 static int igb_setup_all_rx_resources(struct igb_adapter *);
98 static void igb_free_all_tx_resources(struct igb_adapter *);
99 static void igb_free_all_rx_resources(struct igb_adapter *);
100 static void igb_setup_mrqc(struct igb_adapter *);
101 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
102 static void __devexit igb_remove(struct pci_dev *pdev);
103 static int igb_sw_init(struct igb_adapter *);
104 static int igb_open(struct net_device *);
105 static int igb_close(struct net_device *);
106 static void igb_configure_tx(struct igb_adapter *);
107 static void igb_configure_rx(struct igb_adapter *);
108 static void igb_clean_all_tx_rings(struct igb_adapter *);
109 static void igb_clean_all_rx_rings(struct igb_adapter *);
110 static void igb_clean_tx_ring(struct igb_ring *);
111 static void igb_clean_rx_ring(struct igb_ring *);
112 static void igb_set_rx_mode(struct net_device *);
113 static void igb_update_phy_info(unsigned long);
114 static void igb_watchdog(unsigned long);
115 static void igb_watchdog_task(struct work_struct *);
116 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
117 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
118                                                  struct rtnl_link_stats64 *stats);
119 static int igb_change_mtu(struct net_device *, int);
120 static int igb_set_mac(struct net_device *, void *);
121 static void igb_set_uta(struct igb_adapter *adapter);
122 static irqreturn_t igb_intr(int irq, void *);
123 static irqreturn_t igb_intr_msi(int irq, void *);
124 static irqreturn_t igb_msix_other(int irq, void *);
125 static irqreturn_t igb_msix_ring(int irq, void *);
126 #ifdef CONFIG_IGB_DCA
127 static void igb_update_dca(struct igb_q_vector *);
128 static void igb_setup_dca(struct igb_adapter *);
129 #endif /* CONFIG_IGB_DCA */
130 static bool igb_clean_tx_irq(struct igb_q_vector *);
131 static int igb_poll(struct napi_struct *, int);
132 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
133 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
134 static void igb_tx_timeout(struct net_device *);
135 static void igb_reset_task(struct work_struct *);
136 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
137 static void igb_vlan_rx_add_vid(struct net_device *, u16);
138 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
139 static void igb_restore_vlan(struct igb_adapter *);
140 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
141 static void igb_ping_all_vfs(struct igb_adapter *);
142 static void igb_msg_task(struct igb_adapter *);
143 static void igb_vmm_control(struct igb_adapter *);
144 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
145 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
146 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
147 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
148                                int vf, u16 vlan, u8 qos);
149 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
150 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
151                                  struct ifla_vf_info *ivi);
152
153 #ifdef CONFIG_PM
154 static int igb_suspend(struct pci_dev *, pm_message_t);
155 static int igb_resume(struct pci_dev *);
156 #endif
157 static void igb_shutdown(struct pci_dev *);
158 #ifdef CONFIG_IGB_DCA
159 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
160 static struct notifier_block dca_notifier = {
161         .notifier_call  = igb_notify_dca,
162         .next           = NULL,
163         .priority       = 0
164 };
165 #endif
166 #ifdef CONFIG_NET_POLL_CONTROLLER
167 /* for netdump / net console */
168 static void igb_netpoll(struct net_device *);
169 #endif
170 #ifdef CONFIG_PCI_IOV
171 static unsigned int max_vfs = 0;
172 module_param(max_vfs, uint, 0);
173 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
174                  "per physical function");
175 #endif /* CONFIG_PCI_IOV */
176
177 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
178                      pci_channel_state_t);
179 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
180 static void igb_io_resume(struct pci_dev *);
181
182 static struct pci_error_handlers igb_err_handler = {
183         .error_detected = igb_io_error_detected,
184         .slot_reset = igb_io_slot_reset,
185         .resume = igb_io_resume,
186 };
187
188
189 static struct pci_driver igb_driver = {
190         .name     = igb_driver_name,
191         .id_table = igb_pci_tbl,
192         .probe    = igb_probe,
193         .remove   = __devexit_p(igb_remove),
194 #ifdef CONFIG_PM
195         /* Power Managment Hooks */
196         .suspend  = igb_suspend,
197         .resume   = igb_resume,
198 #endif
199         .shutdown = igb_shutdown,
200         .err_handler = &igb_err_handler
201 };
202
203 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
204 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
205 MODULE_LICENSE("GPL");
206 MODULE_VERSION(DRV_VERSION);
207
208 struct igb_reg_info {
209         u32 ofs;
210         char *name;
211 };
212
213 static const struct igb_reg_info igb_reg_info_tbl[] = {
214
215         /* General Registers */
216         {E1000_CTRL, "CTRL"},
217         {E1000_STATUS, "STATUS"},
218         {E1000_CTRL_EXT, "CTRL_EXT"},
219
220         /* Interrupt Registers */
221         {E1000_ICR, "ICR"},
222
223         /* RX Registers */
224         {E1000_RCTL, "RCTL"},
225         {E1000_RDLEN(0), "RDLEN"},
226         {E1000_RDH(0), "RDH"},
227         {E1000_RDT(0), "RDT"},
228         {E1000_RXDCTL(0), "RXDCTL"},
229         {E1000_RDBAL(0), "RDBAL"},
230         {E1000_RDBAH(0), "RDBAH"},
231
232         /* TX Registers */
233         {E1000_TCTL, "TCTL"},
234         {E1000_TDBAL(0), "TDBAL"},
235         {E1000_TDBAH(0), "TDBAH"},
236         {E1000_TDLEN(0), "TDLEN"},
237         {E1000_TDH(0), "TDH"},
238         {E1000_TDT(0), "TDT"},
239         {E1000_TXDCTL(0), "TXDCTL"},
240         {E1000_TDFH, "TDFH"},
241         {E1000_TDFT, "TDFT"},
242         {E1000_TDFHS, "TDFHS"},
243         {E1000_TDFPC, "TDFPC"},
244
245         /* List Terminator */
246         {}
247 };
248
249 /*
250  * igb_regdump - register printout routine
251  */
252 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
253 {
254         int n = 0;
255         char rname[16];
256         u32 regs[8];
257
258         switch (reginfo->ofs) {
259         case E1000_RDLEN(0):
260                 for (n = 0; n < 4; n++)
261                         regs[n] = rd32(E1000_RDLEN(n));
262                 break;
263         case E1000_RDH(0):
264                 for (n = 0; n < 4; n++)
265                         regs[n] = rd32(E1000_RDH(n));
266                 break;
267         case E1000_RDT(0):
268                 for (n = 0; n < 4; n++)
269                         regs[n] = rd32(E1000_RDT(n));
270                 break;
271         case E1000_RXDCTL(0):
272                 for (n = 0; n < 4; n++)
273                         regs[n] = rd32(E1000_RXDCTL(n));
274                 break;
275         case E1000_RDBAL(0):
276                 for (n = 0; n < 4; n++)
277                         regs[n] = rd32(E1000_RDBAL(n));
278                 break;
279         case E1000_RDBAH(0):
280                 for (n = 0; n < 4; n++)
281                         regs[n] = rd32(E1000_RDBAH(n));
282                 break;
283         case E1000_TDBAL(0):
284                 for (n = 0; n < 4; n++)
285                         regs[n] = rd32(E1000_RDBAL(n));
286                 break;
287         case E1000_TDBAH(0):
288                 for (n = 0; n < 4; n++)
289                         regs[n] = rd32(E1000_TDBAH(n));
290                 break;
291         case E1000_TDLEN(0):
292                 for (n = 0; n < 4; n++)
293                         regs[n] = rd32(E1000_TDLEN(n));
294                 break;
295         case E1000_TDH(0):
296                 for (n = 0; n < 4; n++)
297                         regs[n] = rd32(E1000_TDH(n));
298                 break;
299         case E1000_TDT(0):
300                 for (n = 0; n < 4; n++)
301                         regs[n] = rd32(E1000_TDT(n));
302                 break;
303         case E1000_TXDCTL(0):
304                 for (n = 0; n < 4; n++)
305                         regs[n] = rd32(E1000_TXDCTL(n));
306                 break;
307         default:
308                 printk(KERN_INFO "%-15s %08x\n",
309                         reginfo->name, rd32(reginfo->ofs));
310                 return;
311         }
312
313         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
314         printk(KERN_INFO "%-15s ", rname);
315         for (n = 0; n < 4; n++)
316                 printk(KERN_CONT "%08x ", regs[n]);
317         printk(KERN_CONT "\n");
318 }
319
320 /*
321  * igb_dump - Print registers, tx-rings and rx-rings
322  */
323 static void igb_dump(struct igb_adapter *adapter)
324 {
325         struct net_device *netdev = adapter->netdev;
326         struct e1000_hw *hw = &adapter->hw;
327         struct igb_reg_info *reginfo;
328         int n = 0;
329         struct igb_ring *tx_ring;
330         union e1000_adv_tx_desc *tx_desc;
331         struct my_u0 { u64 a; u64 b; } *u0;
332         struct igb_buffer *buffer_info;
333         struct igb_ring *rx_ring;
334         union e1000_adv_rx_desc *rx_desc;
335         u32 staterr;
336         int i = 0;
337
338         if (!netif_msg_hw(adapter))
339                 return;
340
341         /* Print netdevice Info */
342         if (netdev) {
343                 dev_info(&adapter->pdev->dev, "Net device Info\n");
344                 printk(KERN_INFO "Device Name     state            "
345                         "trans_start      last_rx\n");
346                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
347                 netdev->name,
348                 netdev->state,
349                 netdev->trans_start,
350                 netdev->last_rx);
351         }
352
353         /* Print Registers */
354         dev_info(&adapter->pdev->dev, "Register Dump\n");
355         printk(KERN_INFO " Register Name   Value\n");
356         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
357              reginfo->name; reginfo++) {
358                 igb_regdump(hw, reginfo);
359         }
360
361         /* Print TX Ring Summary */
362         if (!netdev || !netif_running(netdev))
363                 goto exit;
364
365         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
366         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
367                 " leng ntw timestamp\n");
368         for (n = 0; n < adapter->num_tx_queues; n++) {
369                 tx_ring = adapter->tx_ring[n];
370                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
371                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
372                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
373                            (u64)buffer_info->dma,
374                            buffer_info->length,
375                            buffer_info->next_to_watch,
376                            (u64)buffer_info->time_stamp);
377         }
378
379         /* Print TX Rings */
380         if (!netif_msg_tx_done(adapter))
381                 goto rx_ring_summary;
382
383         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
384
385         /* Transmit Descriptor Formats
386          *
387          * Advanced Transmit Descriptor
388          *   +--------------------------------------------------------------+
389          * 0 |         Buffer Address [63:0]                                |
390          *   +--------------------------------------------------------------+
391          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
392          *   +--------------------------------------------------------------+
393          *   63      46 45    40 39 38 36 35 32 31   24             15       0
394          */
395
396         for (n = 0; n < adapter->num_tx_queues; n++) {
397                 tx_ring = adapter->tx_ring[n];
398                 printk(KERN_INFO "------------------------------------\n");
399                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
400                 printk(KERN_INFO "------------------------------------\n");
401                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
402                         "[PlPOCIStDDM Ln] [bi->dma       ] "
403                         "leng  ntw timestamp        bi->skb\n");
404
405                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
406                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
407                         buffer_info = &tx_ring->buffer_info[i];
408                         u0 = (struct my_u0 *)tx_desc;
409                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
410                                 " %04X  %3X %016llX %p", i,
411                                 le64_to_cpu(u0->a),
412                                 le64_to_cpu(u0->b),
413                                 (u64)buffer_info->dma,
414                                 buffer_info->length,
415                                 buffer_info->next_to_watch,
416                                 (u64)buffer_info->time_stamp,
417                                 buffer_info->skb);
418                         if (i == tx_ring->next_to_use &&
419                                 i == tx_ring->next_to_clean)
420                                 printk(KERN_CONT " NTC/U\n");
421                         else if (i == tx_ring->next_to_use)
422                                 printk(KERN_CONT " NTU\n");
423                         else if (i == tx_ring->next_to_clean)
424                                 printk(KERN_CONT " NTC\n");
425                         else
426                                 printk(KERN_CONT "\n");
427
428                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
429                                 print_hex_dump(KERN_INFO, "",
430                                         DUMP_PREFIX_ADDRESS,
431                                         16, 1, phys_to_virt(buffer_info->dma),
432                                         buffer_info->length, true);
433                 }
434         }
435
436         /* Print RX Rings Summary */
437 rx_ring_summary:
438         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
439         printk(KERN_INFO "Queue [NTU] [NTC]\n");
440         for (n = 0; n < adapter->num_rx_queues; n++) {
441                 rx_ring = adapter->rx_ring[n];
442                 printk(KERN_INFO " %5d %5X %5X\n", n,
443                            rx_ring->next_to_use, rx_ring->next_to_clean);
444         }
445
446         /* Print RX Rings */
447         if (!netif_msg_rx_status(adapter))
448                 goto exit;
449
450         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
451
452         /* Advanced Receive Descriptor (Read) Format
453          *    63                                           1        0
454          *    +-----------------------------------------------------+
455          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
456          *    +----------------------------------------------+------+
457          *  8 |       Header Buffer Address [63:1]           |  DD  |
458          *    +-----------------------------------------------------+
459          *
460          *
461          * Advanced Receive Descriptor (Write-Back) Format
462          *
463          *   63       48 47    32 31  30      21 20 17 16   4 3     0
464          *   +------------------------------------------------------+
465          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
466          *   | Checksum   Ident  |   |           |    | Type | Type |
467          *   +------------------------------------------------------+
468          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
469          *   +------------------------------------------------------+
470          *   63       48 47    32 31            20 19               0
471          */
472
473         for (n = 0; n < adapter->num_rx_queues; n++) {
474                 rx_ring = adapter->rx_ring[n];
475                 printk(KERN_INFO "------------------------------------\n");
476                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
477                 printk(KERN_INFO "------------------------------------\n");
478                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
479                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
480                         "<-- Adv Rx Read format\n");
481                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
482                         "[vl er S cks ln] ---------------- [bi->skb] "
483                         "<-- Adv Rx Write-Back format\n");
484
485                 for (i = 0; i < rx_ring->count; i++) {
486                         buffer_info = &rx_ring->buffer_info[i];
487                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
488                         u0 = (struct my_u0 *)rx_desc;
489                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
490                         if (staterr & E1000_RXD_STAT_DD) {
491                                 /* Descriptor Done */
492                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
493                                         "%016llX ---------------- %p", i,
494                                         le64_to_cpu(u0->a),
495                                         le64_to_cpu(u0->b),
496                                         buffer_info->skb);
497                         } else {
498                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
499                                         "%016llX %016llX %p", i,
500                                         le64_to_cpu(u0->a),
501                                         le64_to_cpu(u0->b),
502                                         (u64)buffer_info->dma,
503                                         buffer_info->skb);
504
505                                 if (netif_msg_pktdata(adapter)) {
506                                         print_hex_dump(KERN_INFO, "",
507                                                 DUMP_PREFIX_ADDRESS,
508                                                 16, 1,
509                                                 phys_to_virt(buffer_info->dma),
510                                                 rx_ring->rx_buffer_len, true);
511                                         if (rx_ring->rx_buffer_len
512                                                 < IGB_RXBUFFER_1024)
513                                                 print_hex_dump(KERN_INFO, "",
514                                                   DUMP_PREFIX_ADDRESS,
515                                                   16, 1,
516                                                   phys_to_virt(
517                                                     buffer_info->page_dma +
518                                                     buffer_info->page_offset),
519                                                   PAGE_SIZE/2, true);
520                                 }
521                         }
522
523                         if (i == rx_ring->next_to_use)
524                                 printk(KERN_CONT " NTU\n");
525                         else if (i == rx_ring->next_to_clean)
526                                 printk(KERN_CONT " NTC\n");
527                         else
528                                 printk(KERN_CONT "\n");
529
530                 }
531         }
532
533 exit:
534         return;
535 }
536
537
538 /**
539  * igb_read_clock - read raw cycle counter (to be used by time counter)
540  */
541 static cycle_t igb_read_clock(const struct cyclecounter *tc)
542 {
543         struct igb_adapter *adapter =
544                 container_of(tc, struct igb_adapter, cycles);
545         struct e1000_hw *hw = &adapter->hw;
546         u64 stamp = 0;
547         int shift = 0;
548
549         /*
550          * The timestamp latches on lowest register read. For the 82580
551          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
552          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
553          */
554         if (hw->mac.type == e1000_82580) {
555                 stamp = rd32(E1000_SYSTIMR) >> 8;
556                 shift = IGB_82580_TSYNC_SHIFT;
557         }
558
559         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
560         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
561         return stamp;
562 }
563
564 /**
565  * igb_get_hw_dev - return device
566  * used by hardware layer to print debugging information
567  **/
568 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
569 {
570         struct igb_adapter *adapter = hw->back;
571         return adapter->netdev;
572 }
573
574 /**
575  * igb_init_module - Driver Registration Routine
576  *
577  * igb_init_module is the first routine called when the driver is
578  * loaded. All it does is register with the PCI subsystem.
579  **/
580 static int __init igb_init_module(void)
581 {
582         int ret;
583         printk(KERN_INFO "%s - version %s\n",
584                igb_driver_string, igb_driver_version);
585
586         printk(KERN_INFO "%s\n", igb_copyright);
587
588 #ifdef CONFIG_IGB_DCA
589         dca_register_notify(&dca_notifier);
590 #endif
591         ret = pci_register_driver(&igb_driver);
592         return ret;
593 }
594
595 module_init(igb_init_module);
596
597 /**
598  * igb_exit_module - Driver Exit Cleanup Routine
599  *
600  * igb_exit_module is called just before the driver is removed
601  * from memory.
602  **/
603 static void __exit igb_exit_module(void)
604 {
605 #ifdef CONFIG_IGB_DCA
606         dca_unregister_notify(&dca_notifier);
607 #endif
608         pci_unregister_driver(&igb_driver);
609 }
610
611 module_exit(igb_exit_module);
612
613 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
614 /**
615  * igb_cache_ring_register - Descriptor ring to register mapping
616  * @adapter: board private structure to initialize
617  *
618  * Once we know the feature-set enabled for the device, we'll cache
619  * the register offset the descriptor ring is assigned to.
620  **/
621 static void igb_cache_ring_register(struct igb_adapter *adapter)
622 {
623         int i = 0, j = 0;
624         u32 rbase_offset = adapter->vfs_allocated_count;
625
626         switch (adapter->hw.mac.type) {
627         case e1000_82576:
628                 /* The queues are allocated for virtualization such that VF 0
629                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
630                  * In order to avoid collision we start at the first free queue
631                  * and continue consuming queues in the same sequence
632                  */
633                 if (adapter->vfs_allocated_count) {
634                         for (; i < adapter->rss_queues; i++)
635                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
636                                                                Q_IDX_82576(i);
637                 }
638         case e1000_82575:
639         case e1000_82580:
640         case e1000_i350:
641         default:
642                 for (; i < adapter->num_rx_queues; i++)
643                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
644                 for (; j < adapter->num_tx_queues; j++)
645                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
646                 break;
647         }
648 }
649
650 static void igb_free_queues(struct igb_adapter *adapter)
651 {
652         int i;
653
654         for (i = 0; i < adapter->num_tx_queues; i++) {
655                 kfree(adapter->tx_ring[i]);
656                 adapter->tx_ring[i] = NULL;
657         }
658         for (i = 0; i < adapter->num_rx_queues; i++) {
659                 kfree(adapter->rx_ring[i]);
660                 adapter->rx_ring[i] = NULL;
661         }
662         adapter->num_rx_queues = 0;
663         adapter->num_tx_queues = 0;
664 }
665
666 /**
667  * igb_alloc_queues - Allocate memory for all rings
668  * @adapter: board private structure to initialize
669  *
670  * We allocate one ring per queue at run-time since we don't know the
671  * number of queues at compile-time.
672  **/
673 static int igb_alloc_queues(struct igb_adapter *adapter)
674 {
675         struct igb_ring *ring;
676         int i;
677
678         for (i = 0; i < adapter->num_tx_queues; i++) {
679                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
680                 if (!ring)
681                         goto err;
682                 ring->count = adapter->tx_ring_count;
683                 ring->queue_index = i;
684                 ring->dev = &adapter->pdev->dev;
685                 ring->netdev = adapter->netdev;
686                 /* For 82575, context index must be unique per ring. */
687                 if (adapter->hw.mac.type == e1000_82575)
688                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
689                 adapter->tx_ring[i] = ring;
690         }
691
692         for (i = 0; i < adapter->num_rx_queues; i++) {
693                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
694                 if (!ring)
695                         goto err;
696                 ring->count = adapter->rx_ring_count;
697                 ring->queue_index = i;
698                 ring->dev = &adapter->pdev->dev;
699                 ring->netdev = adapter->netdev;
700                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
701                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
702                 /* set flag indicating ring supports SCTP checksum offload */
703                 if (adapter->hw.mac.type >= e1000_82576)
704                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
705                 adapter->rx_ring[i] = ring;
706         }
707
708         igb_cache_ring_register(adapter);
709
710         return 0;
711
712 err:
713         igb_free_queues(adapter);
714
715         return -ENOMEM;
716 }
717
718 #define IGB_N0_QUEUE -1
719 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
720 {
721         u32 msixbm = 0;
722         struct igb_adapter *adapter = q_vector->adapter;
723         struct e1000_hw *hw = &adapter->hw;
724         u32 ivar, index;
725         int rx_queue = IGB_N0_QUEUE;
726         int tx_queue = IGB_N0_QUEUE;
727
728         if (q_vector->rx_ring)
729                 rx_queue = q_vector->rx_ring->reg_idx;
730         if (q_vector->tx_ring)
731                 tx_queue = q_vector->tx_ring->reg_idx;
732
733         switch (hw->mac.type) {
734         case e1000_82575:
735                 /* The 82575 assigns vectors using a bitmask, which matches the
736                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
737                    or more queues to a vector, we write the appropriate bits
738                    into the MSIXBM register for that vector. */
739                 if (rx_queue > IGB_N0_QUEUE)
740                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
741                 if (tx_queue > IGB_N0_QUEUE)
742                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
743                 if (!adapter->msix_entries && msix_vector == 0)
744                         msixbm |= E1000_EIMS_OTHER;
745                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
746                 q_vector->eims_value = msixbm;
747                 break;
748         case e1000_82576:
749                 /* 82576 uses a table-based method for assigning vectors.
750                    Each queue has a single entry in the table to which we write
751                    a vector number along with a "valid" bit.  Sadly, the layout
752                    of the table is somewhat counterintuitive. */
753                 if (rx_queue > IGB_N0_QUEUE) {
754                         index = (rx_queue & 0x7);
755                         ivar = array_rd32(E1000_IVAR0, index);
756                         if (rx_queue < 8) {
757                                 /* vector goes into low byte of register */
758                                 ivar = ivar & 0xFFFFFF00;
759                                 ivar |= msix_vector | E1000_IVAR_VALID;
760                         } else {
761                                 /* vector goes into third byte of register */
762                                 ivar = ivar & 0xFF00FFFF;
763                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
764                         }
765                         array_wr32(E1000_IVAR0, index, ivar);
766                 }
767                 if (tx_queue > IGB_N0_QUEUE) {
768                         index = (tx_queue & 0x7);
769                         ivar = array_rd32(E1000_IVAR0, index);
770                         if (tx_queue < 8) {
771                                 /* vector goes into second byte of register */
772                                 ivar = ivar & 0xFFFF00FF;
773                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
774                         } else {
775                                 /* vector goes into high byte of register */
776                                 ivar = ivar & 0x00FFFFFF;
777                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
778                         }
779                         array_wr32(E1000_IVAR0, index, ivar);
780                 }
781                 q_vector->eims_value = 1 << msix_vector;
782                 break;
783         case e1000_82580:
784         case e1000_i350:
785                 /* 82580 uses the same table-based approach as 82576 but has fewer
786                    entries as a result we carry over for queues greater than 4. */
787                 if (rx_queue > IGB_N0_QUEUE) {
788                         index = (rx_queue >> 1);
789                         ivar = array_rd32(E1000_IVAR0, index);
790                         if (rx_queue & 0x1) {
791                                 /* vector goes into third byte of register */
792                                 ivar = ivar & 0xFF00FFFF;
793                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
794                         } else {
795                                 /* vector goes into low byte of register */
796                                 ivar = ivar & 0xFFFFFF00;
797                                 ivar |= msix_vector | E1000_IVAR_VALID;
798                         }
799                         array_wr32(E1000_IVAR0, index, ivar);
800                 }
801                 if (tx_queue > IGB_N0_QUEUE) {
802                         index = (tx_queue >> 1);
803                         ivar = array_rd32(E1000_IVAR0, index);
804                         if (tx_queue & 0x1) {
805                                 /* vector goes into high byte of register */
806                                 ivar = ivar & 0x00FFFFFF;
807                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
808                         } else {
809                                 /* vector goes into second byte of register */
810                                 ivar = ivar & 0xFFFF00FF;
811                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
812                         }
813                         array_wr32(E1000_IVAR0, index, ivar);
814                 }
815                 q_vector->eims_value = 1 << msix_vector;
816                 break;
817         default:
818                 BUG();
819                 break;
820         }
821
822         /* add q_vector eims value to global eims_enable_mask */
823         adapter->eims_enable_mask |= q_vector->eims_value;
824
825         /* configure q_vector to set itr on first interrupt */
826         q_vector->set_itr = 1;
827 }
828
829 /**
830  * igb_configure_msix - Configure MSI-X hardware
831  *
832  * igb_configure_msix sets up the hardware to properly
833  * generate MSI-X interrupts.
834  **/
835 static void igb_configure_msix(struct igb_adapter *adapter)
836 {
837         u32 tmp;
838         int i, vector = 0;
839         struct e1000_hw *hw = &adapter->hw;
840
841         adapter->eims_enable_mask = 0;
842
843         /* set vector for other causes, i.e. link changes */
844         switch (hw->mac.type) {
845         case e1000_82575:
846                 tmp = rd32(E1000_CTRL_EXT);
847                 /* enable MSI-X PBA support*/
848                 tmp |= E1000_CTRL_EXT_PBA_CLR;
849
850                 /* Auto-Mask interrupts upon ICR read. */
851                 tmp |= E1000_CTRL_EXT_EIAME;
852                 tmp |= E1000_CTRL_EXT_IRCA;
853
854                 wr32(E1000_CTRL_EXT, tmp);
855
856                 /* enable msix_other interrupt */
857                 array_wr32(E1000_MSIXBM(0), vector++,
858                                       E1000_EIMS_OTHER);
859                 adapter->eims_other = E1000_EIMS_OTHER;
860
861                 break;
862
863         case e1000_82576:
864         case e1000_82580:
865         case e1000_i350:
866                 /* Turn on MSI-X capability first, or our settings
867                  * won't stick.  And it will take days to debug. */
868                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
869                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
870                                 E1000_GPIE_NSICR);
871
872                 /* enable msix_other interrupt */
873                 adapter->eims_other = 1 << vector;
874                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
875
876                 wr32(E1000_IVAR_MISC, tmp);
877                 break;
878         default:
879                 /* do nothing, since nothing else supports MSI-X */
880                 break;
881         } /* switch (hw->mac.type) */
882
883         adapter->eims_enable_mask |= adapter->eims_other;
884
885         for (i = 0; i < adapter->num_q_vectors; i++)
886                 igb_assign_vector(adapter->q_vector[i], vector++);
887
888         wrfl();
889 }
890
891 /**
892  * igb_request_msix - Initialize MSI-X interrupts
893  *
894  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
895  * kernel.
896  **/
897 static int igb_request_msix(struct igb_adapter *adapter)
898 {
899         struct net_device *netdev = adapter->netdev;
900         struct e1000_hw *hw = &adapter->hw;
901         int i, err = 0, vector = 0;
902
903         err = request_irq(adapter->msix_entries[vector].vector,
904                           igb_msix_other, 0, netdev->name, adapter);
905         if (err)
906                 goto out;
907         vector++;
908
909         for (i = 0; i < adapter->num_q_vectors; i++) {
910                 struct igb_q_vector *q_vector = adapter->q_vector[i];
911
912                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
913
914                 if (q_vector->rx_ring && q_vector->tx_ring)
915                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
916                                 q_vector->rx_ring->queue_index);
917                 else if (q_vector->tx_ring)
918                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
919                                 q_vector->tx_ring->queue_index);
920                 else if (q_vector->rx_ring)
921                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
922                                 q_vector->rx_ring->queue_index);
923                 else
924                         sprintf(q_vector->name, "%s-unused", netdev->name);
925
926                 err = request_irq(adapter->msix_entries[vector].vector,
927                                   igb_msix_ring, 0, q_vector->name,
928                                   q_vector);
929                 if (err)
930                         goto out;
931                 vector++;
932         }
933
934         igb_configure_msix(adapter);
935         return 0;
936 out:
937         return err;
938 }
939
940 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
941 {
942         if (adapter->msix_entries) {
943                 pci_disable_msix(adapter->pdev);
944                 kfree(adapter->msix_entries);
945                 adapter->msix_entries = NULL;
946         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
947                 pci_disable_msi(adapter->pdev);
948         }
949 }
950
951 /**
952  * igb_free_q_vectors - Free memory allocated for interrupt vectors
953  * @adapter: board private structure to initialize
954  *
955  * This function frees the memory allocated to the q_vectors.  In addition if
956  * NAPI is enabled it will delete any references to the NAPI struct prior
957  * to freeing the q_vector.
958  **/
959 static void igb_free_q_vectors(struct igb_adapter *adapter)
960 {
961         int v_idx;
962
963         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
964                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
965                 adapter->q_vector[v_idx] = NULL;
966                 if (!q_vector)
967                         continue;
968                 netif_napi_del(&q_vector->napi);
969                 kfree(q_vector);
970         }
971         adapter->num_q_vectors = 0;
972 }
973
974 /**
975  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
976  *
977  * This function resets the device so that it has 0 rx queues, tx queues, and
978  * MSI-X interrupts allocated.
979  */
980 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
981 {
982         igb_free_queues(adapter);
983         igb_free_q_vectors(adapter);
984         igb_reset_interrupt_capability(adapter);
985 }
986
987 /**
988  * igb_set_interrupt_capability - set MSI or MSI-X if supported
989  *
990  * Attempt to configure interrupts using the best available
991  * capabilities of the hardware and kernel.
992  **/
993 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
994 {
995         int err;
996         int numvecs, i;
997
998         /* Number of supported queues. */
999         adapter->num_rx_queues = adapter->rss_queues;
1000         if (adapter->vfs_allocated_count)
1001                 adapter->num_tx_queues = 1;
1002         else
1003                 adapter->num_tx_queues = adapter->rss_queues;
1004
1005         /* start with one vector for every rx queue */
1006         numvecs = adapter->num_rx_queues;
1007
1008         /* if tx handler is separate add 1 for every tx queue */
1009         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1010                 numvecs += adapter->num_tx_queues;
1011
1012         /* store the number of vectors reserved for queues */
1013         adapter->num_q_vectors = numvecs;
1014
1015         /* add 1 vector for link status interrupts */
1016         numvecs++;
1017         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1018                                         GFP_KERNEL);
1019         if (!adapter->msix_entries)
1020                 goto msi_only;
1021
1022         for (i = 0; i < numvecs; i++)
1023                 adapter->msix_entries[i].entry = i;
1024
1025         err = pci_enable_msix(adapter->pdev,
1026                               adapter->msix_entries,
1027                               numvecs);
1028         if (err == 0)
1029                 goto out;
1030
1031         igb_reset_interrupt_capability(adapter);
1032
1033         /* If we can't do MSI-X, try MSI */
1034 msi_only:
1035 #ifdef CONFIG_PCI_IOV
1036         /* disable SR-IOV for non MSI-X configurations */
1037         if (adapter->vf_data) {
1038                 struct e1000_hw *hw = &adapter->hw;
1039                 /* disable iov and allow time for transactions to clear */
1040                 pci_disable_sriov(adapter->pdev);
1041                 msleep(500);
1042
1043                 kfree(adapter->vf_data);
1044                 adapter->vf_data = NULL;
1045                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1046                 msleep(100);
1047                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1048         }
1049 #endif
1050         adapter->vfs_allocated_count = 0;
1051         adapter->rss_queues = 1;
1052         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1053         adapter->num_rx_queues = 1;
1054         adapter->num_tx_queues = 1;
1055         adapter->num_q_vectors = 1;
1056         if (!pci_enable_msi(adapter->pdev))
1057                 adapter->flags |= IGB_FLAG_HAS_MSI;
1058 out:
1059         /* Notify the stack of the (possibly) reduced queue counts. */
1060         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1061         return netif_set_real_num_rx_queues(adapter->netdev,
1062                                             adapter->num_rx_queues);
1063 }
1064
1065 /**
1066  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1067  * @adapter: board private structure to initialize
1068  *
1069  * We allocate one q_vector per queue interrupt.  If allocation fails we
1070  * return -ENOMEM.
1071  **/
1072 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1073 {
1074         struct igb_q_vector *q_vector;
1075         struct e1000_hw *hw = &adapter->hw;
1076         int v_idx;
1077
1078         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1079                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1080                 if (!q_vector)
1081                         goto err_out;
1082                 q_vector->adapter = adapter;
1083                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1084                 q_vector->itr_val = IGB_START_ITR;
1085                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1086                 adapter->q_vector[v_idx] = q_vector;
1087         }
1088         return 0;
1089
1090 err_out:
1091         igb_free_q_vectors(adapter);
1092         return -ENOMEM;
1093 }
1094
1095 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1096                                       int ring_idx, int v_idx)
1097 {
1098         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1099
1100         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1101         q_vector->rx_ring->q_vector = q_vector;
1102         q_vector->itr_val = adapter->rx_itr_setting;
1103         if (q_vector->itr_val && q_vector->itr_val <= 3)
1104                 q_vector->itr_val = IGB_START_ITR;
1105 }
1106
1107 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1108                                       int ring_idx, int v_idx)
1109 {
1110         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1111
1112         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1113         q_vector->tx_ring->q_vector = q_vector;
1114         q_vector->itr_val = adapter->tx_itr_setting;
1115         if (q_vector->itr_val && q_vector->itr_val <= 3)
1116                 q_vector->itr_val = IGB_START_ITR;
1117 }
1118
1119 /**
1120  * igb_map_ring_to_vector - maps allocated queues to vectors
1121  *
1122  * This function maps the recently allocated queues to vectors.
1123  **/
1124 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1125 {
1126         int i;
1127         int v_idx = 0;
1128
1129         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1130             (adapter->num_q_vectors < adapter->num_tx_queues))
1131                 return -ENOMEM;
1132
1133         if (adapter->num_q_vectors >=
1134             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1135                 for (i = 0; i < adapter->num_rx_queues; i++)
1136                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1137                 for (i = 0; i < adapter->num_tx_queues; i++)
1138                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1139         } else {
1140                 for (i = 0; i < adapter->num_rx_queues; i++) {
1141                         if (i < adapter->num_tx_queues)
1142                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1143                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1144                 }
1145                 for (; i < adapter->num_tx_queues; i++)
1146                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147         }
1148         return 0;
1149 }
1150
1151 /**
1152  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1153  *
1154  * This function initializes the interrupts and allocates all of the queues.
1155  **/
1156 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1157 {
1158         struct pci_dev *pdev = adapter->pdev;
1159         int err;
1160
1161         err = igb_set_interrupt_capability(adapter);
1162         if (err)
1163                 return err;
1164
1165         err = igb_alloc_q_vectors(adapter);
1166         if (err) {
1167                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1168                 goto err_alloc_q_vectors;
1169         }
1170
1171         err = igb_alloc_queues(adapter);
1172         if (err) {
1173                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1174                 goto err_alloc_queues;
1175         }
1176
1177         err = igb_map_ring_to_vector(adapter);
1178         if (err) {
1179                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1180                 goto err_map_queues;
1181         }
1182
1183
1184         return 0;
1185 err_map_queues:
1186         igb_free_queues(adapter);
1187 err_alloc_queues:
1188         igb_free_q_vectors(adapter);
1189 err_alloc_q_vectors:
1190         igb_reset_interrupt_capability(adapter);
1191         return err;
1192 }
1193
1194 /**
1195  * igb_request_irq - initialize interrupts
1196  *
1197  * Attempts to configure interrupts using the best available
1198  * capabilities of the hardware and kernel.
1199  **/
1200 static int igb_request_irq(struct igb_adapter *adapter)
1201 {
1202         struct net_device *netdev = adapter->netdev;
1203         struct pci_dev *pdev = adapter->pdev;
1204         int err = 0;
1205
1206         if (adapter->msix_entries) {
1207                 err = igb_request_msix(adapter);
1208                 if (!err)
1209                         goto request_done;
1210                 /* fall back to MSI */
1211                 igb_clear_interrupt_scheme(adapter);
1212                 if (!pci_enable_msi(adapter->pdev))
1213                         adapter->flags |= IGB_FLAG_HAS_MSI;
1214                 igb_free_all_tx_resources(adapter);
1215                 igb_free_all_rx_resources(adapter);
1216                 adapter->num_tx_queues = 1;
1217                 adapter->num_rx_queues = 1;
1218                 adapter->num_q_vectors = 1;
1219                 err = igb_alloc_q_vectors(adapter);
1220                 if (err) {
1221                         dev_err(&pdev->dev,
1222                                 "Unable to allocate memory for vectors\n");
1223                         goto request_done;
1224                 }
1225                 err = igb_alloc_queues(adapter);
1226                 if (err) {
1227                         dev_err(&pdev->dev,
1228                                 "Unable to allocate memory for queues\n");
1229                         igb_free_q_vectors(adapter);
1230                         goto request_done;
1231                 }
1232                 igb_setup_all_tx_resources(adapter);
1233                 igb_setup_all_rx_resources(adapter);
1234         } else {
1235                 igb_assign_vector(adapter->q_vector[0], 0);
1236         }
1237
1238         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1239                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1240                                   netdev->name, adapter);
1241                 if (!err)
1242                         goto request_done;
1243
1244                 /* fall back to legacy interrupts */
1245                 igb_reset_interrupt_capability(adapter);
1246                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1247         }
1248
1249         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1250                           netdev->name, adapter);
1251
1252         if (err)
1253                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1254                         err);
1255
1256 request_done:
1257         return err;
1258 }
1259
1260 static void igb_free_irq(struct igb_adapter *adapter)
1261 {
1262         if (adapter->msix_entries) {
1263                 int vector = 0, i;
1264
1265                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1266
1267                 for (i = 0; i < adapter->num_q_vectors; i++) {
1268                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1269                         free_irq(adapter->msix_entries[vector++].vector,
1270                                  q_vector);
1271                 }
1272         } else {
1273                 free_irq(adapter->pdev->irq, adapter);
1274         }
1275 }
1276
1277 /**
1278  * igb_irq_disable - Mask off interrupt generation on the NIC
1279  * @adapter: board private structure
1280  **/
1281 static void igb_irq_disable(struct igb_adapter *adapter)
1282 {
1283         struct e1000_hw *hw = &adapter->hw;
1284
1285         /*
1286          * we need to be careful when disabling interrupts.  The VFs are also
1287          * mapped into these registers and so clearing the bits can cause
1288          * issues on the VF drivers so we only need to clear what we set
1289          */
1290         if (adapter->msix_entries) {
1291                 u32 regval = rd32(E1000_EIAM);
1292                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1293                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1294                 regval = rd32(E1000_EIAC);
1295                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1296         }
1297
1298         wr32(E1000_IAM, 0);
1299         wr32(E1000_IMC, ~0);
1300         wrfl();
1301         if (adapter->msix_entries) {
1302                 int i;
1303                 for (i = 0; i < adapter->num_q_vectors; i++)
1304                         synchronize_irq(adapter->msix_entries[i].vector);
1305         } else {
1306                 synchronize_irq(adapter->pdev->irq);
1307         }
1308 }
1309
1310 /**
1311  * igb_irq_enable - Enable default interrupt generation settings
1312  * @adapter: board private structure
1313  **/
1314 static void igb_irq_enable(struct igb_adapter *adapter)
1315 {
1316         struct e1000_hw *hw = &adapter->hw;
1317
1318         if (adapter->msix_entries) {
1319                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1320                 u32 regval = rd32(E1000_EIAC);
1321                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1322                 regval = rd32(E1000_EIAM);
1323                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1324                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1325                 if (adapter->vfs_allocated_count) {
1326                         wr32(E1000_MBVFIMR, 0xFF);
1327                         ims |= E1000_IMS_VMMB;
1328                 }
1329                 if (adapter->hw.mac.type == e1000_82580)
1330                         ims |= E1000_IMS_DRSTA;
1331
1332                 wr32(E1000_IMS, ims);
1333         } else {
1334                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1335                                 E1000_IMS_DRSTA);
1336                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1337                                 E1000_IMS_DRSTA);
1338         }
1339 }
1340
1341 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1342 {
1343         struct e1000_hw *hw = &adapter->hw;
1344         u16 vid = adapter->hw.mng_cookie.vlan_id;
1345         u16 old_vid = adapter->mng_vlan_id;
1346
1347         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1348                 /* add VID to filter table */
1349                 igb_vfta_set(hw, vid, true);
1350                 adapter->mng_vlan_id = vid;
1351         } else {
1352                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1353         }
1354
1355         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1356             (vid != old_vid) &&
1357             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1358                 /* remove VID from filter table */
1359                 igb_vfta_set(hw, old_vid, false);
1360         }
1361 }
1362
1363 /**
1364  * igb_release_hw_control - release control of the h/w to f/w
1365  * @adapter: address of board private structure
1366  *
1367  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1368  * For ASF and Pass Through versions of f/w this means that the
1369  * driver is no longer loaded.
1370  *
1371  **/
1372 static void igb_release_hw_control(struct igb_adapter *adapter)
1373 {
1374         struct e1000_hw *hw = &adapter->hw;
1375         u32 ctrl_ext;
1376
1377         /* Let firmware take over control of h/w */
1378         ctrl_ext = rd32(E1000_CTRL_EXT);
1379         wr32(E1000_CTRL_EXT,
1380                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1381 }
1382
1383 /**
1384  * igb_get_hw_control - get control of the h/w from f/w
1385  * @adapter: address of board private structure
1386  *
1387  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1388  * For ASF and Pass Through versions of f/w this means that
1389  * the driver is loaded.
1390  *
1391  **/
1392 static void igb_get_hw_control(struct igb_adapter *adapter)
1393 {
1394         struct e1000_hw *hw = &adapter->hw;
1395         u32 ctrl_ext;
1396
1397         /* Let firmware know the driver has taken over */
1398         ctrl_ext = rd32(E1000_CTRL_EXT);
1399         wr32(E1000_CTRL_EXT,
1400                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1401 }
1402
1403 /**
1404  * igb_configure - configure the hardware for RX and TX
1405  * @adapter: private board structure
1406  **/
1407 static void igb_configure(struct igb_adapter *adapter)
1408 {
1409         struct net_device *netdev = adapter->netdev;
1410         int i;
1411
1412         igb_get_hw_control(adapter);
1413         igb_set_rx_mode(netdev);
1414
1415         igb_restore_vlan(adapter);
1416
1417         igb_setup_tctl(adapter);
1418         igb_setup_mrqc(adapter);
1419         igb_setup_rctl(adapter);
1420
1421         igb_configure_tx(adapter);
1422         igb_configure_rx(adapter);
1423
1424         igb_rx_fifo_flush_82575(&adapter->hw);
1425
1426         /* call igb_desc_unused which always leaves
1427          * at least 1 descriptor unused to make sure
1428          * next_to_use != next_to_clean */
1429         for (i = 0; i < adapter->num_rx_queues; i++) {
1430                 struct igb_ring *ring = adapter->rx_ring[i];
1431                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1432         }
1433 }
1434
1435 /**
1436  * igb_power_up_link - Power up the phy/serdes link
1437  * @adapter: address of board private structure
1438  **/
1439 void igb_power_up_link(struct igb_adapter *adapter)
1440 {
1441         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1442                 igb_power_up_phy_copper(&adapter->hw);
1443         else
1444                 igb_power_up_serdes_link_82575(&adapter->hw);
1445 }
1446
1447 /**
1448  * igb_power_down_link - Power down the phy/serdes link
1449  * @adapter: address of board private structure
1450  */
1451 static void igb_power_down_link(struct igb_adapter *adapter)
1452 {
1453         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1454                 igb_power_down_phy_copper_82575(&adapter->hw);
1455         else
1456                 igb_shutdown_serdes_link_82575(&adapter->hw);
1457 }
1458
1459 /**
1460  * igb_up - Open the interface and prepare it to handle traffic
1461  * @adapter: board private structure
1462  **/
1463 int igb_up(struct igb_adapter *adapter)
1464 {
1465         struct e1000_hw *hw = &adapter->hw;
1466         int i;
1467
1468         /* hardware has been reset, we need to reload some things */
1469         igb_configure(adapter);
1470
1471         clear_bit(__IGB_DOWN, &adapter->state);
1472
1473         for (i = 0; i < adapter->num_q_vectors; i++) {
1474                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1475                 napi_enable(&q_vector->napi);
1476         }
1477         if (adapter->msix_entries)
1478                 igb_configure_msix(adapter);
1479         else
1480                 igb_assign_vector(adapter->q_vector[0], 0);
1481
1482         /* Clear any pending interrupts. */
1483         rd32(E1000_ICR);
1484         igb_irq_enable(adapter);
1485
1486         /* notify VFs that reset has been completed */
1487         if (adapter->vfs_allocated_count) {
1488                 u32 reg_data = rd32(E1000_CTRL_EXT);
1489                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1490                 wr32(E1000_CTRL_EXT, reg_data);
1491         }
1492
1493         netif_tx_start_all_queues(adapter->netdev);
1494
1495         /* start the watchdog. */
1496         hw->mac.get_link_status = 1;
1497         schedule_work(&adapter->watchdog_task);
1498
1499         return 0;
1500 }
1501
1502 void igb_down(struct igb_adapter *adapter)
1503 {
1504         struct net_device *netdev = adapter->netdev;
1505         struct e1000_hw *hw = &adapter->hw;
1506         u32 tctl, rctl;
1507         int i;
1508
1509         /* signal that we're down so the interrupt handler does not
1510          * reschedule our watchdog timer */
1511         set_bit(__IGB_DOWN, &adapter->state);
1512
1513         /* disable receives in the hardware */
1514         rctl = rd32(E1000_RCTL);
1515         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1516         /* flush and sleep below */
1517
1518         netif_tx_stop_all_queues(netdev);
1519
1520         /* disable transmits in the hardware */
1521         tctl = rd32(E1000_TCTL);
1522         tctl &= ~E1000_TCTL_EN;
1523         wr32(E1000_TCTL, tctl);
1524         /* flush both disables and wait for them to finish */
1525         wrfl();
1526         msleep(10);
1527
1528         for (i = 0; i < adapter->num_q_vectors; i++) {
1529                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1530                 napi_disable(&q_vector->napi);
1531         }
1532
1533         igb_irq_disable(adapter);
1534
1535         del_timer_sync(&adapter->watchdog_timer);
1536         del_timer_sync(&adapter->phy_info_timer);
1537
1538         netif_carrier_off(netdev);
1539
1540         /* record the stats before reset*/
1541         spin_lock(&adapter->stats64_lock);
1542         igb_update_stats(adapter, &adapter->stats64);
1543         spin_unlock(&adapter->stats64_lock);
1544
1545         adapter->link_speed = 0;
1546         adapter->link_duplex = 0;
1547
1548         if (!pci_channel_offline(adapter->pdev))
1549                 igb_reset(adapter);
1550         igb_clean_all_tx_rings(adapter);
1551         igb_clean_all_rx_rings(adapter);
1552 #ifdef CONFIG_IGB_DCA
1553
1554         /* since we reset the hardware DCA settings were cleared */
1555         igb_setup_dca(adapter);
1556 #endif
1557 }
1558
1559 void igb_reinit_locked(struct igb_adapter *adapter)
1560 {
1561         WARN_ON(in_interrupt());
1562         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1563                 msleep(1);
1564         igb_down(adapter);
1565         igb_up(adapter);
1566         clear_bit(__IGB_RESETTING, &adapter->state);
1567 }
1568
1569 void igb_reset(struct igb_adapter *adapter)
1570 {
1571         struct pci_dev *pdev = adapter->pdev;
1572         struct e1000_hw *hw = &adapter->hw;
1573         struct e1000_mac_info *mac = &hw->mac;
1574         struct e1000_fc_info *fc = &hw->fc;
1575         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1576         u16 hwm;
1577
1578         /* Repartition Pba for greater than 9k mtu
1579          * To take effect CTRL.RST is required.
1580          */
1581         switch (mac->type) {
1582         case e1000_i350:
1583         case e1000_82580:
1584                 pba = rd32(E1000_RXPBS);
1585                 pba = igb_rxpbs_adjust_82580(pba);
1586                 break;
1587         case e1000_82576:
1588                 pba = rd32(E1000_RXPBS);
1589                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1590                 break;
1591         case e1000_82575:
1592         default:
1593                 pba = E1000_PBA_34K;
1594                 break;
1595         }
1596
1597         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1598             (mac->type < e1000_82576)) {
1599                 /* adjust PBA for jumbo frames */
1600                 wr32(E1000_PBA, pba);
1601
1602                 /* To maintain wire speed transmits, the Tx FIFO should be
1603                  * large enough to accommodate two full transmit packets,
1604                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1605                  * the Rx FIFO should be large enough to accommodate at least
1606                  * one full receive packet and is similarly rounded up and
1607                  * expressed in KB. */
1608                 pba = rd32(E1000_PBA);
1609                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1610                 tx_space = pba >> 16;
1611                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1612                 pba &= 0xffff;
1613                 /* the tx fifo also stores 16 bytes of information about the tx
1614                  * but don't include ethernet FCS because hardware appends it */
1615                 min_tx_space = (adapter->max_frame_size +
1616                                 sizeof(union e1000_adv_tx_desc) -
1617                                 ETH_FCS_LEN) * 2;
1618                 min_tx_space = ALIGN(min_tx_space, 1024);
1619                 min_tx_space >>= 10;
1620                 /* software strips receive CRC, so leave room for it */
1621                 min_rx_space = adapter->max_frame_size;
1622                 min_rx_space = ALIGN(min_rx_space, 1024);
1623                 min_rx_space >>= 10;
1624
1625                 /* If current Tx allocation is less than the min Tx FIFO size,
1626                  * and the min Tx FIFO size is less than the current Rx FIFO
1627                  * allocation, take space away from current Rx allocation */
1628                 if (tx_space < min_tx_space &&
1629                     ((min_tx_space - tx_space) < pba)) {
1630                         pba = pba - (min_tx_space - tx_space);
1631
1632                         /* if short on rx space, rx wins and must trump tx
1633                          * adjustment */
1634                         if (pba < min_rx_space)
1635                                 pba = min_rx_space;
1636                 }
1637                 wr32(E1000_PBA, pba);
1638         }
1639
1640         /* flow control settings */
1641         /* The high water mark must be low enough to fit one full frame
1642          * (or the size used for early receive) above it in the Rx FIFO.
1643          * Set it to the lower of:
1644          * - 90% of the Rx FIFO size, or
1645          * - the full Rx FIFO size minus one full frame */
1646         hwm = min(((pba << 10) * 9 / 10),
1647                         ((pba << 10) - 2 * adapter->max_frame_size));
1648
1649         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1650         fc->low_water = fc->high_water - 16;
1651         fc->pause_time = 0xFFFF;
1652         fc->send_xon = 1;
1653         fc->current_mode = fc->requested_mode;
1654
1655         /* disable receive for all VFs and wait one second */
1656         if (adapter->vfs_allocated_count) {
1657                 int i;
1658                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1659                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1660
1661                 /* ping all the active vfs to let them know we are going down */
1662                 igb_ping_all_vfs(adapter);
1663
1664                 /* disable transmits and receives */
1665                 wr32(E1000_VFRE, 0);
1666                 wr32(E1000_VFTE, 0);
1667         }
1668
1669         /* Allow time for pending master requests to run */
1670         hw->mac.ops.reset_hw(hw);
1671         wr32(E1000_WUC, 0);
1672
1673         if (hw->mac.ops.init_hw(hw))
1674                 dev_err(&pdev->dev, "Hardware Error\n");
1675
1676         if (hw->mac.type == e1000_82580) {
1677                 u32 reg = rd32(E1000_PCIEMISC);
1678                 wr32(E1000_PCIEMISC,
1679                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1680         }
1681         if (!netif_running(adapter->netdev))
1682                 igb_power_down_link(adapter);
1683
1684         igb_update_mng_vlan(adapter);
1685
1686         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1687         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1688
1689         igb_get_phy_info(hw);
1690 }
1691
1692 static const struct net_device_ops igb_netdev_ops = {
1693         .ndo_open               = igb_open,
1694         .ndo_stop               = igb_close,
1695         .ndo_start_xmit         = igb_xmit_frame_adv,
1696         .ndo_get_stats64        = igb_get_stats64,
1697         .ndo_set_rx_mode        = igb_set_rx_mode,
1698         .ndo_set_multicast_list = igb_set_rx_mode,
1699         .ndo_set_mac_address    = igb_set_mac,
1700         .ndo_change_mtu         = igb_change_mtu,
1701         .ndo_do_ioctl           = igb_ioctl,
1702         .ndo_tx_timeout         = igb_tx_timeout,
1703         .ndo_validate_addr      = eth_validate_addr,
1704         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1705         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1706         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1707         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1708         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1709         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1710         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1711 #ifdef CONFIG_NET_POLL_CONTROLLER
1712         .ndo_poll_controller    = igb_netpoll,
1713 #endif
1714 };
1715
1716 /**
1717  * igb_probe - Device Initialization Routine
1718  * @pdev: PCI device information struct
1719  * @ent: entry in igb_pci_tbl
1720  *
1721  * Returns 0 on success, negative on failure
1722  *
1723  * igb_probe initializes an adapter identified by a pci_dev structure.
1724  * The OS initialization, configuring of the adapter private structure,
1725  * and a hardware reset occur.
1726  **/
1727 static int __devinit igb_probe(struct pci_dev *pdev,
1728                                const struct pci_device_id *ent)
1729 {
1730         struct net_device *netdev;
1731         struct igb_adapter *adapter;
1732         struct e1000_hw *hw;
1733         u16 eeprom_data = 0;
1734         s32 ret_val;
1735         static int global_quad_port_a; /* global quad port a indication */
1736         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1737         unsigned long mmio_start, mmio_len;
1738         int err, pci_using_dac;
1739         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1740         u8 part_str[E1000_PBANUM_LENGTH];
1741
1742         /* Catch broken hardware that put the wrong VF device ID in
1743          * the PCIe SR-IOV capability.
1744          */
1745         if (pdev->is_virtfn) {
1746                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1747                      pci_name(pdev), pdev->vendor, pdev->device);
1748                 return -EINVAL;
1749         }
1750
1751         err = pci_enable_device_mem(pdev);
1752         if (err)
1753                 return err;
1754
1755         pci_using_dac = 0;
1756         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1757         if (!err) {
1758                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1759                 if (!err)
1760                         pci_using_dac = 1;
1761         } else {
1762                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1763                 if (err) {
1764                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1765                         if (err) {
1766                                 dev_err(&pdev->dev, "No usable DMA "
1767                                         "configuration, aborting\n");
1768                                 goto err_dma;
1769                         }
1770                 }
1771         }
1772
1773         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1774                                            IORESOURCE_MEM),
1775                                            igb_driver_name);
1776         if (err)
1777                 goto err_pci_reg;
1778
1779         pci_enable_pcie_error_reporting(pdev);
1780
1781         pci_set_master(pdev);
1782         pci_save_state(pdev);
1783
1784         err = -ENOMEM;
1785         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1786                                    IGB_ABS_MAX_TX_QUEUES);
1787         if (!netdev)
1788                 goto err_alloc_etherdev;
1789
1790         SET_NETDEV_DEV(netdev, &pdev->dev);
1791
1792         pci_set_drvdata(pdev, netdev);
1793         adapter = netdev_priv(netdev);
1794         adapter->netdev = netdev;
1795         adapter->pdev = pdev;
1796         hw = &adapter->hw;
1797         hw->back = adapter;
1798         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1799
1800         mmio_start = pci_resource_start(pdev, 0);
1801         mmio_len = pci_resource_len(pdev, 0);
1802
1803         err = -EIO;
1804         hw->hw_addr = ioremap(mmio_start, mmio_len);
1805         if (!hw->hw_addr)
1806                 goto err_ioremap;
1807
1808         netdev->netdev_ops = &igb_netdev_ops;
1809         igb_set_ethtool_ops(netdev);
1810         netdev->watchdog_timeo = 5 * HZ;
1811
1812         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1813
1814         netdev->mem_start = mmio_start;
1815         netdev->mem_end = mmio_start + mmio_len;
1816
1817         /* PCI config space info */
1818         hw->vendor_id = pdev->vendor;
1819         hw->device_id = pdev->device;
1820         hw->revision_id = pdev->revision;
1821         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1822         hw->subsystem_device_id = pdev->subsystem_device;
1823
1824         /* Copy the default MAC, PHY and NVM function pointers */
1825         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1826         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1827         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1828         /* Initialize skew-specific constants */
1829         err = ei->get_invariants(hw);
1830         if (err)
1831                 goto err_sw_init;
1832
1833         /* setup the private structure */
1834         err = igb_sw_init(adapter);
1835         if (err)
1836                 goto err_sw_init;
1837
1838         igb_get_bus_info_pcie(hw);
1839
1840         hw->phy.autoneg_wait_to_complete = false;
1841
1842         /* Copper options */
1843         if (hw->phy.media_type == e1000_media_type_copper) {
1844                 hw->phy.mdix = AUTO_ALL_MODES;
1845                 hw->phy.disable_polarity_correction = false;
1846                 hw->phy.ms_type = e1000_ms_hw_default;
1847         }
1848
1849         if (igb_check_reset_block(hw))
1850                 dev_info(&pdev->dev,
1851                         "PHY reset is blocked due to SOL/IDER session.\n");
1852
1853         netdev->features = NETIF_F_SG |
1854                            NETIF_F_IP_CSUM |
1855                            NETIF_F_HW_VLAN_TX |
1856                            NETIF_F_HW_VLAN_RX |
1857                            NETIF_F_HW_VLAN_FILTER;
1858
1859         netdev->features |= NETIF_F_IPV6_CSUM;
1860         netdev->features |= NETIF_F_TSO;
1861         netdev->features |= NETIF_F_TSO6;
1862         netdev->features |= NETIF_F_GRO;
1863
1864         netdev->vlan_features |= NETIF_F_TSO;
1865         netdev->vlan_features |= NETIF_F_TSO6;
1866         netdev->vlan_features |= NETIF_F_IP_CSUM;
1867         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1868         netdev->vlan_features |= NETIF_F_SG;
1869
1870         if (pci_using_dac) {
1871                 netdev->features |= NETIF_F_HIGHDMA;
1872                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1873         }
1874
1875         if (hw->mac.type >= e1000_82576)
1876                 netdev->features |= NETIF_F_SCTP_CSUM;
1877
1878         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1879
1880         /* before reading the NVM, reset the controller to put the device in a
1881          * known good starting state */
1882         hw->mac.ops.reset_hw(hw);
1883
1884         /* make sure the NVM is good */
1885         if (igb_validate_nvm_checksum(hw) < 0) {
1886                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1887                 err = -EIO;
1888                 goto err_eeprom;
1889         }
1890
1891         /* copy the MAC address out of the NVM */
1892         if (hw->mac.ops.read_mac_addr(hw))
1893                 dev_err(&pdev->dev, "NVM Read Error\n");
1894
1895         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1896         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1897
1898         if (!is_valid_ether_addr(netdev->perm_addr)) {
1899                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1900                 err = -EIO;
1901                 goto err_eeprom;
1902         }
1903
1904         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1905                     (unsigned long) adapter);
1906         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1907                     (unsigned long) adapter);
1908
1909         INIT_WORK(&adapter->reset_task, igb_reset_task);
1910         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1911
1912         /* Initialize link properties that are user-changeable */
1913         adapter->fc_autoneg = true;
1914         hw->mac.autoneg = true;
1915         hw->phy.autoneg_advertised = 0x2f;
1916
1917         hw->fc.requested_mode = e1000_fc_default;
1918         hw->fc.current_mode = e1000_fc_default;
1919
1920         igb_validate_mdi_setting(hw);
1921
1922         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1923          * enable the ACPI Magic Packet filter
1924          */
1925
1926         if (hw->bus.func == 0)
1927                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1928         else if (hw->mac.type == e1000_82580)
1929                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1930                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1931                                  &eeprom_data);
1932         else if (hw->bus.func == 1)
1933                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1934
1935         if (eeprom_data & eeprom_apme_mask)
1936                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1937
1938         /* now that we have the eeprom settings, apply the special cases where
1939          * the eeprom may be wrong or the board simply won't support wake on
1940          * lan on a particular port */
1941         switch (pdev->device) {
1942         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1943                 adapter->eeprom_wol = 0;
1944                 break;
1945         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1946         case E1000_DEV_ID_82576_FIBER:
1947         case E1000_DEV_ID_82576_SERDES:
1948                 /* Wake events only supported on port A for dual fiber
1949                  * regardless of eeprom setting */
1950                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1951                         adapter->eeprom_wol = 0;
1952                 break;
1953         case E1000_DEV_ID_82576_QUAD_COPPER:
1954         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1955                 /* if quad port adapter, disable WoL on all but port A */
1956                 if (global_quad_port_a != 0)
1957                         adapter->eeprom_wol = 0;
1958                 else
1959                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1960                 /* Reset for multiple quad port adapters */
1961                 if (++global_quad_port_a == 4)
1962                         global_quad_port_a = 0;
1963                 break;
1964         }
1965
1966         /* initialize the wol settings based on the eeprom settings */
1967         adapter->wol = adapter->eeprom_wol;
1968         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1969
1970         /* reset the hardware with the new settings */
1971         igb_reset(adapter);
1972
1973         /* let the f/w know that the h/w is now under the control of the
1974          * driver. */
1975         igb_get_hw_control(adapter);
1976
1977         strcpy(netdev->name, "eth%d");
1978         err = register_netdev(netdev);
1979         if (err)
1980                 goto err_register;
1981
1982         /* carrier off reporting is important to ethtool even BEFORE open */
1983         netif_carrier_off(netdev);
1984
1985 #ifdef CONFIG_IGB_DCA
1986         if (dca_add_requester(&pdev->dev) == 0) {
1987                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1988                 dev_info(&pdev->dev, "DCA enabled\n");
1989                 igb_setup_dca(adapter);
1990         }
1991
1992 #endif
1993         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1994         /* print bus type/speed/width info */
1995         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1996                  netdev->name,
1997                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1998                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1999                                                             "unknown"),
2000                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2001                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2002                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2003                    "unknown"),
2004                  netdev->dev_addr);
2005
2006         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2007         if (ret_val)
2008                 strcpy(part_str, "Unknown");
2009         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2010         dev_info(&pdev->dev,
2011                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2012                 adapter->msix_entries ? "MSI-X" :
2013                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2014                 adapter->num_rx_queues, adapter->num_tx_queues);
2015
2016         return 0;
2017
2018 err_register:
2019         igb_release_hw_control(adapter);
2020 err_eeprom:
2021         if (!igb_check_reset_block(hw))
2022                 igb_reset_phy(hw);
2023
2024         if (hw->flash_address)
2025                 iounmap(hw->flash_address);
2026 err_sw_init:
2027         igb_clear_interrupt_scheme(adapter);
2028         iounmap(hw->hw_addr);
2029 err_ioremap:
2030         free_netdev(netdev);
2031 err_alloc_etherdev:
2032         pci_release_selected_regions(pdev,
2033                                      pci_select_bars(pdev, IORESOURCE_MEM));
2034 err_pci_reg:
2035 err_dma:
2036         pci_disable_device(pdev);
2037         return err;
2038 }
2039
2040 /**
2041  * igb_remove - Device Removal Routine
2042  * @pdev: PCI device information struct
2043  *
2044  * igb_remove is called by the PCI subsystem to alert the driver
2045  * that it should release a PCI device.  The could be caused by a
2046  * Hot-Plug event, or because the driver is going to be removed from
2047  * memory.
2048  **/
2049 static void __devexit igb_remove(struct pci_dev *pdev)
2050 {
2051         struct net_device *netdev = pci_get_drvdata(pdev);
2052         struct igb_adapter *adapter = netdev_priv(netdev);
2053         struct e1000_hw *hw = &adapter->hw;
2054
2055         /*
2056          * The watchdog timer may be rescheduled, so explicitly
2057          * disable watchdog from being rescheduled.
2058          */
2059         set_bit(__IGB_DOWN, &adapter->state);
2060         del_timer_sync(&adapter->watchdog_timer);
2061         del_timer_sync(&adapter->phy_info_timer);
2062
2063         cancel_work_sync(&adapter->reset_task);
2064         cancel_work_sync(&adapter->watchdog_task);
2065
2066 #ifdef CONFIG_IGB_DCA
2067         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2068                 dev_info(&pdev->dev, "DCA disabled\n");
2069                 dca_remove_requester(&pdev->dev);
2070                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2071                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2072         }
2073 #endif
2074
2075         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2076          * would have already happened in close and is redundant. */
2077         igb_release_hw_control(adapter);
2078
2079         unregister_netdev(netdev);
2080
2081         igb_clear_interrupt_scheme(adapter);
2082
2083 #ifdef CONFIG_PCI_IOV
2084         /* reclaim resources allocated to VFs */
2085         if (adapter->vf_data) {
2086                 /* disable iov and allow time for transactions to clear */
2087                 pci_disable_sriov(pdev);
2088                 msleep(500);
2089
2090                 kfree(adapter->vf_data);
2091                 adapter->vf_data = NULL;
2092                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2093                 msleep(100);
2094                 dev_info(&pdev->dev, "IOV Disabled\n");
2095         }
2096 #endif
2097
2098         iounmap(hw->hw_addr);
2099         if (hw->flash_address)
2100                 iounmap(hw->flash_address);
2101         pci_release_selected_regions(pdev,
2102                                      pci_select_bars(pdev, IORESOURCE_MEM));
2103
2104         free_netdev(netdev);
2105
2106         pci_disable_pcie_error_reporting(pdev);
2107
2108         pci_disable_device(pdev);
2109 }
2110
2111 /**
2112  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2113  * @adapter: board private structure to initialize
2114  *
2115  * This function initializes the vf specific data storage and then attempts to
2116  * allocate the VFs.  The reason for ordering it this way is because it is much
2117  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2118  * the memory for the VFs.
2119  **/
2120 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2121 {
2122 #ifdef CONFIG_PCI_IOV
2123         struct pci_dev *pdev = adapter->pdev;
2124
2125         if (adapter->vfs_allocated_count) {
2126                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2127                                            sizeof(struct vf_data_storage),
2128                                            GFP_KERNEL);
2129                 /* if allocation failed then we do not support SR-IOV */
2130                 if (!adapter->vf_data) {
2131                         adapter->vfs_allocated_count = 0;
2132                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2133                                 "Data Storage\n");
2134                 }
2135         }
2136
2137         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2138                 kfree(adapter->vf_data);
2139                 adapter->vf_data = NULL;
2140 #endif /* CONFIG_PCI_IOV */
2141                 adapter->vfs_allocated_count = 0;
2142 #ifdef CONFIG_PCI_IOV
2143         } else {
2144                 unsigned char mac_addr[ETH_ALEN];
2145                 int i;
2146                 dev_info(&pdev->dev, "%d vfs allocated\n",
2147                          adapter->vfs_allocated_count);
2148                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2149                         random_ether_addr(mac_addr);
2150                         igb_set_vf_mac(adapter, i, mac_addr);
2151                 }
2152         }
2153 #endif /* CONFIG_PCI_IOV */
2154 }
2155
2156
2157 /**
2158  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2159  * @adapter: board private structure to initialize
2160  *
2161  * igb_init_hw_timer initializes the function pointer and values for the hw
2162  * timer found in hardware.
2163  **/
2164 static void igb_init_hw_timer(struct igb_adapter *adapter)
2165 {
2166         struct e1000_hw *hw = &adapter->hw;
2167
2168         switch (hw->mac.type) {
2169         case e1000_i350:
2170         case e1000_82580:
2171                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2172                 adapter->cycles.read = igb_read_clock;
2173                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2174                 adapter->cycles.mult = 1;
2175                 /*
2176                  * The 82580 timesync updates the system timer every 8ns by 8ns
2177                  * and the value cannot be shifted.  Instead we need to shift
2178                  * the registers to generate a 64bit timer value.  As a result
2179                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2180                  * 24 in order to generate a larger value for synchronization.
2181                  */
2182                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2183                 /* disable system timer temporarily by setting bit 31 */
2184                 wr32(E1000_TSAUXC, 0x80000000);
2185                 wrfl();
2186
2187                 /* Set registers so that rollover occurs soon to test this. */
2188                 wr32(E1000_SYSTIMR, 0x00000000);
2189                 wr32(E1000_SYSTIML, 0x80000000);
2190                 wr32(E1000_SYSTIMH, 0x000000FF);
2191                 wrfl();
2192
2193                 /* enable system timer by clearing bit 31 */
2194                 wr32(E1000_TSAUXC, 0x0);
2195                 wrfl();
2196
2197                 timecounter_init(&adapter->clock,
2198                                  &adapter->cycles,
2199                                  ktime_to_ns(ktime_get_real()));
2200                 /*
2201                  * Synchronize our NIC clock against system wall clock. NIC
2202                  * time stamp reading requires ~3us per sample, each sample
2203                  * was pretty stable even under load => only require 10
2204                  * samples for each offset comparison.
2205                  */
2206                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2207                 adapter->compare.source = &adapter->clock;
2208                 adapter->compare.target = ktime_get_real;
2209                 adapter->compare.num_samples = 10;
2210                 timecompare_update(&adapter->compare, 0);
2211                 break;
2212         case e1000_82576:
2213                 /*
2214                  * Initialize hardware timer: we keep it running just in case
2215                  * that some program needs it later on.
2216                  */
2217                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2218                 adapter->cycles.read = igb_read_clock;
2219                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2220                 adapter->cycles.mult = 1;
2221                 /**
2222                  * Scale the NIC clock cycle by a large factor so that
2223                  * relatively small clock corrections can be added or
2224                  * substracted at each clock tick. The drawbacks of a large
2225                  * factor are a) that the clock register overflows more quickly
2226                  * (not such a big deal) and b) that the increment per tick has
2227                  * to fit into 24 bits.  As a result we need to use a shift of
2228                  * 19 so we can fit a value of 16 into the TIMINCA register.
2229                  */
2230                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2231                 wr32(E1000_TIMINCA,
2232                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2233                                 (16 << IGB_82576_TSYNC_SHIFT));
2234
2235                 /* Set registers so that rollover occurs soon to test this. */
2236                 wr32(E1000_SYSTIML, 0x00000000);
2237                 wr32(E1000_SYSTIMH, 0xFF800000);
2238                 wrfl();
2239
2240                 timecounter_init(&adapter->clock,
2241                                  &adapter->cycles,
2242                                  ktime_to_ns(ktime_get_real()));
2243                 /*
2244                  * Synchronize our NIC clock against system wall clock. NIC
2245                  * time stamp reading requires ~3us per sample, each sample
2246                  * was pretty stable even under load => only require 10
2247                  * samples for each offset comparison.
2248                  */
2249                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2250                 adapter->compare.source = &adapter->clock;
2251                 adapter->compare.target = ktime_get_real;
2252                 adapter->compare.num_samples = 10;
2253                 timecompare_update(&adapter->compare, 0);
2254                 break;
2255         case e1000_82575:
2256                 /* 82575 does not support timesync */
2257         default:
2258                 break;
2259         }
2260
2261 }
2262
2263 /**
2264  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2265  * @adapter: board private structure to initialize
2266  *
2267  * igb_sw_init initializes the Adapter private data structure.
2268  * Fields are initialized based on PCI device information and
2269  * OS network device settings (MTU size).
2270  **/
2271 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2272 {
2273         struct e1000_hw *hw = &adapter->hw;
2274         struct net_device *netdev = adapter->netdev;
2275         struct pci_dev *pdev = adapter->pdev;
2276
2277         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2278
2279         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2280         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2281         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2282         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2283
2284         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2285         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2286
2287         spin_lock_init(&adapter->stats64_lock);
2288 #ifdef CONFIG_PCI_IOV
2289         if (hw->mac.type == e1000_82576)
2290                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2291
2292 #endif /* CONFIG_PCI_IOV */
2293         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2294
2295         /*
2296          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2297          * then we should combine the queues into a queue pair in order to
2298          * conserve interrupts due to limited supply
2299          */
2300         if ((adapter->rss_queues > 4) ||
2301             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2302                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2303
2304         /* This call may decrease the number of queues */
2305         if (igb_init_interrupt_scheme(adapter)) {
2306                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2307                 return -ENOMEM;
2308         }
2309
2310         igb_init_hw_timer(adapter);
2311         igb_probe_vfs(adapter);
2312
2313         /* Explicitly disable IRQ since the NIC can be in any state. */
2314         igb_irq_disable(adapter);
2315
2316         set_bit(__IGB_DOWN, &adapter->state);
2317         return 0;
2318 }
2319
2320 /**
2321  * igb_open - Called when a network interface is made active
2322  * @netdev: network interface device structure
2323  *
2324  * Returns 0 on success, negative value on failure
2325  *
2326  * The open entry point is called when a network interface is made
2327  * active by the system (IFF_UP).  At this point all resources needed
2328  * for transmit and receive operations are allocated, the interrupt
2329  * handler is registered with the OS, the watchdog timer is started,
2330  * and the stack is notified that the interface is ready.
2331  **/
2332 static int igb_open(struct net_device *netdev)
2333 {
2334         struct igb_adapter *adapter = netdev_priv(netdev);
2335         struct e1000_hw *hw = &adapter->hw;
2336         int err;
2337         int i;
2338
2339         /* disallow open during test */
2340         if (test_bit(__IGB_TESTING, &adapter->state))
2341                 return -EBUSY;
2342
2343         netif_carrier_off(netdev);
2344
2345         /* allocate transmit descriptors */
2346         err = igb_setup_all_tx_resources(adapter);
2347         if (err)
2348                 goto err_setup_tx;
2349
2350         /* allocate receive descriptors */
2351         err = igb_setup_all_rx_resources(adapter);
2352         if (err)
2353                 goto err_setup_rx;
2354
2355         igb_power_up_link(adapter);
2356
2357         /* before we allocate an interrupt, we must be ready to handle it.
2358          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2359          * as soon as we call pci_request_irq, so we have to setup our
2360          * clean_rx handler before we do so.  */
2361         igb_configure(adapter);
2362
2363         err = igb_request_irq(adapter);
2364         if (err)
2365                 goto err_req_irq;
2366
2367         /* From here on the code is the same as igb_up() */
2368         clear_bit(__IGB_DOWN, &adapter->state);
2369
2370         for (i = 0; i < adapter->num_q_vectors; i++) {
2371                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2372                 napi_enable(&q_vector->napi);
2373         }
2374
2375         /* Clear any pending interrupts. */
2376         rd32(E1000_ICR);
2377
2378         igb_irq_enable(adapter);
2379
2380         /* notify VFs that reset has been completed */
2381         if (adapter->vfs_allocated_count) {
2382                 u32 reg_data = rd32(E1000_CTRL_EXT);
2383                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2384                 wr32(E1000_CTRL_EXT, reg_data);
2385         }
2386
2387         netif_tx_start_all_queues(netdev);
2388
2389         /* start the watchdog. */
2390         hw->mac.get_link_status = 1;
2391         schedule_work(&adapter->watchdog_task);
2392
2393         return 0;
2394
2395 err_req_irq:
2396         igb_release_hw_control(adapter);
2397         igb_power_down_link(adapter);
2398         igb_free_all_rx_resources(adapter);
2399 err_setup_rx:
2400         igb_free_all_tx_resources(adapter);
2401 err_setup_tx:
2402         igb_reset(adapter);
2403
2404         return err;
2405 }
2406
2407 /**
2408  * igb_close - Disables a network interface
2409  * @netdev: network interface device structure
2410  *
2411  * Returns 0, this is not allowed to fail
2412  *
2413  * The close entry point is called when an interface is de-activated
2414  * by the OS.  The hardware is still under the driver's control, but
2415  * needs to be disabled.  A global MAC reset is issued to stop the
2416  * hardware, and all transmit and receive resources are freed.
2417  **/
2418 static int igb_close(struct net_device *netdev)
2419 {
2420         struct igb_adapter *adapter = netdev_priv(netdev);
2421
2422         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2423         igb_down(adapter);
2424
2425         igb_free_irq(adapter);
2426
2427         igb_free_all_tx_resources(adapter);
2428         igb_free_all_rx_resources(adapter);
2429
2430         return 0;
2431 }
2432
2433 /**
2434  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2435  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2436  *
2437  * Return 0 on success, negative on failure
2438  **/
2439 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2440 {
2441         struct device *dev = tx_ring->dev;
2442         int size;
2443
2444         size = sizeof(struct igb_buffer) * tx_ring->count;
2445         tx_ring->buffer_info = vzalloc(size);
2446         if (!tx_ring->buffer_info)
2447                 goto err;
2448
2449         /* round up to nearest 4K */
2450         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2451         tx_ring->size = ALIGN(tx_ring->size, 4096);
2452
2453         tx_ring->desc = dma_alloc_coherent(dev,
2454                                            tx_ring->size,
2455                                            &tx_ring->dma,
2456                                            GFP_KERNEL);
2457
2458         if (!tx_ring->desc)
2459                 goto err;
2460
2461         tx_ring->next_to_use = 0;
2462         tx_ring->next_to_clean = 0;
2463         return 0;
2464
2465 err:
2466         vfree(tx_ring->buffer_info);
2467         dev_err(dev,
2468                 "Unable to allocate memory for the transmit descriptor ring\n");
2469         return -ENOMEM;
2470 }
2471
2472 /**
2473  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2474  *                                (Descriptors) for all queues
2475  * @adapter: board private structure
2476  *
2477  * Return 0 on success, negative on failure
2478  **/
2479 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2480 {
2481         struct pci_dev *pdev = adapter->pdev;
2482         int i, err = 0;
2483
2484         for (i = 0; i < adapter->num_tx_queues; i++) {
2485                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2486                 if (err) {
2487                         dev_err(&pdev->dev,
2488                                 "Allocation for Tx Queue %u failed\n", i);
2489                         for (i--; i >= 0; i--)
2490                                 igb_free_tx_resources(adapter->tx_ring[i]);
2491                         break;
2492                 }
2493         }
2494
2495         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2496                 int r_idx = i % adapter->num_tx_queues;
2497                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2498         }
2499         return err;
2500 }
2501
2502 /**
2503  * igb_setup_tctl - configure the transmit control registers
2504  * @adapter: Board private structure
2505  **/
2506 void igb_setup_tctl(struct igb_adapter *adapter)
2507 {
2508         struct e1000_hw *hw = &adapter->hw;
2509         u32 tctl;
2510
2511         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2512         wr32(E1000_TXDCTL(0), 0);
2513
2514         /* Program the Transmit Control Register */
2515         tctl = rd32(E1000_TCTL);
2516         tctl &= ~E1000_TCTL_CT;
2517         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2518                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2519
2520         igb_config_collision_dist(hw);
2521
2522         /* Enable transmits */
2523         tctl |= E1000_TCTL_EN;
2524
2525         wr32(E1000_TCTL, tctl);
2526 }
2527
2528 /**
2529  * igb_configure_tx_ring - Configure transmit ring after Reset
2530  * @adapter: board private structure
2531  * @ring: tx ring to configure
2532  *
2533  * Configure a transmit ring after a reset.
2534  **/
2535 void igb_configure_tx_ring(struct igb_adapter *adapter,
2536                            struct igb_ring *ring)
2537 {
2538         struct e1000_hw *hw = &adapter->hw;
2539         u32 txdctl;
2540         u64 tdba = ring->dma;
2541         int reg_idx = ring->reg_idx;
2542
2543         /* disable the queue */
2544         txdctl = rd32(E1000_TXDCTL(reg_idx));
2545         wr32(E1000_TXDCTL(reg_idx),
2546                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2547         wrfl();
2548         mdelay(10);
2549
2550         wr32(E1000_TDLEN(reg_idx),
2551                         ring->count * sizeof(union e1000_adv_tx_desc));
2552         wr32(E1000_TDBAL(reg_idx),
2553                         tdba & 0x00000000ffffffffULL);
2554         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2555
2556         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2557         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2558         writel(0, ring->head);
2559         writel(0, ring->tail);
2560
2561         txdctl |= IGB_TX_PTHRESH;
2562         txdctl |= IGB_TX_HTHRESH << 8;
2563         txdctl |= IGB_TX_WTHRESH << 16;
2564
2565         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2566         wr32(E1000_TXDCTL(reg_idx), txdctl);
2567 }
2568
2569 /**
2570  * igb_configure_tx - Configure transmit Unit after Reset
2571  * @adapter: board private structure
2572  *
2573  * Configure the Tx unit of the MAC after a reset.
2574  **/
2575 static void igb_configure_tx(struct igb_adapter *adapter)
2576 {
2577         int i;
2578
2579         for (i = 0; i < adapter->num_tx_queues; i++)
2580                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2581 }
2582
2583 /**
2584  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2585  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2586  *
2587  * Returns 0 on success, negative on failure
2588  **/
2589 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2590 {
2591         struct device *dev = rx_ring->dev;
2592         int size, desc_len;
2593
2594         size = sizeof(struct igb_buffer) * rx_ring->count;
2595         rx_ring->buffer_info = vzalloc(size);
2596         if (!rx_ring->buffer_info)
2597                 goto err;
2598
2599         desc_len = sizeof(union e1000_adv_rx_desc);
2600
2601         /* Round up to nearest 4K */
2602         rx_ring->size = rx_ring->count * desc_len;
2603         rx_ring->size = ALIGN(rx_ring->size, 4096);
2604
2605         rx_ring->desc = dma_alloc_coherent(dev,
2606                                            rx_ring->size,
2607                                            &rx_ring->dma,
2608                                            GFP_KERNEL);
2609
2610         if (!rx_ring->desc)
2611                 goto err;
2612
2613         rx_ring->next_to_clean = 0;
2614         rx_ring->next_to_use = 0;
2615
2616         return 0;
2617
2618 err:
2619         vfree(rx_ring->buffer_info);
2620         rx_ring->buffer_info = NULL;
2621         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2622                 " ring\n");
2623         return -ENOMEM;
2624 }
2625
2626 /**
2627  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2628  *                                (Descriptors) for all queues
2629  * @adapter: board private structure
2630  *
2631  * Return 0 on success, negative on failure
2632  **/
2633 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2634 {
2635         struct pci_dev *pdev = adapter->pdev;
2636         int i, err = 0;
2637
2638         for (i = 0; i < adapter->num_rx_queues; i++) {
2639                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2640                 if (err) {
2641                         dev_err(&pdev->dev,
2642                                 "Allocation for Rx Queue %u failed\n", i);
2643                         for (i--; i >= 0; i--)
2644                                 igb_free_rx_resources(adapter->rx_ring[i]);
2645                         break;
2646                 }
2647         }
2648
2649         return err;
2650 }
2651
2652 /**
2653  * igb_setup_mrqc - configure the multiple receive queue control registers
2654  * @adapter: Board private structure
2655  **/
2656 static void igb_setup_mrqc(struct igb_adapter *adapter)
2657 {
2658         struct e1000_hw *hw = &adapter->hw;
2659         u32 mrqc, rxcsum;
2660         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2661         union e1000_reta {
2662                 u32 dword;
2663                 u8  bytes[4];
2664         } reta;
2665         static const u8 rsshash[40] = {
2666                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2667                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2668                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2669                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2670
2671         /* Fill out hash function seeds */
2672         for (j = 0; j < 10; j++) {
2673                 u32 rsskey = rsshash[(j * 4)];
2674                 rsskey |= rsshash[(j * 4) + 1] << 8;
2675                 rsskey |= rsshash[(j * 4) + 2] << 16;
2676                 rsskey |= rsshash[(j * 4) + 3] << 24;
2677                 array_wr32(E1000_RSSRK(0), j, rsskey);
2678         }
2679
2680         num_rx_queues = adapter->rss_queues;
2681
2682         if (adapter->vfs_allocated_count) {
2683                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2684                 switch (hw->mac.type) {
2685                 case e1000_i350:
2686                 case e1000_82580:
2687                         num_rx_queues = 1;
2688                         shift = 0;
2689                         break;
2690                 case e1000_82576:
2691                         shift = 3;
2692                         num_rx_queues = 2;
2693                         break;
2694                 case e1000_82575:
2695                         shift = 2;
2696                         shift2 = 6;
2697                 default:
2698                         break;
2699                 }
2700         } else {
2701                 if (hw->mac.type == e1000_82575)
2702                         shift = 6;
2703         }
2704
2705         for (j = 0; j < (32 * 4); j++) {
2706                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2707                 if (shift2)
2708                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2709                 if ((j & 3) == 3)
2710                         wr32(E1000_RETA(j >> 2), reta.dword);
2711         }
2712
2713         /*
2714          * Disable raw packet checksumming so that RSS hash is placed in
2715          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2716          * offloads as they are enabled by default
2717          */
2718         rxcsum = rd32(E1000_RXCSUM);
2719         rxcsum |= E1000_RXCSUM_PCSD;
2720
2721         if (adapter->hw.mac.type >= e1000_82576)
2722                 /* Enable Receive Checksum Offload for SCTP */
2723                 rxcsum |= E1000_RXCSUM_CRCOFL;
2724
2725         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2726         wr32(E1000_RXCSUM, rxcsum);
2727
2728         /* If VMDq is enabled then we set the appropriate mode for that, else
2729          * we default to RSS so that an RSS hash is calculated per packet even
2730          * if we are only using one queue */
2731         if (adapter->vfs_allocated_count) {
2732                 if (hw->mac.type > e1000_82575) {
2733                         /* Set the default pool for the PF's first queue */
2734                         u32 vtctl = rd32(E1000_VT_CTL);
2735                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2736                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2737                         vtctl |= adapter->vfs_allocated_count <<
2738                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2739                         wr32(E1000_VT_CTL, vtctl);
2740                 }
2741                 if (adapter->rss_queues > 1)
2742                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2743                 else
2744                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2745         } else {
2746                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2747         }
2748         igb_vmm_control(adapter);
2749
2750         /*
2751          * Generate RSS hash based on TCP port numbers and/or
2752          * IPv4/v6 src and dst addresses since UDP cannot be
2753          * hashed reliably due to IP fragmentation
2754          */
2755         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2756                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2757                 E1000_MRQC_RSS_FIELD_IPV6 |
2758                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2759                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2760
2761         wr32(E1000_MRQC, mrqc);
2762 }
2763
2764 /**
2765  * igb_setup_rctl - configure the receive control registers
2766  * @adapter: Board private structure
2767  **/
2768 void igb_setup_rctl(struct igb_adapter *adapter)
2769 {
2770         struct e1000_hw *hw = &adapter->hw;
2771         u32 rctl;
2772
2773         rctl = rd32(E1000_RCTL);
2774
2775         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2776         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2777
2778         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2779                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2780
2781         /*
2782          * enable stripping of CRC. It's unlikely this will break BMC
2783          * redirection as it did with e1000. Newer features require
2784          * that the HW strips the CRC.
2785          */
2786         rctl |= E1000_RCTL_SECRC;
2787
2788         /* disable store bad packets and clear size bits. */
2789         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2790
2791         /* enable LPE to prevent packets larger than max_frame_size */
2792         rctl |= E1000_RCTL_LPE;
2793
2794         /* disable queue 0 to prevent tail write w/o re-config */
2795         wr32(E1000_RXDCTL(0), 0);
2796
2797         /* Attention!!!  For SR-IOV PF driver operations you must enable
2798          * queue drop for all VF and PF queues to prevent head of line blocking
2799          * if an un-trusted VF does not provide descriptors to hardware.
2800          */
2801         if (adapter->vfs_allocated_count) {
2802                 /* set all queue drop enable bits */
2803                 wr32(E1000_QDE, ALL_QUEUES);
2804         }
2805
2806         wr32(E1000_RCTL, rctl);
2807 }
2808
2809 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2810                                    int vfn)
2811 {
2812         struct e1000_hw *hw = &adapter->hw;
2813         u32 vmolr;
2814
2815         /* if it isn't the PF check to see if VFs are enabled and
2816          * increase the size to support vlan tags */
2817         if (vfn < adapter->vfs_allocated_count &&
2818             adapter->vf_data[vfn].vlans_enabled)
2819                 size += VLAN_TAG_SIZE;
2820
2821         vmolr = rd32(E1000_VMOLR(vfn));
2822         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2823         vmolr |= size | E1000_VMOLR_LPE;
2824         wr32(E1000_VMOLR(vfn), vmolr);
2825
2826         return 0;
2827 }
2828
2829 /**
2830  * igb_rlpml_set - set maximum receive packet size
2831  * @adapter: board private structure
2832  *
2833  * Configure maximum receivable packet size.
2834  **/
2835 static void igb_rlpml_set(struct igb_adapter *adapter)
2836 {
2837         u32 max_frame_size = adapter->max_frame_size;
2838         struct e1000_hw *hw = &adapter->hw;
2839         u16 pf_id = adapter->vfs_allocated_count;
2840
2841         if (adapter->vlgrp)
2842                 max_frame_size += VLAN_TAG_SIZE;
2843
2844         /* if vfs are enabled we set RLPML to the largest possible request
2845          * size and set the VMOLR RLPML to the size we need */
2846         if (pf_id) {
2847                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2848                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2849         }
2850
2851         wr32(E1000_RLPML, max_frame_size);
2852 }
2853
2854 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2855                                  int vfn, bool aupe)
2856 {
2857         struct e1000_hw *hw = &adapter->hw;
2858         u32 vmolr;
2859
2860         /*
2861          * This register exists only on 82576 and newer so if we are older then
2862          * we should exit and do nothing
2863          */
2864         if (hw->mac.type < e1000_82576)
2865                 return;
2866
2867         vmolr = rd32(E1000_VMOLR(vfn));
2868         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2869         if (aupe)
2870                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2871         else
2872                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2873
2874         /* clear all bits that might not be set */
2875         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2876
2877         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2878                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2879         /*
2880          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2881          * multicast packets
2882          */
2883         if (vfn <= adapter->vfs_allocated_count)
2884                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2885
2886         wr32(E1000_VMOLR(vfn), vmolr);
2887 }
2888
2889 /**
2890  * igb_configure_rx_ring - Configure a receive ring after Reset
2891  * @adapter: board private structure
2892  * @ring: receive ring to be configured
2893  *
2894  * Configure the Rx unit of the MAC after a reset.
2895  **/
2896 void igb_configure_rx_ring(struct igb_adapter *adapter,
2897                            struct igb_ring *ring)
2898 {
2899         struct e1000_hw *hw = &adapter->hw;
2900         u64 rdba = ring->dma;
2901         int reg_idx = ring->reg_idx;
2902         u32 srrctl, rxdctl;
2903
2904         /* disable the queue */
2905         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2906         wr32(E1000_RXDCTL(reg_idx),
2907                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2908
2909         /* Set DMA base address registers */
2910         wr32(E1000_RDBAL(reg_idx),
2911              rdba & 0x00000000ffffffffULL);
2912         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2913         wr32(E1000_RDLEN(reg_idx),
2914                        ring->count * sizeof(union e1000_adv_rx_desc));
2915
2916         /* initialize head and tail */
2917         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2918         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2919         writel(0, ring->head);
2920         writel(0, ring->tail);
2921
2922         /* set descriptor configuration */
2923         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2924                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2925                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2926 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2927                 srrctl |= IGB_RXBUFFER_16384 >>
2928                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2929 #else
2930                 srrctl |= (PAGE_SIZE / 2) >>
2931                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2932 #endif
2933                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2934         } else {
2935                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2936                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2937                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2938         }
2939         if (hw->mac.type == e1000_82580)
2940                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2941         /* Only set Drop Enable if we are supporting multiple queues */
2942         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2943                 srrctl |= E1000_SRRCTL_DROP_EN;
2944
2945         wr32(E1000_SRRCTL(reg_idx), srrctl);
2946
2947         /* set filtering for VMDQ pools */
2948         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2949
2950         /* enable receive descriptor fetching */
2951         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2952         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2953         rxdctl &= 0xFFF00000;
2954         rxdctl |= IGB_RX_PTHRESH;
2955         rxdctl |= IGB_RX_HTHRESH << 8;
2956         rxdctl |= IGB_RX_WTHRESH << 16;
2957         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2958 }
2959
2960 /**
2961  * igb_configure_rx - Configure receive Unit after Reset
2962  * @adapter: board private structure
2963  *
2964  * Configure the Rx unit of the MAC after a reset.
2965  **/
2966 static void igb_configure_rx(struct igb_adapter *adapter)
2967 {
2968         int i;
2969
2970         /* set UTA to appropriate mode */
2971         igb_set_uta(adapter);
2972
2973         /* set the correct pool for the PF default MAC address in entry 0 */
2974         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2975                          adapter->vfs_allocated_count);
2976
2977         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2978          * the Base and Length of the Rx Descriptor Ring */
2979         for (i = 0; i < adapter->num_rx_queues; i++)
2980                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2981 }
2982
2983 /**
2984  * igb_free_tx_resources - Free Tx Resources per Queue
2985  * @tx_ring: Tx descriptor ring for a specific queue
2986  *
2987  * Free all transmit software resources
2988  **/
2989 void igb_free_tx_resources(struct igb_ring *tx_ring)
2990 {
2991         igb_clean_tx_ring(tx_ring);
2992
2993         vfree(tx_ring->buffer_info);
2994         tx_ring->buffer_info = NULL;
2995
2996         /* if not set, then don't free */
2997         if (!tx_ring->desc)
2998                 return;
2999
3000         dma_free_coherent(tx_ring->dev, tx_ring->size,
3001                           tx_ring->desc, tx_ring->dma);
3002
3003         tx_ring->desc = NULL;
3004 }
3005
3006 /**
3007  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3008  * @adapter: board private structure
3009  *
3010  * Free all transmit software resources
3011  **/
3012 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3013 {
3014         int i;
3015
3016         for (i = 0; i < adapter->num_tx_queues; i++)
3017                 igb_free_tx_resources(adapter->tx_ring[i]);
3018 }
3019
3020 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3021                                     struct igb_buffer *buffer_info)
3022 {
3023         if (buffer_info->dma) {
3024                 if (buffer_info->mapped_as_page)
3025                         dma_unmap_page(tx_ring->dev,
3026                                         buffer_info->dma,
3027                                         buffer_info->length,
3028                                         DMA_TO_DEVICE);
3029                 else
3030                         dma_unmap_single(tx_ring->dev,
3031                                         buffer_info->dma,
3032                                         buffer_info->length,
3033                                         DMA_TO_DEVICE);
3034                 buffer_info->dma = 0;
3035         }
3036         if (buffer_info->skb) {
3037                 dev_kfree_skb_any(buffer_info->skb);
3038                 buffer_info->skb = NULL;
3039         }
3040         buffer_info->time_stamp = 0;
3041         buffer_info->length = 0;
3042         buffer_info->next_to_watch = 0;
3043         buffer_info->mapped_as_page = false;
3044 }
3045
3046 /**
3047  * igb_clean_tx_ring - Free Tx Buffers
3048  * @tx_ring: ring to be cleaned
3049  **/
3050 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3051 {
3052         struct igb_buffer *buffer_info;
3053         unsigned long size;
3054         unsigned int i;
3055
3056         if (!tx_ring->buffer_info)
3057                 return;
3058         /* Free all the Tx ring sk_buffs */
3059
3060         for (i = 0; i < tx_ring->count; i++) {
3061                 buffer_info = &tx_ring->buffer_info[i];
3062                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3063         }
3064
3065         size = sizeof(struct igb_buffer) * tx_ring->count;
3066         memset(tx_ring->buffer_info, 0, size);
3067
3068         /* Zero out the descriptor ring */
3069         memset(tx_ring->desc, 0, tx_ring->size);
3070
3071         tx_ring->next_to_use = 0;
3072         tx_ring->next_to_clean = 0;
3073 }
3074
3075 /**
3076  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3077  * @adapter: board private structure
3078  **/
3079 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3080 {
3081         int i;
3082
3083         for (i = 0; i < adapter->num_tx_queues; i++)
3084                 igb_clean_tx_ring(adapter->tx_ring[i]);
3085 }
3086
3087 /**
3088  * igb_free_rx_resources - Free Rx Resources
3089  * @rx_ring: ring to clean the resources from
3090  *
3091  * Free all receive software resources
3092  **/
3093 void igb_free_rx_resources(struct igb_ring *rx_ring)
3094 {
3095         igb_clean_rx_ring(rx_ring);
3096
3097         vfree(rx_ring->buffer_info);
3098         rx_ring->buffer_info = NULL;
3099
3100         /* if not set, then don't free */
3101         if (!rx_ring->desc)
3102                 return;
3103
3104         dma_free_coherent(rx_ring->dev, rx_ring->size,
3105                           rx_ring->desc, rx_ring->dma);
3106
3107         rx_ring->desc = NULL;
3108 }
3109
3110 /**
3111  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3112  * @adapter: board private structure
3113  *
3114  * Free all receive software resources
3115  **/
3116 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3117 {
3118         int i;
3119
3120         for (i = 0; i < adapter->num_rx_queues; i++)
3121                 igb_free_rx_resources(adapter->rx_ring[i]);
3122 }
3123
3124 /**
3125  * igb_clean_rx_ring - Free Rx Buffers per Queue
3126  * @rx_ring: ring to free buffers from
3127  **/
3128 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3129 {
3130         struct igb_buffer *buffer_info;
3131         unsigned long size;
3132         unsigned int i;
3133
3134         if (!rx_ring->buffer_info)
3135                 return;
3136
3137         /* Free all the Rx ring sk_buffs */
3138         for (i = 0; i < rx_ring->count; i++) {
3139                 buffer_info = &rx_ring->buffer_info[i];
3140                 if (buffer_info->dma) {
3141                         dma_unmap_single(rx_ring->dev,
3142                                          buffer_info->dma,
3143                                          rx_ring->rx_buffer_len,
3144                                          DMA_FROM_DEVICE);
3145                         buffer_info->dma = 0;
3146                 }
3147
3148                 if (buffer_info->skb) {
3149                         dev_kfree_skb(buffer_info->skb);
3150                         buffer_info->skb = NULL;
3151                 }
3152                 if (buffer_info->page_dma) {
3153                         dma_unmap_page(rx_ring->dev,
3154                                        buffer_info->page_dma,
3155                                        PAGE_SIZE / 2,
3156                                        DMA_FROM_DEVICE);
3157                         buffer_info->page_dma = 0;
3158                 }
3159                 if (buffer_info->page) {
3160                         put_page(buffer_info->page);
3161                         buffer_info->page = NULL;
3162                         buffer_info->page_offset = 0;
3163                 }
3164         }
3165
3166         size = sizeof(struct igb_buffer) * rx_ring->count;
3167         memset(rx_ring->buffer_info, 0, size);
3168
3169         /* Zero out the descriptor ring */
3170         memset(rx_ring->desc, 0, rx_ring->size);
3171
3172         rx_ring->next_to_clean = 0;
3173         rx_ring->next_to_use = 0;
3174 }
3175
3176 /**
3177  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3178  * @adapter: board private structure
3179  **/
3180 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3181 {
3182         int i;
3183
3184         for (i = 0; i < adapter->num_rx_queues; i++)
3185                 igb_clean_rx_ring(adapter->rx_ring[i]);
3186 }
3187
3188 /**
3189  * igb_set_mac - Change the Ethernet Address of the NIC
3190  * @netdev: network interface device structure
3191  * @p: pointer to an address structure
3192  *
3193  * Returns 0 on success, negative on failure
3194  **/
3195 static int igb_set_mac(struct net_device *netdev, void *p)
3196 {
3197         struct igb_adapter *adapter = netdev_priv(netdev);
3198         struct e1000_hw *hw = &adapter->hw;
3199         struct sockaddr *addr = p;
3200
3201         if (!is_valid_ether_addr(addr->sa_data))
3202                 return -EADDRNOTAVAIL;
3203
3204         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3205         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3206
3207         /* set the correct pool for the new PF MAC address in entry 0 */
3208         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3209                          adapter->vfs_allocated_count);
3210
3211         return 0;
3212 }
3213
3214 /**
3215  * igb_write_mc_addr_list - write multicast addresses to MTA
3216  * @netdev: network interface device structure
3217  *
3218  * Writes multicast address list to the MTA hash table.
3219  * Returns: -ENOMEM on failure
3220  *                0 on no addresses written
3221  *                X on writing X addresses to MTA
3222  **/
3223 static int igb_write_mc_addr_list(struct net_device *netdev)
3224 {
3225         struct igb_adapter *adapter = netdev_priv(netdev);
3226         struct e1000_hw *hw = &adapter->hw;
3227         struct netdev_hw_addr *ha;
3228         u8  *mta_list;
3229         int i;
3230
3231         if (netdev_mc_empty(netdev)) {
3232                 /* nothing to program, so clear mc list */
3233                 igb_update_mc_addr_list(hw, NULL, 0);
3234                 igb_restore_vf_multicasts(adapter);
3235                 return 0;
3236         }
3237
3238         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3239         if (!mta_list)
3240                 return -ENOMEM;
3241
3242         /* The shared function expects a packed array of only addresses. */
3243         i = 0;
3244         netdev_for_each_mc_addr(ha, netdev)
3245                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3246
3247         igb_update_mc_addr_list(hw, mta_list, i);
3248         kfree(mta_list);
3249
3250         return netdev_mc_count(netdev);
3251 }
3252
3253 /**
3254  * igb_write_uc_addr_list - write unicast addresses to RAR table
3255  * @netdev: network interface device structure
3256  *
3257  * Writes unicast address list to the RAR table.
3258  * Returns: -ENOMEM on failure/insufficient address space
3259  *                0 on no addresses written
3260  *                X on writing X addresses to the RAR table
3261  **/
3262 static int igb_write_uc_addr_list(struct net_device *netdev)
3263 {
3264         struct igb_adapter *adapter = netdev_priv(netdev);
3265         struct e1000_hw *hw = &adapter->hw;
3266         unsigned int vfn = adapter->vfs_allocated_count;
3267         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3268         int count = 0;
3269
3270         /* return ENOMEM indicating insufficient memory for addresses */
3271         if (netdev_uc_count(netdev) > rar_entries)
3272                 return -ENOMEM;
3273
3274         if (!netdev_uc_empty(netdev) && rar_entries) {
3275                 struct netdev_hw_addr *ha;
3276
3277                 netdev_for_each_uc_addr(ha, netdev) {
3278                         if (!rar_entries)
3279                                 break;
3280                         igb_rar_set_qsel(adapter, ha->addr,
3281                                          rar_entries--,
3282                                          vfn);
3283                         count++;
3284                 }
3285         }
3286         /* write the addresses in reverse order to avoid write combining */
3287         for (; rar_entries > 0 ; rar_entries--) {
3288                 wr32(E1000_RAH(rar_entries), 0);
3289                 wr32(E1000_RAL(rar_entries), 0);
3290         }
3291         wrfl();
3292
3293         return count;
3294 }
3295
3296 /**
3297  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3298  * @netdev: network interface device structure
3299  *
3300  * The set_rx_mode entry point is called whenever the unicast or multicast
3301  * address lists or the network interface flags are updated.  This routine is
3302  * responsible for configuring the hardware for proper unicast, multicast,
3303  * promiscuous mode, and all-multi behavior.
3304  **/
3305 static void igb_set_rx_mode(struct net_device *netdev)
3306 {
3307         struct igb_adapter *adapter = netdev_priv(netdev);
3308         struct e1000_hw *hw = &adapter->hw;
3309         unsigned int vfn = adapter->vfs_allocated_count;
3310         u32 rctl, vmolr = 0;
3311         int count;
3312
3313         /* Check for Promiscuous and All Multicast modes */
3314         rctl = rd32(E1000_RCTL);
3315
3316         /* clear the effected bits */
3317         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3318
3319         if (netdev->flags & IFF_PROMISC) {
3320                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3321                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3322         } else {
3323                 if (netdev->flags & IFF_ALLMULTI) {
3324                         rctl |= E1000_RCTL_MPE;
3325                         vmolr |= E1000_VMOLR_MPME;
3326                 } else {
3327                         /*
3328                          * Write addresses to the MTA, if the attempt fails
3329                          * then we should just turn on promiscous mode so
3330                          * that we can at least receive multicast traffic
3331                          */
3332                         count = igb_write_mc_addr_list(netdev);
3333                         if (count < 0) {
3334                                 rctl |= E1000_RCTL_MPE;
3335                                 vmolr |= E1000_VMOLR_MPME;
3336                         } else if (count) {
3337                                 vmolr |= E1000_VMOLR_ROMPE;
3338                         }
3339                 }
3340                 /*
3341                  * Write addresses to available RAR registers, if there is not
3342                  * sufficient space to store all the addresses then enable
3343                  * unicast promiscous mode
3344                  */
3345                 count = igb_write_uc_addr_list(netdev);
3346                 if (count < 0) {
3347                         rctl |= E1000_RCTL_UPE;
3348                         vmolr |= E1000_VMOLR_ROPE;
3349                 }
3350                 rctl |= E1000_RCTL_VFE;
3351         }
3352         wr32(E1000_RCTL, rctl);
3353
3354         /*
3355          * In order to support SR-IOV and eventually VMDq it is necessary to set
3356          * the VMOLR to enable the appropriate modes.  Without this workaround
3357          * we will have issues with VLAN tag stripping not being done for frames
3358          * that are only arriving because we are the default pool
3359          */
3360         if (hw->mac.type < e1000_82576)
3361                 return;
3362
3363         vmolr |= rd32(E1000_VMOLR(vfn)) &
3364                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3365         wr32(E1000_VMOLR(vfn), vmolr);
3366         igb_restore_vf_multicasts(adapter);
3367 }
3368
3369 static void igb_check_wvbr(struct igb_adapter *adapter)
3370 {
3371         struct e1000_hw *hw = &adapter->hw;
3372         u32 wvbr = 0;
3373
3374         switch (hw->mac.type) {
3375         case e1000_82576:
3376         case e1000_i350:
3377                 if (!(wvbr = rd32(E1000_WVBR)))
3378                         return;
3379                 break;
3380         default:
3381                 break;
3382         }
3383
3384         adapter->wvbr |= wvbr;
3385 }
3386
3387 #define IGB_STAGGERED_QUEUE_OFFSET 8
3388
3389 static void igb_spoof_check(struct igb_adapter *adapter)
3390 {
3391         int j;
3392
3393         if (!adapter->wvbr)
3394                 return;
3395
3396         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3397                 if (adapter->wvbr & (1 << j) ||
3398                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3399                         dev_warn(&adapter->pdev->dev,
3400                                 "Spoof event(s) detected on VF %d\n", j);
3401                         adapter->wvbr &=
3402                                 ~((1 << j) |
3403                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3404                 }
3405         }
3406 }
3407
3408 /* Need to wait a few seconds after link up to get diagnostic information from
3409  * the phy */
3410 static void igb_update_phy_info(unsigned long data)
3411 {
3412         struct igb_adapter *adapter = (struct igb_adapter *) data;
3413         igb_get_phy_info(&adapter->hw);
3414 }
3415
3416 /**
3417  * igb_has_link - check shared code for link and determine up/down
3418  * @adapter: pointer to driver private info
3419  **/
3420 bool igb_has_link(struct igb_adapter *adapter)
3421 {
3422         struct e1000_hw *hw = &adapter->hw;
3423         bool link_active = false;
3424         s32 ret_val = 0;
3425
3426         /* get_link_status is set on LSC (link status) interrupt or
3427          * rx sequence error interrupt.  get_link_status will stay
3428          * false until the e1000_check_for_link establishes link
3429          * for copper adapters ONLY
3430          */
3431         switch (hw->phy.media_type) {
3432         case e1000_media_type_copper:
3433                 if (hw->mac.get_link_status) {
3434                         ret_val = hw->mac.ops.check_for_link(hw);
3435                         link_active = !hw->mac.get_link_status;
3436                 } else {
3437                         link_active = true;
3438                 }
3439                 break;
3440         case e1000_media_type_internal_serdes:
3441                 ret_val = hw->mac.ops.check_for_link(hw);
3442                 link_active = hw->mac.serdes_has_link;
3443                 break;
3444         default:
3445         case e1000_media_type_unknown:
3446                 break;
3447         }
3448
3449         return link_active;
3450 }
3451
3452 /**
3453  * igb_watchdog - Timer Call-back
3454  * @data: pointer to adapter cast into an unsigned long
3455  **/
3456 static void igb_watchdog(unsigned long data)
3457 {
3458         struct igb_adapter *adapter = (struct igb_adapter *)data;
3459         /* Do the rest outside of interrupt context */
3460         schedule_work(&adapter->watchdog_task);
3461 }
3462
3463 static void igb_watchdog_task(struct work_struct *work)
3464 {
3465         struct igb_adapter *adapter = container_of(work,
3466                                                    struct igb_adapter,
3467                                                    watchdog_task);
3468         struct e1000_hw *hw = &adapter->hw;
3469         struct net_device *netdev = adapter->netdev;
3470         u32 link;
3471         int i;
3472
3473         link = igb_has_link(adapter);
3474         if (link) {
3475                 if (!netif_carrier_ok(netdev)) {
3476                         u32 ctrl;
3477                         hw->mac.ops.get_speed_and_duplex(hw,
3478                                                          &adapter->link_speed,
3479                                                          &adapter->link_duplex);
3480
3481                         ctrl = rd32(E1000_CTRL);
3482                         /* Links status message must follow this format */
3483                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3484                                  "Flow Control: %s\n",
3485                                netdev->name,
3486                                adapter->link_speed,
3487                                adapter->link_duplex == FULL_DUPLEX ?
3488                                  "Full Duplex" : "Half Duplex",
3489                                ((ctrl & E1000_CTRL_TFCE) &&
3490                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3491                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3492                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3493
3494                         /* adjust timeout factor according to speed/duplex */
3495                         adapter->tx_timeout_factor = 1;
3496                         switch (adapter->link_speed) {
3497                         case SPEED_10:
3498                                 adapter->tx_timeout_factor = 14;
3499                                 break;
3500                         case SPEED_100:
3501                                 /* maybe add some timeout factor ? */
3502                                 break;
3503                         }
3504
3505                         netif_carrier_on(netdev);
3506
3507                         igb_ping_all_vfs(adapter);
3508
3509                         /* link state has changed, schedule phy info update */
3510                         if (!test_bit(__IGB_DOWN, &adapter->state))
3511                                 mod_timer(&adapter->phy_info_timer,
3512                                           round_jiffies(jiffies + 2 * HZ));
3513                 }
3514         } else {
3515                 if (netif_carrier_ok(netdev)) {
3516                         adapter->link_speed = 0;
3517                         adapter->link_duplex = 0;
3518                         /* Links status message must follow this format */
3519                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3520                                netdev->name);
3521                         netif_carrier_off(netdev);
3522
3523                         igb_ping_all_vfs(adapter);
3524
3525                         /* link state has changed, schedule phy info update */
3526                         if (!test_bit(__IGB_DOWN, &adapter->state))
3527                                 mod_timer(&adapter->phy_info_timer,
3528                                           round_jiffies(jiffies + 2 * HZ));
3529                 }
3530         }
3531
3532         spin_lock(&adapter->stats64_lock);
3533         igb_update_stats(adapter, &adapter->stats64);
3534         spin_unlock(&adapter->stats64_lock);
3535
3536         for (i = 0; i < adapter->num_tx_queues; i++) {
3537                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3538                 if (!netif_carrier_ok(netdev)) {
3539                         /* We've lost link, so the controller stops DMA,
3540                          * but we've got queued Tx work that's never going
3541                          * to get done, so reset controller to flush Tx.
3542                          * (Do the reset outside of interrupt context). */
3543                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3544                                 adapter->tx_timeout_count++;
3545                                 schedule_work(&adapter->reset_task);
3546                                 /* return immediately since reset is imminent */
3547                                 return;
3548                         }
3549                 }
3550
3551                 /* Force detection of hung controller every watchdog period */
3552                 tx_ring->detect_tx_hung = true;
3553         }
3554
3555         /* Cause software interrupt to ensure rx ring is cleaned */
3556         if (adapter->msix_entries) {
3557                 u32 eics = 0;
3558                 for (i = 0; i < adapter->num_q_vectors; i++) {
3559                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3560                         eics |= q_vector->eims_value;
3561                 }
3562                 wr32(E1000_EICS, eics);
3563         } else {
3564                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3565         }
3566
3567         igb_spoof_check(adapter);
3568
3569         /* Reset the timer */
3570         if (!test_bit(__IGB_DOWN, &adapter->state))
3571                 mod_timer(&adapter->watchdog_timer,
3572                           round_jiffies(jiffies + 2 * HZ));
3573 }
3574
3575 enum latency_range {
3576         lowest_latency = 0,
3577         low_latency = 1,
3578         bulk_latency = 2,
3579         latency_invalid = 255
3580 };
3581
3582 /**
3583  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3584  *
3585  *      Stores a new ITR value based on strictly on packet size.  This
3586  *      algorithm is less sophisticated than that used in igb_update_itr,
3587  *      due to the difficulty of synchronizing statistics across multiple
3588  *      receive rings.  The divisors and thresholds used by this function
3589  *      were determined based on theoretical maximum wire speed and testing
3590  *      data, in order to minimize response time while increasing bulk
3591  *      throughput.
3592  *      This functionality is controlled by the InterruptThrottleRate module
3593  *      parameter (see igb_param.c)
3594  *      NOTE:  This function is called only when operating in a multiqueue
3595  *             receive environment.
3596  * @q_vector: pointer to q_vector
3597  **/
3598 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3599 {
3600         int new_val = q_vector->itr_val;
3601         int avg_wire_size = 0;
3602         struct igb_adapter *adapter = q_vector->adapter;
3603         struct igb_ring *ring;
3604         unsigned int packets;
3605
3606         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3607          * ints/sec - ITR timer value of 120 ticks.
3608          */
3609         if (adapter->link_speed != SPEED_1000) {
3610                 new_val = 976;
3611                 goto set_itr_val;
3612         }
3613
3614         ring = q_vector->rx_ring;
3615         if (ring) {
3616                 packets = ACCESS_ONCE(ring->total_packets);
3617
3618                 if (packets)
3619                         avg_wire_size = ring->total_bytes / packets;
3620         }
3621
3622         ring = q_vector->tx_ring;
3623         if (ring) {
3624                 packets = ACCESS_ONCE(ring->total_packets);
3625
3626                 if (packets)
3627                         avg_wire_size = max_t(u32, avg_wire_size,
3628                                               ring->total_bytes / packets);
3629         }
3630
3631         /* if avg_wire_size isn't set no work was done */
3632         if (!avg_wire_size)
3633                 goto clear_counts;
3634
3635         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3636         avg_wire_size += 24;
3637
3638         /* Don't starve jumbo frames */
3639         avg_wire_size = min(avg_wire_size, 3000);
3640
3641         /* Give a little boost to mid-size frames */
3642         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3643                 new_val = avg_wire_size / 3;
3644         else
3645                 new_val = avg_wire_size / 2;
3646
3647         /* when in itr mode 3 do not exceed 20K ints/sec */
3648         if (adapter->rx_itr_setting == 3 && new_val < 196)
3649                 new_val = 196;
3650
3651 set_itr_val:
3652         if (new_val != q_vector->itr_val) {
3653                 q_vector->itr_val = new_val;
3654                 q_vector->set_itr = 1;
3655         }
3656 clear_counts:
3657         if (q_vector->rx_ring) {
3658                 q_vector->rx_ring->total_bytes = 0;
3659                 q_vector->rx_ring->total_packets = 0;
3660         }
3661         if (q_vector->tx_ring) {
3662                 q_vector->tx_ring->total_bytes = 0;
3663                 q_vector->tx_ring->total_packets = 0;
3664         }
3665 }
3666
3667 /**
3668  * igb_update_itr - update the dynamic ITR value based on statistics
3669  *      Stores a new ITR value based on packets and byte
3670  *      counts during the last interrupt.  The advantage of per interrupt
3671  *      computation is faster updates and more accurate ITR for the current
3672  *      traffic pattern.  Constants in this function were computed
3673  *      based on theoretical maximum wire speed and thresholds were set based
3674  *      on testing data as well as attempting to minimize response time
3675  *      while increasing bulk throughput.
3676  *      this functionality is controlled by the InterruptThrottleRate module
3677  *      parameter (see igb_param.c)
3678  *      NOTE:  These calculations are only valid when operating in a single-
3679  *             queue environment.
3680  * @adapter: pointer to adapter
3681  * @itr_setting: current q_vector->itr_val
3682  * @packets: the number of packets during this measurement interval
3683  * @bytes: the number of bytes during this measurement interval
3684  **/
3685 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3686                                    int packets, int bytes)
3687 {
3688         unsigned int retval = itr_setting;
3689
3690         if (packets == 0)
3691                 goto update_itr_done;
3692
3693         switch (itr_setting) {
3694         case lowest_latency:
3695                 /* handle TSO and jumbo frames */
3696                 if (bytes/packets > 8000)
3697                         retval = bulk_latency;
3698                 else if ((packets < 5) && (bytes > 512))
3699                         retval = low_latency;
3700                 break;
3701         case low_latency:  /* 50 usec aka 20000 ints/s */
3702                 if (bytes > 10000) {
3703                         /* this if handles the TSO accounting */
3704                         if (bytes/packets > 8000) {
3705                                 retval = bulk_latency;
3706                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3707                                 retval = bulk_latency;
3708                         } else if ((packets > 35)) {
3709                                 retval = lowest_latency;
3710                         }
3711                 } else if (bytes/packets > 2000) {
3712                         retval = bulk_latency;
3713                 } else if (packets <= 2 && bytes < 512) {
3714                         retval = lowest_latency;
3715                 }
3716                 break;
3717         case bulk_latency: /* 250 usec aka 4000 ints/s */
3718                 if (bytes > 25000) {
3719                         if (packets > 35)
3720                                 retval = low_latency;
3721                 } else if (bytes < 1500) {
3722                         retval = low_latency;
3723                 }
3724                 break;
3725         }
3726
3727 update_itr_done:
3728         return retval;
3729 }
3730
3731 static void igb_set_itr(struct igb_adapter *adapter)
3732 {
3733         struct igb_q_vector *q_vector = adapter->q_vector[0];
3734         u16 current_itr;
3735         u32 new_itr = q_vector->itr_val;
3736
3737         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3738         if (adapter->link_speed != SPEED_1000) {
3739                 current_itr = 0;
3740                 new_itr = 4000;
3741                 goto set_itr_now;
3742         }
3743
3744         adapter->rx_itr = igb_update_itr(adapter,
3745                                     adapter->rx_itr,
3746                                     q_vector->rx_ring->total_packets,
3747                                     q_vector->rx_ring->total_bytes);
3748
3749         adapter->tx_itr = igb_update_itr(adapter,
3750                                     adapter->tx_itr,
3751                                     q_vector->tx_ring->total_packets,
3752                                     q_vector->tx_ring->total_bytes);
3753         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3754
3755         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3756         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3757                 current_itr = low_latency;
3758
3759         switch (current_itr) {
3760         /* counts and packets in update_itr are dependent on these numbers */
3761         case lowest_latency:
3762                 new_itr = 56;  /* aka 70,000 ints/sec */
3763                 break;
3764         case low_latency:
3765                 new_itr = 196; /* aka 20,000 ints/sec */
3766                 break;
3767         case bulk_latency:
3768                 new_itr = 980; /* aka 4,000 ints/sec */
3769                 break;
3770         default:
3771                 break;
3772         }
3773
3774 set_itr_now:
3775         q_vector->rx_ring->total_bytes = 0;
3776         q_vector->rx_ring->total_packets = 0;
3777         q_vector->tx_ring->total_bytes = 0;
3778         q_vector->tx_ring->total_packets = 0;
3779
3780         if (new_itr != q_vector->itr_val) {
3781                 /* this attempts to bias the interrupt rate towards Bulk
3782                  * by adding intermediate steps when interrupt rate is
3783                  * increasing */
3784                 new_itr = new_itr > q_vector->itr_val ?
3785                              max((new_itr * q_vector->itr_val) /
3786                                  (new_itr + (q_vector->itr_val >> 2)),
3787                                  new_itr) :
3788                              new_itr;
3789                 /* Don't write the value here; it resets the adapter's
3790                  * internal timer, and causes us to delay far longer than
3791                  * we should between interrupts.  Instead, we write the ITR
3792                  * value at the beginning of the next interrupt so the timing
3793                  * ends up being correct.
3794                  */
3795                 q_vector->itr_val = new_itr;
3796                 q_vector->set_itr = 1;
3797         }
3798 }
3799
3800 #define IGB_TX_FLAGS_CSUM               0x00000001
3801 #define IGB_TX_FLAGS_VLAN               0x00000002
3802 #define IGB_TX_FLAGS_TSO                0x00000004
3803 #define IGB_TX_FLAGS_IPV4               0x00000008
3804 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3805 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3806 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3807
3808 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3809                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3810 {
3811         struct e1000_adv_tx_context_desc *context_desc;
3812         unsigned int i;
3813         int err;
3814         struct igb_buffer *buffer_info;
3815         u32 info = 0, tu_cmd = 0;
3816         u32 mss_l4len_idx;
3817         u8 l4len;
3818
3819         if (skb_header_cloned(skb)) {
3820                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3821                 if (err)
3822                         return err;
3823         }
3824
3825         l4len = tcp_hdrlen(skb);
3826         *hdr_len += l4len;
3827
3828         if (skb->protocol == htons(ETH_P_IP)) {
3829                 struct iphdr *iph = ip_hdr(skb);
3830                 iph->tot_len = 0;
3831                 iph->check = 0;
3832                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3833                                                          iph->daddr, 0,
3834                                                          IPPROTO_TCP,
3835                                                          0);
3836         } else if (skb_is_gso_v6(skb)) {
3837                 ipv6_hdr(skb)->payload_len = 0;
3838                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3839                                                        &ipv6_hdr(skb)->daddr,
3840                                                        0, IPPROTO_TCP, 0);
3841         }
3842
3843         i = tx_ring->next_to_use;
3844
3845         buffer_info = &tx_ring->buffer_info[i];
3846         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3847         /* VLAN MACLEN IPLEN */
3848         if (tx_flags & IGB_TX_FLAGS_VLAN)
3849                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3850         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3851         *hdr_len += skb_network_offset(skb);
3852         info |= skb_network_header_len(skb);
3853         *hdr_len += skb_network_header_len(skb);
3854         context_desc->vlan_macip_lens = cpu_to_le32(info);
3855
3856         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3857         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3858
3859         if (skb->protocol == htons(ETH_P_IP))
3860                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3861         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3862
3863         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3864
3865         /* MSS L4LEN IDX */
3866         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3867         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3868
3869         /* For 82575, context index must be unique per ring. */
3870         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3871                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3872
3873         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3874         context_desc->seqnum_seed = 0;
3875
3876         buffer_info->time_stamp = jiffies;
3877         buffer_info->next_to_watch = i;
3878         buffer_info->dma = 0;
3879         i++;
3880         if (i == tx_ring->count)
3881                 i = 0;
3882
3883         tx_ring->next_to_use = i;
3884
3885         return true;
3886 }
3887
3888 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3889                                    struct sk_buff *skb, u32 tx_flags)
3890 {
3891         struct e1000_adv_tx_context_desc *context_desc;
3892         struct device *dev = tx_ring->dev;
3893         struct igb_buffer *buffer_info;
3894         u32 info = 0, tu_cmd = 0;
3895         unsigned int i;
3896
3897         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3898             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3899                 i = tx_ring->next_to_use;
3900                 buffer_info = &tx_ring->buffer_info[i];
3901                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3902
3903                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3904                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3905
3906                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3907                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3908                         info |= skb_network_header_len(skb);
3909
3910                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3911
3912                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3913
3914                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3915                         __be16 protocol;
3916
3917                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3918                                 const struct vlan_ethhdr *vhdr =
3919                                           (const struct vlan_ethhdr*)skb->data;
3920
3921                                 protocol = vhdr->h_vlan_encapsulated_proto;
3922                         } else {
3923                                 protocol = skb->protocol;
3924                         }
3925
3926                         switch (protocol) {
3927                         case cpu_to_be16(ETH_P_IP):
3928                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3929                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3930                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3931                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3932                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3933                                 break;
3934                         case cpu_to_be16(ETH_P_IPV6):
3935                                 /* XXX what about other V6 headers?? */
3936                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3937                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3938                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3939                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3940                                 break;
3941                         default:
3942                                 if (unlikely(net_ratelimit()))
3943                                         dev_warn(dev,
3944                                             "partial checksum but proto=%x!\n",
3945                                             skb->protocol);
3946                                 break;
3947                         }
3948                 }
3949
3950                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3951                 context_desc->seqnum_seed = 0;
3952                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3953                         context_desc->mss_l4len_idx =
3954                                 cpu_to_le32(tx_ring->reg_idx << 4);
3955
3956                 buffer_info->time_stamp = jiffies;
3957                 buffer_info->next_to_watch = i;
3958                 buffer_info->dma = 0;
3959
3960                 i++;
3961                 if (i == tx_ring->count)
3962                         i = 0;
3963                 tx_ring->next_to_use = i;
3964
3965                 return true;
3966         }
3967         return false;
3968 }
3969
3970 #define IGB_MAX_TXD_PWR 16
3971 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3972
3973 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3974                                  unsigned int first)
3975 {
3976         struct igb_buffer *buffer_info;
3977         struct device *dev = tx_ring->dev;
3978         unsigned int hlen = skb_headlen(skb);
3979         unsigned int count = 0, i;
3980         unsigned int f;
3981         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3982
3983         i = tx_ring->next_to_use;
3984
3985         buffer_info = &tx_ring->buffer_info[i];
3986         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3987         buffer_info->length = hlen;
3988         /* set time_stamp *before* dma to help avoid a possible race */
3989         buffer_info->time_stamp = jiffies;
3990         buffer_info->next_to_watch = i;
3991         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3992                                           DMA_TO_DEVICE);
3993         if (dma_mapping_error(dev, buffer_info->dma))
3994                 goto dma_error;
3995
3996         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3997                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3998                 unsigned int len = frag->size;
3999
4000                 count++;
4001                 i++;
4002                 if (i == tx_ring->count)
4003                         i = 0;
4004
4005                 buffer_info = &tx_ring->buffer_info[i];
4006                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4007                 buffer_info->length = len;
4008                 buffer_info->time_stamp = jiffies;
4009                 buffer_info->next_to_watch = i;
4010                 buffer_info->mapped_as_page = true;
4011                 buffer_info->dma = dma_map_page(dev,
4012                                                 frag->page,
4013                                                 frag->page_offset,
4014                                                 len,
4015                                                 DMA_TO_DEVICE);
4016                 if (dma_mapping_error(dev, buffer_info->dma))
4017                         goto dma_error;
4018
4019         }
4020
4021         tx_ring->buffer_info[i].skb = skb;
4022         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4023         /* multiply data chunks by size of headers */
4024         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4025         tx_ring->buffer_info[i].gso_segs = gso_segs;
4026         tx_ring->buffer_info[first].next_to_watch = i;
4027
4028         return ++count;
4029
4030 dma_error:
4031         dev_err(dev, "TX DMA map failed\n");
4032
4033         /* clear timestamp and dma mappings for failed buffer_info mapping */
4034         buffer_info->dma = 0;
4035         buffer_info->time_stamp = 0;
4036         buffer_info->length = 0;
4037         buffer_info->next_to_watch = 0;
4038         buffer_info->mapped_as_page = false;
4039
4040         /* clear timestamp and dma mappings for remaining portion of packet */
4041         while (count--) {
4042                 if (i == 0)
4043                         i = tx_ring->count;
4044                 i--;
4045                 buffer_info = &tx_ring->buffer_info[i];
4046                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4047         }
4048
4049         return 0;
4050 }
4051
4052 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4053                                     u32 tx_flags, int count, u32 paylen,
4054                                     u8 hdr_len)
4055 {
4056         union e1000_adv_tx_desc *tx_desc;
4057         struct igb_buffer *buffer_info;
4058         u32 olinfo_status = 0, cmd_type_len;
4059         unsigned int i = tx_ring->next_to_use;
4060
4061         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4062                         E1000_ADVTXD_DCMD_DEXT);
4063
4064         if (tx_flags & IGB_TX_FLAGS_VLAN)
4065                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4066
4067         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4068                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4069
4070         if (tx_flags & IGB_TX_FLAGS_TSO) {
4071                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4072
4073                 /* insert tcp checksum */
4074                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4075
4076                 /* insert ip checksum */
4077                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4078                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4079
4080         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4081                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4082         }
4083
4084         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4085             (tx_flags & (IGB_TX_FLAGS_CSUM |
4086                          IGB_TX_FLAGS_TSO |
4087                          IGB_TX_FLAGS_VLAN)))
4088                 olinfo_status |= tx_ring->reg_idx << 4;
4089
4090         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4091
4092         do {
4093                 buffer_info = &tx_ring->buffer_info[i];
4094                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4095                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4096                 tx_desc->read.cmd_type_len =
4097                         cpu_to_le32(cmd_type_len | buffer_info->length);
4098                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4099                 count--;
4100                 i++;
4101                 if (i == tx_ring->count)
4102                         i = 0;
4103         } while (count > 0);
4104
4105         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4106         /* Force memory writes to complete before letting h/w
4107          * know there are new descriptors to fetch.  (Only
4108          * applicable for weak-ordered memory model archs,
4109          * such as IA-64). */
4110         wmb();
4111
4112         tx_ring->next_to_use = i;
4113         writel(i, tx_ring->tail);
4114         /* we need this if more than one processor can write to our tail
4115          * at a time, it syncronizes IO on IA64/Altix systems */
4116         mmiowb();
4117 }
4118
4119 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4120 {
4121         struct net_device *netdev = tx_ring->netdev;
4122
4123         netif_stop_subqueue(netdev, tx_ring->queue_index);
4124
4125         /* Herbert's original patch had:
4126          *  smp_mb__after_netif_stop_queue();
4127          * but since that doesn't exist yet, just open code it. */
4128         smp_mb();
4129
4130         /* We need to check again in a case another CPU has just
4131          * made room available. */
4132         if (igb_desc_unused(tx_ring) < size)
4133                 return -EBUSY;
4134
4135         /* A reprieve! */
4136         netif_wake_subqueue(netdev, tx_ring->queue_index);
4137
4138         u64_stats_update_begin(&tx_ring->tx_syncp2);
4139         tx_ring->tx_stats.restart_queue2++;
4140         u64_stats_update_end(&tx_ring->tx_syncp2);
4141
4142         return 0;
4143 }
4144
4145 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4146 {
4147         if (igb_desc_unused(tx_ring) >= size)
4148                 return 0;
4149         return __igb_maybe_stop_tx(tx_ring, size);
4150 }
4151
4152 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4153                                     struct igb_ring *tx_ring)
4154 {
4155         int tso = 0, count;
4156         u32 tx_flags = 0;
4157         u16 first;
4158         u8 hdr_len = 0;
4159
4160         /* need: 1 descriptor per page,
4161          *       + 2 desc gap to keep tail from touching head,
4162          *       + 1 desc for skb->data,
4163          *       + 1 desc for context descriptor,
4164          * otherwise try next time */
4165         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4166                 /* this is a hard error */
4167                 return NETDEV_TX_BUSY;
4168         }
4169
4170         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4171                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4172                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4173         }
4174
4175         if (vlan_tx_tag_present(skb)) {
4176                 tx_flags |= IGB_TX_FLAGS_VLAN;
4177                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4178         }
4179
4180         if (skb->protocol == htons(ETH_P_IP))
4181                 tx_flags |= IGB_TX_FLAGS_IPV4;
4182
4183         first = tx_ring->next_to_use;
4184         if (skb_is_gso(skb)) {
4185                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4186
4187                 if (tso < 0) {
4188                         dev_kfree_skb_any(skb);
4189                         return NETDEV_TX_OK;
4190                 }
4191         }
4192
4193         if (tso)
4194                 tx_flags |= IGB_TX_FLAGS_TSO;
4195         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4196                  (skb->ip_summed == CHECKSUM_PARTIAL))
4197                 tx_flags |= IGB_TX_FLAGS_CSUM;
4198
4199         /*
4200          * count reflects descriptors mapped, if 0 or less then mapping error
4201          * has occured and we need to rewind the descriptor queue
4202          */
4203         count = igb_tx_map_adv(tx_ring, skb, first);
4204         if (!count) {
4205                 dev_kfree_skb_any(skb);
4206                 tx_ring->buffer_info[first].time_stamp = 0;
4207                 tx_ring->next_to_use = first;
4208                 return NETDEV_TX_OK;
4209         }
4210
4211         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4212
4213         /* Make sure there is space in the ring for the next send. */
4214         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4215
4216         return NETDEV_TX_OK;
4217 }
4218
4219 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4220                                       struct net_device *netdev)
4221 {
4222         struct igb_adapter *adapter = netdev_priv(netdev);
4223         struct igb_ring *tx_ring;
4224         int r_idx = 0;
4225
4226         if (test_bit(__IGB_DOWN, &adapter->state)) {
4227                 dev_kfree_skb_any(skb);
4228                 return NETDEV_TX_OK;
4229         }
4230
4231         if (skb->len <= 0) {
4232                 dev_kfree_skb_any(skb);
4233                 return NETDEV_TX_OK;
4234         }
4235
4236         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4237         tx_ring = adapter->multi_tx_table[r_idx];
4238
4239         /* This goes back to the question of how to logically map a tx queue
4240          * to a flow.  Right now, performance is impacted slightly negatively
4241          * if using multiple tx queues.  If the stack breaks away from a
4242          * single qdisc implementation, we can look at this again. */
4243         return igb_xmit_frame_ring_adv(skb, tx_ring);
4244 }
4245
4246 /**
4247  * igb_tx_timeout - Respond to a Tx Hang
4248  * @netdev: network interface device structure
4249  **/
4250 static void igb_tx_timeout(struct net_device *netdev)
4251 {
4252         struct igb_adapter *adapter = netdev_priv(netdev);
4253         struct e1000_hw *hw = &adapter->hw;
4254
4255         /* Do the reset outside of interrupt context */
4256         adapter->tx_timeout_count++;
4257
4258         if (hw->mac.type == e1000_82580)
4259                 hw->dev_spec._82575.global_device_reset = true;
4260
4261         schedule_work(&adapter->reset_task);
4262         wr32(E1000_EICS,
4263              (adapter->eims_enable_mask & ~adapter->eims_other));
4264 }
4265
4266 static void igb_reset_task(struct work_struct *work)
4267 {
4268         struct igb_adapter *adapter;
4269         adapter = container_of(work, struct igb_adapter, reset_task);
4270
4271         igb_dump(adapter);
4272         netdev_err(adapter->netdev, "Reset adapter\n");
4273         igb_reinit_locked(adapter);
4274 }
4275
4276 /**
4277  * igb_get_stats64 - Get System Network Statistics
4278  * @netdev: network interface device structure
4279  * @stats: rtnl_link_stats64 pointer
4280  *
4281  **/
4282 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4283                                                  struct rtnl_link_stats64 *stats)
4284 {
4285         struct igb_adapter *adapter = netdev_priv(netdev);
4286
4287         spin_lock(&adapter->stats64_lock);
4288         igb_update_stats(adapter, &adapter->stats64);
4289         memcpy(stats, &adapter->stats64, sizeof(*stats));
4290         spin_unlock(&adapter->stats64_lock);
4291
4292         return stats;
4293 }
4294
4295 /**
4296  * igb_change_mtu - Change the Maximum Transfer Unit
4297  * @netdev: network interface device structure
4298  * @new_mtu: new value for maximum frame size
4299  *
4300  * Returns 0 on success, negative on failure
4301  **/
4302 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4303 {
4304         struct igb_adapter *adapter = netdev_priv(netdev);
4305         struct pci_dev *pdev = adapter->pdev;
4306         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4307         u32 rx_buffer_len, i;
4308
4309         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4310                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4311                 return -EINVAL;
4312         }
4313
4314         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4315                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4316                 return -EINVAL;
4317         }
4318
4319         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4320                 msleep(1);
4321
4322         /* igb_down has a dependency on max_frame_size */
4323         adapter->max_frame_size = max_frame;
4324
4325         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4326          * means we reserve 2 more, this pushes us to allocate from the next
4327          * larger slab size.
4328          * i.e. RXBUFFER_2048 --> size-4096 slab
4329          */
4330
4331         if (adapter->hw.mac.type == e1000_82580)
4332                 max_frame += IGB_TS_HDR_LEN;
4333
4334         if (max_frame <= IGB_RXBUFFER_1024)
4335                 rx_buffer_len = IGB_RXBUFFER_1024;
4336         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4337                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4338         else
4339                 rx_buffer_len = IGB_RXBUFFER_128;
4340
4341         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4342              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4343                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4344
4345         if ((adapter->hw.mac.type == e1000_82580) &&
4346             (rx_buffer_len == IGB_RXBUFFER_128))
4347                 rx_buffer_len += IGB_RXBUFFER_64;
4348
4349         if (netif_running(netdev))
4350                 igb_down(adapter);
4351
4352         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4353                  netdev->mtu, new_mtu);
4354         netdev->mtu = new_mtu;
4355
4356         for (i = 0; i < adapter->num_rx_queues; i++)
4357                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4358
4359         if (netif_running(netdev))
4360                 igb_up(adapter);
4361         else
4362                 igb_reset(adapter);
4363
4364         clear_bit(__IGB_RESETTING, &adapter->state);
4365
4366         return 0;
4367 }
4368
4369 /**
4370  * igb_update_stats - Update the board statistics counters
4371  * @adapter: board private structure
4372  **/
4373
4374 void igb_update_stats(struct igb_adapter *adapter,
4375                       struct rtnl_link_stats64 *net_stats)
4376 {
4377         struct e1000_hw *hw = &adapter->hw;
4378         struct pci_dev *pdev = adapter->pdev;
4379         u32 reg, mpc;
4380         u16 phy_tmp;
4381         int i;
4382         u64 bytes, packets;
4383         unsigned int start;
4384         u64 _bytes, _packets;
4385
4386 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4387
4388         /*
4389          * Prevent stats update while adapter is being reset, or if the pci
4390          * connection is down.
4391          */
4392         if (adapter->link_speed == 0)
4393                 return;
4394         if (pci_channel_offline(pdev))
4395                 return;
4396
4397         bytes = 0;
4398         packets = 0;
4399         for (i = 0; i < adapter->num_rx_queues; i++) {
4400                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4401                 struct igb_ring *ring = adapter->rx_ring[i];
4402
4403                 ring->rx_stats.drops += rqdpc_tmp;
4404                 net_stats->rx_fifo_errors += rqdpc_tmp;
4405
4406                 do {
4407                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4408                         _bytes = ring->rx_stats.bytes;
4409                         _packets = ring->rx_stats.packets;
4410                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4411                 bytes += _bytes;
4412                 packets += _packets;
4413         }
4414
4415         net_stats->rx_bytes = bytes;
4416         net_stats->rx_packets = packets;
4417
4418         bytes = 0;
4419         packets = 0;
4420         for (i = 0; i < adapter->num_tx_queues; i++) {
4421                 struct igb_ring *ring = adapter->tx_ring[i];
4422                 do {
4423                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4424                         _bytes = ring->tx_stats.bytes;
4425                         _packets = ring->tx_stats.packets;
4426                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4427                 bytes += _bytes;
4428                 packets += _packets;
4429         }
4430         net_stats->tx_bytes = bytes;
4431         net_stats->tx_packets = packets;
4432
4433         /* read stats registers */
4434         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4435         adapter->stats.gprc += rd32(E1000_GPRC);
4436         adapter->stats.gorc += rd32(E1000_GORCL);
4437         rd32(E1000_GORCH); /* clear GORCL */
4438         adapter->stats.bprc += rd32(E1000_BPRC);
4439         adapter->stats.mprc += rd32(E1000_MPRC);
4440         adapter->stats.roc += rd32(E1000_ROC);
4441
4442         adapter->stats.prc64 += rd32(E1000_PRC64);
4443         adapter->stats.prc127 += rd32(E1000_PRC127);
4444         adapter->stats.prc255 += rd32(E1000_PRC255);
4445         adapter->stats.prc511 += rd32(E1000_PRC511);
4446         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4447         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4448         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4449         adapter->stats.sec += rd32(E1000_SEC);
4450
4451         mpc = rd32(E1000_MPC);
4452         adapter->stats.mpc += mpc;
4453         net_stats->rx_fifo_errors += mpc;
4454         adapter->stats.scc += rd32(E1000_SCC);
4455         adapter->stats.ecol += rd32(E1000_ECOL);
4456         adapter->stats.mcc += rd32(E1000_MCC);
4457         adapter->stats.latecol += rd32(E1000_LATECOL);
4458         adapter->stats.dc += rd32(E1000_DC);
4459         adapter->stats.rlec += rd32(E1000_RLEC);
4460         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4461         adapter->stats.xontxc += rd32(E1000_XONTXC);
4462         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4463         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4464         adapter->stats.fcruc += rd32(E1000_FCRUC);
4465         adapter->stats.gptc += rd32(E1000_GPTC);
4466         adapter->stats.gotc += rd32(E1000_GOTCL);
4467         rd32(E1000_GOTCH); /* clear GOTCL */
4468         adapter->stats.rnbc += rd32(E1000_RNBC);
4469         adapter->stats.ruc += rd32(E1000_RUC);
4470         adapter->stats.rfc += rd32(E1000_RFC);
4471         adapter->stats.rjc += rd32(E1000_RJC);
4472         adapter->stats.tor += rd32(E1000_TORH);
4473         adapter->stats.tot += rd32(E1000_TOTH);
4474         adapter->stats.tpr += rd32(E1000_TPR);
4475
4476         adapter->stats.ptc64 += rd32(E1000_PTC64);
4477         adapter->stats.ptc127 += rd32(E1000_PTC127);
4478         adapter->stats.ptc255 += rd32(E1000_PTC255);
4479         adapter->stats.ptc511 += rd32(E1000_PTC511);
4480         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4481         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4482
4483         adapter->stats.mptc += rd32(E1000_MPTC);
4484         adapter->stats.bptc += rd32(E1000_BPTC);
4485
4486         adapter->stats.tpt += rd32(E1000_TPT);
4487         adapter->stats.colc += rd32(E1000_COLC);
4488
4489         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4490         /* read internal phy specific stats */
4491         reg = rd32(E1000_CTRL_EXT);
4492         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4493                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4494                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4495         }
4496
4497         adapter->stats.tsctc += rd32(E1000_TSCTC);
4498         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4499
4500         adapter->stats.iac += rd32(E1000_IAC);
4501         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4502         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4503         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4504         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4505         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4506         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4507         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4508         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4509
4510         /* Fill out the OS statistics structure */
4511         net_stats->multicast = adapter->stats.mprc;
4512         net_stats->collisions = adapter->stats.colc;
4513
4514         /* Rx Errors */
4515
4516         /* RLEC on some newer hardware can be incorrect so build
4517          * our own version based on RUC and ROC */
4518         net_stats->rx_errors = adapter->stats.rxerrc +
4519                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4520                 adapter->stats.ruc + adapter->stats.roc +
4521                 adapter->stats.cexterr;
4522         net_stats->rx_length_errors = adapter->stats.ruc +
4523                                       adapter->stats.roc;
4524         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4525         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4526         net_stats->rx_missed_errors = adapter->stats.mpc;
4527
4528         /* Tx Errors */
4529         net_stats->tx_errors = adapter->stats.ecol +
4530                                adapter->stats.latecol;
4531         net_stats->tx_aborted_errors = adapter->stats.ecol;
4532         net_stats->tx_window_errors = adapter->stats.latecol;
4533         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4534
4535         /* Tx Dropped needs to be maintained elsewhere */
4536
4537         /* Phy Stats */
4538         if (hw->phy.media_type == e1000_media_type_copper) {
4539                 if ((adapter->link_speed == SPEED_1000) &&
4540                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4541                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4542                         adapter->phy_stats.idle_errors += phy_tmp;
4543                 }
4544         }
4545
4546         /* Management Stats */
4547         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4548         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4549         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4550 }
4551
4552 static irqreturn_t igb_msix_other(int irq, void *data)
4553 {
4554         struct igb_adapter *adapter = data;
4555         struct e1000_hw *hw = &adapter->hw;
4556         u32 icr = rd32(E1000_ICR);
4557         /* reading ICR causes bit 31 of EICR to be cleared */
4558
4559         if (icr & E1000_ICR_DRSTA)
4560                 schedule_work(&adapter->reset_task);
4561
4562         if (icr & E1000_ICR_DOUTSYNC) {
4563                 /* HW is reporting DMA is out of sync */
4564                 adapter->stats.doosync++;
4565                 /* The DMA Out of Sync is also indication of a spoof event
4566                  * in IOV mode. Check the Wrong VM Behavior register to
4567                  * see if it is really a spoof event. */
4568                 igb_check_wvbr(adapter);
4569         }
4570
4571         /* Check for a mailbox event */
4572         if (icr & E1000_ICR_VMMB)
4573                 igb_msg_task(adapter);
4574
4575         if (icr & E1000_ICR_LSC) {
4576                 hw->mac.get_link_status = 1;
4577                 /* guard against interrupt when we're going down */
4578                 if (!test_bit(__IGB_DOWN, &adapter->state))
4579                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4580         }
4581
4582         if (adapter->vfs_allocated_count)
4583                 wr32(E1000_IMS, E1000_IMS_LSC |
4584                                 E1000_IMS_VMMB |
4585                                 E1000_IMS_DOUTSYNC);
4586         else
4587                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4588         wr32(E1000_EIMS, adapter->eims_other);
4589
4590         return IRQ_HANDLED;
4591 }
4592
4593 static void igb_write_itr(struct igb_q_vector *q_vector)
4594 {
4595         struct igb_adapter *adapter = q_vector->adapter;
4596         u32 itr_val = q_vector->itr_val & 0x7FFC;
4597
4598         if (!q_vector->set_itr)
4599                 return;
4600
4601         if (!itr_val)
4602                 itr_val = 0x4;
4603
4604         if (adapter->hw.mac.type == e1000_82575)
4605                 itr_val |= itr_val << 16;
4606         else
4607                 itr_val |= 0x8000000;
4608
4609         writel(itr_val, q_vector->itr_register);
4610         q_vector->set_itr = 0;
4611 }
4612
4613 static irqreturn_t igb_msix_ring(int irq, void *data)
4614 {
4615         struct igb_q_vector *q_vector = data;
4616
4617         /* Write the ITR value calculated from the previous interrupt. */
4618         igb_write_itr(q_vector);
4619
4620         napi_schedule(&q_vector->napi);
4621
4622         return IRQ_HANDLED;
4623 }
4624
4625 #ifdef CONFIG_IGB_DCA
4626 static void igb_update_dca(struct igb_q_vector *q_vector)
4627 {
4628         struct igb_adapter *adapter = q_vector->adapter;
4629         struct e1000_hw *hw = &adapter->hw;
4630         int cpu = get_cpu();
4631
4632         if (q_vector->cpu == cpu)
4633                 goto out_no_update;
4634
4635         if (q_vector->tx_ring) {
4636                 int q = q_vector->tx_ring->reg_idx;
4637                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4638                 if (hw->mac.type == e1000_82575) {
4639                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4640                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4641                 } else {
4642                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4643                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4644                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4645                 }
4646                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4647                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4648         }
4649         if (q_vector->rx_ring) {
4650                 int q = q_vector->rx_ring->reg_idx;
4651                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4652                 if (hw->mac.type == e1000_82575) {
4653                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4654                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4655                 } else {
4656                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4657                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4658                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4659                 }
4660                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4661                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4662                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4663                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4664         }
4665         q_vector->cpu = cpu;
4666 out_no_update:
4667         put_cpu();
4668 }
4669
4670 static void igb_setup_dca(struct igb_adapter *adapter)
4671 {
4672         struct e1000_hw *hw = &adapter->hw;
4673         int i;
4674
4675         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4676                 return;
4677
4678         /* Always use CB2 mode, difference is masked in the CB driver. */
4679         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4680
4681         for (i = 0; i < adapter->num_q_vectors; i++) {
4682                 adapter->q_vector[i]->cpu = -1;
4683                 igb_update_dca(adapter->q_vector[i]);
4684         }
4685 }
4686
4687 static int __igb_notify_dca(struct device *dev, void *data)
4688 {
4689         struct net_device *netdev = dev_get_drvdata(dev);
4690         struct igb_adapter *adapter = netdev_priv(netdev);
4691         struct pci_dev *pdev = adapter->pdev;
4692         struct e1000_hw *hw = &adapter->hw;
4693         unsigned long event = *(unsigned long *)data;
4694
4695         switch (event) {
4696         case DCA_PROVIDER_ADD:
4697                 /* if already enabled, don't do it again */
4698                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4699                         break;
4700                 if (dca_add_requester(dev) == 0) {
4701                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4702                         dev_info(&pdev->dev, "DCA enabled\n");
4703                         igb_setup_dca(adapter);
4704                         break;
4705                 }
4706                 /* Fall Through since DCA is disabled. */
4707         case DCA_PROVIDER_REMOVE:
4708                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4709                         /* without this a class_device is left
4710                          * hanging around in the sysfs model */
4711                         dca_remove_requester(dev);
4712                         dev_info(&pdev->dev, "DCA disabled\n");
4713                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4714                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4715                 }
4716                 break;
4717         }
4718
4719         return 0;
4720 }
4721
4722 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4723                           void *p)
4724 {
4725         int ret_val;
4726
4727         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4728                                          __igb_notify_dca);
4729
4730         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4731 }
4732 #endif /* CONFIG_IGB_DCA */
4733
4734 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4735 {
4736         struct e1000_hw *hw = &adapter->hw;
4737         u32 ping;
4738         int i;
4739
4740         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4741                 ping = E1000_PF_CONTROL_MSG;
4742                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4743                         ping |= E1000_VT_MSGTYPE_CTS;
4744                 igb_write_mbx(hw, &ping, 1, i);
4745         }
4746 }
4747
4748 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4749 {
4750         struct e1000_hw *hw = &adapter->hw;
4751         u32 vmolr = rd32(E1000_VMOLR(vf));
4752         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4753
4754         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4755                             IGB_VF_FLAG_MULTI_PROMISC);
4756         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4757
4758         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4759                 vmolr |= E1000_VMOLR_MPME;
4760                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4761                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4762         } else {
4763                 /*
4764                  * if we have hashes and we are clearing a multicast promisc
4765                  * flag we need to write the hashes to the MTA as this step
4766                  * was previously skipped
4767                  */
4768                 if (vf_data->num_vf_mc_hashes > 30) {
4769                         vmolr |= E1000_VMOLR_MPME;
4770                 } else if (vf_data->num_vf_mc_hashes) {
4771                         int j;
4772                         vmolr |= E1000_VMOLR_ROMPE;
4773                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4774                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4775                 }
4776         }
4777
4778         wr32(E1000_VMOLR(vf), vmolr);
4779
4780         /* there are flags left unprocessed, likely not supported */
4781         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4782                 return -EINVAL;
4783
4784         return 0;
4785
4786 }
4787
4788 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4789                                   u32 *msgbuf, u32 vf)
4790 {
4791         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4792         u16 *hash_list = (u16 *)&msgbuf[1];
4793         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4794         int i;
4795
4796         /* salt away the number of multicast addresses assigned
4797          * to this VF for later use to restore when the PF multi cast
4798          * list changes
4799          */
4800         vf_data->num_vf_mc_hashes = n;
4801
4802         /* only up to 30 hash values supported */
4803         if (n > 30)
4804                 n = 30;
4805
4806         /* store the hashes for later use */
4807         for (i = 0; i < n; i++)
4808                 vf_data->vf_mc_hashes[i] = hash_list[i];
4809
4810         /* Flush and reset the mta with the new values */
4811         igb_set_rx_mode(adapter->netdev);
4812
4813         return 0;
4814 }
4815
4816 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4817 {
4818         struct e1000_hw *hw = &adapter->hw;
4819         struct vf_data_storage *vf_data;
4820         int i, j;
4821
4822         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4823                 u32 vmolr = rd32(E1000_VMOLR(i));
4824                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4825
4826                 vf_data = &adapter->vf_data[i];
4827
4828                 if ((vf_data->num_vf_mc_hashes > 30) ||
4829                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4830                         vmolr |= E1000_VMOLR_MPME;
4831                 } else if (vf_data->num_vf_mc_hashes) {
4832                         vmolr |= E1000_VMOLR_ROMPE;
4833                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4834                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4835                 }
4836                 wr32(E1000_VMOLR(i), vmolr);
4837         }
4838 }
4839
4840 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4841 {
4842         struct e1000_hw *hw = &adapter->hw;
4843         u32 pool_mask, reg, vid;
4844         int i;
4845
4846         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4847
4848         /* Find the vlan filter for this id */
4849         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4850                 reg = rd32(E1000_VLVF(i));
4851
4852                 /* remove the vf from the pool */
4853                 reg &= ~pool_mask;
4854
4855                 /* if pool is empty then remove entry from vfta */
4856                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4857                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4858                         reg = 0;
4859                         vid = reg & E1000_VLVF_VLANID_MASK;
4860                         igb_vfta_set(hw, vid, false);
4861                 }
4862
4863                 wr32(E1000_VLVF(i), reg);
4864         }
4865
4866         adapter->vf_data[vf].vlans_enabled = 0;
4867 }
4868
4869 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4870 {
4871         struct e1000_hw *hw = &adapter->hw;
4872         u32 reg, i;
4873
4874         /* The vlvf table only exists on 82576 hardware and newer */
4875         if (hw->mac.type < e1000_82576)
4876                 return -1;
4877
4878         /* we only need to do this if VMDq is enabled */
4879         if (!adapter->vfs_allocated_count)
4880                 return -1;
4881
4882         /* Find the vlan filter for this id */
4883         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4884                 reg = rd32(E1000_VLVF(i));
4885                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4886                     vid == (reg & E1000_VLVF_VLANID_MASK))
4887                         break;
4888         }
4889
4890         if (add) {
4891                 if (i == E1000_VLVF_ARRAY_SIZE) {
4892                         /* Did not find a matching VLAN ID entry that was
4893                          * enabled.  Search for a free filter entry, i.e.
4894                          * one without the enable bit set
4895                          */
4896                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4897                                 reg = rd32(E1000_VLVF(i));
4898                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4899                                         break;
4900                         }
4901                 }
4902                 if (i < E1000_VLVF_ARRAY_SIZE) {
4903                         /* Found an enabled/available entry */
4904                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4905
4906                         /* if !enabled we need to set this up in vfta */
4907                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4908                                 /* add VID to filter table */
4909                                 igb_vfta_set(hw, vid, true);
4910                                 reg |= E1000_VLVF_VLANID_ENABLE;
4911                         }
4912                         reg &= ~E1000_VLVF_VLANID_MASK;
4913                         reg |= vid;
4914                         wr32(E1000_VLVF(i), reg);
4915
4916                         /* do not modify RLPML for PF devices */
4917                         if (vf >= adapter->vfs_allocated_count)
4918                                 return 0;
4919
4920                         if (!adapter->vf_data[vf].vlans_enabled) {
4921                                 u32 size;
4922                                 reg = rd32(E1000_VMOLR(vf));
4923                                 size = reg & E1000_VMOLR_RLPML_MASK;
4924                                 size += 4;
4925                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4926                                 reg |= size;
4927                                 wr32(E1000_VMOLR(vf), reg);
4928                         }
4929
4930                         adapter->vf_data[vf].vlans_enabled++;
4931                         return 0;
4932                 }
4933         } else {
4934                 if (i < E1000_VLVF_ARRAY_SIZE) {
4935                         /* remove vf from the pool */
4936                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4937                         /* if pool is empty then remove entry from vfta */
4938                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4939                                 reg = 0;
4940                                 igb_vfta_set(hw, vid, false);
4941                         }
4942                         wr32(E1000_VLVF(i), reg);
4943
4944                         /* do not modify RLPML for PF devices */
4945                         if (vf >= adapter->vfs_allocated_count)
4946                                 return 0;
4947
4948                         adapter->vf_data[vf].vlans_enabled--;
4949                         if (!adapter->vf_data[vf].vlans_enabled) {
4950                                 u32 size;
4951                                 reg = rd32(E1000_VMOLR(vf));
4952                                 size = reg & E1000_VMOLR_RLPML_MASK;
4953                                 size -= 4;
4954                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4955                                 reg |= size;
4956                                 wr32(E1000_VMOLR(vf), reg);
4957                         }
4958                 }
4959         }
4960         return 0;
4961 }
4962
4963 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4964 {
4965         struct e1000_hw *hw = &adapter->hw;
4966
4967         if (vid)
4968                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4969         else
4970                 wr32(E1000_VMVIR(vf), 0);
4971 }
4972
4973 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4974                                int vf, u16 vlan, u8 qos)
4975 {
4976         int err = 0;
4977         struct igb_adapter *adapter = netdev_priv(netdev);
4978
4979         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4980                 return -EINVAL;
4981         if (vlan || qos) {
4982                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4983                 if (err)
4984                         goto out;
4985                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4986                 igb_set_vmolr(adapter, vf, !vlan);
4987                 adapter->vf_data[vf].pf_vlan = vlan;
4988                 adapter->vf_data[vf].pf_qos = qos;
4989                 dev_info(&adapter->pdev->dev,
4990                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4991                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4992                         dev_warn(&adapter->pdev->dev,
4993                                  "The VF VLAN has been set,"
4994                                  " but the PF device is not up.\n");
4995                         dev_warn(&adapter->pdev->dev,
4996                                  "Bring the PF device up before"
4997                                  " attempting to use the VF device.\n");
4998                 }
4999         } else {
5000                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5001                                    false, vf);
5002                 igb_set_vmvir(adapter, vlan, vf);
5003                 igb_set_vmolr(adapter, vf, true);
5004                 adapter->vf_data[vf].pf_vlan = 0;
5005                 adapter->vf_data[vf].pf_qos = 0;
5006        }
5007 out:
5008        return err;
5009 }
5010
5011 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5012 {
5013         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5014         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5015
5016         return igb_vlvf_set(adapter, vid, add, vf);
5017 }
5018
5019 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5020 {
5021         /* clear flags - except flag that indicates PF has set the MAC */
5022         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5023         adapter->vf_data[vf].last_nack = jiffies;
5024
5025         /* reset offloads to defaults */
5026         igb_set_vmolr(adapter, vf, true);
5027
5028         /* reset vlans for device */
5029         igb_clear_vf_vfta(adapter, vf);
5030         if (adapter->vf_data[vf].pf_vlan)
5031                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5032                                     adapter->vf_data[vf].pf_vlan,
5033                                     adapter->vf_data[vf].pf_qos);
5034         else
5035                 igb_clear_vf_vfta(adapter, vf);
5036
5037         /* reset multicast table array for vf */
5038         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5039
5040         /* Flush and reset the mta with the new values */
5041         igb_set_rx_mode(adapter->netdev);
5042 }
5043
5044 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5045 {
5046         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5047
5048         /* generate a new mac address as we were hotplug removed/added */
5049         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5050                 random_ether_addr(vf_mac);
5051
5052         /* process remaining reset events */
5053         igb_vf_reset(adapter, vf);
5054 }
5055
5056 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5057 {
5058         struct e1000_hw *hw = &adapter->hw;
5059         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5060         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5061         u32 reg, msgbuf[3];
5062         u8 *addr = (u8 *)(&msgbuf[1]);
5063
5064         /* process all the same items cleared in a function level reset */
5065         igb_vf_reset(adapter, vf);
5066
5067         /* set vf mac address */
5068         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5069
5070         /* enable transmit and receive for vf */
5071         reg = rd32(E1000_VFTE);
5072         wr32(E1000_VFTE, reg | (1 << vf));
5073         reg = rd32(E1000_VFRE);
5074         wr32(E1000_VFRE, reg | (1 << vf));
5075
5076         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5077
5078         /* reply to reset with ack and vf mac address */
5079         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5080         memcpy(addr, vf_mac, 6);
5081         igb_write_mbx(hw, msgbuf, 3, vf);
5082 }
5083
5084 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5085 {
5086         /*
5087          * The VF MAC Address is stored in a packed array of bytes
5088          * starting at the second 32 bit word of the msg array
5089          */
5090         unsigned char *addr = (char *)&msg[1];
5091         int err = -1;
5092
5093         if (is_valid_ether_addr(addr))
5094                 err = igb_set_vf_mac(adapter, vf, addr);
5095
5096         return err;
5097 }
5098
5099 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5100 {
5101         struct e1000_hw *hw = &adapter->hw;
5102         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5103         u32 msg = E1000_VT_MSGTYPE_NACK;
5104
5105         /* if device isn't clear to send it shouldn't be reading either */
5106         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5107             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5108                 igb_write_mbx(hw, &msg, 1, vf);
5109                 vf_data->last_nack = jiffies;
5110         }
5111 }
5112
5113 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5114 {
5115         struct pci_dev *pdev = adapter->pdev;
5116         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5117         struct e1000_hw *hw = &adapter->hw;
5118         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5119         s32 retval;
5120
5121         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5122
5123         if (retval) {
5124                 /* if receive failed revoke VF CTS stats and restart init */
5125                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5126                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5127                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5128                         return;
5129                 goto out;
5130         }
5131
5132         /* this is a message we already processed, do nothing */
5133         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5134                 return;
5135
5136         /*
5137          * until the vf completes a reset it should not be
5138          * allowed to start any configuration.
5139          */
5140
5141         if (msgbuf[0] == E1000_VF_RESET) {
5142                 igb_vf_reset_msg(adapter, vf);
5143                 return;
5144         }
5145
5146         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5147                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5148                         return;
5149                 retval = -1;
5150                 goto out;
5151         }
5152
5153         switch ((msgbuf[0] & 0xFFFF)) {
5154         case E1000_VF_SET_MAC_ADDR:
5155                 retval = -EINVAL;
5156                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5157                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5158                 else
5159                         dev_warn(&pdev->dev,
5160                                  "VF %d attempted to override administratively "
5161                                  "set MAC address\nReload the VF driver to "
5162                                  "resume operations\n", vf);
5163                 break;
5164         case E1000_VF_SET_PROMISC:
5165                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5166                 break;
5167         case E1000_VF_SET_MULTICAST:
5168                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5169                 break;
5170         case E1000_VF_SET_LPE:
5171                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5172                 break;
5173         case E1000_VF_SET_VLAN:
5174                 retval = -1;
5175                 if (vf_data->pf_vlan)
5176                         dev_warn(&pdev->dev,
5177                                  "VF %d attempted to override administratively "
5178                                  "set VLAN tag\nReload the VF driver to "
5179                                  "resume operations\n", vf);
5180                 else
5181                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5182                 break;
5183         default:
5184                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5185                 retval = -1;
5186                 break;
5187         }
5188
5189         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5190 out:
5191         /* notify the VF of the results of what it sent us */
5192         if (retval)
5193                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5194         else
5195                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5196
5197         igb_write_mbx(hw, msgbuf, 1, vf);
5198 }
5199
5200 static void igb_msg_task(struct igb_adapter *adapter)
5201 {
5202         struct e1000_hw *hw = &adapter->hw;
5203         u32 vf;
5204
5205         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5206                 /* process any reset requests */
5207                 if (!igb_check_for_rst(hw, vf))
5208                         igb_vf_reset_event(adapter, vf);
5209
5210                 /* process any messages pending */
5211                 if (!igb_check_for_msg(hw, vf))
5212                         igb_rcv_msg_from_vf(adapter, vf);
5213
5214                 /* process any acks */
5215                 if (!igb_check_for_ack(hw, vf))
5216                         igb_rcv_ack_from_vf(adapter, vf);
5217         }
5218 }
5219
5220 /**
5221  *  igb_set_uta - Set unicast filter table address
5222  *  @adapter: board private structure
5223  *
5224  *  The unicast table address is a register array of 32-bit registers.
5225  *  The table is meant to be used in a way similar to how the MTA is used
5226  *  however due to certain limitations in the hardware it is necessary to
5227  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5228  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5229  **/
5230 static void igb_set_uta(struct igb_adapter *adapter)
5231 {
5232         struct e1000_hw *hw = &adapter->hw;
5233         int i;
5234
5235         /* The UTA table only exists on 82576 hardware and newer */
5236         if (hw->mac.type < e1000_82576)
5237                 return;
5238
5239         /* we only need to do this if VMDq is enabled */
5240         if (!adapter->vfs_allocated_count)
5241                 return;
5242
5243         for (i = 0; i < hw->mac.uta_reg_count; i++)
5244                 array_wr32(E1000_UTA, i, ~0);
5245 }
5246
5247 /**
5248  * igb_intr_msi - Interrupt Handler
5249  * @irq: interrupt number
5250  * @data: pointer to a network interface device structure
5251  **/
5252 static irqreturn_t igb_intr_msi(int irq, void *data)
5253 {
5254         struct igb_adapter *adapter = data;
5255         struct igb_q_vector *q_vector = adapter->q_vector[0];
5256         struct e1000_hw *hw = &adapter->hw;
5257         /* read ICR disables interrupts using IAM */
5258         u32 icr = rd32(E1000_ICR);
5259
5260         igb_write_itr(q_vector);
5261
5262         if (icr & E1000_ICR_DRSTA)
5263                 schedule_work(&adapter->reset_task);
5264
5265         if (icr & E1000_ICR_DOUTSYNC) {
5266                 /* HW is reporting DMA is out of sync */
5267                 adapter->stats.doosync++;
5268         }
5269
5270         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5271                 hw->mac.get_link_status = 1;
5272                 if (!test_bit(__IGB_DOWN, &adapter->state))
5273                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5274         }
5275
5276         napi_schedule(&q_vector->napi);
5277
5278         return IRQ_HANDLED;
5279 }
5280
5281 /**
5282  * igb_intr - Legacy Interrupt Handler
5283  * @irq: interrupt number
5284  * @data: pointer to a network interface device structure
5285  **/
5286 static irqreturn_t igb_intr(int irq, void *data)
5287 {
5288         struct igb_adapter *adapter = data;
5289         struct igb_q_vector *q_vector = adapter->q_vector[0];
5290         struct e1000_hw *hw = &adapter->hw;
5291         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5292          * need for the IMC write */
5293         u32 icr = rd32(E1000_ICR);
5294         if (!icr)
5295                 return IRQ_NONE;  /* Not our interrupt */
5296
5297         igb_write_itr(q_vector);
5298
5299         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5300          * not set, then the adapter didn't send an interrupt */
5301         if (!(icr & E1000_ICR_INT_ASSERTED))
5302                 return IRQ_NONE;
5303
5304         if (icr & E1000_ICR_DRSTA)
5305                 schedule_work(&adapter->reset_task);
5306
5307         if (icr & E1000_ICR_DOUTSYNC) {
5308                 /* HW is reporting DMA is out of sync */
5309                 adapter->stats.doosync++;
5310         }
5311
5312         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5313                 hw->mac.get_link_status = 1;
5314                 /* guard against interrupt when we're going down */
5315                 if (!test_bit(__IGB_DOWN, &adapter->state))
5316                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5317         }
5318
5319         napi_schedule(&q_vector->napi);
5320
5321         return IRQ_HANDLED;
5322 }
5323
5324 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5325 {
5326         struct igb_adapter *adapter = q_vector->adapter;
5327         struct e1000_hw *hw = &adapter->hw;
5328
5329         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5330             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5331                 if (!adapter->msix_entries)
5332                         igb_set_itr(adapter);
5333                 else
5334                         igb_update_ring_itr(q_vector);
5335         }
5336
5337         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5338                 if (adapter->msix_entries)
5339                         wr32(E1000_EIMS, q_vector->eims_value);
5340                 else
5341                         igb_irq_enable(adapter);
5342         }
5343 }
5344
5345 /**
5346  * igb_poll - NAPI Rx polling callback
5347  * @napi: napi polling structure
5348  * @budget: count of how many packets we should handle
5349  **/
5350 static int igb_poll(struct napi_struct *napi, int budget)
5351 {
5352         struct igb_q_vector *q_vector = container_of(napi,
5353                                                      struct igb_q_vector,
5354                                                      napi);
5355         int tx_clean_complete = 1, work_done = 0;
5356
5357 #ifdef CONFIG_IGB_DCA
5358         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5359                 igb_update_dca(q_vector);
5360 #endif
5361         if (q_vector->tx_ring)
5362                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5363
5364         if (q_vector->rx_ring)
5365                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5366
5367         if (!tx_clean_complete)
5368                 work_done = budget;
5369
5370         /* If not enough Rx work done, exit the polling mode */
5371         if (work_done < budget) {
5372                 napi_complete(napi);
5373                 igb_ring_irq_enable(q_vector);
5374         }
5375
5376         return work_done;
5377 }
5378
5379 /**
5380  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5381  * @adapter: board private structure
5382  * @shhwtstamps: timestamp structure to update
5383  * @regval: unsigned 64bit system time value.
5384  *
5385  * We need to convert the system time value stored in the RX/TXSTMP registers
5386  * into a hwtstamp which can be used by the upper level timestamping functions
5387  */
5388 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5389                                    struct skb_shared_hwtstamps *shhwtstamps,
5390                                    u64 regval)
5391 {
5392         u64 ns;
5393
5394         /*
5395          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5396          * 24 to match clock shift we setup earlier.
5397          */
5398         if (adapter->hw.mac.type == e1000_82580)
5399                 regval <<= IGB_82580_TSYNC_SHIFT;
5400
5401         ns = timecounter_cyc2time(&adapter->clock, regval);
5402         timecompare_update(&adapter->compare, ns);
5403         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5404         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5405         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5406 }
5407
5408 /**
5409  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5410  * @q_vector: pointer to q_vector containing needed info
5411  * @buffer: pointer to igb_buffer structure
5412  *
5413  * If we were asked to do hardware stamping and such a time stamp is
5414  * available, then it must have been for this skb here because we only
5415  * allow only one such packet into the queue.
5416  */
5417 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5418 {
5419         struct igb_adapter *adapter = q_vector->adapter;
5420         struct e1000_hw *hw = &adapter->hw;
5421         struct skb_shared_hwtstamps shhwtstamps;
5422         u64 regval;
5423
5424         /* if skb does not support hw timestamp or TX stamp not valid exit */
5425         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5426             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5427                 return;
5428
5429         regval = rd32(E1000_TXSTMPL);
5430         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5431
5432         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5433         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5434 }
5435
5436 /**
5437  * igb_clean_tx_irq - Reclaim resources after transmit completes
5438  * @q_vector: pointer to q_vector containing needed info
5439  * returns true if ring is completely cleaned
5440  **/
5441 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5442 {
5443         struct igb_adapter *adapter = q_vector->adapter;
5444         struct igb_ring *tx_ring = q_vector->tx_ring;
5445         struct net_device *netdev = tx_ring->netdev;
5446         struct e1000_hw *hw = &adapter->hw;
5447         struct igb_buffer *buffer_info;
5448         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5449         unsigned int total_bytes = 0, total_packets = 0;
5450         unsigned int i, eop, count = 0;
5451         bool cleaned = false;
5452
5453         i = tx_ring->next_to_clean;
5454         eop = tx_ring->buffer_info[i].next_to_watch;
5455         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5456
5457         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5458                (count < tx_ring->count)) {
5459                 rmb();  /* read buffer_info after eop_desc status */
5460                 for (cleaned = false; !cleaned; count++) {
5461                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5462                         buffer_info = &tx_ring->buffer_info[i];
5463                         cleaned = (i == eop);
5464
5465                         if (buffer_info->skb) {
5466                                 total_bytes += buffer_info->bytecount;
5467                                 /* gso_segs is currently only valid for tcp */
5468                                 total_packets += buffer_info->gso_segs;
5469                                 igb_tx_hwtstamp(q_vector, buffer_info);
5470                         }
5471
5472                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5473                         tx_desc->wb.status = 0;
5474
5475                         i++;
5476                         if (i == tx_ring->count)
5477                                 i = 0;
5478                 }
5479                 eop = tx_ring->buffer_info[i].next_to_watch;
5480                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5481         }
5482
5483         tx_ring->next_to_clean = i;
5484
5485         if (unlikely(count &&
5486                      netif_carrier_ok(netdev) &&
5487                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5488                 /* Make sure that anybody stopping the queue after this
5489                  * sees the new next_to_clean.
5490                  */
5491                 smp_mb();
5492                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5493                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5494                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5495
5496                         u64_stats_update_begin(&tx_ring->tx_syncp);
5497                         tx_ring->tx_stats.restart_queue++;
5498                         u64_stats_update_end(&tx_ring->tx_syncp);
5499                 }
5500         }
5501
5502         if (tx_ring->detect_tx_hung) {
5503                 /* Detect a transmit hang in hardware, this serializes the
5504                  * check with the clearing of time_stamp and movement of i */
5505                 tx_ring->detect_tx_hung = false;
5506                 if (tx_ring->buffer_info[i].time_stamp &&
5507                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5508                                (adapter->tx_timeout_factor * HZ)) &&
5509                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5510
5511                         /* detected Tx unit hang */
5512                         dev_err(tx_ring->dev,
5513                                 "Detected Tx Unit Hang\n"
5514                                 "  Tx Queue             <%d>\n"
5515                                 "  TDH                  <%x>\n"
5516                                 "  TDT                  <%x>\n"
5517                                 "  next_to_use          <%x>\n"
5518                                 "  next_to_clean        <%x>\n"
5519                                 "buffer_info[next_to_clean]\n"
5520                                 "  time_stamp           <%lx>\n"
5521                                 "  next_to_watch        <%x>\n"
5522                                 "  jiffies              <%lx>\n"
5523                                 "  desc.status          <%x>\n",
5524                                 tx_ring->queue_index,
5525                                 readl(tx_ring->head),
5526                                 readl(tx_ring->tail),
5527                                 tx_ring->next_to_use,
5528                                 tx_ring->next_to_clean,
5529                                 tx_ring->buffer_info[eop].time_stamp,
5530                                 eop,
5531                                 jiffies,
5532                                 eop_desc->wb.status);
5533                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5534                 }
5535         }
5536         tx_ring->total_bytes += total_bytes;
5537         tx_ring->total_packets += total_packets;
5538         u64_stats_update_begin(&tx_ring->tx_syncp);
5539         tx_ring->tx_stats.bytes += total_bytes;
5540         tx_ring->tx_stats.packets += total_packets;
5541         u64_stats_update_end(&tx_ring->tx_syncp);
5542         return count < tx_ring->count;
5543 }
5544
5545 /**
5546  * igb_receive_skb - helper function to handle rx indications
5547  * @q_vector: structure containing interrupt and ring information
5548  * @skb: packet to send up
5549  * @vlan_tag: vlan tag for packet
5550  **/
5551 static void igb_receive_skb(struct igb_q_vector *q_vector,
5552                             struct sk_buff *skb,
5553                             u16 vlan_tag)
5554 {
5555         struct igb_adapter *adapter = q_vector->adapter;
5556
5557         if (vlan_tag && adapter->vlgrp)
5558                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5559                                  vlan_tag, skb);
5560         else
5561                 napi_gro_receive(&q_vector->napi, skb);
5562 }
5563
5564 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5565                                        u32 status_err, struct sk_buff *skb)
5566 {
5567         skb_checksum_none_assert(skb);
5568
5569         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5570         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5571              (status_err & E1000_RXD_STAT_IXSM))
5572                 return;
5573
5574         /* TCP/UDP checksum error bit is set */
5575         if (status_err &
5576             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5577                 /*
5578                  * work around errata with sctp packets where the TCPE aka
5579                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5580                  * packets, (aka let the stack check the crc32c)
5581                  */
5582                 if ((skb->len == 60) &&
5583                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5584                         u64_stats_update_begin(&ring->rx_syncp);
5585                         ring->rx_stats.csum_err++;
5586                         u64_stats_update_end(&ring->rx_syncp);
5587                 }
5588                 /* let the stack verify checksum errors */
5589                 return;
5590         }
5591         /* It must be a TCP or UDP packet with a valid checksum */
5592         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5593                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5594
5595         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5596 }
5597
5598 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5599                                    struct sk_buff *skb)
5600 {
5601         struct igb_adapter *adapter = q_vector->adapter;
5602         struct e1000_hw *hw = &adapter->hw;
5603         u64 regval;
5604
5605         /*
5606          * If this bit is set, then the RX registers contain the time stamp. No
5607          * other packet will be time stamped until we read these registers, so
5608          * read the registers to make them available again. Because only one
5609          * packet can be time stamped at a time, we know that the register
5610          * values must belong to this one here and therefore we don't need to
5611          * compare any of the additional attributes stored for it.
5612          *
5613          * If nothing went wrong, then it should have a shared tx_flags that we
5614          * can turn into a skb_shared_hwtstamps.
5615          */
5616         if (staterr & E1000_RXDADV_STAT_TSIP) {
5617                 u32 *stamp = (u32 *)skb->data;
5618                 regval = le32_to_cpu(*(stamp + 2));
5619                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5620                 skb_pull(skb, IGB_TS_HDR_LEN);
5621         } else {
5622                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5623                         return;
5624
5625                 regval = rd32(E1000_RXSTMPL);
5626                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5627         }
5628
5629         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5630 }
5631 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5632                                union e1000_adv_rx_desc *rx_desc)
5633 {
5634         /* HW will not DMA in data larger than the given buffer, even if it
5635          * parses the (NFS, of course) header to be larger.  In that case, it
5636          * fills the header buffer and spills the rest into the page.
5637          */
5638         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5639                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5640         if (hlen > rx_ring->rx_buffer_len)
5641                 hlen = rx_ring->rx_buffer_len;
5642         return hlen;
5643 }
5644
5645 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5646                                  int *work_done, int budget)
5647 {
5648         struct igb_ring *rx_ring = q_vector->rx_ring;
5649         struct net_device *netdev = rx_ring->netdev;
5650         struct device *dev = rx_ring->dev;
5651         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5652         struct igb_buffer *buffer_info , *next_buffer;
5653         struct sk_buff *skb;
5654         bool cleaned = false;
5655         int cleaned_count = 0;
5656         int current_node = numa_node_id();
5657         unsigned int total_bytes = 0, total_packets = 0;
5658         unsigned int i;
5659         u32 staterr;
5660         u16 length;
5661         u16 vlan_tag;
5662
5663         i = rx_ring->next_to_clean;
5664         buffer_info = &rx_ring->buffer_info[i];
5665         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5666         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5667
5668         while (staterr & E1000_RXD_STAT_DD) {
5669                 if (*work_done >= budget)
5670                         break;
5671                 (*work_done)++;
5672                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5673
5674                 skb = buffer_info->skb;
5675                 prefetch(skb->data - NET_IP_ALIGN);
5676                 buffer_info->skb = NULL;
5677
5678                 i++;
5679                 if (i == rx_ring->count)
5680                         i = 0;
5681
5682                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5683                 prefetch(next_rxd);
5684                 next_buffer = &rx_ring->buffer_info[i];
5685
5686                 length = le16_to_cpu(rx_desc->wb.upper.length);
5687                 cleaned = true;
5688                 cleaned_count++;
5689
5690                 if (buffer_info->dma) {
5691                         dma_unmap_single(dev, buffer_info->dma,
5692                                          rx_ring->rx_buffer_len,
5693                                          DMA_FROM_DEVICE);
5694                         buffer_info->dma = 0;
5695                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5696                                 skb_put(skb, length);
5697                                 goto send_up;
5698                         }
5699                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5700                 }
5701
5702                 if (length) {
5703                         dma_unmap_page(dev, buffer_info->page_dma,
5704                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5705                         buffer_info->page_dma = 0;
5706
5707                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5708                                                 buffer_info->page,
5709                                                 buffer_info->page_offset,
5710                                                 length);
5711
5712                         if ((page_count(buffer_info->page) != 1) ||
5713                             (page_to_nid(buffer_info->page) != current_node))
5714                                 buffer_info->page = NULL;
5715                         else
5716                                 get_page(buffer_info->page);
5717
5718                         skb->len += length;
5719                         skb->data_len += length;
5720                         skb->truesize += length;
5721                 }
5722
5723                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5724                         buffer_info->skb = next_buffer->skb;
5725                         buffer_info->dma = next_buffer->dma;
5726                         next_buffer->skb = skb;
5727                         next_buffer->dma = 0;
5728                         goto next_desc;
5729                 }
5730 send_up:
5731                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5732                         dev_kfree_skb_irq(skb);
5733                         goto next_desc;
5734                 }
5735
5736                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5737                         igb_rx_hwtstamp(q_vector, staterr, skb);
5738                 total_bytes += skb->len;
5739                 total_packets++;
5740
5741                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5742
5743                 skb->protocol = eth_type_trans(skb, netdev);
5744                 skb_record_rx_queue(skb, rx_ring->queue_index);
5745
5746                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5747                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5748
5749                 igb_receive_skb(q_vector, skb, vlan_tag);
5750
5751 next_desc:
5752                 rx_desc->wb.upper.status_error = 0;
5753
5754                 /* return some buffers to hardware, one at a time is too slow */
5755                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5756                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5757                         cleaned_count = 0;
5758                 }
5759
5760                 /* use prefetched values */
5761                 rx_desc = next_rxd;
5762                 buffer_info = next_buffer;
5763                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5764         }
5765
5766         rx_ring->next_to_clean = i;
5767         cleaned_count = igb_desc_unused(rx_ring);
5768
5769         if (cleaned_count)
5770                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5771
5772         rx_ring->total_packets += total_packets;
5773         rx_ring->total_bytes += total_bytes;
5774         u64_stats_update_begin(&rx_ring->rx_syncp);
5775         rx_ring->rx_stats.packets += total_packets;
5776         rx_ring->rx_stats.bytes += total_bytes;
5777         u64_stats_update_end(&rx_ring->rx_syncp);
5778         return cleaned;
5779 }
5780
5781 /**
5782  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5783  * @adapter: address of board private structure
5784  **/
5785 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5786 {
5787         struct net_device *netdev = rx_ring->netdev;
5788         union e1000_adv_rx_desc *rx_desc;
5789         struct igb_buffer *buffer_info;
5790         struct sk_buff *skb;
5791         unsigned int i;
5792         int bufsz;
5793
5794         i = rx_ring->next_to_use;
5795         buffer_info = &rx_ring->buffer_info[i];
5796
5797         bufsz = rx_ring->rx_buffer_len;
5798
5799         while (cleaned_count--) {
5800                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5801
5802                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5803                         if (!buffer_info->page) {
5804                                 buffer_info->page = netdev_alloc_page(netdev);
5805                                 if (unlikely(!buffer_info->page)) {
5806                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5807                                         rx_ring->rx_stats.alloc_failed++;
5808                                         u64_stats_update_end(&rx_ring->rx_syncp);
5809                                         goto no_buffers;
5810                                 }
5811                                 buffer_info->page_offset = 0;
5812                         } else {
5813                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5814                         }
5815                         buffer_info->page_dma =
5816                                 dma_map_page(rx_ring->dev, buffer_info->page,
5817                                              buffer_info->page_offset,
5818                                              PAGE_SIZE / 2,
5819                                              DMA_FROM_DEVICE);
5820                         if (dma_mapping_error(rx_ring->dev,
5821                                               buffer_info->page_dma)) {
5822                                 buffer_info->page_dma = 0;
5823                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5824                                 rx_ring->rx_stats.alloc_failed++;
5825                                 u64_stats_update_end(&rx_ring->rx_syncp);
5826                                 goto no_buffers;
5827                         }
5828                 }
5829
5830                 skb = buffer_info->skb;
5831                 if (!skb) {
5832                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5833                         if (unlikely(!skb)) {
5834                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5835                                 rx_ring->rx_stats.alloc_failed++;
5836                                 u64_stats_update_end(&rx_ring->rx_syncp);
5837                                 goto no_buffers;
5838                         }
5839
5840                         buffer_info->skb = skb;
5841                 }
5842                 if (!buffer_info->dma) {
5843                         buffer_info->dma = dma_map_single(rx_ring->dev,
5844                                                           skb->data,
5845                                                           bufsz,
5846                                                           DMA_FROM_DEVICE);
5847                         if (dma_mapping_error(rx_ring->dev,
5848                                               buffer_info->dma)) {
5849                                 buffer_info->dma = 0;
5850                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5851                                 rx_ring->rx_stats.alloc_failed++;
5852                                 u64_stats_update_end(&rx_ring->rx_syncp);
5853                                 goto no_buffers;
5854                         }
5855                 }
5856                 /* Refresh the desc even if buffer_addrs didn't change because
5857                  * each write-back erases this info. */
5858                 if (bufsz < IGB_RXBUFFER_1024) {
5859                         rx_desc->read.pkt_addr =
5860                              cpu_to_le64(buffer_info->page_dma);
5861                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5862                 } else {
5863                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5864                         rx_desc->read.hdr_addr = 0;
5865                 }
5866
5867                 i++;
5868                 if (i == rx_ring->count)
5869                         i = 0;
5870                 buffer_info = &rx_ring->buffer_info[i];
5871         }
5872
5873 no_buffers:
5874         if (rx_ring->next_to_use != i) {
5875                 rx_ring->next_to_use = i;
5876                 if (i == 0)
5877                         i = (rx_ring->count - 1);
5878                 else
5879                         i--;
5880
5881                 /* Force memory writes to complete before letting h/w
5882                  * know there are new descriptors to fetch.  (Only
5883                  * applicable for weak-ordered memory model archs,
5884                  * such as IA-64). */
5885                 wmb();
5886                 writel(i, rx_ring->tail);
5887         }
5888 }
5889
5890 /**
5891  * igb_mii_ioctl -
5892  * @netdev:
5893  * @ifreq:
5894  * @cmd:
5895  **/
5896 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5897 {
5898         struct igb_adapter *adapter = netdev_priv(netdev);
5899         struct mii_ioctl_data *data = if_mii(ifr);
5900
5901         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5902                 return -EOPNOTSUPP;
5903
5904         switch (cmd) {
5905         case SIOCGMIIPHY:
5906                 data->phy_id = adapter->hw.phy.addr;
5907                 break;
5908         case SIOCGMIIREG:
5909                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5910                                      &data->val_out))
5911                         return -EIO;
5912                 break;
5913         case SIOCSMIIREG:
5914         default:
5915                 return -EOPNOTSUPP;
5916         }
5917         return 0;
5918 }
5919
5920 /**
5921  * igb_hwtstamp_ioctl - control hardware time stamping
5922  * @netdev:
5923  * @ifreq:
5924  * @cmd:
5925  *
5926  * Outgoing time stamping can be enabled and disabled. Play nice and
5927  * disable it when requested, although it shouldn't case any overhead
5928  * when no packet needs it. At most one packet in the queue may be
5929  * marked for time stamping, otherwise it would be impossible to tell
5930  * for sure to which packet the hardware time stamp belongs.
5931  *
5932  * Incoming time stamping has to be configured via the hardware
5933  * filters. Not all combinations are supported, in particular event
5934  * type has to be specified. Matching the kind of event packet is
5935  * not supported, with the exception of "all V2 events regardless of
5936  * level 2 or 4".
5937  *
5938  **/
5939 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5940                               struct ifreq *ifr, int cmd)
5941 {
5942         struct igb_adapter *adapter = netdev_priv(netdev);
5943         struct e1000_hw *hw = &adapter->hw;
5944         struct hwtstamp_config config;
5945         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5946         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5947         u32 tsync_rx_cfg = 0;
5948         bool is_l4 = false;
5949         bool is_l2 = false;
5950         u32 regval;
5951
5952         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5953                 return -EFAULT;
5954
5955         /* reserved for future extensions */
5956         if (config.flags)
5957                 return -EINVAL;
5958
5959         switch (config.tx_type) {
5960         case HWTSTAMP_TX_OFF:
5961                 tsync_tx_ctl = 0;
5962         case HWTSTAMP_TX_ON:
5963                 break;
5964         default:
5965                 return -ERANGE;
5966         }
5967
5968         switch (config.rx_filter) {
5969         case HWTSTAMP_FILTER_NONE:
5970                 tsync_rx_ctl = 0;
5971                 break;
5972         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5973         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5974         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5975         case HWTSTAMP_FILTER_ALL:
5976                 /*
5977                  * register TSYNCRXCFG must be set, therefore it is not
5978                  * possible to time stamp both Sync and Delay_Req messages
5979                  * => fall back to time stamping all packets
5980                  */
5981                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5982                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5983                 break;
5984         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5985                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5986                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5987                 is_l4 = true;
5988                 break;
5989         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5990                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5991                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5992                 is_l4 = true;
5993                 break;
5994         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5995         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5996                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5997                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5998                 is_l2 = true;
5999                 is_l4 = true;
6000                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6001                 break;
6002         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6003         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6004                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6005                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6006                 is_l2 = true;
6007                 is_l4 = true;
6008                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6009                 break;
6010         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6011         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6012         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6013                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6014                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6015                 is_l2 = true;
6016                 break;
6017         default:
6018                 return -ERANGE;
6019         }
6020
6021         if (hw->mac.type == e1000_82575) {
6022                 if (tsync_rx_ctl | tsync_tx_ctl)
6023                         return -EINVAL;
6024                 return 0;
6025         }
6026
6027         /*
6028          * Per-packet timestamping only works if all packets are
6029          * timestamped, so enable timestamping in all packets as
6030          * long as one rx filter was configured.
6031          */
6032         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6033                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6034                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6035         }
6036
6037         /* enable/disable TX */
6038         regval = rd32(E1000_TSYNCTXCTL);
6039         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6040         regval |= tsync_tx_ctl;
6041         wr32(E1000_TSYNCTXCTL, regval);
6042
6043         /* enable/disable RX */
6044         regval = rd32(E1000_TSYNCRXCTL);
6045         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6046         regval |= tsync_rx_ctl;
6047         wr32(E1000_TSYNCRXCTL, regval);
6048
6049         /* define which PTP packets are time stamped */
6050         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6051
6052         /* define ethertype filter for timestamped packets */
6053         if (is_l2)
6054                 wr32(E1000_ETQF(3),
6055                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6056                                  E1000_ETQF_1588 | /* enable timestamping */
6057                                  ETH_P_1588));     /* 1588 eth protocol type */
6058         else
6059                 wr32(E1000_ETQF(3), 0);
6060
6061 #define PTP_PORT 319
6062         /* L4 Queue Filter[3]: filter by destination port and protocol */
6063         if (is_l4) {
6064                 u32 ftqf = (IPPROTO_UDP /* UDP */
6065                         | E1000_FTQF_VF_BP /* VF not compared */
6066                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6067                         | E1000_FTQF_MASK); /* mask all inputs */
6068                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6069
6070                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6071                 wr32(E1000_IMIREXT(3),
6072                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6073                 if (hw->mac.type == e1000_82576) {
6074                         /* enable source port check */
6075                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6076                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6077                 }
6078                 wr32(E1000_FTQF(3), ftqf);
6079         } else {
6080                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6081         }
6082         wrfl();
6083
6084         adapter->hwtstamp_config = config;
6085
6086         /* clear TX/RX time stamp registers, just to be sure */
6087         regval = rd32(E1000_TXSTMPH);
6088         regval = rd32(E1000_RXSTMPH);
6089
6090         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6091                 -EFAULT : 0;
6092 }
6093
6094 /**
6095  * igb_ioctl -
6096  * @netdev:
6097  * @ifreq:
6098  * @cmd:
6099  **/
6100 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6101 {
6102         switch (cmd) {
6103         case SIOCGMIIPHY:
6104         case SIOCGMIIREG:
6105         case SIOCSMIIREG:
6106                 return igb_mii_ioctl(netdev, ifr, cmd);
6107         case SIOCSHWTSTAMP:
6108                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6109         default:
6110                 return -EOPNOTSUPP;
6111         }
6112 }
6113
6114 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6115 {
6116         struct igb_adapter *adapter = hw->back;
6117         u16 cap_offset;
6118
6119         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6120         if (!cap_offset)
6121                 return -E1000_ERR_CONFIG;
6122
6123         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6124
6125         return 0;
6126 }
6127
6128 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6129 {
6130         struct igb_adapter *adapter = hw->back;
6131         u16 cap_offset;
6132
6133         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6134         if (!cap_offset)
6135                 return -E1000_ERR_CONFIG;
6136
6137         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6138
6139         return 0;
6140 }
6141
6142 static void igb_vlan_rx_register(struct net_device *netdev,
6143                                  struct vlan_group *grp)
6144 {
6145         struct igb_adapter *adapter = netdev_priv(netdev);
6146         struct e1000_hw *hw = &adapter->hw;
6147         u32 ctrl, rctl;
6148
6149         igb_irq_disable(adapter);
6150         adapter->vlgrp = grp;
6151
6152         if (grp) {
6153                 /* enable VLAN tag insert/strip */
6154                 ctrl = rd32(E1000_CTRL);
6155                 ctrl |= E1000_CTRL_VME;
6156                 wr32(E1000_CTRL, ctrl);
6157
6158                 /* Disable CFI check */
6159                 rctl = rd32(E1000_RCTL);
6160                 rctl &= ~E1000_RCTL_CFIEN;
6161                 wr32(E1000_RCTL, rctl);
6162         } else {
6163                 /* disable VLAN tag insert/strip */
6164                 ctrl = rd32(E1000_CTRL);
6165                 ctrl &= ~E1000_CTRL_VME;
6166                 wr32(E1000_CTRL, ctrl);
6167         }
6168
6169         igb_rlpml_set(adapter);
6170
6171         if (!test_bit(__IGB_DOWN, &adapter->state))
6172                 igb_irq_enable(adapter);
6173 }
6174
6175 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6176 {
6177         struct igb_adapter *adapter = netdev_priv(netdev);
6178         struct e1000_hw *hw = &adapter->hw;
6179         int pf_id = adapter->vfs_allocated_count;
6180
6181         /* attempt to add filter to vlvf array */
6182         igb_vlvf_set(adapter, vid, true, pf_id);
6183
6184         /* add the filter since PF can receive vlans w/o entry in vlvf */
6185         igb_vfta_set(hw, vid, true);
6186 }
6187
6188 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6189 {
6190         struct igb_adapter *adapter = netdev_priv(netdev);
6191         struct e1000_hw *hw = &adapter->hw;
6192         int pf_id = adapter->vfs_allocated_count;
6193         s32 err;
6194
6195         igb_irq_disable(adapter);
6196         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6197
6198         if (!test_bit(__IGB_DOWN, &adapter->state))
6199                 igb_irq_enable(adapter);
6200
6201         /* remove vlan from VLVF table array */
6202         err = igb_vlvf_set(adapter, vid, false, pf_id);
6203
6204         /* if vid was not present in VLVF just remove it from table */
6205         if (err)
6206                 igb_vfta_set(hw, vid, false);
6207 }
6208
6209 static void igb_restore_vlan(struct igb_adapter *adapter)
6210 {
6211         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6212
6213         if (adapter->vlgrp) {
6214                 u16 vid;
6215                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6216                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6217                                 continue;
6218                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6219                 }
6220         }
6221 }
6222
6223 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6224 {
6225         struct pci_dev *pdev = adapter->pdev;
6226         struct e1000_mac_info *mac = &adapter->hw.mac;
6227
6228         mac->autoneg = 0;
6229
6230         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6231         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6232                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6233                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6234                 return -EINVAL;
6235         }
6236
6237         switch (spddplx) {
6238         case SPEED_10 + DUPLEX_HALF:
6239                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6240                 break;
6241         case SPEED_10 + DUPLEX_FULL:
6242                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6243                 break;
6244         case SPEED_100 + DUPLEX_HALF:
6245                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6246                 break;
6247         case SPEED_100 + DUPLEX_FULL:
6248                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6249                 break;
6250         case SPEED_1000 + DUPLEX_FULL:
6251                 mac->autoneg = 1;
6252                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6253                 break;
6254         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6255         default:
6256                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6257                 return -EINVAL;
6258         }
6259         return 0;
6260 }
6261
6262 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6263 {
6264         struct net_device *netdev = pci_get_drvdata(pdev);
6265         struct igb_adapter *adapter = netdev_priv(netdev);
6266         struct e1000_hw *hw = &adapter->hw;
6267         u32 ctrl, rctl, status;
6268         u32 wufc = adapter->wol;
6269 #ifdef CONFIG_PM
6270         int retval = 0;
6271 #endif
6272
6273         netif_device_detach(netdev);
6274
6275         if (netif_running(netdev))
6276                 igb_close(netdev);
6277
6278         igb_clear_interrupt_scheme(adapter);
6279
6280 #ifdef CONFIG_PM
6281         retval = pci_save_state(pdev);
6282         if (retval)
6283                 return retval;
6284 #endif
6285
6286         status = rd32(E1000_STATUS);
6287         if (status & E1000_STATUS_LU)
6288                 wufc &= ~E1000_WUFC_LNKC;
6289
6290         if (wufc) {
6291                 igb_setup_rctl(adapter);
6292                 igb_set_rx_mode(netdev);
6293
6294                 /* turn on all-multi mode if wake on multicast is enabled */
6295                 if (wufc & E1000_WUFC_MC) {
6296                         rctl = rd32(E1000_RCTL);
6297                         rctl |= E1000_RCTL_MPE;
6298                         wr32(E1000_RCTL, rctl);
6299                 }
6300
6301                 ctrl = rd32(E1000_CTRL);
6302                 /* advertise wake from D3Cold */
6303                 #define E1000_CTRL_ADVD3WUC 0x00100000
6304                 /* phy power management enable */
6305                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6306                 ctrl |= E1000_CTRL_ADVD3WUC;
6307                 wr32(E1000_CTRL, ctrl);
6308
6309                 /* Allow time for pending master requests to run */
6310                 igb_disable_pcie_master(hw);
6311
6312                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6313                 wr32(E1000_WUFC, wufc);
6314         } else {
6315                 wr32(E1000_WUC, 0);
6316                 wr32(E1000_WUFC, 0);
6317         }
6318
6319         *enable_wake = wufc || adapter->en_mng_pt;
6320         if (!*enable_wake)
6321                 igb_power_down_link(adapter);
6322         else
6323                 igb_power_up_link(adapter);
6324
6325         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6326          * would have already happened in close and is redundant. */
6327         igb_release_hw_control(adapter);
6328
6329         pci_disable_device(pdev);
6330
6331         return 0;
6332 }
6333
6334 #ifdef CONFIG_PM
6335 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6336 {
6337         int retval;
6338         bool wake;
6339
6340         retval = __igb_shutdown(pdev, &wake);
6341         if (retval)
6342                 return retval;
6343
6344         if (wake) {
6345                 pci_prepare_to_sleep(pdev);
6346         } else {
6347                 pci_wake_from_d3(pdev, false);
6348                 pci_set_power_state(pdev, PCI_D3hot);
6349         }
6350
6351         return 0;
6352 }
6353
6354 static int igb_resume(struct pci_dev *pdev)
6355 {
6356         struct net_device *netdev = pci_get_drvdata(pdev);
6357         struct igb_adapter *adapter = netdev_priv(netdev);
6358         struct e1000_hw *hw = &adapter->hw;
6359         u32 err;
6360
6361         pci_set_power_state(pdev, PCI_D0);
6362         pci_restore_state(pdev);
6363         pci_save_state(pdev);
6364
6365         err = pci_enable_device_mem(pdev);
6366         if (err) {
6367                 dev_err(&pdev->dev,
6368                         "igb: Cannot enable PCI device from suspend\n");
6369                 return err;
6370         }
6371         pci_set_master(pdev);
6372
6373         pci_enable_wake(pdev, PCI_D3hot, 0);
6374         pci_enable_wake(pdev, PCI_D3cold, 0);
6375
6376         if (igb_init_interrupt_scheme(adapter)) {
6377                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6378                 return -ENOMEM;
6379         }
6380
6381         igb_reset(adapter);
6382
6383         /* let the f/w know that the h/w is now under the control of the
6384          * driver. */
6385         igb_get_hw_control(adapter);
6386
6387         wr32(E1000_WUS, ~0);
6388
6389         if (netif_running(netdev)) {
6390                 err = igb_open(netdev);
6391                 if (err)
6392                         return err;
6393         }
6394
6395         netif_device_attach(netdev);
6396
6397         return 0;
6398 }
6399 #endif
6400
6401 static void igb_shutdown(struct pci_dev *pdev)
6402 {
6403         bool wake;
6404
6405         __igb_shutdown(pdev, &wake);
6406
6407         if (system_state == SYSTEM_POWER_OFF) {
6408                 pci_wake_from_d3(pdev, wake);
6409                 pci_set_power_state(pdev, PCI_D3hot);
6410         }
6411 }
6412
6413 #ifdef CONFIG_NET_POLL_CONTROLLER
6414 /*
6415  * Polling 'interrupt' - used by things like netconsole to send skbs
6416  * without having to re-enable interrupts. It's not called while
6417  * the interrupt routine is executing.
6418  */
6419 static void igb_netpoll(struct net_device *netdev)
6420 {
6421         struct igb_adapter *adapter = netdev_priv(netdev);
6422         struct e1000_hw *hw = &adapter->hw;
6423         int i;
6424
6425         if (!adapter->msix_entries) {
6426                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6427                 igb_irq_disable(adapter);
6428                 napi_schedule(&q_vector->napi);
6429                 return;
6430         }
6431
6432         for (i = 0; i < adapter->num_q_vectors; i++) {
6433                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6434                 wr32(E1000_EIMC, q_vector->eims_value);
6435                 napi_schedule(&q_vector->napi);
6436         }
6437 }
6438 #endif /* CONFIG_NET_POLL_CONTROLLER */
6439
6440 /**
6441  * igb_io_error_detected - called when PCI error is detected
6442  * @pdev: Pointer to PCI device
6443  * @state: The current pci connection state
6444  *
6445  * This function is called after a PCI bus error affecting
6446  * this device has been detected.
6447  */
6448 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6449                                               pci_channel_state_t state)
6450 {
6451         struct net_device *netdev = pci_get_drvdata(pdev);
6452         struct igb_adapter *adapter = netdev_priv(netdev);
6453
6454         netif_device_detach(netdev);
6455
6456         if (state == pci_channel_io_perm_failure)
6457                 return PCI_ERS_RESULT_DISCONNECT;
6458
6459         if (netif_running(netdev))
6460                 igb_down(adapter);
6461         pci_disable_device(pdev);
6462
6463         /* Request a slot slot reset. */
6464         return PCI_ERS_RESULT_NEED_RESET;
6465 }
6466
6467 /**
6468  * igb_io_slot_reset - called after the pci bus has been reset.
6469  * @pdev: Pointer to PCI device
6470  *
6471  * Restart the card from scratch, as if from a cold-boot. Implementation
6472  * resembles the first-half of the igb_resume routine.
6473  */
6474 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6475 {
6476         struct net_device *netdev = pci_get_drvdata(pdev);
6477         struct igb_adapter *adapter = netdev_priv(netdev);
6478         struct e1000_hw *hw = &adapter->hw;
6479         pci_ers_result_t result;
6480         int err;
6481
6482         if (pci_enable_device_mem(pdev)) {
6483                 dev_err(&pdev->dev,
6484                         "Cannot re-enable PCI device after reset.\n");
6485                 result = PCI_ERS_RESULT_DISCONNECT;
6486         } else {
6487                 pci_set_master(pdev);
6488                 pci_restore_state(pdev);
6489                 pci_save_state(pdev);
6490
6491                 pci_enable_wake(pdev, PCI_D3hot, 0);
6492                 pci_enable_wake(pdev, PCI_D3cold, 0);
6493
6494                 igb_reset(adapter);
6495                 wr32(E1000_WUS, ~0);
6496                 result = PCI_ERS_RESULT_RECOVERED;
6497         }
6498
6499         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6500         if (err) {
6501                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6502                         "failed 0x%0x\n", err);
6503                 /* non-fatal, continue */
6504         }
6505
6506         return result;
6507 }
6508
6509 /**
6510  * igb_io_resume - called when traffic can start flowing again.
6511  * @pdev: Pointer to PCI device
6512  *
6513  * This callback is called when the error recovery driver tells us that
6514  * its OK to resume normal operation. Implementation resembles the
6515  * second-half of the igb_resume routine.
6516  */
6517 static void igb_io_resume(struct pci_dev *pdev)
6518 {
6519         struct net_device *netdev = pci_get_drvdata(pdev);
6520         struct igb_adapter *adapter = netdev_priv(netdev);
6521
6522         if (netif_running(netdev)) {
6523                 if (igb_up(adapter)) {
6524                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6525                         return;
6526                 }
6527         }
6528
6529         netif_device_attach(netdev);
6530
6531         /* let the f/w know that the h/w is now under the control of the
6532          * driver. */
6533         igb_get_hw_control(adapter);
6534 }
6535
6536 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6537                              u8 qsel)
6538 {
6539         u32 rar_low, rar_high;
6540         struct e1000_hw *hw = &adapter->hw;
6541
6542         /* HW expects these in little endian so we reverse the byte order
6543          * from network order (big endian) to little endian
6544          */
6545         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6546                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6547         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6548
6549         /* Indicate to hardware the Address is Valid. */
6550         rar_high |= E1000_RAH_AV;
6551
6552         if (hw->mac.type == e1000_82575)
6553                 rar_high |= E1000_RAH_POOL_1 * qsel;
6554         else
6555                 rar_high |= E1000_RAH_POOL_1 << qsel;
6556
6557         wr32(E1000_RAL(index), rar_low);
6558         wrfl();
6559         wr32(E1000_RAH(index), rar_high);
6560         wrfl();
6561 }
6562
6563 static int igb_set_vf_mac(struct igb_adapter *adapter,
6564                           int vf, unsigned char *mac_addr)
6565 {
6566         struct e1000_hw *hw = &adapter->hw;
6567         /* VF MAC addresses start at end of receive addresses and moves
6568          * torwards the first, as a result a collision should not be possible */
6569         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6570
6571         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6572
6573         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6574
6575         return 0;
6576 }
6577
6578 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6579 {
6580         struct igb_adapter *adapter = netdev_priv(netdev);
6581         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6582                 return -EINVAL;
6583         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6584         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6585         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6586                                       " change effective.");
6587         if (test_bit(__IGB_DOWN, &adapter->state)) {
6588                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6589                          " but the PF device is not up.\n");
6590                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6591                          " attempting to use the VF device.\n");
6592         }
6593         return igb_set_vf_mac(adapter, vf, mac);
6594 }
6595
6596 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6597 {
6598         return -EOPNOTSUPP;
6599 }
6600
6601 static int igb_ndo_get_vf_config(struct net_device *netdev,
6602                                  int vf, struct ifla_vf_info *ivi)
6603 {
6604         struct igb_adapter *adapter = netdev_priv(netdev);
6605         if (vf >= adapter->vfs_allocated_count)
6606                 return -EINVAL;
6607         ivi->vf = vf;
6608         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6609         ivi->tx_rate = 0;
6610         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6611         ivi->qos = adapter->vf_data[vf].pf_qos;
6612         return 0;
6613 }
6614
6615 static void igb_vmm_control(struct igb_adapter *adapter)
6616 {
6617         struct e1000_hw *hw = &adapter->hw;
6618         u32 reg;
6619
6620         switch (hw->mac.type) {
6621         case e1000_82575:
6622         default:
6623                 /* replication is not supported for 82575 */
6624                 return;
6625         case e1000_82576:
6626                 /* notify HW that the MAC is adding vlan tags */
6627                 reg = rd32(E1000_DTXCTL);
6628                 reg |= E1000_DTXCTL_VLAN_ADDED;
6629                 wr32(E1000_DTXCTL, reg);
6630         case e1000_82580:
6631                 /* enable replication vlan tag stripping */
6632                 reg = rd32(E1000_RPLOLR);
6633                 reg |= E1000_RPLOLR_STRVLAN;
6634                 wr32(E1000_RPLOLR, reg);
6635         case e1000_i350:
6636                 /* none of the above registers are supported by i350 */
6637                 break;
6638         }
6639
6640         if (adapter->vfs_allocated_count) {
6641                 igb_vmdq_set_loopback_pf(hw, true);
6642                 igb_vmdq_set_replication_pf(hw, true);
6643                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6644                                                 adapter->vfs_allocated_count);
6645         } else {
6646                 igb_vmdq_set_loopback_pf(hw, false);
6647                 igb_vmdq_set_replication_pf(hw, false);
6648         }
6649 }
6650
6651 /* igb_main.c */