igb: warn if max_vfs limit is exceeded
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.4.13-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
90         /* required last entry */
91         {0, }
92 };
93
94 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
95
96 void igb_reset(struct igb_adapter *);
97 static int igb_setup_all_tx_resources(struct igb_adapter *);
98 static int igb_setup_all_rx_resources(struct igb_adapter *);
99 static void igb_free_all_tx_resources(struct igb_adapter *);
100 static void igb_free_all_rx_resources(struct igb_adapter *);
101 static void igb_setup_mrqc(struct igb_adapter *);
102 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
103 static void __devexit igb_remove(struct pci_dev *pdev);
104 static int igb_sw_init(struct igb_adapter *);
105 static int igb_open(struct net_device *);
106 static int igb_close(struct net_device *);
107 static void igb_configure_tx(struct igb_adapter *);
108 static void igb_configure_rx(struct igb_adapter *);
109 static void igb_clean_all_tx_rings(struct igb_adapter *);
110 static void igb_clean_all_rx_rings(struct igb_adapter *);
111 static void igb_clean_tx_ring(struct igb_ring *);
112 static void igb_clean_rx_ring(struct igb_ring *);
113 static void igb_set_rx_mode(struct net_device *);
114 static void igb_update_phy_info(unsigned long);
115 static void igb_watchdog(unsigned long);
116 static void igb_watchdog_task(struct work_struct *);
117 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
118 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
119                                                  struct rtnl_link_stats64 *stats);
120 static int igb_change_mtu(struct net_device *, int);
121 static int igb_set_mac(struct net_device *, void *);
122 static void igb_set_uta(struct igb_adapter *adapter);
123 static irqreturn_t igb_intr(int irq, void *);
124 static irqreturn_t igb_intr_msi(int irq, void *);
125 static irqreturn_t igb_msix_other(int irq, void *);
126 static irqreturn_t igb_msix_ring(int irq, void *);
127 #ifdef CONFIG_IGB_DCA
128 static void igb_update_dca(struct igb_q_vector *);
129 static void igb_setup_dca(struct igb_adapter *);
130 #endif /* CONFIG_IGB_DCA */
131 static bool igb_clean_tx_irq(struct igb_q_vector *);
132 static int igb_poll(struct napi_struct *, int);
133 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
134 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
135 static void igb_tx_timeout(struct net_device *);
136 static void igb_reset_task(struct work_struct *);
137 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
138 static void igb_vlan_rx_add_vid(struct net_device *, u16);
139 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
140 static void igb_restore_vlan(struct igb_adapter *);
141 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
142 static void igb_ping_all_vfs(struct igb_adapter *);
143 static void igb_msg_task(struct igb_adapter *);
144 static void igb_vmm_control(struct igb_adapter *);
145 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
146 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
147 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
148 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
149                                int vf, u16 vlan, u8 qos);
150 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
151 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
152                                  struct ifla_vf_info *ivi);
153 static void igb_check_vf_rate_limit(struct igb_adapter *);
154
155 #ifdef CONFIG_PM
156 static int igb_suspend(struct pci_dev *, pm_message_t);
157 static int igb_resume(struct pci_dev *);
158 #endif
159 static void igb_shutdown(struct pci_dev *);
160 #ifdef CONFIG_IGB_DCA
161 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
162 static struct notifier_block dca_notifier = {
163         .notifier_call  = igb_notify_dca,
164         .next           = NULL,
165         .priority       = 0
166 };
167 #endif
168 #ifdef CONFIG_NET_POLL_CONTROLLER
169 /* for netdump / net console */
170 static void igb_netpoll(struct net_device *);
171 #endif
172 #ifdef CONFIG_PCI_IOV
173 static unsigned int max_vfs = 0;
174 module_param(max_vfs, uint, 0);
175 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
176                  "per physical function");
177 #endif /* CONFIG_PCI_IOV */
178
179 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
180                      pci_channel_state_t);
181 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
182 static void igb_io_resume(struct pci_dev *);
183
184 static struct pci_error_handlers igb_err_handler = {
185         .error_detected = igb_io_error_detected,
186         .slot_reset = igb_io_slot_reset,
187         .resume = igb_io_resume,
188 };
189
190
191 static struct pci_driver igb_driver = {
192         .name     = igb_driver_name,
193         .id_table = igb_pci_tbl,
194         .probe    = igb_probe,
195         .remove   = __devexit_p(igb_remove),
196 #ifdef CONFIG_PM
197         /* Power Managment Hooks */
198         .suspend  = igb_suspend,
199         .resume   = igb_resume,
200 #endif
201         .shutdown = igb_shutdown,
202         .err_handler = &igb_err_handler
203 };
204
205 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
206 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
207 MODULE_LICENSE("GPL");
208 MODULE_VERSION(DRV_VERSION);
209
210 struct igb_reg_info {
211         u32 ofs;
212         char *name;
213 };
214
215 static const struct igb_reg_info igb_reg_info_tbl[] = {
216
217         /* General Registers */
218         {E1000_CTRL, "CTRL"},
219         {E1000_STATUS, "STATUS"},
220         {E1000_CTRL_EXT, "CTRL_EXT"},
221
222         /* Interrupt Registers */
223         {E1000_ICR, "ICR"},
224
225         /* RX Registers */
226         {E1000_RCTL, "RCTL"},
227         {E1000_RDLEN(0), "RDLEN"},
228         {E1000_RDH(0), "RDH"},
229         {E1000_RDT(0), "RDT"},
230         {E1000_RXDCTL(0), "RXDCTL"},
231         {E1000_RDBAL(0), "RDBAL"},
232         {E1000_RDBAH(0), "RDBAH"},
233
234         /* TX Registers */
235         {E1000_TCTL, "TCTL"},
236         {E1000_TDBAL(0), "TDBAL"},
237         {E1000_TDBAH(0), "TDBAH"},
238         {E1000_TDLEN(0), "TDLEN"},
239         {E1000_TDH(0), "TDH"},
240         {E1000_TDT(0), "TDT"},
241         {E1000_TXDCTL(0), "TXDCTL"},
242         {E1000_TDFH, "TDFH"},
243         {E1000_TDFT, "TDFT"},
244         {E1000_TDFHS, "TDFHS"},
245         {E1000_TDFPC, "TDFPC"},
246
247         /* List Terminator */
248         {}
249 };
250
251 /*
252  * igb_regdump - register printout routine
253  */
254 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
255 {
256         int n = 0;
257         char rname[16];
258         u32 regs[8];
259
260         switch (reginfo->ofs) {
261         case E1000_RDLEN(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDLEN(n));
264                 break;
265         case E1000_RDH(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDH(n));
268                 break;
269         case E1000_RDT(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RDT(n));
272                 break;
273         case E1000_RXDCTL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RXDCTL(n));
276                 break;
277         case E1000_RDBAL(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAL(n));
280                 break;
281         case E1000_RDBAH(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAH(n));
284                 break;
285         case E1000_TDBAL(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_RDBAL(n));
288                 break;
289         case E1000_TDBAH(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDBAH(n));
292                 break;
293         case E1000_TDLEN(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDLEN(n));
296                 break;
297         case E1000_TDH(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDH(n));
300                 break;
301         case E1000_TDT(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TDT(n));
304                 break;
305         case E1000_TXDCTL(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_TXDCTL(n));
308                 break;
309         default:
310                 printk(KERN_INFO "%-15s %08x\n",
311                         reginfo->name, rd32(reginfo->ofs));
312                 return;
313         }
314
315         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
316         printk(KERN_INFO "%-15s ", rname);
317         for (n = 0; n < 4; n++)
318                 printk(KERN_CONT "%08x ", regs[n]);
319         printk(KERN_CONT "\n");
320 }
321
322 /*
323  * igb_dump - Print registers, tx-rings and rx-rings
324  */
325 static void igb_dump(struct igb_adapter *adapter)
326 {
327         struct net_device *netdev = adapter->netdev;
328         struct e1000_hw *hw = &adapter->hw;
329         struct igb_reg_info *reginfo;
330         int n = 0;
331         struct igb_ring *tx_ring;
332         union e1000_adv_tx_desc *tx_desc;
333         struct my_u0 { u64 a; u64 b; } *u0;
334         struct igb_buffer *buffer_info;
335         struct igb_ring *rx_ring;
336         union e1000_adv_rx_desc *rx_desc;
337         u32 staterr;
338         int i = 0;
339
340         if (!netif_msg_hw(adapter))
341                 return;
342
343         /* Print netdevice Info */
344         if (netdev) {
345                 dev_info(&adapter->pdev->dev, "Net device Info\n");
346                 printk(KERN_INFO "Device Name     state            "
347                         "trans_start      last_rx\n");
348                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
349                 netdev->name,
350                 netdev->state,
351                 netdev->trans_start,
352                 netdev->last_rx);
353         }
354
355         /* Print Registers */
356         dev_info(&adapter->pdev->dev, "Register Dump\n");
357         printk(KERN_INFO " Register Name   Value\n");
358         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
359              reginfo->name; reginfo++) {
360                 igb_regdump(hw, reginfo);
361         }
362
363         /* Print TX Ring Summary */
364         if (!netdev || !netif_running(netdev))
365                 goto exit;
366
367         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
368         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
369                 " leng ntw timestamp\n");
370         for (n = 0; n < adapter->num_tx_queues; n++) {
371                 tx_ring = adapter->tx_ring[n];
372                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
373                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
374                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
375                            (u64)buffer_info->dma,
376                            buffer_info->length,
377                            buffer_info->next_to_watch,
378                            (u64)buffer_info->time_stamp);
379         }
380
381         /* Print TX Rings */
382         if (!netif_msg_tx_done(adapter))
383                 goto rx_ring_summary;
384
385         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
386
387         /* Transmit Descriptor Formats
388          *
389          * Advanced Transmit Descriptor
390          *   +--------------------------------------------------------------+
391          * 0 |         Buffer Address [63:0]                                |
392          *   +--------------------------------------------------------------+
393          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
394          *   +--------------------------------------------------------------+
395          *   63      46 45    40 39 38 36 35 32 31   24             15       0
396          */
397
398         for (n = 0; n < adapter->num_tx_queues; n++) {
399                 tx_ring = adapter->tx_ring[n];
400                 printk(KERN_INFO "------------------------------------\n");
401                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
402                 printk(KERN_INFO "------------------------------------\n");
403                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
404                         "[PlPOCIStDDM Ln] [bi->dma       ] "
405                         "leng  ntw timestamp        bi->skb\n");
406
407                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
408                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
409                         buffer_info = &tx_ring->buffer_info[i];
410                         u0 = (struct my_u0 *)tx_desc;
411                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
412                                 " %04X  %3X %016llX %p", i,
413                                 le64_to_cpu(u0->a),
414                                 le64_to_cpu(u0->b),
415                                 (u64)buffer_info->dma,
416                                 buffer_info->length,
417                                 buffer_info->next_to_watch,
418                                 (u64)buffer_info->time_stamp,
419                                 buffer_info->skb);
420                         if (i == tx_ring->next_to_use &&
421                                 i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC/U\n");
423                         else if (i == tx_ring->next_to_use)
424                                 printk(KERN_CONT " NTU\n");
425                         else if (i == tx_ring->next_to_clean)
426                                 printk(KERN_CONT " NTC\n");
427                         else
428                                 printk(KERN_CONT "\n");
429
430                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
431                                 print_hex_dump(KERN_INFO, "",
432                                         DUMP_PREFIX_ADDRESS,
433                                         16, 1, phys_to_virt(buffer_info->dma),
434                                         buffer_info->length, true);
435                 }
436         }
437
438         /* Print RX Rings Summary */
439 rx_ring_summary:
440         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
441         printk(KERN_INFO "Queue [NTU] [NTC]\n");
442         for (n = 0; n < adapter->num_rx_queues; n++) {
443                 rx_ring = adapter->rx_ring[n];
444                 printk(KERN_INFO " %5d %5X %5X\n", n,
445                            rx_ring->next_to_use, rx_ring->next_to_clean);
446         }
447
448         /* Print RX Rings */
449         if (!netif_msg_rx_status(adapter))
450                 goto exit;
451
452         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
453
454         /* Advanced Receive Descriptor (Read) Format
455          *    63                                           1        0
456          *    +-----------------------------------------------------+
457          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
458          *    +----------------------------------------------+------+
459          *  8 |       Header Buffer Address [63:1]           |  DD  |
460          *    +-----------------------------------------------------+
461          *
462          *
463          * Advanced Receive Descriptor (Write-Back) Format
464          *
465          *   63       48 47    32 31  30      21 20 17 16   4 3     0
466          *   +------------------------------------------------------+
467          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
468          *   | Checksum   Ident  |   |           |    | Type | Type |
469          *   +------------------------------------------------------+
470          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
471          *   +------------------------------------------------------+
472          *   63       48 47    32 31            20 19               0
473          */
474
475         for (n = 0; n < adapter->num_rx_queues; n++) {
476                 rx_ring = adapter->rx_ring[n];
477                 printk(KERN_INFO "------------------------------------\n");
478                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
479                 printk(KERN_INFO "------------------------------------\n");
480                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
481                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
482                         "<-- Adv Rx Read format\n");
483                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
484                         "[vl er S cks ln] ---------------- [bi->skb] "
485                         "<-- Adv Rx Write-Back format\n");
486
487                 for (i = 0; i < rx_ring->count; i++) {
488                         buffer_info = &rx_ring->buffer_info[i];
489                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
490                         u0 = (struct my_u0 *)rx_desc;
491                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
492                         if (staterr & E1000_RXD_STAT_DD) {
493                                 /* Descriptor Done */
494                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
495                                         "%016llX ---------------- %p", i,
496                                         le64_to_cpu(u0->a),
497                                         le64_to_cpu(u0->b),
498                                         buffer_info->skb);
499                         } else {
500                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
501                                         "%016llX %016llX %p", i,
502                                         le64_to_cpu(u0->a),
503                                         le64_to_cpu(u0->b),
504                                         (u64)buffer_info->dma,
505                                         buffer_info->skb);
506
507                                 if (netif_msg_pktdata(adapter)) {
508                                         print_hex_dump(KERN_INFO, "",
509                                                 DUMP_PREFIX_ADDRESS,
510                                                 16, 1,
511                                                 phys_to_virt(buffer_info->dma),
512                                                 rx_ring->rx_buffer_len, true);
513                                         if (rx_ring->rx_buffer_len
514                                                 < IGB_RXBUFFER_1024)
515                                                 print_hex_dump(KERN_INFO, "",
516                                                   DUMP_PREFIX_ADDRESS,
517                                                   16, 1,
518                                                   phys_to_virt(
519                                                     buffer_info->page_dma +
520                                                     buffer_info->page_offset),
521                                                   PAGE_SIZE/2, true);
522                                 }
523                         }
524
525                         if (i == rx_ring->next_to_use)
526                                 printk(KERN_CONT " NTU\n");
527                         else if (i == rx_ring->next_to_clean)
528                                 printk(KERN_CONT " NTC\n");
529                         else
530                                 printk(KERN_CONT "\n");
531
532                 }
533         }
534
535 exit:
536         return;
537 }
538
539
540 /**
541  * igb_read_clock - read raw cycle counter (to be used by time counter)
542  */
543 static cycle_t igb_read_clock(const struct cyclecounter *tc)
544 {
545         struct igb_adapter *adapter =
546                 container_of(tc, struct igb_adapter, cycles);
547         struct e1000_hw *hw = &adapter->hw;
548         u64 stamp = 0;
549         int shift = 0;
550
551         /*
552          * The timestamp latches on lowest register read. For the 82580
553          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
554          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
555          */
556         if (hw->mac.type == e1000_82580) {
557                 stamp = rd32(E1000_SYSTIMR) >> 8;
558                 shift = IGB_82580_TSYNC_SHIFT;
559         }
560
561         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
562         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
563         return stamp;
564 }
565
566 /**
567  * igb_get_hw_dev - return device
568  * used by hardware layer to print debugging information
569  **/
570 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
571 {
572         struct igb_adapter *adapter = hw->back;
573         return adapter->netdev;
574 }
575
576 /**
577  * igb_init_module - Driver Registration Routine
578  *
579  * igb_init_module is the first routine called when the driver is
580  * loaded. All it does is register with the PCI subsystem.
581  **/
582 static int __init igb_init_module(void)
583 {
584         int ret;
585         printk(KERN_INFO "%s - version %s\n",
586                igb_driver_string, igb_driver_version);
587
588         printk(KERN_INFO "%s\n", igb_copyright);
589
590 #ifdef CONFIG_IGB_DCA
591         dca_register_notify(&dca_notifier);
592 #endif
593         ret = pci_register_driver(&igb_driver);
594         return ret;
595 }
596
597 module_init(igb_init_module);
598
599 /**
600  * igb_exit_module - Driver Exit Cleanup Routine
601  *
602  * igb_exit_module is called just before the driver is removed
603  * from memory.
604  **/
605 static void __exit igb_exit_module(void)
606 {
607 #ifdef CONFIG_IGB_DCA
608         dca_unregister_notify(&dca_notifier);
609 #endif
610         pci_unregister_driver(&igb_driver);
611 }
612
613 module_exit(igb_exit_module);
614
615 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
616 /**
617  * igb_cache_ring_register - Descriptor ring to register mapping
618  * @adapter: board private structure to initialize
619  *
620  * Once we know the feature-set enabled for the device, we'll cache
621  * the register offset the descriptor ring is assigned to.
622  **/
623 static void igb_cache_ring_register(struct igb_adapter *adapter)
624 {
625         int i = 0, j = 0;
626         u32 rbase_offset = adapter->vfs_allocated_count;
627
628         switch (adapter->hw.mac.type) {
629         case e1000_82576:
630                 /* The queues are allocated for virtualization such that VF 0
631                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
632                  * In order to avoid collision we start at the first free queue
633                  * and continue consuming queues in the same sequence
634                  */
635                 if (adapter->vfs_allocated_count) {
636                         for (; i < adapter->rss_queues; i++)
637                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
638                                                                Q_IDX_82576(i);
639                 }
640         case e1000_82575:
641         case e1000_82580:
642         case e1000_i350:
643         default:
644                 for (; i < adapter->num_rx_queues; i++)
645                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
646                 for (; j < adapter->num_tx_queues; j++)
647                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
648                 break;
649         }
650 }
651
652 static void igb_free_queues(struct igb_adapter *adapter)
653 {
654         int i;
655
656         for (i = 0; i < adapter->num_tx_queues; i++) {
657                 kfree(adapter->tx_ring[i]);
658                 adapter->tx_ring[i] = NULL;
659         }
660         for (i = 0; i < adapter->num_rx_queues; i++) {
661                 kfree(adapter->rx_ring[i]);
662                 adapter->rx_ring[i] = NULL;
663         }
664         adapter->num_rx_queues = 0;
665         adapter->num_tx_queues = 0;
666 }
667
668 /**
669  * igb_alloc_queues - Allocate memory for all rings
670  * @adapter: board private structure to initialize
671  *
672  * We allocate one ring per queue at run-time since we don't know the
673  * number of queues at compile-time.
674  **/
675 static int igb_alloc_queues(struct igb_adapter *adapter)
676 {
677         struct igb_ring *ring;
678         int i;
679
680         for (i = 0; i < adapter->num_tx_queues; i++) {
681                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
682                 if (!ring)
683                         goto err;
684                 ring->count = adapter->tx_ring_count;
685                 ring->queue_index = i;
686                 ring->dev = &adapter->pdev->dev;
687                 ring->netdev = adapter->netdev;
688                 /* For 82575, context index must be unique per ring. */
689                 if (adapter->hw.mac.type == e1000_82575)
690                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
691                 adapter->tx_ring[i] = ring;
692         }
693
694         for (i = 0; i < adapter->num_rx_queues; i++) {
695                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
696                 if (!ring)
697                         goto err;
698                 ring->count = adapter->rx_ring_count;
699                 ring->queue_index = i;
700                 ring->dev = &adapter->pdev->dev;
701                 ring->netdev = adapter->netdev;
702                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
703                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
704                 /* set flag indicating ring supports SCTP checksum offload */
705                 if (adapter->hw.mac.type >= e1000_82576)
706                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
707                 adapter->rx_ring[i] = ring;
708         }
709
710         igb_cache_ring_register(adapter);
711
712         return 0;
713
714 err:
715         igb_free_queues(adapter);
716
717         return -ENOMEM;
718 }
719
720 #define IGB_N0_QUEUE -1
721 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
722 {
723         u32 msixbm = 0;
724         struct igb_adapter *adapter = q_vector->adapter;
725         struct e1000_hw *hw = &adapter->hw;
726         u32 ivar, index;
727         int rx_queue = IGB_N0_QUEUE;
728         int tx_queue = IGB_N0_QUEUE;
729
730         if (q_vector->rx_ring)
731                 rx_queue = q_vector->rx_ring->reg_idx;
732         if (q_vector->tx_ring)
733                 tx_queue = q_vector->tx_ring->reg_idx;
734
735         switch (hw->mac.type) {
736         case e1000_82575:
737                 /* The 82575 assigns vectors using a bitmask, which matches the
738                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
739                    or more queues to a vector, we write the appropriate bits
740                    into the MSIXBM register for that vector. */
741                 if (rx_queue > IGB_N0_QUEUE)
742                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
743                 if (tx_queue > IGB_N0_QUEUE)
744                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
745                 if (!adapter->msix_entries && msix_vector == 0)
746                         msixbm |= E1000_EIMS_OTHER;
747                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
748                 q_vector->eims_value = msixbm;
749                 break;
750         case e1000_82576:
751                 /* 82576 uses a table-based method for assigning vectors.
752                    Each queue has a single entry in the table to which we write
753                    a vector number along with a "valid" bit.  Sadly, the layout
754                    of the table is somewhat counterintuitive. */
755                 if (rx_queue > IGB_N0_QUEUE) {
756                         index = (rx_queue & 0x7);
757                         ivar = array_rd32(E1000_IVAR0, index);
758                         if (rx_queue < 8) {
759                                 /* vector goes into low byte of register */
760                                 ivar = ivar & 0xFFFFFF00;
761                                 ivar |= msix_vector | E1000_IVAR_VALID;
762                         } else {
763                                 /* vector goes into third byte of register */
764                                 ivar = ivar & 0xFF00FFFF;
765                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
766                         }
767                         array_wr32(E1000_IVAR0, index, ivar);
768                 }
769                 if (tx_queue > IGB_N0_QUEUE) {
770                         index = (tx_queue & 0x7);
771                         ivar = array_rd32(E1000_IVAR0, index);
772                         if (tx_queue < 8) {
773                                 /* vector goes into second byte of register */
774                                 ivar = ivar & 0xFFFF00FF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
776                         } else {
777                                 /* vector goes into high byte of register */
778                                 ivar = ivar & 0x00FFFFFF;
779                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
780                         }
781                         array_wr32(E1000_IVAR0, index, ivar);
782                 }
783                 q_vector->eims_value = 1 << msix_vector;
784                 break;
785         case e1000_82580:
786         case e1000_i350:
787                 /* 82580 uses the same table-based approach as 82576 but has fewer
788                    entries as a result we carry over for queues greater than 4. */
789                 if (rx_queue > IGB_N0_QUEUE) {
790                         index = (rx_queue >> 1);
791                         ivar = array_rd32(E1000_IVAR0, index);
792                         if (rx_queue & 0x1) {
793                                 /* vector goes into third byte of register */
794                                 ivar = ivar & 0xFF00FFFF;
795                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
796                         } else {
797                                 /* vector goes into low byte of register */
798                                 ivar = ivar & 0xFFFFFF00;
799                                 ivar |= msix_vector | E1000_IVAR_VALID;
800                         }
801                         array_wr32(E1000_IVAR0, index, ivar);
802                 }
803                 if (tx_queue > IGB_N0_QUEUE) {
804                         index = (tx_queue >> 1);
805                         ivar = array_rd32(E1000_IVAR0, index);
806                         if (tx_queue & 0x1) {
807                                 /* vector goes into high byte of register */
808                                 ivar = ivar & 0x00FFFFFF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
810                         } else {
811                                 /* vector goes into second byte of register */
812                                 ivar = ivar & 0xFFFF00FF;
813                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
814                         }
815                         array_wr32(E1000_IVAR0, index, ivar);
816                 }
817                 q_vector->eims_value = 1 << msix_vector;
818                 break;
819         default:
820                 BUG();
821                 break;
822         }
823
824         /* add q_vector eims value to global eims_enable_mask */
825         adapter->eims_enable_mask |= q_vector->eims_value;
826
827         /* configure q_vector to set itr on first interrupt */
828         q_vector->set_itr = 1;
829 }
830
831 /**
832  * igb_configure_msix - Configure MSI-X hardware
833  *
834  * igb_configure_msix sets up the hardware to properly
835  * generate MSI-X interrupts.
836  **/
837 static void igb_configure_msix(struct igb_adapter *adapter)
838 {
839         u32 tmp;
840         int i, vector = 0;
841         struct e1000_hw *hw = &adapter->hw;
842
843         adapter->eims_enable_mask = 0;
844
845         /* set vector for other causes, i.e. link changes */
846         switch (hw->mac.type) {
847         case e1000_82575:
848                 tmp = rd32(E1000_CTRL_EXT);
849                 /* enable MSI-X PBA support*/
850                 tmp |= E1000_CTRL_EXT_PBA_CLR;
851
852                 /* Auto-Mask interrupts upon ICR read. */
853                 tmp |= E1000_CTRL_EXT_EIAME;
854                 tmp |= E1000_CTRL_EXT_IRCA;
855
856                 wr32(E1000_CTRL_EXT, tmp);
857
858                 /* enable msix_other interrupt */
859                 array_wr32(E1000_MSIXBM(0), vector++,
860                                       E1000_EIMS_OTHER);
861                 adapter->eims_other = E1000_EIMS_OTHER;
862
863                 break;
864
865         case e1000_82576:
866         case e1000_82580:
867         case e1000_i350:
868                 /* Turn on MSI-X capability first, or our settings
869                  * won't stick.  And it will take days to debug. */
870                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
871                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
872                                 E1000_GPIE_NSICR);
873
874                 /* enable msix_other interrupt */
875                 adapter->eims_other = 1 << vector;
876                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
877
878                 wr32(E1000_IVAR_MISC, tmp);
879                 break;
880         default:
881                 /* do nothing, since nothing else supports MSI-X */
882                 break;
883         } /* switch (hw->mac.type) */
884
885         adapter->eims_enable_mask |= adapter->eims_other;
886
887         for (i = 0; i < adapter->num_q_vectors; i++)
888                 igb_assign_vector(adapter->q_vector[i], vector++);
889
890         wrfl();
891 }
892
893 /**
894  * igb_request_msix - Initialize MSI-X interrupts
895  *
896  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
897  * kernel.
898  **/
899 static int igb_request_msix(struct igb_adapter *adapter)
900 {
901         struct net_device *netdev = adapter->netdev;
902         struct e1000_hw *hw = &adapter->hw;
903         int i, err = 0, vector = 0;
904
905         err = request_irq(adapter->msix_entries[vector].vector,
906                           igb_msix_other, 0, netdev->name, adapter);
907         if (err)
908                 goto out;
909         vector++;
910
911         for (i = 0; i < adapter->num_q_vectors; i++) {
912                 struct igb_q_vector *q_vector = adapter->q_vector[i];
913
914                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
915
916                 if (q_vector->rx_ring && q_vector->tx_ring)
917                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
918                                 q_vector->rx_ring->queue_index);
919                 else if (q_vector->tx_ring)
920                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
921                                 q_vector->tx_ring->queue_index);
922                 else if (q_vector->rx_ring)
923                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
924                                 q_vector->rx_ring->queue_index);
925                 else
926                         sprintf(q_vector->name, "%s-unused", netdev->name);
927
928                 err = request_irq(adapter->msix_entries[vector].vector,
929                                   igb_msix_ring, 0, q_vector->name,
930                                   q_vector);
931                 if (err)
932                         goto out;
933                 vector++;
934         }
935
936         igb_configure_msix(adapter);
937         return 0;
938 out:
939         return err;
940 }
941
942 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
943 {
944         if (adapter->msix_entries) {
945                 pci_disable_msix(adapter->pdev);
946                 kfree(adapter->msix_entries);
947                 adapter->msix_entries = NULL;
948         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
949                 pci_disable_msi(adapter->pdev);
950         }
951 }
952
953 /**
954  * igb_free_q_vectors - Free memory allocated for interrupt vectors
955  * @adapter: board private structure to initialize
956  *
957  * This function frees the memory allocated to the q_vectors.  In addition if
958  * NAPI is enabled it will delete any references to the NAPI struct prior
959  * to freeing the q_vector.
960  **/
961 static void igb_free_q_vectors(struct igb_adapter *adapter)
962 {
963         int v_idx;
964
965         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
966                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
967                 adapter->q_vector[v_idx] = NULL;
968                 if (!q_vector)
969                         continue;
970                 netif_napi_del(&q_vector->napi);
971                 kfree(q_vector);
972         }
973         adapter->num_q_vectors = 0;
974 }
975
976 /**
977  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
978  *
979  * This function resets the device so that it has 0 rx queues, tx queues, and
980  * MSI-X interrupts allocated.
981  */
982 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
983 {
984         igb_free_queues(adapter);
985         igb_free_q_vectors(adapter);
986         igb_reset_interrupt_capability(adapter);
987 }
988
989 /**
990  * igb_set_interrupt_capability - set MSI or MSI-X if supported
991  *
992  * Attempt to configure interrupts using the best available
993  * capabilities of the hardware and kernel.
994  **/
995 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
996 {
997         int err;
998         int numvecs, i;
999
1000         /* Number of supported queues. */
1001         adapter->num_rx_queues = adapter->rss_queues;
1002         if (adapter->vfs_allocated_count)
1003                 adapter->num_tx_queues = 1;
1004         else
1005                 adapter->num_tx_queues = adapter->rss_queues;
1006
1007         /* start with one vector for every rx queue */
1008         numvecs = adapter->num_rx_queues;
1009
1010         /* if tx handler is separate add 1 for every tx queue */
1011         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1012                 numvecs += adapter->num_tx_queues;
1013
1014         /* store the number of vectors reserved for queues */
1015         adapter->num_q_vectors = numvecs;
1016
1017         /* add 1 vector for link status interrupts */
1018         numvecs++;
1019         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1020                                         GFP_KERNEL);
1021         if (!adapter->msix_entries)
1022                 goto msi_only;
1023
1024         for (i = 0; i < numvecs; i++)
1025                 adapter->msix_entries[i].entry = i;
1026
1027         err = pci_enable_msix(adapter->pdev,
1028                               adapter->msix_entries,
1029                               numvecs);
1030         if (err == 0)
1031                 goto out;
1032
1033         igb_reset_interrupt_capability(adapter);
1034
1035         /* If we can't do MSI-X, try MSI */
1036 msi_only:
1037 #ifdef CONFIG_PCI_IOV
1038         /* disable SR-IOV for non MSI-X configurations */
1039         if (adapter->vf_data) {
1040                 struct e1000_hw *hw = &adapter->hw;
1041                 /* disable iov and allow time for transactions to clear */
1042                 pci_disable_sriov(adapter->pdev);
1043                 msleep(500);
1044
1045                 kfree(adapter->vf_data);
1046                 adapter->vf_data = NULL;
1047                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1048                 msleep(100);
1049                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1050         }
1051 #endif
1052         adapter->vfs_allocated_count = 0;
1053         adapter->rss_queues = 1;
1054         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1055         adapter->num_rx_queues = 1;
1056         adapter->num_tx_queues = 1;
1057         adapter->num_q_vectors = 1;
1058         if (!pci_enable_msi(adapter->pdev))
1059                 adapter->flags |= IGB_FLAG_HAS_MSI;
1060 out:
1061         /* Notify the stack of the (possibly) reduced queue counts. */
1062         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1063         return netif_set_real_num_rx_queues(adapter->netdev,
1064                                             adapter->num_rx_queues);
1065 }
1066
1067 /**
1068  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1069  * @adapter: board private structure to initialize
1070  *
1071  * We allocate one q_vector per queue interrupt.  If allocation fails we
1072  * return -ENOMEM.
1073  **/
1074 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1075 {
1076         struct igb_q_vector *q_vector;
1077         struct e1000_hw *hw = &adapter->hw;
1078         int v_idx;
1079
1080         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1081                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1082                 if (!q_vector)
1083                         goto err_out;
1084                 q_vector->adapter = adapter;
1085                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1086                 q_vector->itr_val = IGB_START_ITR;
1087                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1088                 adapter->q_vector[v_idx] = q_vector;
1089         }
1090         return 0;
1091
1092 err_out:
1093         igb_free_q_vectors(adapter);
1094         return -ENOMEM;
1095 }
1096
1097 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1098                                       int ring_idx, int v_idx)
1099 {
1100         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1101
1102         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1103         q_vector->rx_ring->q_vector = q_vector;
1104         q_vector->itr_val = adapter->rx_itr_setting;
1105         if (q_vector->itr_val && q_vector->itr_val <= 3)
1106                 q_vector->itr_val = IGB_START_ITR;
1107 }
1108
1109 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1110                                       int ring_idx, int v_idx)
1111 {
1112         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1113
1114         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1115         q_vector->tx_ring->q_vector = q_vector;
1116         q_vector->itr_val = adapter->tx_itr_setting;
1117         if (q_vector->itr_val && q_vector->itr_val <= 3)
1118                 q_vector->itr_val = IGB_START_ITR;
1119 }
1120
1121 /**
1122  * igb_map_ring_to_vector - maps allocated queues to vectors
1123  *
1124  * This function maps the recently allocated queues to vectors.
1125  **/
1126 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1127 {
1128         int i;
1129         int v_idx = 0;
1130
1131         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1132             (adapter->num_q_vectors < adapter->num_tx_queues))
1133                 return -ENOMEM;
1134
1135         if (adapter->num_q_vectors >=
1136             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1137                 for (i = 0; i < adapter->num_rx_queues; i++)
1138                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1139                 for (i = 0; i < adapter->num_tx_queues; i++)
1140                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1141         } else {
1142                 for (i = 0; i < adapter->num_rx_queues; i++) {
1143                         if (i < adapter->num_tx_queues)
1144                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1145                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146                 }
1147                 for (; i < adapter->num_tx_queues; i++)
1148                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149         }
1150         return 0;
1151 }
1152
1153 /**
1154  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1155  *
1156  * This function initializes the interrupts and allocates all of the queues.
1157  **/
1158 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1159 {
1160         struct pci_dev *pdev = adapter->pdev;
1161         int err;
1162
1163         err = igb_set_interrupt_capability(adapter);
1164         if (err)
1165                 return err;
1166
1167         err = igb_alloc_q_vectors(adapter);
1168         if (err) {
1169                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1170                 goto err_alloc_q_vectors;
1171         }
1172
1173         err = igb_alloc_queues(adapter);
1174         if (err) {
1175                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1176                 goto err_alloc_queues;
1177         }
1178
1179         err = igb_map_ring_to_vector(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1182                 goto err_map_queues;
1183         }
1184
1185
1186         return 0;
1187 err_map_queues:
1188         igb_free_queues(adapter);
1189 err_alloc_queues:
1190         igb_free_q_vectors(adapter);
1191 err_alloc_q_vectors:
1192         igb_reset_interrupt_capability(adapter);
1193         return err;
1194 }
1195
1196 /**
1197  * igb_request_irq - initialize interrupts
1198  *
1199  * Attempts to configure interrupts using the best available
1200  * capabilities of the hardware and kernel.
1201  **/
1202 static int igb_request_irq(struct igb_adapter *adapter)
1203 {
1204         struct net_device *netdev = adapter->netdev;
1205         struct pci_dev *pdev = adapter->pdev;
1206         int err = 0;
1207
1208         if (adapter->msix_entries) {
1209                 err = igb_request_msix(adapter);
1210                 if (!err)
1211                         goto request_done;
1212                 /* fall back to MSI */
1213                 igb_clear_interrupt_scheme(adapter);
1214                 if (!pci_enable_msi(adapter->pdev))
1215                         adapter->flags |= IGB_FLAG_HAS_MSI;
1216                 igb_free_all_tx_resources(adapter);
1217                 igb_free_all_rx_resources(adapter);
1218                 adapter->num_tx_queues = 1;
1219                 adapter->num_rx_queues = 1;
1220                 adapter->num_q_vectors = 1;
1221                 err = igb_alloc_q_vectors(adapter);
1222                 if (err) {
1223                         dev_err(&pdev->dev,
1224                                 "Unable to allocate memory for vectors\n");
1225                         goto request_done;
1226                 }
1227                 err = igb_alloc_queues(adapter);
1228                 if (err) {
1229                         dev_err(&pdev->dev,
1230                                 "Unable to allocate memory for queues\n");
1231                         igb_free_q_vectors(adapter);
1232                         goto request_done;
1233                 }
1234                 igb_setup_all_tx_resources(adapter);
1235                 igb_setup_all_rx_resources(adapter);
1236         } else {
1237                 igb_assign_vector(adapter->q_vector[0], 0);
1238         }
1239
1240         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1241                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1242                                   netdev->name, adapter);
1243                 if (!err)
1244                         goto request_done;
1245
1246                 /* fall back to legacy interrupts */
1247                 igb_reset_interrupt_capability(adapter);
1248                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1249         }
1250
1251         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1252                           netdev->name, adapter);
1253
1254         if (err)
1255                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1256                         err);
1257
1258 request_done:
1259         return err;
1260 }
1261
1262 static void igb_free_irq(struct igb_adapter *adapter)
1263 {
1264         if (adapter->msix_entries) {
1265                 int vector = 0, i;
1266
1267                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1268
1269                 for (i = 0; i < adapter->num_q_vectors; i++) {
1270                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1271                         free_irq(adapter->msix_entries[vector++].vector,
1272                                  q_vector);
1273                 }
1274         } else {
1275                 free_irq(adapter->pdev->irq, adapter);
1276         }
1277 }
1278
1279 /**
1280  * igb_irq_disable - Mask off interrupt generation on the NIC
1281  * @adapter: board private structure
1282  **/
1283 static void igb_irq_disable(struct igb_adapter *adapter)
1284 {
1285         struct e1000_hw *hw = &adapter->hw;
1286
1287         /*
1288          * we need to be careful when disabling interrupts.  The VFs are also
1289          * mapped into these registers and so clearing the bits can cause
1290          * issues on the VF drivers so we only need to clear what we set
1291          */
1292         if (adapter->msix_entries) {
1293                 u32 regval = rd32(E1000_EIAM);
1294                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1295                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1296                 regval = rd32(E1000_EIAC);
1297                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1298         }
1299
1300         wr32(E1000_IAM, 0);
1301         wr32(E1000_IMC, ~0);
1302         wrfl();
1303         if (adapter->msix_entries) {
1304                 int i;
1305                 for (i = 0; i < adapter->num_q_vectors; i++)
1306                         synchronize_irq(adapter->msix_entries[i].vector);
1307         } else {
1308                 synchronize_irq(adapter->pdev->irq);
1309         }
1310 }
1311
1312 /**
1313  * igb_irq_enable - Enable default interrupt generation settings
1314  * @adapter: board private structure
1315  **/
1316 static void igb_irq_enable(struct igb_adapter *adapter)
1317 {
1318         struct e1000_hw *hw = &adapter->hw;
1319
1320         if (adapter->msix_entries) {
1321                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1322                 u32 regval = rd32(E1000_EIAC);
1323                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1324                 regval = rd32(E1000_EIAM);
1325                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1326                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1327                 if (adapter->vfs_allocated_count) {
1328                         wr32(E1000_MBVFIMR, 0xFF);
1329                         ims |= E1000_IMS_VMMB;
1330                 }
1331                 if (adapter->hw.mac.type == e1000_82580)
1332                         ims |= E1000_IMS_DRSTA;
1333
1334                 wr32(E1000_IMS, ims);
1335         } else {
1336                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1337                                 E1000_IMS_DRSTA);
1338                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1339                                 E1000_IMS_DRSTA);
1340         }
1341 }
1342
1343 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1344 {
1345         struct e1000_hw *hw = &adapter->hw;
1346         u16 vid = adapter->hw.mng_cookie.vlan_id;
1347         u16 old_vid = adapter->mng_vlan_id;
1348
1349         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1350                 /* add VID to filter table */
1351                 igb_vfta_set(hw, vid, true);
1352                 adapter->mng_vlan_id = vid;
1353         } else {
1354                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1355         }
1356
1357         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1358             (vid != old_vid) &&
1359             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1360                 /* remove VID from filter table */
1361                 igb_vfta_set(hw, old_vid, false);
1362         }
1363 }
1364
1365 /**
1366  * igb_release_hw_control - release control of the h/w to f/w
1367  * @adapter: address of board private structure
1368  *
1369  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1370  * For ASF and Pass Through versions of f/w this means that the
1371  * driver is no longer loaded.
1372  *
1373  **/
1374 static void igb_release_hw_control(struct igb_adapter *adapter)
1375 {
1376         struct e1000_hw *hw = &adapter->hw;
1377         u32 ctrl_ext;
1378
1379         /* Let firmware take over control of h/w */
1380         ctrl_ext = rd32(E1000_CTRL_EXT);
1381         wr32(E1000_CTRL_EXT,
1382                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1383 }
1384
1385 /**
1386  * igb_get_hw_control - get control of the h/w from f/w
1387  * @adapter: address of board private structure
1388  *
1389  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1390  * For ASF and Pass Through versions of f/w this means that
1391  * the driver is loaded.
1392  *
1393  **/
1394 static void igb_get_hw_control(struct igb_adapter *adapter)
1395 {
1396         struct e1000_hw *hw = &adapter->hw;
1397         u32 ctrl_ext;
1398
1399         /* Let firmware know the driver has taken over */
1400         ctrl_ext = rd32(E1000_CTRL_EXT);
1401         wr32(E1000_CTRL_EXT,
1402                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1403 }
1404
1405 /**
1406  * igb_configure - configure the hardware for RX and TX
1407  * @adapter: private board structure
1408  **/
1409 static void igb_configure(struct igb_adapter *adapter)
1410 {
1411         struct net_device *netdev = adapter->netdev;
1412         int i;
1413
1414         igb_get_hw_control(adapter);
1415         igb_set_rx_mode(netdev);
1416
1417         igb_restore_vlan(adapter);
1418
1419         igb_setup_tctl(adapter);
1420         igb_setup_mrqc(adapter);
1421         igb_setup_rctl(adapter);
1422
1423         igb_configure_tx(adapter);
1424         igb_configure_rx(adapter);
1425
1426         igb_rx_fifo_flush_82575(&adapter->hw);
1427
1428         /* call igb_desc_unused which always leaves
1429          * at least 1 descriptor unused to make sure
1430          * next_to_use != next_to_clean */
1431         for (i = 0; i < adapter->num_rx_queues; i++) {
1432                 struct igb_ring *ring = adapter->rx_ring[i];
1433                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1434         }
1435 }
1436
1437 /**
1438  * igb_power_up_link - Power up the phy/serdes link
1439  * @adapter: address of board private structure
1440  **/
1441 void igb_power_up_link(struct igb_adapter *adapter)
1442 {
1443         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1444                 igb_power_up_phy_copper(&adapter->hw);
1445         else
1446                 igb_power_up_serdes_link_82575(&adapter->hw);
1447 }
1448
1449 /**
1450  * igb_power_down_link - Power down the phy/serdes link
1451  * @adapter: address of board private structure
1452  */
1453 static void igb_power_down_link(struct igb_adapter *adapter)
1454 {
1455         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1456                 igb_power_down_phy_copper_82575(&adapter->hw);
1457         else
1458                 igb_shutdown_serdes_link_82575(&adapter->hw);
1459 }
1460
1461 /**
1462  * igb_up - Open the interface and prepare it to handle traffic
1463  * @adapter: board private structure
1464  **/
1465 int igb_up(struct igb_adapter *adapter)
1466 {
1467         struct e1000_hw *hw = &adapter->hw;
1468         int i;
1469
1470         /* hardware has been reset, we need to reload some things */
1471         igb_configure(adapter);
1472
1473         clear_bit(__IGB_DOWN, &adapter->state);
1474
1475         for (i = 0; i < adapter->num_q_vectors; i++) {
1476                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1477                 napi_enable(&q_vector->napi);
1478         }
1479         if (adapter->msix_entries)
1480                 igb_configure_msix(adapter);
1481         else
1482                 igb_assign_vector(adapter->q_vector[0], 0);
1483
1484         /* Clear any pending interrupts. */
1485         rd32(E1000_ICR);
1486         igb_irq_enable(adapter);
1487
1488         /* notify VFs that reset has been completed */
1489         if (adapter->vfs_allocated_count) {
1490                 u32 reg_data = rd32(E1000_CTRL_EXT);
1491                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1492                 wr32(E1000_CTRL_EXT, reg_data);
1493         }
1494
1495         netif_tx_start_all_queues(adapter->netdev);
1496
1497         /* start the watchdog. */
1498         hw->mac.get_link_status = 1;
1499         schedule_work(&adapter->watchdog_task);
1500
1501         return 0;
1502 }
1503
1504 void igb_down(struct igb_adapter *adapter)
1505 {
1506         struct net_device *netdev = adapter->netdev;
1507         struct e1000_hw *hw = &adapter->hw;
1508         u32 tctl, rctl;
1509         int i;
1510
1511         /* signal that we're down so the interrupt handler does not
1512          * reschedule our watchdog timer */
1513         set_bit(__IGB_DOWN, &adapter->state);
1514
1515         /* disable receives in the hardware */
1516         rctl = rd32(E1000_RCTL);
1517         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1518         /* flush and sleep below */
1519
1520         netif_tx_stop_all_queues(netdev);
1521
1522         /* disable transmits in the hardware */
1523         tctl = rd32(E1000_TCTL);
1524         tctl &= ~E1000_TCTL_EN;
1525         wr32(E1000_TCTL, tctl);
1526         /* flush both disables and wait for them to finish */
1527         wrfl();
1528         msleep(10);
1529
1530         for (i = 0; i < adapter->num_q_vectors; i++) {
1531                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1532                 napi_disable(&q_vector->napi);
1533         }
1534
1535         igb_irq_disable(adapter);
1536
1537         del_timer_sync(&adapter->watchdog_timer);
1538         del_timer_sync(&adapter->phy_info_timer);
1539
1540         netif_carrier_off(netdev);
1541
1542         /* record the stats before reset*/
1543         spin_lock(&adapter->stats64_lock);
1544         igb_update_stats(adapter, &adapter->stats64);
1545         spin_unlock(&adapter->stats64_lock);
1546
1547         adapter->link_speed = 0;
1548         adapter->link_duplex = 0;
1549
1550         if (!pci_channel_offline(adapter->pdev))
1551                 igb_reset(adapter);
1552         igb_clean_all_tx_rings(adapter);
1553         igb_clean_all_rx_rings(adapter);
1554 #ifdef CONFIG_IGB_DCA
1555
1556         /* since we reset the hardware DCA settings were cleared */
1557         igb_setup_dca(adapter);
1558 #endif
1559 }
1560
1561 void igb_reinit_locked(struct igb_adapter *adapter)
1562 {
1563         WARN_ON(in_interrupt());
1564         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1565                 msleep(1);
1566         igb_down(adapter);
1567         igb_up(adapter);
1568         clear_bit(__IGB_RESETTING, &adapter->state);
1569 }
1570
1571 void igb_reset(struct igb_adapter *adapter)
1572 {
1573         struct pci_dev *pdev = adapter->pdev;
1574         struct e1000_hw *hw = &adapter->hw;
1575         struct e1000_mac_info *mac = &hw->mac;
1576         struct e1000_fc_info *fc = &hw->fc;
1577         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1578         u16 hwm;
1579
1580         /* Repartition Pba for greater than 9k mtu
1581          * To take effect CTRL.RST is required.
1582          */
1583         switch (mac->type) {
1584         case e1000_i350:
1585         case e1000_82580:
1586                 pba = rd32(E1000_RXPBS);
1587                 pba = igb_rxpbs_adjust_82580(pba);
1588                 break;
1589         case e1000_82576:
1590                 pba = rd32(E1000_RXPBS);
1591                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1592                 break;
1593         case e1000_82575:
1594         default:
1595                 pba = E1000_PBA_34K;
1596                 break;
1597         }
1598
1599         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1600             (mac->type < e1000_82576)) {
1601                 /* adjust PBA for jumbo frames */
1602                 wr32(E1000_PBA, pba);
1603
1604                 /* To maintain wire speed transmits, the Tx FIFO should be
1605                  * large enough to accommodate two full transmit packets,
1606                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1607                  * the Rx FIFO should be large enough to accommodate at least
1608                  * one full receive packet and is similarly rounded up and
1609                  * expressed in KB. */
1610                 pba = rd32(E1000_PBA);
1611                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1612                 tx_space = pba >> 16;
1613                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1614                 pba &= 0xffff;
1615                 /* the tx fifo also stores 16 bytes of information about the tx
1616                  * but don't include ethernet FCS because hardware appends it */
1617                 min_tx_space = (adapter->max_frame_size +
1618                                 sizeof(union e1000_adv_tx_desc) -
1619                                 ETH_FCS_LEN) * 2;
1620                 min_tx_space = ALIGN(min_tx_space, 1024);
1621                 min_tx_space >>= 10;
1622                 /* software strips receive CRC, so leave room for it */
1623                 min_rx_space = adapter->max_frame_size;
1624                 min_rx_space = ALIGN(min_rx_space, 1024);
1625                 min_rx_space >>= 10;
1626
1627                 /* If current Tx allocation is less than the min Tx FIFO size,
1628                  * and the min Tx FIFO size is less than the current Rx FIFO
1629                  * allocation, take space away from current Rx allocation */
1630                 if (tx_space < min_tx_space &&
1631                     ((min_tx_space - tx_space) < pba)) {
1632                         pba = pba - (min_tx_space - tx_space);
1633
1634                         /* if short on rx space, rx wins and must trump tx
1635                          * adjustment */
1636                         if (pba < min_rx_space)
1637                                 pba = min_rx_space;
1638                 }
1639                 wr32(E1000_PBA, pba);
1640         }
1641
1642         /* flow control settings */
1643         /* The high water mark must be low enough to fit one full frame
1644          * (or the size used for early receive) above it in the Rx FIFO.
1645          * Set it to the lower of:
1646          * - 90% of the Rx FIFO size, or
1647          * - the full Rx FIFO size minus one full frame */
1648         hwm = min(((pba << 10) * 9 / 10),
1649                         ((pba << 10) - 2 * adapter->max_frame_size));
1650
1651         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1652         fc->low_water = fc->high_water - 16;
1653         fc->pause_time = 0xFFFF;
1654         fc->send_xon = 1;
1655         fc->current_mode = fc->requested_mode;
1656
1657         /* disable receive for all VFs and wait one second */
1658         if (adapter->vfs_allocated_count) {
1659                 int i;
1660                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1661                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1662
1663                 /* ping all the active vfs to let them know we are going down */
1664                 igb_ping_all_vfs(adapter);
1665
1666                 /* disable transmits and receives */
1667                 wr32(E1000_VFRE, 0);
1668                 wr32(E1000_VFTE, 0);
1669         }
1670
1671         /* Allow time for pending master requests to run */
1672         hw->mac.ops.reset_hw(hw);
1673         wr32(E1000_WUC, 0);
1674
1675         if (hw->mac.ops.init_hw(hw))
1676                 dev_err(&pdev->dev, "Hardware Error\n");
1677
1678         if (hw->mac.type == e1000_82580) {
1679                 u32 reg = rd32(E1000_PCIEMISC);
1680                 wr32(E1000_PCIEMISC,
1681                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1682         }
1683         if (!netif_running(adapter->netdev))
1684                 igb_power_down_link(adapter);
1685
1686         igb_update_mng_vlan(adapter);
1687
1688         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1689         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1690
1691         igb_get_phy_info(hw);
1692 }
1693
1694 static const struct net_device_ops igb_netdev_ops = {
1695         .ndo_open               = igb_open,
1696         .ndo_stop               = igb_close,
1697         .ndo_start_xmit         = igb_xmit_frame_adv,
1698         .ndo_get_stats64        = igb_get_stats64,
1699         .ndo_set_rx_mode        = igb_set_rx_mode,
1700         .ndo_set_multicast_list = igb_set_rx_mode,
1701         .ndo_set_mac_address    = igb_set_mac,
1702         .ndo_change_mtu         = igb_change_mtu,
1703         .ndo_do_ioctl           = igb_ioctl,
1704         .ndo_tx_timeout         = igb_tx_timeout,
1705         .ndo_validate_addr      = eth_validate_addr,
1706         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1707         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1708         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1709         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1710         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1711         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1712         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1713 #ifdef CONFIG_NET_POLL_CONTROLLER
1714         .ndo_poll_controller    = igb_netpoll,
1715 #endif
1716 };
1717
1718 /**
1719  * igb_probe - Device Initialization Routine
1720  * @pdev: PCI device information struct
1721  * @ent: entry in igb_pci_tbl
1722  *
1723  * Returns 0 on success, negative on failure
1724  *
1725  * igb_probe initializes an adapter identified by a pci_dev structure.
1726  * The OS initialization, configuring of the adapter private structure,
1727  * and a hardware reset occur.
1728  **/
1729 static int __devinit igb_probe(struct pci_dev *pdev,
1730                                const struct pci_device_id *ent)
1731 {
1732         struct net_device *netdev;
1733         struct igb_adapter *adapter;
1734         struct e1000_hw *hw;
1735         u16 eeprom_data = 0;
1736         s32 ret_val;
1737         static int global_quad_port_a; /* global quad port a indication */
1738         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1739         unsigned long mmio_start, mmio_len;
1740         int err, pci_using_dac;
1741         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1742         u8 part_str[E1000_PBANUM_LENGTH];
1743
1744         /* Catch broken hardware that put the wrong VF device ID in
1745          * the PCIe SR-IOV capability.
1746          */
1747         if (pdev->is_virtfn) {
1748                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1749                      pci_name(pdev), pdev->vendor, pdev->device);
1750                 return -EINVAL;
1751         }
1752
1753         err = pci_enable_device_mem(pdev);
1754         if (err)
1755                 return err;
1756
1757         pci_using_dac = 0;
1758         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1759         if (!err) {
1760                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1761                 if (!err)
1762                         pci_using_dac = 1;
1763         } else {
1764                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1765                 if (err) {
1766                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1767                         if (err) {
1768                                 dev_err(&pdev->dev, "No usable DMA "
1769                                         "configuration, aborting\n");
1770                                 goto err_dma;
1771                         }
1772                 }
1773         }
1774
1775         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1776                                            IORESOURCE_MEM),
1777                                            igb_driver_name);
1778         if (err)
1779                 goto err_pci_reg;
1780
1781         pci_enable_pcie_error_reporting(pdev);
1782
1783         pci_set_master(pdev);
1784         pci_save_state(pdev);
1785
1786         err = -ENOMEM;
1787         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1788                                    IGB_ABS_MAX_TX_QUEUES);
1789         if (!netdev)
1790                 goto err_alloc_etherdev;
1791
1792         SET_NETDEV_DEV(netdev, &pdev->dev);
1793
1794         pci_set_drvdata(pdev, netdev);
1795         adapter = netdev_priv(netdev);
1796         adapter->netdev = netdev;
1797         adapter->pdev = pdev;
1798         hw = &adapter->hw;
1799         hw->back = adapter;
1800         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1801
1802         mmio_start = pci_resource_start(pdev, 0);
1803         mmio_len = pci_resource_len(pdev, 0);
1804
1805         err = -EIO;
1806         hw->hw_addr = ioremap(mmio_start, mmio_len);
1807         if (!hw->hw_addr)
1808                 goto err_ioremap;
1809
1810         netdev->netdev_ops = &igb_netdev_ops;
1811         igb_set_ethtool_ops(netdev);
1812         netdev->watchdog_timeo = 5 * HZ;
1813
1814         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1815
1816         netdev->mem_start = mmio_start;
1817         netdev->mem_end = mmio_start + mmio_len;
1818
1819         /* PCI config space info */
1820         hw->vendor_id = pdev->vendor;
1821         hw->device_id = pdev->device;
1822         hw->revision_id = pdev->revision;
1823         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1824         hw->subsystem_device_id = pdev->subsystem_device;
1825
1826         /* Copy the default MAC, PHY and NVM function pointers */
1827         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1828         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1829         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1830         /* Initialize skew-specific constants */
1831         err = ei->get_invariants(hw);
1832         if (err)
1833                 goto err_sw_init;
1834
1835         /* setup the private structure */
1836         err = igb_sw_init(adapter);
1837         if (err)
1838                 goto err_sw_init;
1839
1840         igb_get_bus_info_pcie(hw);
1841
1842         hw->phy.autoneg_wait_to_complete = false;
1843
1844         /* Copper options */
1845         if (hw->phy.media_type == e1000_media_type_copper) {
1846                 hw->phy.mdix = AUTO_ALL_MODES;
1847                 hw->phy.disable_polarity_correction = false;
1848                 hw->phy.ms_type = e1000_ms_hw_default;
1849         }
1850
1851         if (igb_check_reset_block(hw))
1852                 dev_info(&pdev->dev,
1853                         "PHY reset is blocked due to SOL/IDER session.\n");
1854
1855         netdev->features = NETIF_F_SG |
1856                            NETIF_F_IP_CSUM |
1857                            NETIF_F_HW_VLAN_TX |
1858                            NETIF_F_HW_VLAN_RX |
1859                            NETIF_F_HW_VLAN_FILTER;
1860
1861         netdev->features |= NETIF_F_IPV6_CSUM;
1862         netdev->features |= NETIF_F_TSO;
1863         netdev->features |= NETIF_F_TSO6;
1864         netdev->features |= NETIF_F_GRO;
1865
1866         netdev->vlan_features |= NETIF_F_TSO;
1867         netdev->vlan_features |= NETIF_F_TSO6;
1868         netdev->vlan_features |= NETIF_F_IP_CSUM;
1869         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1870         netdev->vlan_features |= NETIF_F_SG;
1871
1872         if (pci_using_dac) {
1873                 netdev->features |= NETIF_F_HIGHDMA;
1874                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1875         }
1876
1877         if (hw->mac.type >= e1000_82576)
1878                 netdev->features |= NETIF_F_SCTP_CSUM;
1879
1880         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1881
1882         /* before reading the NVM, reset the controller to put the device in a
1883          * known good starting state */
1884         hw->mac.ops.reset_hw(hw);
1885
1886         /* make sure the NVM is good */
1887         if (igb_validate_nvm_checksum(hw) < 0) {
1888                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1889                 err = -EIO;
1890                 goto err_eeprom;
1891         }
1892
1893         /* copy the MAC address out of the NVM */
1894         if (hw->mac.ops.read_mac_addr(hw))
1895                 dev_err(&pdev->dev, "NVM Read Error\n");
1896
1897         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1898         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1899
1900         if (!is_valid_ether_addr(netdev->perm_addr)) {
1901                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1902                 err = -EIO;
1903                 goto err_eeprom;
1904         }
1905
1906         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1907                     (unsigned long) adapter);
1908         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1909                     (unsigned long) adapter);
1910
1911         INIT_WORK(&adapter->reset_task, igb_reset_task);
1912         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1913
1914         /* Initialize link properties that are user-changeable */
1915         adapter->fc_autoneg = true;
1916         hw->mac.autoneg = true;
1917         hw->phy.autoneg_advertised = 0x2f;
1918
1919         hw->fc.requested_mode = e1000_fc_default;
1920         hw->fc.current_mode = e1000_fc_default;
1921
1922         igb_validate_mdi_setting(hw);
1923
1924         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1925          * enable the ACPI Magic Packet filter
1926          */
1927
1928         if (hw->bus.func == 0)
1929                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1930         else if (hw->mac.type == e1000_82580)
1931                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1932                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1933                                  &eeprom_data);
1934         else if (hw->bus.func == 1)
1935                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1936
1937         if (eeprom_data & eeprom_apme_mask)
1938                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1939
1940         /* now that we have the eeprom settings, apply the special cases where
1941          * the eeprom may be wrong or the board simply won't support wake on
1942          * lan on a particular port */
1943         switch (pdev->device) {
1944         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1945                 adapter->eeprom_wol = 0;
1946                 break;
1947         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1948         case E1000_DEV_ID_82576_FIBER:
1949         case E1000_DEV_ID_82576_SERDES:
1950                 /* Wake events only supported on port A for dual fiber
1951                  * regardless of eeprom setting */
1952                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1953                         adapter->eeprom_wol = 0;
1954                 break;
1955         case E1000_DEV_ID_82576_QUAD_COPPER:
1956         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1957                 /* if quad port adapter, disable WoL on all but port A */
1958                 if (global_quad_port_a != 0)
1959                         adapter->eeprom_wol = 0;
1960                 else
1961                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1962                 /* Reset for multiple quad port adapters */
1963                 if (++global_quad_port_a == 4)
1964                         global_quad_port_a = 0;
1965                 break;
1966         }
1967
1968         /* initialize the wol settings based on the eeprom settings */
1969         adapter->wol = adapter->eeprom_wol;
1970         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1971
1972         /* reset the hardware with the new settings */
1973         igb_reset(adapter);
1974
1975         /* let the f/w know that the h/w is now under the control of the
1976          * driver. */
1977         igb_get_hw_control(adapter);
1978
1979         strcpy(netdev->name, "eth%d");
1980         err = register_netdev(netdev);
1981         if (err)
1982                 goto err_register;
1983
1984         /* carrier off reporting is important to ethtool even BEFORE open */
1985         netif_carrier_off(netdev);
1986
1987 #ifdef CONFIG_IGB_DCA
1988         if (dca_add_requester(&pdev->dev) == 0) {
1989                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1990                 dev_info(&pdev->dev, "DCA enabled\n");
1991                 igb_setup_dca(adapter);
1992         }
1993
1994 #endif
1995         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1996         /* print bus type/speed/width info */
1997         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1998                  netdev->name,
1999                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2000                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2001                                                             "unknown"),
2002                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2003                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2004                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2005                    "unknown"),
2006                  netdev->dev_addr);
2007
2008         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2009         if (ret_val)
2010                 strcpy(part_str, "Unknown");
2011         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2012         dev_info(&pdev->dev,
2013                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2014                 adapter->msix_entries ? "MSI-X" :
2015                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2016                 adapter->num_rx_queues, adapter->num_tx_queues);
2017
2018         return 0;
2019
2020 err_register:
2021         igb_release_hw_control(adapter);
2022 err_eeprom:
2023         if (!igb_check_reset_block(hw))
2024                 igb_reset_phy(hw);
2025
2026         if (hw->flash_address)
2027                 iounmap(hw->flash_address);
2028 err_sw_init:
2029         igb_clear_interrupt_scheme(adapter);
2030         iounmap(hw->hw_addr);
2031 err_ioremap:
2032         free_netdev(netdev);
2033 err_alloc_etherdev:
2034         pci_release_selected_regions(pdev,
2035                                      pci_select_bars(pdev, IORESOURCE_MEM));
2036 err_pci_reg:
2037 err_dma:
2038         pci_disable_device(pdev);
2039         return err;
2040 }
2041
2042 /**
2043  * igb_remove - Device Removal Routine
2044  * @pdev: PCI device information struct
2045  *
2046  * igb_remove is called by the PCI subsystem to alert the driver
2047  * that it should release a PCI device.  The could be caused by a
2048  * Hot-Plug event, or because the driver is going to be removed from
2049  * memory.
2050  **/
2051 static void __devexit igb_remove(struct pci_dev *pdev)
2052 {
2053         struct net_device *netdev = pci_get_drvdata(pdev);
2054         struct igb_adapter *adapter = netdev_priv(netdev);
2055         struct e1000_hw *hw = &adapter->hw;
2056
2057         /*
2058          * The watchdog timer may be rescheduled, so explicitly
2059          * disable watchdog from being rescheduled.
2060          */
2061         set_bit(__IGB_DOWN, &adapter->state);
2062         del_timer_sync(&adapter->watchdog_timer);
2063         del_timer_sync(&adapter->phy_info_timer);
2064
2065         cancel_work_sync(&adapter->reset_task);
2066         cancel_work_sync(&adapter->watchdog_task);
2067
2068 #ifdef CONFIG_IGB_DCA
2069         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2070                 dev_info(&pdev->dev, "DCA disabled\n");
2071                 dca_remove_requester(&pdev->dev);
2072                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2073                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2074         }
2075 #endif
2076
2077         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2078          * would have already happened in close and is redundant. */
2079         igb_release_hw_control(adapter);
2080
2081         unregister_netdev(netdev);
2082
2083         igb_clear_interrupt_scheme(adapter);
2084
2085 #ifdef CONFIG_PCI_IOV
2086         /* reclaim resources allocated to VFs */
2087         if (adapter->vf_data) {
2088                 /* disable iov and allow time for transactions to clear */
2089                 pci_disable_sriov(pdev);
2090                 msleep(500);
2091
2092                 kfree(adapter->vf_data);
2093                 adapter->vf_data = NULL;
2094                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2095                 msleep(100);
2096                 dev_info(&pdev->dev, "IOV Disabled\n");
2097         }
2098 #endif
2099
2100         iounmap(hw->hw_addr);
2101         if (hw->flash_address)
2102                 iounmap(hw->flash_address);
2103         pci_release_selected_regions(pdev,
2104                                      pci_select_bars(pdev, IORESOURCE_MEM));
2105
2106         free_netdev(netdev);
2107
2108         pci_disable_pcie_error_reporting(pdev);
2109
2110         pci_disable_device(pdev);
2111 }
2112
2113 /**
2114  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2115  * @adapter: board private structure to initialize
2116  *
2117  * This function initializes the vf specific data storage and then attempts to
2118  * allocate the VFs.  The reason for ordering it this way is because it is much
2119  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2120  * the memory for the VFs.
2121  **/
2122 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2123 {
2124 #ifdef CONFIG_PCI_IOV
2125         struct pci_dev *pdev = adapter->pdev;
2126
2127         if (adapter->vfs_allocated_count) {
2128                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2129                                            sizeof(struct vf_data_storage),
2130                                            GFP_KERNEL);
2131                 /* if allocation failed then we do not support SR-IOV */
2132                 if (!adapter->vf_data) {
2133                         adapter->vfs_allocated_count = 0;
2134                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2135                                 "Data Storage\n");
2136                 }
2137         }
2138
2139         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2140                 kfree(adapter->vf_data);
2141                 adapter->vf_data = NULL;
2142 #endif /* CONFIG_PCI_IOV */
2143                 adapter->vfs_allocated_count = 0;
2144 #ifdef CONFIG_PCI_IOV
2145         } else {
2146                 unsigned char mac_addr[ETH_ALEN];
2147                 int i;
2148                 dev_info(&pdev->dev, "%d vfs allocated\n",
2149                          adapter->vfs_allocated_count);
2150                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2151                         random_ether_addr(mac_addr);
2152                         igb_set_vf_mac(adapter, i, mac_addr);
2153                 }
2154         }
2155 #endif /* CONFIG_PCI_IOV */
2156 }
2157
2158
2159 /**
2160  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2161  * @adapter: board private structure to initialize
2162  *
2163  * igb_init_hw_timer initializes the function pointer and values for the hw
2164  * timer found in hardware.
2165  **/
2166 static void igb_init_hw_timer(struct igb_adapter *adapter)
2167 {
2168         struct e1000_hw *hw = &adapter->hw;
2169
2170         switch (hw->mac.type) {
2171         case e1000_i350:
2172         case e1000_82580:
2173                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2174                 adapter->cycles.read = igb_read_clock;
2175                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2176                 adapter->cycles.mult = 1;
2177                 /*
2178                  * The 82580 timesync updates the system timer every 8ns by 8ns
2179                  * and the value cannot be shifted.  Instead we need to shift
2180                  * the registers to generate a 64bit timer value.  As a result
2181                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2182                  * 24 in order to generate a larger value for synchronization.
2183                  */
2184                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2185                 /* disable system timer temporarily by setting bit 31 */
2186                 wr32(E1000_TSAUXC, 0x80000000);
2187                 wrfl();
2188
2189                 /* Set registers so that rollover occurs soon to test this. */
2190                 wr32(E1000_SYSTIMR, 0x00000000);
2191                 wr32(E1000_SYSTIML, 0x80000000);
2192                 wr32(E1000_SYSTIMH, 0x000000FF);
2193                 wrfl();
2194
2195                 /* enable system timer by clearing bit 31 */
2196                 wr32(E1000_TSAUXC, 0x0);
2197                 wrfl();
2198
2199                 timecounter_init(&adapter->clock,
2200                                  &adapter->cycles,
2201                                  ktime_to_ns(ktime_get_real()));
2202                 /*
2203                  * Synchronize our NIC clock against system wall clock. NIC
2204                  * time stamp reading requires ~3us per sample, each sample
2205                  * was pretty stable even under load => only require 10
2206                  * samples for each offset comparison.
2207                  */
2208                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2209                 adapter->compare.source = &adapter->clock;
2210                 adapter->compare.target = ktime_get_real;
2211                 adapter->compare.num_samples = 10;
2212                 timecompare_update(&adapter->compare, 0);
2213                 break;
2214         case e1000_82576:
2215                 /*
2216                  * Initialize hardware timer: we keep it running just in case
2217                  * that some program needs it later on.
2218                  */
2219                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2220                 adapter->cycles.read = igb_read_clock;
2221                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2222                 adapter->cycles.mult = 1;
2223                 /**
2224                  * Scale the NIC clock cycle by a large factor so that
2225                  * relatively small clock corrections can be added or
2226                  * substracted at each clock tick. The drawbacks of a large
2227                  * factor are a) that the clock register overflows more quickly
2228                  * (not such a big deal) and b) that the increment per tick has
2229                  * to fit into 24 bits.  As a result we need to use a shift of
2230                  * 19 so we can fit a value of 16 into the TIMINCA register.
2231                  */
2232                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2233                 wr32(E1000_TIMINCA,
2234                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2235                                 (16 << IGB_82576_TSYNC_SHIFT));
2236
2237                 /* Set registers so that rollover occurs soon to test this. */
2238                 wr32(E1000_SYSTIML, 0x00000000);
2239                 wr32(E1000_SYSTIMH, 0xFF800000);
2240                 wrfl();
2241
2242                 timecounter_init(&adapter->clock,
2243                                  &adapter->cycles,
2244                                  ktime_to_ns(ktime_get_real()));
2245                 /*
2246                  * Synchronize our NIC clock against system wall clock. NIC
2247                  * time stamp reading requires ~3us per sample, each sample
2248                  * was pretty stable even under load => only require 10
2249                  * samples for each offset comparison.
2250                  */
2251                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2252                 adapter->compare.source = &adapter->clock;
2253                 adapter->compare.target = ktime_get_real;
2254                 adapter->compare.num_samples = 10;
2255                 timecompare_update(&adapter->compare, 0);
2256                 break;
2257         case e1000_82575:
2258                 /* 82575 does not support timesync */
2259         default:
2260                 break;
2261         }
2262
2263 }
2264
2265 /**
2266  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2267  * @adapter: board private structure to initialize
2268  *
2269  * igb_sw_init initializes the Adapter private data structure.
2270  * Fields are initialized based on PCI device information and
2271  * OS network device settings (MTU size).
2272  **/
2273 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2274 {
2275         struct e1000_hw *hw = &adapter->hw;
2276         struct net_device *netdev = adapter->netdev;
2277         struct pci_dev *pdev = adapter->pdev;
2278
2279         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2280
2281         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2282         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2283         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2284         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2285
2286         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2287         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2288
2289         spin_lock_init(&adapter->stats64_lock);
2290 #ifdef CONFIG_PCI_IOV
2291         switch (hw->mac.type) {
2292         case e1000_82576:
2293         case e1000_i350:
2294                 if (max_vfs > 7) {
2295                         dev_warn(&pdev->dev,
2296                                  "Maximum of 7 VFs per PF, using max\n");
2297                         adapter->vfs_allocated_count = 7;
2298                 } else
2299                         adapter->vfs_allocated_count = max_vfs;
2300                 break;
2301         default:
2302                 break;
2303         }
2304 #endif /* CONFIG_PCI_IOV */
2305         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2306
2307         /*
2308          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2309          * then we should combine the queues into a queue pair in order to
2310          * conserve interrupts due to limited supply
2311          */
2312         if ((adapter->rss_queues > 4) ||
2313             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2314                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2315
2316         /* This call may decrease the number of queues */
2317         if (igb_init_interrupt_scheme(adapter)) {
2318                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2319                 return -ENOMEM;
2320         }
2321
2322         igb_init_hw_timer(adapter);
2323         igb_probe_vfs(adapter);
2324
2325         /* Explicitly disable IRQ since the NIC can be in any state. */
2326         igb_irq_disable(adapter);
2327
2328         set_bit(__IGB_DOWN, &adapter->state);
2329         return 0;
2330 }
2331
2332 /**
2333  * igb_open - Called when a network interface is made active
2334  * @netdev: network interface device structure
2335  *
2336  * Returns 0 on success, negative value on failure
2337  *
2338  * The open entry point is called when a network interface is made
2339  * active by the system (IFF_UP).  At this point all resources needed
2340  * for transmit and receive operations are allocated, the interrupt
2341  * handler is registered with the OS, the watchdog timer is started,
2342  * and the stack is notified that the interface is ready.
2343  **/
2344 static int igb_open(struct net_device *netdev)
2345 {
2346         struct igb_adapter *adapter = netdev_priv(netdev);
2347         struct e1000_hw *hw = &adapter->hw;
2348         int err;
2349         int i;
2350
2351         /* disallow open during test */
2352         if (test_bit(__IGB_TESTING, &adapter->state))
2353                 return -EBUSY;
2354
2355         netif_carrier_off(netdev);
2356
2357         /* allocate transmit descriptors */
2358         err = igb_setup_all_tx_resources(adapter);
2359         if (err)
2360                 goto err_setup_tx;
2361
2362         /* allocate receive descriptors */
2363         err = igb_setup_all_rx_resources(adapter);
2364         if (err)
2365                 goto err_setup_rx;
2366
2367         igb_power_up_link(adapter);
2368
2369         /* before we allocate an interrupt, we must be ready to handle it.
2370          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2371          * as soon as we call pci_request_irq, so we have to setup our
2372          * clean_rx handler before we do so.  */
2373         igb_configure(adapter);
2374
2375         err = igb_request_irq(adapter);
2376         if (err)
2377                 goto err_req_irq;
2378
2379         /* From here on the code is the same as igb_up() */
2380         clear_bit(__IGB_DOWN, &adapter->state);
2381
2382         for (i = 0; i < adapter->num_q_vectors; i++) {
2383                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2384                 napi_enable(&q_vector->napi);
2385         }
2386
2387         /* Clear any pending interrupts. */
2388         rd32(E1000_ICR);
2389
2390         igb_irq_enable(adapter);
2391
2392         /* notify VFs that reset has been completed */
2393         if (adapter->vfs_allocated_count) {
2394                 u32 reg_data = rd32(E1000_CTRL_EXT);
2395                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2396                 wr32(E1000_CTRL_EXT, reg_data);
2397         }
2398
2399         netif_tx_start_all_queues(netdev);
2400
2401         /* start the watchdog. */
2402         hw->mac.get_link_status = 1;
2403         schedule_work(&adapter->watchdog_task);
2404
2405         return 0;
2406
2407 err_req_irq:
2408         igb_release_hw_control(adapter);
2409         igb_power_down_link(adapter);
2410         igb_free_all_rx_resources(adapter);
2411 err_setup_rx:
2412         igb_free_all_tx_resources(adapter);
2413 err_setup_tx:
2414         igb_reset(adapter);
2415
2416         return err;
2417 }
2418
2419 /**
2420  * igb_close - Disables a network interface
2421  * @netdev: network interface device structure
2422  *
2423  * Returns 0, this is not allowed to fail
2424  *
2425  * The close entry point is called when an interface is de-activated
2426  * by the OS.  The hardware is still under the driver's control, but
2427  * needs to be disabled.  A global MAC reset is issued to stop the
2428  * hardware, and all transmit and receive resources are freed.
2429  **/
2430 static int igb_close(struct net_device *netdev)
2431 {
2432         struct igb_adapter *adapter = netdev_priv(netdev);
2433
2434         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2435         igb_down(adapter);
2436
2437         igb_free_irq(adapter);
2438
2439         igb_free_all_tx_resources(adapter);
2440         igb_free_all_rx_resources(adapter);
2441
2442         return 0;
2443 }
2444
2445 /**
2446  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2447  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2448  *
2449  * Return 0 on success, negative on failure
2450  **/
2451 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2452 {
2453         struct device *dev = tx_ring->dev;
2454         int size;
2455
2456         size = sizeof(struct igb_buffer) * tx_ring->count;
2457         tx_ring->buffer_info = vzalloc(size);
2458         if (!tx_ring->buffer_info)
2459                 goto err;
2460
2461         /* round up to nearest 4K */
2462         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2463         tx_ring->size = ALIGN(tx_ring->size, 4096);
2464
2465         tx_ring->desc = dma_alloc_coherent(dev,
2466                                            tx_ring->size,
2467                                            &tx_ring->dma,
2468                                            GFP_KERNEL);
2469
2470         if (!tx_ring->desc)
2471                 goto err;
2472
2473         tx_ring->next_to_use = 0;
2474         tx_ring->next_to_clean = 0;
2475         return 0;
2476
2477 err:
2478         vfree(tx_ring->buffer_info);
2479         dev_err(dev,
2480                 "Unable to allocate memory for the transmit descriptor ring\n");
2481         return -ENOMEM;
2482 }
2483
2484 /**
2485  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2486  *                                (Descriptors) for all queues
2487  * @adapter: board private structure
2488  *
2489  * Return 0 on success, negative on failure
2490  **/
2491 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2492 {
2493         struct pci_dev *pdev = adapter->pdev;
2494         int i, err = 0;
2495
2496         for (i = 0; i < adapter->num_tx_queues; i++) {
2497                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2498                 if (err) {
2499                         dev_err(&pdev->dev,
2500                                 "Allocation for Tx Queue %u failed\n", i);
2501                         for (i--; i >= 0; i--)
2502                                 igb_free_tx_resources(adapter->tx_ring[i]);
2503                         break;
2504                 }
2505         }
2506
2507         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2508                 int r_idx = i % adapter->num_tx_queues;
2509                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2510         }
2511         return err;
2512 }
2513
2514 /**
2515  * igb_setup_tctl - configure the transmit control registers
2516  * @adapter: Board private structure
2517  **/
2518 void igb_setup_tctl(struct igb_adapter *adapter)
2519 {
2520         struct e1000_hw *hw = &adapter->hw;
2521         u32 tctl;
2522
2523         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2524         wr32(E1000_TXDCTL(0), 0);
2525
2526         /* Program the Transmit Control Register */
2527         tctl = rd32(E1000_TCTL);
2528         tctl &= ~E1000_TCTL_CT;
2529         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2530                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2531
2532         igb_config_collision_dist(hw);
2533
2534         /* Enable transmits */
2535         tctl |= E1000_TCTL_EN;
2536
2537         wr32(E1000_TCTL, tctl);
2538 }
2539
2540 /**
2541  * igb_configure_tx_ring - Configure transmit ring after Reset
2542  * @adapter: board private structure
2543  * @ring: tx ring to configure
2544  *
2545  * Configure a transmit ring after a reset.
2546  **/
2547 void igb_configure_tx_ring(struct igb_adapter *adapter,
2548                            struct igb_ring *ring)
2549 {
2550         struct e1000_hw *hw = &adapter->hw;
2551         u32 txdctl;
2552         u64 tdba = ring->dma;
2553         int reg_idx = ring->reg_idx;
2554
2555         /* disable the queue */
2556         txdctl = rd32(E1000_TXDCTL(reg_idx));
2557         wr32(E1000_TXDCTL(reg_idx),
2558                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2559         wrfl();
2560         mdelay(10);
2561
2562         wr32(E1000_TDLEN(reg_idx),
2563                         ring->count * sizeof(union e1000_adv_tx_desc));
2564         wr32(E1000_TDBAL(reg_idx),
2565                         tdba & 0x00000000ffffffffULL);
2566         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2567
2568         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2569         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2570         writel(0, ring->head);
2571         writel(0, ring->tail);
2572
2573         txdctl |= IGB_TX_PTHRESH;
2574         txdctl |= IGB_TX_HTHRESH << 8;
2575         txdctl |= IGB_TX_WTHRESH << 16;
2576
2577         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2578         wr32(E1000_TXDCTL(reg_idx), txdctl);
2579 }
2580
2581 /**
2582  * igb_configure_tx - Configure transmit Unit after Reset
2583  * @adapter: board private structure
2584  *
2585  * Configure the Tx unit of the MAC after a reset.
2586  **/
2587 static void igb_configure_tx(struct igb_adapter *adapter)
2588 {
2589         int i;
2590
2591         for (i = 0; i < adapter->num_tx_queues; i++)
2592                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2593 }
2594
2595 /**
2596  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2597  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2598  *
2599  * Returns 0 on success, negative on failure
2600  **/
2601 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2602 {
2603         struct device *dev = rx_ring->dev;
2604         int size, desc_len;
2605
2606         size = sizeof(struct igb_buffer) * rx_ring->count;
2607         rx_ring->buffer_info = vzalloc(size);
2608         if (!rx_ring->buffer_info)
2609                 goto err;
2610
2611         desc_len = sizeof(union e1000_adv_rx_desc);
2612
2613         /* Round up to nearest 4K */
2614         rx_ring->size = rx_ring->count * desc_len;
2615         rx_ring->size = ALIGN(rx_ring->size, 4096);
2616
2617         rx_ring->desc = dma_alloc_coherent(dev,
2618                                            rx_ring->size,
2619                                            &rx_ring->dma,
2620                                            GFP_KERNEL);
2621
2622         if (!rx_ring->desc)
2623                 goto err;
2624
2625         rx_ring->next_to_clean = 0;
2626         rx_ring->next_to_use = 0;
2627
2628         return 0;
2629
2630 err:
2631         vfree(rx_ring->buffer_info);
2632         rx_ring->buffer_info = NULL;
2633         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2634                 " ring\n");
2635         return -ENOMEM;
2636 }
2637
2638 /**
2639  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2640  *                                (Descriptors) for all queues
2641  * @adapter: board private structure
2642  *
2643  * Return 0 on success, negative on failure
2644  **/
2645 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2646 {
2647         struct pci_dev *pdev = adapter->pdev;
2648         int i, err = 0;
2649
2650         for (i = 0; i < adapter->num_rx_queues; i++) {
2651                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2652                 if (err) {
2653                         dev_err(&pdev->dev,
2654                                 "Allocation for Rx Queue %u failed\n", i);
2655                         for (i--; i >= 0; i--)
2656                                 igb_free_rx_resources(adapter->rx_ring[i]);
2657                         break;
2658                 }
2659         }
2660
2661         return err;
2662 }
2663
2664 /**
2665  * igb_setup_mrqc - configure the multiple receive queue control registers
2666  * @adapter: Board private structure
2667  **/
2668 static void igb_setup_mrqc(struct igb_adapter *adapter)
2669 {
2670         struct e1000_hw *hw = &adapter->hw;
2671         u32 mrqc, rxcsum;
2672         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2673         union e1000_reta {
2674                 u32 dword;
2675                 u8  bytes[4];
2676         } reta;
2677         static const u8 rsshash[40] = {
2678                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2679                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2680                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2681                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2682
2683         /* Fill out hash function seeds */
2684         for (j = 0; j < 10; j++) {
2685                 u32 rsskey = rsshash[(j * 4)];
2686                 rsskey |= rsshash[(j * 4) + 1] << 8;
2687                 rsskey |= rsshash[(j * 4) + 2] << 16;
2688                 rsskey |= rsshash[(j * 4) + 3] << 24;
2689                 array_wr32(E1000_RSSRK(0), j, rsskey);
2690         }
2691
2692         num_rx_queues = adapter->rss_queues;
2693
2694         if (adapter->vfs_allocated_count) {
2695                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2696                 switch (hw->mac.type) {
2697                 case e1000_i350:
2698                 case e1000_82580:
2699                         num_rx_queues = 1;
2700                         shift = 0;
2701                         break;
2702                 case e1000_82576:
2703                         shift = 3;
2704                         num_rx_queues = 2;
2705                         break;
2706                 case e1000_82575:
2707                         shift = 2;
2708                         shift2 = 6;
2709                 default:
2710                         break;
2711                 }
2712         } else {
2713                 if (hw->mac.type == e1000_82575)
2714                         shift = 6;
2715         }
2716
2717         for (j = 0; j < (32 * 4); j++) {
2718                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2719                 if (shift2)
2720                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2721                 if ((j & 3) == 3)
2722                         wr32(E1000_RETA(j >> 2), reta.dword);
2723         }
2724
2725         /*
2726          * Disable raw packet checksumming so that RSS hash is placed in
2727          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2728          * offloads as they are enabled by default
2729          */
2730         rxcsum = rd32(E1000_RXCSUM);
2731         rxcsum |= E1000_RXCSUM_PCSD;
2732
2733         if (adapter->hw.mac.type >= e1000_82576)
2734                 /* Enable Receive Checksum Offload for SCTP */
2735                 rxcsum |= E1000_RXCSUM_CRCOFL;
2736
2737         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2738         wr32(E1000_RXCSUM, rxcsum);
2739
2740         /* If VMDq is enabled then we set the appropriate mode for that, else
2741          * we default to RSS so that an RSS hash is calculated per packet even
2742          * if we are only using one queue */
2743         if (adapter->vfs_allocated_count) {
2744                 if (hw->mac.type > e1000_82575) {
2745                         /* Set the default pool for the PF's first queue */
2746                         u32 vtctl = rd32(E1000_VT_CTL);
2747                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2748                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2749                         vtctl |= adapter->vfs_allocated_count <<
2750                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2751                         wr32(E1000_VT_CTL, vtctl);
2752                 }
2753                 if (adapter->rss_queues > 1)
2754                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2755                 else
2756                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2757         } else {
2758                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2759         }
2760         igb_vmm_control(adapter);
2761
2762         /*
2763          * Generate RSS hash based on TCP port numbers and/or
2764          * IPv4/v6 src and dst addresses since UDP cannot be
2765          * hashed reliably due to IP fragmentation
2766          */
2767         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2768                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2769                 E1000_MRQC_RSS_FIELD_IPV6 |
2770                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2771                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2772
2773         wr32(E1000_MRQC, mrqc);
2774 }
2775
2776 /**
2777  * igb_setup_rctl - configure the receive control registers
2778  * @adapter: Board private structure
2779  **/
2780 void igb_setup_rctl(struct igb_adapter *adapter)
2781 {
2782         struct e1000_hw *hw = &adapter->hw;
2783         u32 rctl;
2784
2785         rctl = rd32(E1000_RCTL);
2786
2787         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2788         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2789
2790         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2791                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2792
2793         /*
2794          * enable stripping of CRC. It's unlikely this will break BMC
2795          * redirection as it did with e1000. Newer features require
2796          * that the HW strips the CRC.
2797          */
2798         rctl |= E1000_RCTL_SECRC;
2799
2800         /* disable store bad packets and clear size bits. */
2801         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2802
2803         /* enable LPE to prevent packets larger than max_frame_size */
2804         rctl |= E1000_RCTL_LPE;
2805
2806         /* disable queue 0 to prevent tail write w/o re-config */
2807         wr32(E1000_RXDCTL(0), 0);
2808
2809         /* Attention!!!  For SR-IOV PF driver operations you must enable
2810          * queue drop for all VF and PF queues to prevent head of line blocking
2811          * if an un-trusted VF does not provide descriptors to hardware.
2812          */
2813         if (adapter->vfs_allocated_count) {
2814                 /* set all queue drop enable bits */
2815                 wr32(E1000_QDE, ALL_QUEUES);
2816         }
2817
2818         wr32(E1000_RCTL, rctl);
2819 }
2820
2821 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2822                                    int vfn)
2823 {
2824         struct e1000_hw *hw = &adapter->hw;
2825         u32 vmolr;
2826
2827         /* if it isn't the PF check to see if VFs are enabled and
2828          * increase the size to support vlan tags */
2829         if (vfn < adapter->vfs_allocated_count &&
2830             adapter->vf_data[vfn].vlans_enabled)
2831                 size += VLAN_TAG_SIZE;
2832
2833         vmolr = rd32(E1000_VMOLR(vfn));
2834         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2835         vmolr |= size | E1000_VMOLR_LPE;
2836         wr32(E1000_VMOLR(vfn), vmolr);
2837
2838         return 0;
2839 }
2840
2841 /**
2842  * igb_rlpml_set - set maximum receive packet size
2843  * @adapter: board private structure
2844  *
2845  * Configure maximum receivable packet size.
2846  **/
2847 static void igb_rlpml_set(struct igb_adapter *adapter)
2848 {
2849         u32 max_frame_size = adapter->max_frame_size;
2850         struct e1000_hw *hw = &adapter->hw;
2851         u16 pf_id = adapter->vfs_allocated_count;
2852
2853         if (adapter->vlgrp)
2854                 max_frame_size += VLAN_TAG_SIZE;
2855
2856         /* if vfs are enabled we set RLPML to the largest possible request
2857          * size and set the VMOLR RLPML to the size we need */
2858         if (pf_id) {
2859                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2860                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2861         }
2862
2863         wr32(E1000_RLPML, max_frame_size);
2864 }
2865
2866 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2867                                  int vfn, bool aupe)
2868 {
2869         struct e1000_hw *hw = &adapter->hw;
2870         u32 vmolr;
2871
2872         /*
2873          * This register exists only on 82576 and newer so if we are older then
2874          * we should exit and do nothing
2875          */
2876         if (hw->mac.type < e1000_82576)
2877                 return;
2878
2879         vmolr = rd32(E1000_VMOLR(vfn));
2880         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2881         if (aupe)
2882                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2883         else
2884                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2885
2886         /* clear all bits that might not be set */
2887         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2888
2889         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2890                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2891         /*
2892          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2893          * multicast packets
2894          */
2895         if (vfn <= adapter->vfs_allocated_count)
2896                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2897
2898         wr32(E1000_VMOLR(vfn), vmolr);
2899 }
2900
2901 /**
2902  * igb_configure_rx_ring - Configure a receive ring after Reset
2903  * @adapter: board private structure
2904  * @ring: receive ring to be configured
2905  *
2906  * Configure the Rx unit of the MAC after a reset.
2907  **/
2908 void igb_configure_rx_ring(struct igb_adapter *adapter,
2909                            struct igb_ring *ring)
2910 {
2911         struct e1000_hw *hw = &adapter->hw;
2912         u64 rdba = ring->dma;
2913         int reg_idx = ring->reg_idx;
2914         u32 srrctl, rxdctl;
2915
2916         /* disable the queue */
2917         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2918         wr32(E1000_RXDCTL(reg_idx),
2919                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2920
2921         /* Set DMA base address registers */
2922         wr32(E1000_RDBAL(reg_idx),
2923              rdba & 0x00000000ffffffffULL);
2924         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2925         wr32(E1000_RDLEN(reg_idx),
2926                        ring->count * sizeof(union e1000_adv_rx_desc));
2927
2928         /* initialize head and tail */
2929         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2930         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2931         writel(0, ring->head);
2932         writel(0, ring->tail);
2933
2934         /* set descriptor configuration */
2935         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2936                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2937                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2938 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2939                 srrctl |= IGB_RXBUFFER_16384 >>
2940                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2941 #else
2942                 srrctl |= (PAGE_SIZE / 2) >>
2943                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2944 #endif
2945                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2946         } else {
2947                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2948                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2949                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2950         }
2951         if (hw->mac.type == e1000_82580)
2952                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2953         /* Only set Drop Enable if we are supporting multiple queues */
2954         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2955                 srrctl |= E1000_SRRCTL_DROP_EN;
2956
2957         wr32(E1000_SRRCTL(reg_idx), srrctl);
2958
2959         /* set filtering for VMDQ pools */
2960         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2961
2962         /* enable receive descriptor fetching */
2963         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2964         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2965         rxdctl &= 0xFFF00000;
2966         rxdctl |= IGB_RX_PTHRESH;
2967         rxdctl |= IGB_RX_HTHRESH << 8;
2968         rxdctl |= IGB_RX_WTHRESH << 16;
2969         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2970 }
2971
2972 /**
2973  * igb_configure_rx - Configure receive Unit after Reset
2974  * @adapter: board private structure
2975  *
2976  * Configure the Rx unit of the MAC after a reset.
2977  **/
2978 static void igb_configure_rx(struct igb_adapter *adapter)
2979 {
2980         int i;
2981
2982         /* set UTA to appropriate mode */
2983         igb_set_uta(adapter);
2984
2985         /* set the correct pool for the PF default MAC address in entry 0 */
2986         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2987                          adapter->vfs_allocated_count);
2988
2989         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2990          * the Base and Length of the Rx Descriptor Ring */
2991         for (i = 0; i < adapter->num_rx_queues; i++)
2992                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2993 }
2994
2995 /**
2996  * igb_free_tx_resources - Free Tx Resources per Queue
2997  * @tx_ring: Tx descriptor ring for a specific queue
2998  *
2999  * Free all transmit software resources
3000  **/
3001 void igb_free_tx_resources(struct igb_ring *tx_ring)
3002 {
3003         igb_clean_tx_ring(tx_ring);
3004
3005         vfree(tx_ring->buffer_info);
3006         tx_ring->buffer_info = NULL;
3007
3008         /* if not set, then don't free */
3009         if (!tx_ring->desc)
3010                 return;
3011
3012         dma_free_coherent(tx_ring->dev, tx_ring->size,
3013                           tx_ring->desc, tx_ring->dma);
3014
3015         tx_ring->desc = NULL;
3016 }
3017
3018 /**
3019  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3020  * @adapter: board private structure
3021  *
3022  * Free all transmit software resources
3023  **/
3024 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3025 {
3026         int i;
3027
3028         for (i = 0; i < adapter->num_tx_queues; i++)
3029                 igb_free_tx_resources(adapter->tx_ring[i]);
3030 }
3031
3032 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3033                                     struct igb_buffer *buffer_info)
3034 {
3035         if (buffer_info->dma) {
3036                 if (buffer_info->mapped_as_page)
3037                         dma_unmap_page(tx_ring->dev,
3038                                         buffer_info->dma,
3039                                         buffer_info->length,
3040                                         DMA_TO_DEVICE);
3041                 else
3042                         dma_unmap_single(tx_ring->dev,
3043                                         buffer_info->dma,
3044                                         buffer_info->length,
3045                                         DMA_TO_DEVICE);
3046                 buffer_info->dma = 0;
3047         }
3048         if (buffer_info->skb) {
3049                 dev_kfree_skb_any(buffer_info->skb);
3050                 buffer_info->skb = NULL;
3051         }
3052         buffer_info->time_stamp = 0;
3053         buffer_info->length = 0;
3054         buffer_info->next_to_watch = 0;
3055         buffer_info->mapped_as_page = false;
3056 }
3057
3058 /**
3059  * igb_clean_tx_ring - Free Tx Buffers
3060  * @tx_ring: ring to be cleaned
3061  **/
3062 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3063 {
3064         struct igb_buffer *buffer_info;
3065         unsigned long size;
3066         unsigned int i;
3067
3068         if (!tx_ring->buffer_info)
3069                 return;
3070         /* Free all the Tx ring sk_buffs */
3071
3072         for (i = 0; i < tx_ring->count; i++) {
3073                 buffer_info = &tx_ring->buffer_info[i];
3074                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3075         }
3076
3077         size = sizeof(struct igb_buffer) * tx_ring->count;
3078         memset(tx_ring->buffer_info, 0, size);
3079
3080         /* Zero out the descriptor ring */
3081         memset(tx_ring->desc, 0, tx_ring->size);
3082
3083         tx_ring->next_to_use = 0;
3084         tx_ring->next_to_clean = 0;
3085 }
3086
3087 /**
3088  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3089  * @adapter: board private structure
3090  **/
3091 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3092 {
3093         int i;
3094
3095         for (i = 0; i < adapter->num_tx_queues; i++)
3096                 igb_clean_tx_ring(adapter->tx_ring[i]);
3097 }
3098
3099 /**
3100  * igb_free_rx_resources - Free Rx Resources
3101  * @rx_ring: ring to clean the resources from
3102  *
3103  * Free all receive software resources
3104  **/
3105 void igb_free_rx_resources(struct igb_ring *rx_ring)
3106 {
3107         igb_clean_rx_ring(rx_ring);
3108
3109         vfree(rx_ring->buffer_info);
3110         rx_ring->buffer_info = NULL;
3111
3112         /* if not set, then don't free */
3113         if (!rx_ring->desc)
3114                 return;
3115
3116         dma_free_coherent(rx_ring->dev, rx_ring->size,
3117                           rx_ring->desc, rx_ring->dma);
3118
3119         rx_ring->desc = NULL;
3120 }
3121
3122 /**
3123  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3124  * @adapter: board private structure
3125  *
3126  * Free all receive software resources
3127  **/
3128 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3129 {
3130         int i;
3131
3132         for (i = 0; i < adapter->num_rx_queues; i++)
3133                 igb_free_rx_resources(adapter->rx_ring[i]);
3134 }
3135
3136 /**
3137  * igb_clean_rx_ring - Free Rx Buffers per Queue
3138  * @rx_ring: ring to free buffers from
3139  **/
3140 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3141 {
3142         struct igb_buffer *buffer_info;
3143         unsigned long size;
3144         unsigned int i;
3145
3146         if (!rx_ring->buffer_info)
3147                 return;
3148
3149         /* Free all the Rx ring sk_buffs */
3150         for (i = 0; i < rx_ring->count; i++) {
3151                 buffer_info = &rx_ring->buffer_info[i];
3152                 if (buffer_info->dma) {
3153                         dma_unmap_single(rx_ring->dev,
3154                                          buffer_info->dma,
3155                                          rx_ring->rx_buffer_len,
3156                                          DMA_FROM_DEVICE);
3157                         buffer_info->dma = 0;
3158                 }
3159
3160                 if (buffer_info->skb) {
3161                         dev_kfree_skb(buffer_info->skb);
3162                         buffer_info->skb = NULL;
3163                 }
3164                 if (buffer_info->page_dma) {
3165                         dma_unmap_page(rx_ring->dev,
3166                                        buffer_info->page_dma,
3167                                        PAGE_SIZE / 2,
3168                                        DMA_FROM_DEVICE);
3169                         buffer_info->page_dma = 0;
3170                 }
3171                 if (buffer_info->page) {
3172                         put_page(buffer_info->page);
3173                         buffer_info->page = NULL;
3174                         buffer_info->page_offset = 0;
3175                 }
3176         }
3177
3178         size = sizeof(struct igb_buffer) * rx_ring->count;
3179         memset(rx_ring->buffer_info, 0, size);
3180
3181         /* Zero out the descriptor ring */
3182         memset(rx_ring->desc, 0, rx_ring->size);
3183
3184         rx_ring->next_to_clean = 0;
3185         rx_ring->next_to_use = 0;
3186 }
3187
3188 /**
3189  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3190  * @adapter: board private structure
3191  **/
3192 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3193 {
3194         int i;
3195
3196         for (i = 0; i < adapter->num_rx_queues; i++)
3197                 igb_clean_rx_ring(adapter->rx_ring[i]);
3198 }
3199
3200 /**
3201  * igb_set_mac - Change the Ethernet Address of the NIC
3202  * @netdev: network interface device structure
3203  * @p: pointer to an address structure
3204  *
3205  * Returns 0 on success, negative on failure
3206  **/
3207 static int igb_set_mac(struct net_device *netdev, void *p)
3208 {
3209         struct igb_adapter *adapter = netdev_priv(netdev);
3210         struct e1000_hw *hw = &adapter->hw;
3211         struct sockaddr *addr = p;
3212
3213         if (!is_valid_ether_addr(addr->sa_data))
3214                 return -EADDRNOTAVAIL;
3215
3216         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3217         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3218
3219         /* set the correct pool for the new PF MAC address in entry 0 */
3220         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3221                          adapter->vfs_allocated_count);
3222
3223         return 0;
3224 }
3225
3226 /**
3227  * igb_write_mc_addr_list - write multicast addresses to MTA
3228  * @netdev: network interface device structure
3229  *
3230  * Writes multicast address list to the MTA hash table.
3231  * Returns: -ENOMEM on failure
3232  *                0 on no addresses written
3233  *                X on writing X addresses to MTA
3234  **/
3235 static int igb_write_mc_addr_list(struct net_device *netdev)
3236 {
3237         struct igb_adapter *adapter = netdev_priv(netdev);
3238         struct e1000_hw *hw = &adapter->hw;
3239         struct netdev_hw_addr *ha;
3240         u8  *mta_list;
3241         int i;
3242
3243         if (netdev_mc_empty(netdev)) {
3244                 /* nothing to program, so clear mc list */
3245                 igb_update_mc_addr_list(hw, NULL, 0);
3246                 igb_restore_vf_multicasts(adapter);
3247                 return 0;
3248         }
3249
3250         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3251         if (!mta_list)
3252                 return -ENOMEM;
3253
3254         /* The shared function expects a packed array of only addresses. */
3255         i = 0;
3256         netdev_for_each_mc_addr(ha, netdev)
3257                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3258
3259         igb_update_mc_addr_list(hw, mta_list, i);
3260         kfree(mta_list);
3261
3262         return netdev_mc_count(netdev);
3263 }
3264
3265 /**
3266  * igb_write_uc_addr_list - write unicast addresses to RAR table
3267  * @netdev: network interface device structure
3268  *
3269  * Writes unicast address list to the RAR table.
3270  * Returns: -ENOMEM on failure/insufficient address space
3271  *                0 on no addresses written
3272  *                X on writing X addresses to the RAR table
3273  **/
3274 static int igb_write_uc_addr_list(struct net_device *netdev)
3275 {
3276         struct igb_adapter *adapter = netdev_priv(netdev);
3277         struct e1000_hw *hw = &adapter->hw;
3278         unsigned int vfn = adapter->vfs_allocated_count;
3279         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3280         int count = 0;
3281
3282         /* return ENOMEM indicating insufficient memory for addresses */
3283         if (netdev_uc_count(netdev) > rar_entries)
3284                 return -ENOMEM;
3285
3286         if (!netdev_uc_empty(netdev) && rar_entries) {
3287                 struct netdev_hw_addr *ha;
3288
3289                 netdev_for_each_uc_addr(ha, netdev) {
3290                         if (!rar_entries)
3291                                 break;
3292                         igb_rar_set_qsel(adapter, ha->addr,
3293                                          rar_entries--,
3294                                          vfn);
3295                         count++;
3296                 }
3297         }
3298         /* write the addresses in reverse order to avoid write combining */
3299         for (; rar_entries > 0 ; rar_entries--) {
3300                 wr32(E1000_RAH(rar_entries), 0);
3301                 wr32(E1000_RAL(rar_entries), 0);
3302         }
3303         wrfl();
3304
3305         return count;
3306 }
3307
3308 /**
3309  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3310  * @netdev: network interface device structure
3311  *
3312  * The set_rx_mode entry point is called whenever the unicast or multicast
3313  * address lists or the network interface flags are updated.  This routine is
3314  * responsible for configuring the hardware for proper unicast, multicast,
3315  * promiscuous mode, and all-multi behavior.
3316  **/
3317 static void igb_set_rx_mode(struct net_device *netdev)
3318 {
3319         struct igb_adapter *adapter = netdev_priv(netdev);
3320         struct e1000_hw *hw = &adapter->hw;
3321         unsigned int vfn = adapter->vfs_allocated_count;
3322         u32 rctl, vmolr = 0;
3323         int count;
3324
3325         /* Check for Promiscuous and All Multicast modes */
3326         rctl = rd32(E1000_RCTL);
3327
3328         /* clear the effected bits */
3329         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3330
3331         if (netdev->flags & IFF_PROMISC) {
3332                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3333                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3334         } else {
3335                 if (netdev->flags & IFF_ALLMULTI) {
3336                         rctl |= E1000_RCTL_MPE;
3337                         vmolr |= E1000_VMOLR_MPME;
3338                 } else {
3339                         /*
3340                          * Write addresses to the MTA, if the attempt fails
3341                          * then we should just turn on promiscous mode so
3342                          * that we can at least receive multicast traffic
3343                          */
3344                         count = igb_write_mc_addr_list(netdev);
3345                         if (count < 0) {
3346                                 rctl |= E1000_RCTL_MPE;
3347                                 vmolr |= E1000_VMOLR_MPME;
3348                         } else if (count) {
3349                                 vmolr |= E1000_VMOLR_ROMPE;
3350                         }
3351                 }
3352                 /*
3353                  * Write addresses to available RAR registers, if there is not
3354                  * sufficient space to store all the addresses then enable
3355                  * unicast promiscous mode
3356                  */
3357                 count = igb_write_uc_addr_list(netdev);
3358                 if (count < 0) {
3359                         rctl |= E1000_RCTL_UPE;
3360                         vmolr |= E1000_VMOLR_ROPE;
3361                 }
3362                 rctl |= E1000_RCTL_VFE;
3363         }
3364         wr32(E1000_RCTL, rctl);
3365
3366         /*
3367          * In order to support SR-IOV and eventually VMDq it is necessary to set
3368          * the VMOLR to enable the appropriate modes.  Without this workaround
3369          * we will have issues with VLAN tag stripping not being done for frames
3370          * that are only arriving because we are the default pool
3371          */
3372         if (hw->mac.type < e1000_82576)
3373                 return;
3374
3375         vmolr |= rd32(E1000_VMOLR(vfn)) &
3376                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3377         wr32(E1000_VMOLR(vfn), vmolr);
3378         igb_restore_vf_multicasts(adapter);
3379 }
3380
3381 static void igb_check_wvbr(struct igb_adapter *adapter)
3382 {
3383         struct e1000_hw *hw = &adapter->hw;
3384         u32 wvbr = 0;
3385
3386         switch (hw->mac.type) {
3387         case e1000_82576:
3388         case e1000_i350:
3389                 if (!(wvbr = rd32(E1000_WVBR)))
3390                         return;
3391                 break;
3392         default:
3393                 break;
3394         }
3395
3396         adapter->wvbr |= wvbr;
3397 }
3398
3399 #define IGB_STAGGERED_QUEUE_OFFSET 8
3400
3401 static void igb_spoof_check(struct igb_adapter *adapter)
3402 {
3403         int j;
3404
3405         if (!adapter->wvbr)
3406                 return;
3407
3408         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3409                 if (adapter->wvbr & (1 << j) ||
3410                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3411                         dev_warn(&adapter->pdev->dev,
3412                                 "Spoof event(s) detected on VF %d\n", j);
3413                         adapter->wvbr &=
3414                                 ~((1 << j) |
3415                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3416                 }
3417         }
3418 }
3419
3420 /* Need to wait a few seconds after link up to get diagnostic information from
3421  * the phy */
3422 static void igb_update_phy_info(unsigned long data)
3423 {
3424         struct igb_adapter *adapter = (struct igb_adapter *) data;
3425         igb_get_phy_info(&adapter->hw);
3426 }
3427
3428 /**
3429  * igb_has_link - check shared code for link and determine up/down
3430  * @adapter: pointer to driver private info
3431  **/
3432 bool igb_has_link(struct igb_adapter *adapter)
3433 {
3434         struct e1000_hw *hw = &adapter->hw;
3435         bool link_active = false;
3436         s32 ret_val = 0;
3437
3438         /* get_link_status is set on LSC (link status) interrupt or
3439          * rx sequence error interrupt.  get_link_status will stay
3440          * false until the e1000_check_for_link establishes link
3441          * for copper adapters ONLY
3442          */
3443         switch (hw->phy.media_type) {
3444         case e1000_media_type_copper:
3445                 if (hw->mac.get_link_status) {
3446                         ret_val = hw->mac.ops.check_for_link(hw);
3447                         link_active = !hw->mac.get_link_status;
3448                 } else {
3449                         link_active = true;
3450                 }
3451                 break;
3452         case e1000_media_type_internal_serdes:
3453                 ret_val = hw->mac.ops.check_for_link(hw);
3454                 link_active = hw->mac.serdes_has_link;
3455                 break;
3456         default:
3457         case e1000_media_type_unknown:
3458                 break;
3459         }
3460
3461         return link_active;
3462 }
3463
3464 /**
3465  * igb_watchdog - Timer Call-back
3466  * @data: pointer to adapter cast into an unsigned long
3467  **/
3468 static void igb_watchdog(unsigned long data)
3469 {
3470         struct igb_adapter *adapter = (struct igb_adapter *)data;
3471         /* Do the rest outside of interrupt context */
3472         schedule_work(&adapter->watchdog_task);
3473 }
3474
3475 static void igb_watchdog_task(struct work_struct *work)
3476 {
3477         struct igb_adapter *adapter = container_of(work,
3478                                                    struct igb_adapter,
3479                                                    watchdog_task);
3480         struct e1000_hw *hw = &adapter->hw;
3481         struct net_device *netdev = adapter->netdev;
3482         u32 link;
3483         int i;
3484
3485         link = igb_has_link(adapter);
3486         if (link) {
3487                 if (!netif_carrier_ok(netdev)) {
3488                         u32 ctrl;
3489                         hw->mac.ops.get_speed_and_duplex(hw,
3490                                                          &adapter->link_speed,
3491                                                          &adapter->link_duplex);
3492
3493                         ctrl = rd32(E1000_CTRL);
3494                         /* Links status message must follow this format */
3495                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3496                                  "Flow Control: %s\n",
3497                                netdev->name,
3498                                adapter->link_speed,
3499                                adapter->link_duplex == FULL_DUPLEX ?
3500                                  "Full Duplex" : "Half Duplex",
3501                                ((ctrl & E1000_CTRL_TFCE) &&
3502                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3503                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3504                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3505
3506                         /* adjust timeout factor according to speed/duplex */
3507                         adapter->tx_timeout_factor = 1;
3508                         switch (adapter->link_speed) {
3509                         case SPEED_10:
3510                                 adapter->tx_timeout_factor = 14;
3511                                 break;
3512                         case SPEED_100:
3513                                 /* maybe add some timeout factor ? */
3514                                 break;
3515                         }
3516
3517                         netif_carrier_on(netdev);
3518
3519                         igb_ping_all_vfs(adapter);
3520                         igb_check_vf_rate_limit(adapter);
3521
3522                         /* link state has changed, schedule phy info update */
3523                         if (!test_bit(__IGB_DOWN, &adapter->state))
3524                                 mod_timer(&adapter->phy_info_timer,
3525                                           round_jiffies(jiffies + 2 * HZ));
3526                 }
3527         } else {
3528                 if (netif_carrier_ok(netdev)) {
3529                         adapter->link_speed = 0;
3530                         adapter->link_duplex = 0;
3531                         /* Links status message must follow this format */
3532                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3533                                netdev->name);
3534                         netif_carrier_off(netdev);
3535
3536                         igb_ping_all_vfs(adapter);
3537
3538                         /* link state has changed, schedule phy info update */
3539                         if (!test_bit(__IGB_DOWN, &adapter->state))
3540                                 mod_timer(&adapter->phy_info_timer,
3541                                           round_jiffies(jiffies + 2 * HZ));
3542                 }
3543         }
3544
3545         spin_lock(&adapter->stats64_lock);
3546         igb_update_stats(adapter, &adapter->stats64);
3547         spin_unlock(&adapter->stats64_lock);
3548
3549         for (i = 0; i < adapter->num_tx_queues; i++) {
3550                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3551                 if (!netif_carrier_ok(netdev)) {
3552                         /* We've lost link, so the controller stops DMA,
3553                          * but we've got queued Tx work that's never going
3554                          * to get done, so reset controller to flush Tx.
3555                          * (Do the reset outside of interrupt context). */
3556                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3557                                 adapter->tx_timeout_count++;
3558                                 schedule_work(&adapter->reset_task);
3559                                 /* return immediately since reset is imminent */
3560                                 return;
3561                         }
3562                 }
3563
3564                 /* Force detection of hung controller every watchdog period */
3565                 tx_ring->detect_tx_hung = true;
3566         }
3567
3568         /* Cause software interrupt to ensure rx ring is cleaned */
3569         if (adapter->msix_entries) {
3570                 u32 eics = 0;
3571                 for (i = 0; i < adapter->num_q_vectors; i++) {
3572                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3573                         eics |= q_vector->eims_value;
3574                 }
3575                 wr32(E1000_EICS, eics);
3576         } else {
3577                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3578         }
3579
3580         igb_spoof_check(adapter);
3581
3582         /* Reset the timer */
3583         if (!test_bit(__IGB_DOWN, &adapter->state))
3584                 mod_timer(&adapter->watchdog_timer,
3585                           round_jiffies(jiffies + 2 * HZ));
3586 }
3587
3588 enum latency_range {
3589         lowest_latency = 0,
3590         low_latency = 1,
3591         bulk_latency = 2,
3592         latency_invalid = 255
3593 };
3594
3595 /**
3596  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3597  *
3598  *      Stores a new ITR value based on strictly on packet size.  This
3599  *      algorithm is less sophisticated than that used in igb_update_itr,
3600  *      due to the difficulty of synchronizing statistics across multiple
3601  *      receive rings.  The divisors and thresholds used by this function
3602  *      were determined based on theoretical maximum wire speed and testing
3603  *      data, in order to minimize response time while increasing bulk
3604  *      throughput.
3605  *      This functionality is controlled by the InterruptThrottleRate module
3606  *      parameter (see igb_param.c)
3607  *      NOTE:  This function is called only when operating in a multiqueue
3608  *             receive environment.
3609  * @q_vector: pointer to q_vector
3610  **/
3611 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3612 {
3613         int new_val = q_vector->itr_val;
3614         int avg_wire_size = 0;
3615         struct igb_adapter *adapter = q_vector->adapter;
3616         struct igb_ring *ring;
3617         unsigned int packets;
3618
3619         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3620          * ints/sec - ITR timer value of 120 ticks.
3621          */
3622         if (adapter->link_speed != SPEED_1000) {
3623                 new_val = 976;
3624                 goto set_itr_val;
3625         }
3626
3627         ring = q_vector->rx_ring;
3628         if (ring) {
3629                 packets = ACCESS_ONCE(ring->total_packets);
3630
3631                 if (packets)
3632                         avg_wire_size = ring->total_bytes / packets;
3633         }
3634
3635         ring = q_vector->tx_ring;
3636         if (ring) {
3637                 packets = ACCESS_ONCE(ring->total_packets);
3638
3639                 if (packets)
3640                         avg_wire_size = max_t(u32, avg_wire_size,
3641                                               ring->total_bytes / packets);
3642         }
3643
3644         /* if avg_wire_size isn't set no work was done */
3645         if (!avg_wire_size)
3646                 goto clear_counts;
3647
3648         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3649         avg_wire_size += 24;
3650
3651         /* Don't starve jumbo frames */
3652         avg_wire_size = min(avg_wire_size, 3000);
3653
3654         /* Give a little boost to mid-size frames */
3655         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3656                 new_val = avg_wire_size / 3;
3657         else
3658                 new_val = avg_wire_size / 2;
3659
3660         /* when in itr mode 3 do not exceed 20K ints/sec */
3661         if (adapter->rx_itr_setting == 3 && new_val < 196)
3662                 new_val = 196;
3663
3664 set_itr_val:
3665         if (new_val != q_vector->itr_val) {
3666                 q_vector->itr_val = new_val;
3667                 q_vector->set_itr = 1;
3668         }
3669 clear_counts:
3670         if (q_vector->rx_ring) {
3671                 q_vector->rx_ring->total_bytes = 0;
3672                 q_vector->rx_ring->total_packets = 0;
3673         }
3674         if (q_vector->tx_ring) {
3675                 q_vector->tx_ring->total_bytes = 0;
3676                 q_vector->tx_ring->total_packets = 0;
3677         }
3678 }
3679
3680 /**
3681  * igb_update_itr - update the dynamic ITR value based on statistics
3682  *      Stores a new ITR value based on packets and byte
3683  *      counts during the last interrupt.  The advantage of per interrupt
3684  *      computation is faster updates and more accurate ITR for the current
3685  *      traffic pattern.  Constants in this function were computed
3686  *      based on theoretical maximum wire speed and thresholds were set based
3687  *      on testing data as well as attempting to minimize response time
3688  *      while increasing bulk throughput.
3689  *      this functionality is controlled by the InterruptThrottleRate module
3690  *      parameter (see igb_param.c)
3691  *      NOTE:  These calculations are only valid when operating in a single-
3692  *             queue environment.
3693  * @adapter: pointer to adapter
3694  * @itr_setting: current q_vector->itr_val
3695  * @packets: the number of packets during this measurement interval
3696  * @bytes: the number of bytes during this measurement interval
3697  **/
3698 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3699                                    int packets, int bytes)
3700 {
3701         unsigned int retval = itr_setting;
3702
3703         if (packets == 0)
3704                 goto update_itr_done;
3705
3706         switch (itr_setting) {
3707         case lowest_latency:
3708                 /* handle TSO and jumbo frames */
3709                 if (bytes/packets > 8000)
3710                         retval = bulk_latency;
3711                 else if ((packets < 5) && (bytes > 512))
3712                         retval = low_latency;
3713                 break;
3714         case low_latency:  /* 50 usec aka 20000 ints/s */
3715                 if (bytes > 10000) {
3716                         /* this if handles the TSO accounting */
3717                         if (bytes/packets > 8000) {
3718                                 retval = bulk_latency;
3719                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3720                                 retval = bulk_latency;
3721                         } else if ((packets > 35)) {
3722                                 retval = lowest_latency;
3723                         }
3724                 } else if (bytes/packets > 2000) {
3725                         retval = bulk_latency;
3726                 } else if (packets <= 2 && bytes < 512) {
3727                         retval = lowest_latency;
3728                 }
3729                 break;
3730         case bulk_latency: /* 250 usec aka 4000 ints/s */
3731                 if (bytes > 25000) {
3732                         if (packets > 35)
3733                                 retval = low_latency;
3734                 } else if (bytes < 1500) {
3735                         retval = low_latency;
3736                 }
3737                 break;
3738         }
3739
3740 update_itr_done:
3741         return retval;
3742 }
3743
3744 static void igb_set_itr(struct igb_adapter *adapter)
3745 {
3746         struct igb_q_vector *q_vector = adapter->q_vector[0];
3747         u16 current_itr;
3748         u32 new_itr = q_vector->itr_val;
3749
3750         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3751         if (adapter->link_speed != SPEED_1000) {
3752                 current_itr = 0;
3753                 new_itr = 4000;
3754                 goto set_itr_now;
3755         }
3756
3757         adapter->rx_itr = igb_update_itr(adapter,
3758                                     adapter->rx_itr,
3759                                     q_vector->rx_ring->total_packets,
3760                                     q_vector->rx_ring->total_bytes);
3761
3762         adapter->tx_itr = igb_update_itr(adapter,
3763                                     adapter->tx_itr,
3764                                     q_vector->tx_ring->total_packets,
3765                                     q_vector->tx_ring->total_bytes);
3766         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3767
3768         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3769         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3770                 current_itr = low_latency;
3771
3772         switch (current_itr) {
3773         /* counts and packets in update_itr are dependent on these numbers */
3774         case lowest_latency:
3775                 new_itr = 56;  /* aka 70,000 ints/sec */
3776                 break;
3777         case low_latency:
3778                 new_itr = 196; /* aka 20,000 ints/sec */
3779                 break;
3780         case bulk_latency:
3781                 new_itr = 980; /* aka 4,000 ints/sec */
3782                 break;
3783         default:
3784                 break;
3785         }
3786
3787 set_itr_now:
3788         q_vector->rx_ring->total_bytes = 0;
3789         q_vector->rx_ring->total_packets = 0;
3790         q_vector->tx_ring->total_bytes = 0;
3791         q_vector->tx_ring->total_packets = 0;
3792
3793         if (new_itr != q_vector->itr_val) {
3794                 /* this attempts to bias the interrupt rate towards Bulk
3795                  * by adding intermediate steps when interrupt rate is
3796                  * increasing */
3797                 new_itr = new_itr > q_vector->itr_val ?
3798                              max((new_itr * q_vector->itr_val) /
3799                                  (new_itr + (q_vector->itr_val >> 2)),
3800                                  new_itr) :
3801                              new_itr;
3802                 /* Don't write the value here; it resets the adapter's
3803                  * internal timer, and causes us to delay far longer than
3804                  * we should between interrupts.  Instead, we write the ITR
3805                  * value at the beginning of the next interrupt so the timing
3806                  * ends up being correct.
3807                  */
3808                 q_vector->itr_val = new_itr;
3809                 q_vector->set_itr = 1;
3810         }
3811 }
3812
3813 #define IGB_TX_FLAGS_CSUM               0x00000001
3814 #define IGB_TX_FLAGS_VLAN               0x00000002
3815 #define IGB_TX_FLAGS_TSO                0x00000004
3816 #define IGB_TX_FLAGS_IPV4               0x00000008
3817 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3818 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3819 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3820
3821 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3822                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3823 {
3824         struct e1000_adv_tx_context_desc *context_desc;
3825         unsigned int i;
3826         int err;
3827         struct igb_buffer *buffer_info;
3828         u32 info = 0, tu_cmd = 0;
3829         u32 mss_l4len_idx;
3830         u8 l4len;
3831
3832         if (skb_header_cloned(skb)) {
3833                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3834                 if (err)
3835                         return err;
3836         }
3837
3838         l4len = tcp_hdrlen(skb);
3839         *hdr_len += l4len;
3840
3841         if (skb->protocol == htons(ETH_P_IP)) {
3842                 struct iphdr *iph = ip_hdr(skb);
3843                 iph->tot_len = 0;
3844                 iph->check = 0;
3845                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3846                                                          iph->daddr, 0,
3847                                                          IPPROTO_TCP,
3848                                                          0);
3849         } else if (skb_is_gso_v6(skb)) {
3850                 ipv6_hdr(skb)->payload_len = 0;
3851                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3852                                                        &ipv6_hdr(skb)->daddr,
3853                                                        0, IPPROTO_TCP, 0);
3854         }
3855
3856         i = tx_ring->next_to_use;
3857
3858         buffer_info = &tx_ring->buffer_info[i];
3859         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3860         /* VLAN MACLEN IPLEN */
3861         if (tx_flags & IGB_TX_FLAGS_VLAN)
3862                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3863         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3864         *hdr_len += skb_network_offset(skb);
3865         info |= skb_network_header_len(skb);
3866         *hdr_len += skb_network_header_len(skb);
3867         context_desc->vlan_macip_lens = cpu_to_le32(info);
3868
3869         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3870         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3871
3872         if (skb->protocol == htons(ETH_P_IP))
3873                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3874         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3875
3876         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3877
3878         /* MSS L4LEN IDX */
3879         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3880         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3881
3882         /* For 82575, context index must be unique per ring. */
3883         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3884                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3885
3886         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3887         context_desc->seqnum_seed = 0;
3888
3889         buffer_info->time_stamp = jiffies;
3890         buffer_info->next_to_watch = i;
3891         buffer_info->dma = 0;
3892         i++;
3893         if (i == tx_ring->count)
3894                 i = 0;
3895
3896         tx_ring->next_to_use = i;
3897
3898         return true;
3899 }
3900
3901 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3902                                    struct sk_buff *skb, u32 tx_flags)
3903 {
3904         struct e1000_adv_tx_context_desc *context_desc;
3905         struct device *dev = tx_ring->dev;
3906         struct igb_buffer *buffer_info;
3907         u32 info = 0, tu_cmd = 0;
3908         unsigned int i;
3909
3910         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3911             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3912                 i = tx_ring->next_to_use;
3913                 buffer_info = &tx_ring->buffer_info[i];
3914                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3915
3916                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3917                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3918
3919                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3920                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3921                         info |= skb_network_header_len(skb);
3922
3923                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3924
3925                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3926
3927                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3928                         __be16 protocol;
3929
3930                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3931                                 const struct vlan_ethhdr *vhdr =
3932                                           (const struct vlan_ethhdr*)skb->data;
3933
3934                                 protocol = vhdr->h_vlan_encapsulated_proto;
3935                         } else {
3936                                 protocol = skb->protocol;
3937                         }
3938
3939                         switch (protocol) {
3940                         case cpu_to_be16(ETH_P_IP):
3941                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3942                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3943                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3944                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3945                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3946                                 break;
3947                         case cpu_to_be16(ETH_P_IPV6):
3948                                 /* XXX what about other V6 headers?? */
3949                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3950                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3951                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3952                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3953                                 break;
3954                         default:
3955                                 if (unlikely(net_ratelimit()))
3956                                         dev_warn(dev,
3957                                             "partial checksum but proto=%x!\n",
3958                                             skb->protocol);
3959                                 break;
3960                         }
3961                 }
3962
3963                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3964                 context_desc->seqnum_seed = 0;
3965                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3966                         context_desc->mss_l4len_idx =
3967                                 cpu_to_le32(tx_ring->reg_idx << 4);
3968
3969                 buffer_info->time_stamp = jiffies;
3970                 buffer_info->next_to_watch = i;
3971                 buffer_info->dma = 0;
3972
3973                 i++;
3974                 if (i == tx_ring->count)
3975                         i = 0;
3976                 tx_ring->next_to_use = i;
3977
3978                 return true;
3979         }
3980         return false;
3981 }
3982
3983 #define IGB_MAX_TXD_PWR 16
3984 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3985
3986 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3987                                  unsigned int first)
3988 {
3989         struct igb_buffer *buffer_info;
3990         struct device *dev = tx_ring->dev;
3991         unsigned int hlen = skb_headlen(skb);
3992         unsigned int count = 0, i;
3993         unsigned int f;
3994         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3995
3996         i = tx_ring->next_to_use;
3997
3998         buffer_info = &tx_ring->buffer_info[i];
3999         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4000         buffer_info->length = hlen;
4001         /* set time_stamp *before* dma to help avoid a possible race */
4002         buffer_info->time_stamp = jiffies;
4003         buffer_info->next_to_watch = i;
4004         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4005                                           DMA_TO_DEVICE);
4006         if (dma_mapping_error(dev, buffer_info->dma))
4007                 goto dma_error;
4008
4009         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4010                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4011                 unsigned int len = frag->size;
4012
4013                 count++;
4014                 i++;
4015                 if (i == tx_ring->count)
4016                         i = 0;
4017
4018                 buffer_info = &tx_ring->buffer_info[i];
4019                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4020                 buffer_info->length = len;
4021                 buffer_info->time_stamp = jiffies;
4022                 buffer_info->next_to_watch = i;
4023                 buffer_info->mapped_as_page = true;
4024                 buffer_info->dma = dma_map_page(dev,
4025                                                 frag->page,
4026                                                 frag->page_offset,
4027                                                 len,
4028                                                 DMA_TO_DEVICE);
4029                 if (dma_mapping_error(dev, buffer_info->dma))
4030                         goto dma_error;
4031
4032         }
4033
4034         tx_ring->buffer_info[i].skb = skb;
4035         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4036         /* multiply data chunks by size of headers */
4037         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4038         tx_ring->buffer_info[i].gso_segs = gso_segs;
4039         tx_ring->buffer_info[first].next_to_watch = i;
4040
4041         return ++count;
4042
4043 dma_error:
4044         dev_err(dev, "TX DMA map failed\n");
4045
4046         /* clear timestamp and dma mappings for failed buffer_info mapping */
4047         buffer_info->dma = 0;
4048         buffer_info->time_stamp = 0;
4049         buffer_info->length = 0;
4050         buffer_info->next_to_watch = 0;
4051         buffer_info->mapped_as_page = false;
4052
4053         /* clear timestamp and dma mappings for remaining portion of packet */
4054         while (count--) {
4055                 if (i == 0)
4056                         i = tx_ring->count;
4057                 i--;
4058                 buffer_info = &tx_ring->buffer_info[i];
4059                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4060         }
4061
4062         return 0;
4063 }
4064
4065 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4066                                     u32 tx_flags, int count, u32 paylen,
4067                                     u8 hdr_len)
4068 {
4069         union e1000_adv_tx_desc *tx_desc;
4070         struct igb_buffer *buffer_info;
4071         u32 olinfo_status = 0, cmd_type_len;
4072         unsigned int i = tx_ring->next_to_use;
4073
4074         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4075                         E1000_ADVTXD_DCMD_DEXT);
4076
4077         if (tx_flags & IGB_TX_FLAGS_VLAN)
4078                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4079
4080         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4081                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4082
4083         if (tx_flags & IGB_TX_FLAGS_TSO) {
4084                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4085
4086                 /* insert tcp checksum */
4087                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4088
4089                 /* insert ip checksum */
4090                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4091                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4092
4093         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4094                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4095         }
4096
4097         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4098             (tx_flags & (IGB_TX_FLAGS_CSUM |
4099                          IGB_TX_FLAGS_TSO |
4100                          IGB_TX_FLAGS_VLAN)))
4101                 olinfo_status |= tx_ring->reg_idx << 4;
4102
4103         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4104
4105         do {
4106                 buffer_info = &tx_ring->buffer_info[i];
4107                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4108                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4109                 tx_desc->read.cmd_type_len =
4110                         cpu_to_le32(cmd_type_len | buffer_info->length);
4111                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4112                 count--;
4113                 i++;
4114                 if (i == tx_ring->count)
4115                         i = 0;
4116         } while (count > 0);
4117
4118         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4119         /* Force memory writes to complete before letting h/w
4120          * know there are new descriptors to fetch.  (Only
4121          * applicable for weak-ordered memory model archs,
4122          * such as IA-64). */
4123         wmb();
4124
4125         tx_ring->next_to_use = i;
4126         writel(i, tx_ring->tail);
4127         /* we need this if more than one processor can write to our tail
4128          * at a time, it syncronizes IO on IA64/Altix systems */
4129         mmiowb();
4130 }
4131
4132 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4133 {
4134         struct net_device *netdev = tx_ring->netdev;
4135
4136         netif_stop_subqueue(netdev, tx_ring->queue_index);
4137
4138         /* Herbert's original patch had:
4139          *  smp_mb__after_netif_stop_queue();
4140          * but since that doesn't exist yet, just open code it. */
4141         smp_mb();
4142
4143         /* We need to check again in a case another CPU has just
4144          * made room available. */
4145         if (igb_desc_unused(tx_ring) < size)
4146                 return -EBUSY;
4147
4148         /* A reprieve! */
4149         netif_wake_subqueue(netdev, tx_ring->queue_index);
4150
4151         u64_stats_update_begin(&tx_ring->tx_syncp2);
4152         tx_ring->tx_stats.restart_queue2++;
4153         u64_stats_update_end(&tx_ring->tx_syncp2);
4154
4155         return 0;
4156 }
4157
4158 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4159 {
4160         if (igb_desc_unused(tx_ring) >= size)
4161                 return 0;
4162         return __igb_maybe_stop_tx(tx_ring, size);
4163 }
4164
4165 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4166                                     struct igb_ring *tx_ring)
4167 {
4168         int tso = 0, count;
4169         u32 tx_flags = 0;
4170         u16 first;
4171         u8 hdr_len = 0;
4172
4173         /* need: 1 descriptor per page,
4174          *       + 2 desc gap to keep tail from touching head,
4175          *       + 1 desc for skb->data,
4176          *       + 1 desc for context descriptor,
4177          * otherwise try next time */
4178         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4179                 /* this is a hard error */
4180                 return NETDEV_TX_BUSY;
4181         }
4182
4183         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4184                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4185                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4186         }
4187
4188         if (vlan_tx_tag_present(skb)) {
4189                 tx_flags |= IGB_TX_FLAGS_VLAN;
4190                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4191         }
4192
4193         if (skb->protocol == htons(ETH_P_IP))
4194                 tx_flags |= IGB_TX_FLAGS_IPV4;
4195
4196         first = tx_ring->next_to_use;
4197         if (skb_is_gso(skb)) {
4198                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4199
4200                 if (tso < 0) {
4201                         dev_kfree_skb_any(skb);
4202                         return NETDEV_TX_OK;
4203                 }
4204         }
4205
4206         if (tso)
4207                 tx_flags |= IGB_TX_FLAGS_TSO;
4208         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4209                  (skb->ip_summed == CHECKSUM_PARTIAL))
4210                 tx_flags |= IGB_TX_FLAGS_CSUM;
4211
4212         /*
4213          * count reflects descriptors mapped, if 0 or less then mapping error
4214          * has occured and we need to rewind the descriptor queue
4215          */
4216         count = igb_tx_map_adv(tx_ring, skb, first);
4217         if (!count) {
4218                 dev_kfree_skb_any(skb);
4219                 tx_ring->buffer_info[first].time_stamp = 0;
4220                 tx_ring->next_to_use = first;
4221                 return NETDEV_TX_OK;
4222         }
4223
4224         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4225
4226         /* Make sure there is space in the ring for the next send. */
4227         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4228
4229         return NETDEV_TX_OK;
4230 }
4231
4232 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4233                                       struct net_device *netdev)
4234 {
4235         struct igb_adapter *adapter = netdev_priv(netdev);
4236         struct igb_ring *tx_ring;
4237         int r_idx = 0;
4238
4239         if (test_bit(__IGB_DOWN, &adapter->state)) {
4240                 dev_kfree_skb_any(skb);
4241                 return NETDEV_TX_OK;
4242         }
4243
4244         if (skb->len <= 0) {
4245                 dev_kfree_skb_any(skb);
4246                 return NETDEV_TX_OK;
4247         }
4248
4249         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4250         tx_ring = adapter->multi_tx_table[r_idx];
4251
4252         /* This goes back to the question of how to logically map a tx queue
4253          * to a flow.  Right now, performance is impacted slightly negatively
4254          * if using multiple tx queues.  If the stack breaks away from a
4255          * single qdisc implementation, we can look at this again. */
4256         return igb_xmit_frame_ring_adv(skb, tx_ring);
4257 }
4258
4259 /**
4260  * igb_tx_timeout - Respond to a Tx Hang
4261  * @netdev: network interface device structure
4262  **/
4263 static void igb_tx_timeout(struct net_device *netdev)
4264 {
4265         struct igb_adapter *adapter = netdev_priv(netdev);
4266         struct e1000_hw *hw = &adapter->hw;
4267
4268         /* Do the reset outside of interrupt context */
4269         adapter->tx_timeout_count++;
4270
4271         if (hw->mac.type == e1000_82580)
4272                 hw->dev_spec._82575.global_device_reset = true;
4273
4274         schedule_work(&adapter->reset_task);
4275         wr32(E1000_EICS,
4276              (adapter->eims_enable_mask & ~adapter->eims_other));
4277 }
4278
4279 static void igb_reset_task(struct work_struct *work)
4280 {
4281         struct igb_adapter *adapter;
4282         adapter = container_of(work, struct igb_adapter, reset_task);
4283
4284         igb_dump(adapter);
4285         netdev_err(adapter->netdev, "Reset adapter\n");
4286         igb_reinit_locked(adapter);
4287 }
4288
4289 /**
4290  * igb_get_stats64 - Get System Network Statistics
4291  * @netdev: network interface device structure
4292  * @stats: rtnl_link_stats64 pointer
4293  *
4294  **/
4295 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4296                                                  struct rtnl_link_stats64 *stats)
4297 {
4298         struct igb_adapter *adapter = netdev_priv(netdev);
4299
4300         spin_lock(&adapter->stats64_lock);
4301         igb_update_stats(adapter, &adapter->stats64);
4302         memcpy(stats, &adapter->stats64, sizeof(*stats));
4303         spin_unlock(&adapter->stats64_lock);
4304
4305         return stats;
4306 }
4307
4308 /**
4309  * igb_change_mtu - Change the Maximum Transfer Unit
4310  * @netdev: network interface device structure
4311  * @new_mtu: new value for maximum frame size
4312  *
4313  * Returns 0 on success, negative on failure
4314  **/
4315 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4316 {
4317         struct igb_adapter *adapter = netdev_priv(netdev);
4318         struct pci_dev *pdev = adapter->pdev;
4319         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4320         u32 rx_buffer_len, i;
4321
4322         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4323                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4324                 return -EINVAL;
4325         }
4326
4327         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4328                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4329                 return -EINVAL;
4330         }
4331
4332         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4333                 msleep(1);
4334
4335         /* igb_down has a dependency on max_frame_size */
4336         adapter->max_frame_size = max_frame;
4337
4338         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4339          * means we reserve 2 more, this pushes us to allocate from the next
4340          * larger slab size.
4341          * i.e. RXBUFFER_2048 --> size-4096 slab
4342          */
4343
4344         if (adapter->hw.mac.type == e1000_82580)
4345                 max_frame += IGB_TS_HDR_LEN;
4346
4347         if (max_frame <= IGB_RXBUFFER_1024)
4348                 rx_buffer_len = IGB_RXBUFFER_1024;
4349         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4350                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4351         else
4352                 rx_buffer_len = IGB_RXBUFFER_128;
4353
4354         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4355              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4356                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4357
4358         if ((adapter->hw.mac.type == e1000_82580) &&
4359             (rx_buffer_len == IGB_RXBUFFER_128))
4360                 rx_buffer_len += IGB_RXBUFFER_64;
4361
4362         if (netif_running(netdev))
4363                 igb_down(adapter);
4364
4365         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4366                  netdev->mtu, new_mtu);
4367         netdev->mtu = new_mtu;
4368
4369         for (i = 0; i < adapter->num_rx_queues; i++)
4370                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4371
4372         if (netif_running(netdev))
4373                 igb_up(adapter);
4374         else
4375                 igb_reset(adapter);
4376
4377         clear_bit(__IGB_RESETTING, &adapter->state);
4378
4379         return 0;
4380 }
4381
4382 /**
4383  * igb_update_stats - Update the board statistics counters
4384  * @adapter: board private structure
4385  **/
4386
4387 void igb_update_stats(struct igb_adapter *adapter,
4388                       struct rtnl_link_stats64 *net_stats)
4389 {
4390         struct e1000_hw *hw = &adapter->hw;
4391         struct pci_dev *pdev = adapter->pdev;
4392         u32 reg, mpc;
4393         u16 phy_tmp;
4394         int i;
4395         u64 bytes, packets;
4396         unsigned int start;
4397         u64 _bytes, _packets;
4398
4399 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4400
4401         /*
4402          * Prevent stats update while adapter is being reset, or if the pci
4403          * connection is down.
4404          */
4405         if (adapter->link_speed == 0)
4406                 return;
4407         if (pci_channel_offline(pdev))
4408                 return;
4409
4410         bytes = 0;
4411         packets = 0;
4412         for (i = 0; i < adapter->num_rx_queues; i++) {
4413                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4414                 struct igb_ring *ring = adapter->rx_ring[i];
4415
4416                 ring->rx_stats.drops += rqdpc_tmp;
4417                 net_stats->rx_fifo_errors += rqdpc_tmp;
4418
4419                 do {
4420                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4421                         _bytes = ring->rx_stats.bytes;
4422                         _packets = ring->rx_stats.packets;
4423                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4424                 bytes += _bytes;
4425                 packets += _packets;
4426         }
4427
4428         net_stats->rx_bytes = bytes;
4429         net_stats->rx_packets = packets;
4430
4431         bytes = 0;
4432         packets = 0;
4433         for (i = 0; i < adapter->num_tx_queues; i++) {
4434                 struct igb_ring *ring = adapter->tx_ring[i];
4435                 do {
4436                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4437                         _bytes = ring->tx_stats.bytes;
4438                         _packets = ring->tx_stats.packets;
4439                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4440                 bytes += _bytes;
4441                 packets += _packets;
4442         }
4443         net_stats->tx_bytes = bytes;
4444         net_stats->tx_packets = packets;
4445
4446         /* read stats registers */
4447         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4448         adapter->stats.gprc += rd32(E1000_GPRC);
4449         adapter->stats.gorc += rd32(E1000_GORCL);
4450         rd32(E1000_GORCH); /* clear GORCL */
4451         adapter->stats.bprc += rd32(E1000_BPRC);
4452         adapter->stats.mprc += rd32(E1000_MPRC);
4453         adapter->stats.roc += rd32(E1000_ROC);
4454
4455         adapter->stats.prc64 += rd32(E1000_PRC64);
4456         adapter->stats.prc127 += rd32(E1000_PRC127);
4457         adapter->stats.prc255 += rd32(E1000_PRC255);
4458         adapter->stats.prc511 += rd32(E1000_PRC511);
4459         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4460         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4461         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4462         adapter->stats.sec += rd32(E1000_SEC);
4463
4464         mpc = rd32(E1000_MPC);
4465         adapter->stats.mpc += mpc;
4466         net_stats->rx_fifo_errors += mpc;
4467         adapter->stats.scc += rd32(E1000_SCC);
4468         adapter->stats.ecol += rd32(E1000_ECOL);
4469         adapter->stats.mcc += rd32(E1000_MCC);
4470         adapter->stats.latecol += rd32(E1000_LATECOL);
4471         adapter->stats.dc += rd32(E1000_DC);
4472         adapter->stats.rlec += rd32(E1000_RLEC);
4473         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4474         adapter->stats.xontxc += rd32(E1000_XONTXC);
4475         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4476         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4477         adapter->stats.fcruc += rd32(E1000_FCRUC);
4478         adapter->stats.gptc += rd32(E1000_GPTC);
4479         adapter->stats.gotc += rd32(E1000_GOTCL);
4480         rd32(E1000_GOTCH); /* clear GOTCL */
4481         adapter->stats.rnbc += rd32(E1000_RNBC);
4482         adapter->stats.ruc += rd32(E1000_RUC);
4483         adapter->stats.rfc += rd32(E1000_RFC);
4484         adapter->stats.rjc += rd32(E1000_RJC);
4485         adapter->stats.tor += rd32(E1000_TORH);
4486         adapter->stats.tot += rd32(E1000_TOTH);
4487         adapter->stats.tpr += rd32(E1000_TPR);
4488
4489         adapter->stats.ptc64 += rd32(E1000_PTC64);
4490         adapter->stats.ptc127 += rd32(E1000_PTC127);
4491         adapter->stats.ptc255 += rd32(E1000_PTC255);
4492         adapter->stats.ptc511 += rd32(E1000_PTC511);
4493         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4494         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4495
4496         adapter->stats.mptc += rd32(E1000_MPTC);
4497         adapter->stats.bptc += rd32(E1000_BPTC);
4498
4499         adapter->stats.tpt += rd32(E1000_TPT);
4500         adapter->stats.colc += rd32(E1000_COLC);
4501
4502         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4503         /* read internal phy specific stats */
4504         reg = rd32(E1000_CTRL_EXT);
4505         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4506                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4507                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4508         }
4509
4510         adapter->stats.tsctc += rd32(E1000_TSCTC);
4511         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4512
4513         adapter->stats.iac += rd32(E1000_IAC);
4514         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4515         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4516         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4517         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4518         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4519         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4520         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4521         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4522
4523         /* Fill out the OS statistics structure */
4524         net_stats->multicast = adapter->stats.mprc;
4525         net_stats->collisions = adapter->stats.colc;
4526
4527         /* Rx Errors */
4528
4529         /* RLEC on some newer hardware can be incorrect so build
4530          * our own version based on RUC and ROC */
4531         net_stats->rx_errors = adapter->stats.rxerrc +
4532                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4533                 adapter->stats.ruc + adapter->stats.roc +
4534                 adapter->stats.cexterr;
4535         net_stats->rx_length_errors = adapter->stats.ruc +
4536                                       adapter->stats.roc;
4537         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4538         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4539         net_stats->rx_missed_errors = adapter->stats.mpc;
4540
4541         /* Tx Errors */
4542         net_stats->tx_errors = adapter->stats.ecol +
4543                                adapter->stats.latecol;
4544         net_stats->tx_aborted_errors = adapter->stats.ecol;
4545         net_stats->tx_window_errors = adapter->stats.latecol;
4546         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4547
4548         /* Tx Dropped needs to be maintained elsewhere */
4549
4550         /* Phy Stats */
4551         if (hw->phy.media_type == e1000_media_type_copper) {
4552                 if ((adapter->link_speed == SPEED_1000) &&
4553                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4554                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4555                         adapter->phy_stats.idle_errors += phy_tmp;
4556                 }
4557         }
4558
4559         /* Management Stats */
4560         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4561         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4562         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4563 }
4564
4565 static irqreturn_t igb_msix_other(int irq, void *data)
4566 {
4567         struct igb_adapter *adapter = data;
4568         struct e1000_hw *hw = &adapter->hw;
4569         u32 icr = rd32(E1000_ICR);
4570         /* reading ICR causes bit 31 of EICR to be cleared */
4571
4572         if (icr & E1000_ICR_DRSTA)
4573                 schedule_work(&adapter->reset_task);
4574
4575         if (icr & E1000_ICR_DOUTSYNC) {
4576                 /* HW is reporting DMA is out of sync */
4577                 adapter->stats.doosync++;
4578                 /* The DMA Out of Sync is also indication of a spoof event
4579                  * in IOV mode. Check the Wrong VM Behavior register to
4580                  * see if it is really a spoof event. */
4581                 igb_check_wvbr(adapter);
4582         }
4583
4584         /* Check for a mailbox event */
4585         if (icr & E1000_ICR_VMMB)
4586                 igb_msg_task(adapter);
4587
4588         if (icr & E1000_ICR_LSC) {
4589                 hw->mac.get_link_status = 1;
4590                 /* guard against interrupt when we're going down */
4591                 if (!test_bit(__IGB_DOWN, &adapter->state))
4592                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4593         }
4594
4595         if (adapter->vfs_allocated_count)
4596                 wr32(E1000_IMS, E1000_IMS_LSC |
4597                                 E1000_IMS_VMMB |
4598                                 E1000_IMS_DOUTSYNC);
4599         else
4600                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4601         wr32(E1000_EIMS, adapter->eims_other);
4602
4603         return IRQ_HANDLED;
4604 }
4605
4606 static void igb_write_itr(struct igb_q_vector *q_vector)
4607 {
4608         struct igb_adapter *adapter = q_vector->adapter;
4609         u32 itr_val = q_vector->itr_val & 0x7FFC;
4610
4611         if (!q_vector->set_itr)
4612                 return;
4613
4614         if (!itr_val)
4615                 itr_val = 0x4;
4616
4617         if (adapter->hw.mac.type == e1000_82575)
4618                 itr_val |= itr_val << 16;
4619         else
4620                 itr_val |= 0x8000000;
4621
4622         writel(itr_val, q_vector->itr_register);
4623         q_vector->set_itr = 0;
4624 }
4625
4626 static irqreturn_t igb_msix_ring(int irq, void *data)
4627 {
4628         struct igb_q_vector *q_vector = data;
4629
4630         /* Write the ITR value calculated from the previous interrupt. */
4631         igb_write_itr(q_vector);
4632
4633         napi_schedule(&q_vector->napi);
4634
4635         return IRQ_HANDLED;
4636 }
4637
4638 #ifdef CONFIG_IGB_DCA
4639 static void igb_update_dca(struct igb_q_vector *q_vector)
4640 {
4641         struct igb_adapter *adapter = q_vector->adapter;
4642         struct e1000_hw *hw = &adapter->hw;
4643         int cpu = get_cpu();
4644
4645         if (q_vector->cpu == cpu)
4646                 goto out_no_update;
4647
4648         if (q_vector->tx_ring) {
4649                 int q = q_vector->tx_ring->reg_idx;
4650                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4651                 if (hw->mac.type == e1000_82575) {
4652                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4653                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4654                 } else {
4655                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4656                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4657                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4658                 }
4659                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4660                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4661         }
4662         if (q_vector->rx_ring) {
4663                 int q = q_vector->rx_ring->reg_idx;
4664                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4665                 if (hw->mac.type == e1000_82575) {
4666                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4667                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4668                 } else {
4669                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4670                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4671                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4672                 }
4673                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4674                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4675                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4676                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4677         }
4678         q_vector->cpu = cpu;
4679 out_no_update:
4680         put_cpu();
4681 }
4682
4683 static void igb_setup_dca(struct igb_adapter *adapter)
4684 {
4685         struct e1000_hw *hw = &adapter->hw;
4686         int i;
4687
4688         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4689                 return;
4690
4691         /* Always use CB2 mode, difference is masked in the CB driver. */
4692         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4693
4694         for (i = 0; i < adapter->num_q_vectors; i++) {
4695                 adapter->q_vector[i]->cpu = -1;
4696                 igb_update_dca(adapter->q_vector[i]);
4697         }
4698 }
4699
4700 static int __igb_notify_dca(struct device *dev, void *data)
4701 {
4702         struct net_device *netdev = dev_get_drvdata(dev);
4703         struct igb_adapter *adapter = netdev_priv(netdev);
4704         struct pci_dev *pdev = adapter->pdev;
4705         struct e1000_hw *hw = &adapter->hw;
4706         unsigned long event = *(unsigned long *)data;
4707
4708         switch (event) {
4709         case DCA_PROVIDER_ADD:
4710                 /* if already enabled, don't do it again */
4711                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4712                         break;
4713                 if (dca_add_requester(dev) == 0) {
4714                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4715                         dev_info(&pdev->dev, "DCA enabled\n");
4716                         igb_setup_dca(adapter);
4717                         break;
4718                 }
4719                 /* Fall Through since DCA is disabled. */
4720         case DCA_PROVIDER_REMOVE:
4721                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4722                         /* without this a class_device is left
4723                          * hanging around in the sysfs model */
4724                         dca_remove_requester(dev);
4725                         dev_info(&pdev->dev, "DCA disabled\n");
4726                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4727                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4728                 }
4729                 break;
4730         }
4731
4732         return 0;
4733 }
4734
4735 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4736                           void *p)
4737 {
4738         int ret_val;
4739
4740         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4741                                          __igb_notify_dca);
4742
4743         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4744 }
4745 #endif /* CONFIG_IGB_DCA */
4746
4747 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4748 {
4749         struct e1000_hw *hw = &adapter->hw;
4750         u32 ping;
4751         int i;
4752
4753         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4754                 ping = E1000_PF_CONTROL_MSG;
4755                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4756                         ping |= E1000_VT_MSGTYPE_CTS;
4757                 igb_write_mbx(hw, &ping, 1, i);
4758         }
4759 }
4760
4761 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4762 {
4763         struct e1000_hw *hw = &adapter->hw;
4764         u32 vmolr = rd32(E1000_VMOLR(vf));
4765         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4766
4767         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4768                             IGB_VF_FLAG_MULTI_PROMISC);
4769         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4770
4771         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4772                 vmolr |= E1000_VMOLR_MPME;
4773                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4774                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4775         } else {
4776                 /*
4777                  * if we have hashes and we are clearing a multicast promisc
4778                  * flag we need to write the hashes to the MTA as this step
4779                  * was previously skipped
4780                  */
4781                 if (vf_data->num_vf_mc_hashes > 30) {
4782                         vmolr |= E1000_VMOLR_MPME;
4783                 } else if (vf_data->num_vf_mc_hashes) {
4784                         int j;
4785                         vmolr |= E1000_VMOLR_ROMPE;
4786                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4787                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4788                 }
4789         }
4790
4791         wr32(E1000_VMOLR(vf), vmolr);
4792
4793         /* there are flags left unprocessed, likely not supported */
4794         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4795                 return -EINVAL;
4796
4797         return 0;
4798
4799 }
4800
4801 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4802                                   u32 *msgbuf, u32 vf)
4803 {
4804         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4805         u16 *hash_list = (u16 *)&msgbuf[1];
4806         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4807         int i;
4808
4809         /* salt away the number of multicast addresses assigned
4810          * to this VF for later use to restore when the PF multi cast
4811          * list changes
4812          */
4813         vf_data->num_vf_mc_hashes = n;
4814
4815         /* only up to 30 hash values supported */
4816         if (n > 30)
4817                 n = 30;
4818
4819         /* store the hashes for later use */
4820         for (i = 0; i < n; i++)
4821                 vf_data->vf_mc_hashes[i] = hash_list[i];
4822
4823         /* Flush and reset the mta with the new values */
4824         igb_set_rx_mode(adapter->netdev);
4825
4826         return 0;
4827 }
4828
4829 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4830 {
4831         struct e1000_hw *hw = &adapter->hw;
4832         struct vf_data_storage *vf_data;
4833         int i, j;
4834
4835         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4836                 u32 vmolr = rd32(E1000_VMOLR(i));
4837                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4838
4839                 vf_data = &adapter->vf_data[i];
4840
4841                 if ((vf_data->num_vf_mc_hashes > 30) ||
4842                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4843                         vmolr |= E1000_VMOLR_MPME;
4844                 } else if (vf_data->num_vf_mc_hashes) {
4845                         vmolr |= E1000_VMOLR_ROMPE;
4846                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4847                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4848                 }
4849                 wr32(E1000_VMOLR(i), vmolr);
4850         }
4851 }
4852
4853 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4854 {
4855         struct e1000_hw *hw = &adapter->hw;
4856         u32 pool_mask, reg, vid;
4857         int i;
4858
4859         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4860
4861         /* Find the vlan filter for this id */
4862         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4863                 reg = rd32(E1000_VLVF(i));
4864
4865                 /* remove the vf from the pool */
4866                 reg &= ~pool_mask;
4867
4868                 /* if pool is empty then remove entry from vfta */
4869                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4870                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4871                         reg = 0;
4872                         vid = reg & E1000_VLVF_VLANID_MASK;
4873                         igb_vfta_set(hw, vid, false);
4874                 }
4875
4876                 wr32(E1000_VLVF(i), reg);
4877         }
4878
4879         adapter->vf_data[vf].vlans_enabled = 0;
4880 }
4881
4882 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4883 {
4884         struct e1000_hw *hw = &adapter->hw;
4885         u32 reg, i;
4886
4887         /* The vlvf table only exists on 82576 hardware and newer */
4888         if (hw->mac.type < e1000_82576)
4889                 return -1;
4890
4891         /* we only need to do this if VMDq is enabled */
4892         if (!adapter->vfs_allocated_count)
4893                 return -1;
4894
4895         /* Find the vlan filter for this id */
4896         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4897                 reg = rd32(E1000_VLVF(i));
4898                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4899                     vid == (reg & E1000_VLVF_VLANID_MASK))
4900                         break;
4901         }
4902
4903         if (add) {
4904                 if (i == E1000_VLVF_ARRAY_SIZE) {
4905                         /* Did not find a matching VLAN ID entry that was
4906                          * enabled.  Search for a free filter entry, i.e.
4907                          * one without the enable bit set
4908                          */
4909                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4910                                 reg = rd32(E1000_VLVF(i));
4911                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4912                                         break;
4913                         }
4914                 }
4915                 if (i < E1000_VLVF_ARRAY_SIZE) {
4916                         /* Found an enabled/available entry */
4917                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4918
4919                         /* if !enabled we need to set this up in vfta */
4920                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4921                                 /* add VID to filter table */
4922                                 igb_vfta_set(hw, vid, true);
4923                                 reg |= E1000_VLVF_VLANID_ENABLE;
4924                         }
4925                         reg &= ~E1000_VLVF_VLANID_MASK;
4926                         reg |= vid;
4927                         wr32(E1000_VLVF(i), reg);
4928
4929                         /* do not modify RLPML for PF devices */
4930                         if (vf >= adapter->vfs_allocated_count)
4931                                 return 0;
4932
4933                         if (!adapter->vf_data[vf].vlans_enabled) {
4934                                 u32 size;
4935                                 reg = rd32(E1000_VMOLR(vf));
4936                                 size = reg & E1000_VMOLR_RLPML_MASK;
4937                                 size += 4;
4938                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4939                                 reg |= size;
4940                                 wr32(E1000_VMOLR(vf), reg);
4941                         }
4942
4943                         adapter->vf_data[vf].vlans_enabled++;
4944                         return 0;
4945                 }
4946         } else {
4947                 if (i < E1000_VLVF_ARRAY_SIZE) {
4948                         /* remove vf from the pool */
4949                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4950                         /* if pool is empty then remove entry from vfta */
4951                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4952                                 reg = 0;
4953                                 igb_vfta_set(hw, vid, false);
4954                         }
4955                         wr32(E1000_VLVF(i), reg);
4956
4957                         /* do not modify RLPML for PF devices */
4958                         if (vf >= adapter->vfs_allocated_count)
4959                                 return 0;
4960
4961                         adapter->vf_data[vf].vlans_enabled--;
4962                         if (!adapter->vf_data[vf].vlans_enabled) {
4963                                 u32 size;
4964                                 reg = rd32(E1000_VMOLR(vf));
4965                                 size = reg & E1000_VMOLR_RLPML_MASK;
4966                                 size -= 4;
4967                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4968                                 reg |= size;
4969                                 wr32(E1000_VMOLR(vf), reg);
4970                         }
4971                 }
4972         }
4973         return 0;
4974 }
4975
4976 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4977 {
4978         struct e1000_hw *hw = &adapter->hw;
4979
4980         if (vid)
4981                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4982         else
4983                 wr32(E1000_VMVIR(vf), 0);
4984 }
4985
4986 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4987                                int vf, u16 vlan, u8 qos)
4988 {
4989         int err = 0;
4990         struct igb_adapter *adapter = netdev_priv(netdev);
4991
4992         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4993                 return -EINVAL;
4994         if (vlan || qos) {
4995                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4996                 if (err)
4997                         goto out;
4998                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4999                 igb_set_vmolr(adapter, vf, !vlan);
5000                 adapter->vf_data[vf].pf_vlan = vlan;
5001                 adapter->vf_data[vf].pf_qos = qos;
5002                 dev_info(&adapter->pdev->dev,
5003                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5004                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5005                         dev_warn(&adapter->pdev->dev,
5006                                  "The VF VLAN has been set,"
5007                                  " but the PF device is not up.\n");
5008                         dev_warn(&adapter->pdev->dev,
5009                                  "Bring the PF device up before"
5010                                  " attempting to use the VF device.\n");
5011                 }
5012         } else {
5013                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5014                                    false, vf);
5015                 igb_set_vmvir(adapter, vlan, vf);
5016                 igb_set_vmolr(adapter, vf, true);
5017                 adapter->vf_data[vf].pf_vlan = 0;
5018                 adapter->vf_data[vf].pf_qos = 0;
5019        }
5020 out:
5021        return err;
5022 }
5023
5024 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5025 {
5026         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5027         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5028
5029         return igb_vlvf_set(adapter, vid, add, vf);
5030 }
5031
5032 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5033 {
5034         /* clear flags - except flag that indicates PF has set the MAC */
5035         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5036         adapter->vf_data[vf].last_nack = jiffies;
5037
5038         /* reset offloads to defaults */
5039         igb_set_vmolr(adapter, vf, true);
5040
5041         /* reset vlans for device */
5042         igb_clear_vf_vfta(adapter, vf);
5043         if (adapter->vf_data[vf].pf_vlan)
5044                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5045                                     adapter->vf_data[vf].pf_vlan,
5046                                     adapter->vf_data[vf].pf_qos);
5047         else
5048                 igb_clear_vf_vfta(adapter, vf);
5049
5050         /* reset multicast table array for vf */
5051         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5052
5053         /* Flush and reset the mta with the new values */
5054         igb_set_rx_mode(adapter->netdev);
5055 }
5056
5057 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5058 {
5059         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5060
5061         /* generate a new mac address as we were hotplug removed/added */
5062         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5063                 random_ether_addr(vf_mac);
5064
5065         /* process remaining reset events */
5066         igb_vf_reset(adapter, vf);
5067 }
5068
5069 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5070 {
5071         struct e1000_hw *hw = &adapter->hw;
5072         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5073         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5074         u32 reg, msgbuf[3];
5075         u8 *addr = (u8 *)(&msgbuf[1]);
5076
5077         /* process all the same items cleared in a function level reset */
5078         igb_vf_reset(adapter, vf);
5079
5080         /* set vf mac address */
5081         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5082
5083         /* enable transmit and receive for vf */
5084         reg = rd32(E1000_VFTE);
5085         wr32(E1000_VFTE, reg | (1 << vf));
5086         reg = rd32(E1000_VFRE);
5087         wr32(E1000_VFRE, reg | (1 << vf));
5088
5089         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5090
5091         /* reply to reset with ack and vf mac address */
5092         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5093         memcpy(addr, vf_mac, 6);
5094         igb_write_mbx(hw, msgbuf, 3, vf);
5095 }
5096
5097 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5098 {
5099         /*
5100          * The VF MAC Address is stored in a packed array of bytes
5101          * starting at the second 32 bit word of the msg array
5102          */
5103         unsigned char *addr = (char *)&msg[1];
5104         int err = -1;
5105
5106         if (is_valid_ether_addr(addr))
5107                 err = igb_set_vf_mac(adapter, vf, addr);
5108
5109         return err;
5110 }
5111
5112 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5113 {
5114         struct e1000_hw *hw = &adapter->hw;
5115         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5116         u32 msg = E1000_VT_MSGTYPE_NACK;
5117
5118         /* if device isn't clear to send it shouldn't be reading either */
5119         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5120             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5121                 igb_write_mbx(hw, &msg, 1, vf);
5122                 vf_data->last_nack = jiffies;
5123         }
5124 }
5125
5126 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5127 {
5128         struct pci_dev *pdev = adapter->pdev;
5129         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5130         struct e1000_hw *hw = &adapter->hw;
5131         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5132         s32 retval;
5133
5134         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5135
5136         if (retval) {
5137                 /* if receive failed revoke VF CTS stats and restart init */
5138                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5139                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5140                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5141                         return;
5142                 goto out;
5143         }
5144
5145         /* this is a message we already processed, do nothing */
5146         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5147                 return;
5148
5149         /*
5150          * until the vf completes a reset it should not be
5151          * allowed to start any configuration.
5152          */
5153
5154         if (msgbuf[0] == E1000_VF_RESET) {
5155                 igb_vf_reset_msg(adapter, vf);
5156                 return;
5157         }
5158
5159         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5160                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5161                         return;
5162                 retval = -1;
5163                 goto out;
5164         }
5165
5166         switch ((msgbuf[0] & 0xFFFF)) {
5167         case E1000_VF_SET_MAC_ADDR:
5168                 retval = -EINVAL;
5169                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5170                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5171                 else
5172                         dev_warn(&pdev->dev,
5173                                  "VF %d attempted to override administratively "
5174                                  "set MAC address\nReload the VF driver to "
5175                                  "resume operations\n", vf);
5176                 break;
5177         case E1000_VF_SET_PROMISC:
5178                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5179                 break;
5180         case E1000_VF_SET_MULTICAST:
5181                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5182                 break;
5183         case E1000_VF_SET_LPE:
5184                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5185                 break;
5186         case E1000_VF_SET_VLAN:
5187                 retval = -1;
5188                 if (vf_data->pf_vlan)
5189                         dev_warn(&pdev->dev,
5190                                  "VF %d attempted to override administratively "
5191                                  "set VLAN tag\nReload the VF driver to "
5192                                  "resume operations\n", vf);
5193                 else
5194                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5195                 break;
5196         default:
5197                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5198                 retval = -1;
5199                 break;
5200         }
5201
5202         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5203 out:
5204         /* notify the VF of the results of what it sent us */
5205         if (retval)
5206                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5207         else
5208                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5209
5210         igb_write_mbx(hw, msgbuf, 1, vf);
5211 }
5212
5213 static void igb_msg_task(struct igb_adapter *adapter)
5214 {
5215         struct e1000_hw *hw = &adapter->hw;
5216         u32 vf;
5217
5218         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5219                 /* process any reset requests */
5220                 if (!igb_check_for_rst(hw, vf))
5221                         igb_vf_reset_event(adapter, vf);
5222
5223                 /* process any messages pending */
5224                 if (!igb_check_for_msg(hw, vf))
5225                         igb_rcv_msg_from_vf(adapter, vf);
5226
5227                 /* process any acks */
5228                 if (!igb_check_for_ack(hw, vf))
5229                         igb_rcv_ack_from_vf(adapter, vf);
5230         }
5231 }
5232
5233 /**
5234  *  igb_set_uta - Set unicast filter table address
5235  *  @adapter: board private structure
5236  *
5237  *  The unicast table address is a register array of 32-bit registers.
5238  *  The table is meant to be used in a way similar to how the MTA is used
5239  *  however due to certain limitations in the hardware it is necessary to
5240  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5241  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5242  **/
5243 static void igb_set_uta(struct igb_adapter *adapter)
5244 {
5245         struct e1000_hw *hw = &adapter->hw;
5246         int i;
5247
5248         /* The UTA table only exists on 82576 hardware and newer */
5249         if (hw->mac.type < e1000_82576)
5250                 return;
5251
5252         /* we only need to do this if VMDq is enabled */
5253         if (!adapter->vfs_allocated_count)
5254                 return;
5255
5256         for (i = 0; i < hw->mac.uta_reg_count; i++)
5257                 array_wr32(E1000_UTA, i, ~0);
5258 }
5259
5260 /**
5261  * igb_intr_msi - Interrupt Handler
5262  * @irq: interrupt number
5263  * @data: pointer to a network interface device structure
5264  **/
5265 static irqreturn_t igb_intr_msi(int irq, void *data)
5266 {
5267         struct igb_adapter *adapter = data;
5268         struct igb_q_vector *q_vector = adapter->q_vector[0];
5269         struct e1000_hw *hw = &adapter->hw;
5270         /* read ICR disables interrupts using IAM */
5271         u32 icr = rd32(E1000_ICR);
5272
5273         igb_write_itr(q_vector);
5274
5275         if (icr & E1000_ICR_DRSTA)
5276                 schedule_work(&adapter->reset_task);
5277
5278         if (icr & E1000_ICR_DOUTSYNC) {
5279                 /* HW is reporting DMA is out of sync */
5280                 adapter->stats.doosync++;
5281         }
5282
5283         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5284                 hw->mac.get_link_status = 1;
5285                 if (!test_bit(__IGB_DOWN, &adapter->state))
5286                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5287         }
5288
5289         napi_schedule(&q_vector->napi);
5290
5291         return IRQ_HANDLED;
5292 }
5293
5294 /**
5295  * igb_intr - Legacy Interrupt Handler
5296  * @irq: interrupt number
5297  * @data: pointer to a network interface device structure
5298  **/
5299 static irqreturn_t igb_intr(int irq, void *data)
5300 {
5301         struct igb_adapter *adapter = data;
5302         struct igb_q_vector *q_vector = adapter->q_vector[0];
5303         struct e1000_hw *hw = &adapter->hw;
5304         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5305          * need for the IMC write */
5306         u32 icr = rd32(E1000_ICR);
5307         if (!icr)
5308                 return IRQ_NONE;  /* Not our interrupt */
5309
5310         igb_write_itr(q_vector);
5311
5312         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5313          * not set, then the adapter didn't send an interrupt */
5314         if (!(icr & E1000_ICR_INT_ASSERTED))
5315                 return IRQ_NONE;
5316
5317         if (icr & E1000_ICR_DRSTA)
5318                 schedule_work(&adapter->reset_task);
5319
5320         if (icr & E1000_ICR_DOUTSYNC) {
5321                 /* HW is reporting DMA is out of sync */
5322                 adapter->stats.doosync++;
5323         }
5324
5325         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5326                 hw->mac.get_link_status = 1;
5327                 /* guard against interrupt when we're going down */
5328                 if (!test_bit(__IGB_DOWN, &adapter->state))
5329                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5330         }
5331
5332         napi_schedule(&q_vector->napi);
5333
5334         return IRQ_HANDLED;
5335 }
5336
5337 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5338 {
5339         struct igb_adapter *adapter = q_vector->adapter;
5340         struct e1000_hw *hw = &adapter->hw;
5341
5342         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5343             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5344                 if (!adapter->msix_entries)
5345                         igb_set_itr(adapter);
5346                 else
5347                         igb_update_ring_itr(q_vector);
5348         }
5349
5350         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5351                 if (adapter->msix_entries)
5352                         wr32(E1000_EIMS, q_vector->eims_value);
5353                 else
5354                         igb_irq_enable(adapter);
5355         }
5356 }
5357
5358 /**
5359  * igb_poll - NAPI Rx polling callback
5360  * @napi: napi polling structure
5361  * @budget: count of how many packets we should handle
5362  **/
5363 static int igb_poll(struct napi_struct *napi, int budget)
5364 {
5365         struct igb_q_vector *q_vector = container_of(napi,
5366                                                      struct igb_q_vector,
5367                                                      napi);
5368         int tx_clean_complete = 1, work_done = 0;
5369
5370 #ifdef CONFIG_IGB_DCA
5371         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5372                 igb_update_dca(q_vector);
5373 #endif
5374         if (q_vector->tx_ring)
5375                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5376
5377         if (q_vector->rx_ring)
5378                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5379
5380         if (!tx_clean_complete)
5381                 work_done = budget;
5382
5383         /* If not enough Rx work done, exit the polling mode */
5384         if (work_done < budget) {
5385                 napi_complete(napi);
5386                 igb_ring_irq_enable(q_vector);
5387         }
5388
5389         return work_done;
5390 }
5391
5392 /**
5393  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5394  * @adapter: board private structure
5395  * @shhwtstamps: timestamp structure to update
5396  * @regval: unsigned 64bit system time value.
5397  *
5398  * We need to convert the system time value stored in the RX/TXSTMP registers
5399  * into a hwtstamp which can be used by the upper level timestamping functions
5400  */
5401 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5402                                    struct skb_shared_hwtstamps *shhwtstamps,
5403                                    u64 regval)
5404 {
5405         u64 ns;
5406
5407         /*
5408          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5409          * 24 to match clock shift we setup earlier.
5410          */
5411         if (adapter->hw.mac.type == e1000_82580)
5412                 regval <<= IGB_82580_TSYNC_SHIFT;
5413
5414         ns = timecounter_cyc2time(&adapter->clock, regval);
5415         timecompare_update(&adapter->compare, ns);
5416         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5417         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5418         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5419 }
5420
5421 /**
5422  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5423  * @q_vector: pointer to q_vector containing needed info
5424  * @buffer: pointer to igb_buffer structure
5425  *
5426  * If we were asked to do hardware stamping and such a time stamp is
5427  * available, then it must have been for this skb here because we only
5428  * allow only one such packet into the queue.
5429  */
5430 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5431 {
5432         struct igb_adapter *adapter = q_vector->adapter;
5433         struct e1000_hw *hw = &adapter->hw;
5434         struct skb_shared_hwtstamps shhwtstamps;
5435         u64 regval;
5436
5437         /* if skb does not support hw timestamp or TX stamp not valid exit */
5438         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5439             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5440                 return;
5441
5442         regval = rd32(E1000_TXSTMPL);
5443         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5444
5445         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5446         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5447 }
5448
5449 /**
5450  * igb_clean_tx_irq - Reclaim resources after transmit completes
5451  * @q_vector: pointer to q_vector containing needed info
5452  * returns true if ring is completely cleaned
5453  **/
5454 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5455 {
5456         struct igb_adapter *adapter = q_vector->adapter;
5457         struct igb_ring *tx_ring = q_vector->tx_ring;
5458         struct net_device *netdev = tx_ring->netdev;
5459         struct e1000_hw *hw = &adapter->hw;
5460         struct igb_buffer *buffer_info;
5461         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5462         unsigned int total_bytes = 0, total_packets = 0;
5463         unsigned int i, eop, count = 0;
5464         bool cleaned = false;
5465
5466         i = tx_ring->next_to_clean;
5467         eop = tx_ring->buffer_info[i].next_to_watch;
5468         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5469
5470         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5471                (count < tx_ring->count)) {
5472                 rmb();  /* read buffer_info after eop_desc status */
5473                 for (cleaned = false; !cleaned; count++) {
5474                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5475                         buffer_info = &tx_ring->buffer_info[i];
5476                         cleaned = (i == eop);
5477
5478                         if (buffer_info->skb) {
5479                                 total_bytes += buffer_info->bytecount;
5480                                 /* gso_segs is currently only valid for tcp */
5481                                 total_packets += buffer_info->gso_segs;
5482                                 igb_tx_hwtstamp(q_vector, buffer_info);
5483                         }
5484
5485                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5486                         tx_desc->wb.status = 0;
5487
5488                         i++;
5489                         if (i == tx_ring->count)
5490                                 i = 0;
5491                 }
5492                 eop = tx_ring->buffer_info[i].next_to_watch;
5493                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5494         }
5495
5496         tx_ring->next_to_clean = i;
5497
5498         if (unlikely(count &&
5499                      netif_carrier_ok(netdev) &&
5500                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5501                 /* Make sure that anybody stopping the queue after this
5502                  * sees the new next_to_clean.
5503                  */
5504                 smp_mb();
5505                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5506                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5507                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5508
5509                         u64_stats_update_begin(&tx_ring->tx_syncp);
5510                         tx_ring->tx_stats.restart_queue++;
5511                         u64_stats_update_end(&tx_ring->tx_syncp);
5512                 }
5513         }
5514
5515         if (tx_ring->detect_tx_hung) {
5516                 /* Detect a transmit hang in hardware, this serializes the
5517                  * check with the clearing of time_stamp and movement of i */
5518                 tx_ring->detect_tx_hung = false;
5519                 if (tx_ring->buffer_info[i].time_stamp &&
5520                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5521                                (adapter->tx_timeout_factor * HZ)) &&
5522                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5523
5524                         /* detected Tx unit hang */
5525                         dev_err(tx_ring->dev,
5526                                 "Detected Tx Unit Hang\n"
5527                                 "  Tx Queue             <%d>\n"
5528                                 "  TDH                  <%x>\n"
5529                                 "  TDT                  <%x>\n"
5530                                 "  next_to_use          <%x>\n"
5531                                 "  next_to_clean        <%x>\n"
5532                                 "buffer_info[next_to_clean]\n"
5533                                 "  time_stamp           <%lx>\n"
5534                                 "  next_to_watch        <%x>\n"
5535                                 "  jiffies              <%lx>\n"
5536                                 "  desc.status          <%x>\n",
5537                                 tx_ring->queue_index,
5538                                 readl(tx_ring->head),
5539                                 readl(tx_ring->tail),
5540                                 tx_ring->next_to_use,
5541                                 tx_ring->next_to_clean,
5542                                 tx_ring->buffer_info[eop].time_stamp,
5543                                 eop,
5544                                 jiffies,
5545                                 eop_desc->wb.status);
5546                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5547                 }
5548         }
5549         tx_ring->total_bytes += total_bytes;
5550         tx_ring->total_packets += total_packets;
5551         u64_stats_update_begin(&tx_ring->tx_syncp);
5552         tx_ring->tx_stats.bytes += total_bytes;
5553         tx_ring->tx_stats.packets += total_packets;
5554         u64_stats_update_end(&tx_ring->tx_syncp);
5555         return count < tx_ring->count;
5556 }
5557
5558 /**
5559  * igb_receive_skb - helper function to handle rx indications
5560  * @q_vector: structure containing interrupt and ring information
5561  * @skb: packet to send up
5562  * @vlan_tag: vlan tag for packet
5563  **/
5564 static void igb_receive_skb(struct igb_q_vector *q_vector,
5565                             struct sk_buff *skb,
5566                             u16 vlan_tag)
5567 {
5568         struct igb_adapter *adapter = q_vector->adapter;
5569
5570         if (vlan_tag && adapter->vlgrp)
5571                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5572                                  vlan_tag, skb);
5573         else
5574                 napi_gro_receive(&q_vector->napi, skb);
5575 }
5576
5577 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5578                                        u32 status_err, struct sk_buff *skb)
5579 {
5580         skb_checksum_none_assert(skb);
5581
5582         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5583         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5584              (status_err & E1000_RXD_STAT_IXSM))
5585                 return;
5586
5587         /* TCP/UDP checksum error bit is set */
5588         if (status_err &
5589             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5590                 /*
5591                  * work around errata with sctp packets where the TCPE aka
5592                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5593                  * packets, (aka let the stack check the crc32c)
5594                  */
5595                 if ((skb->len == 60) &&
5596                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5597                         u64_stats_update_begin(&ring->rx_syncp);
5598                         ring->rx_stats.csum_err++;
5599                         u64_stats_update_end(&ring->rx_syncp);
5600                 }
5601                 /* let the stack verify checksum errors */
5602                 return;
5603         }
5604         /* It must be a TCP or UDP packet with a valid checksum */
5605         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5606                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5607
5608         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5609 }
5610
5611 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5612                                    struct sk_buff *skb)
5613 {
5614         struct igb_adapter *adapter = q_vector->adapter;
5615         struct e1000_hw *hw = &adapter->hw;
5616         u64 regval;
5617
5618         /*
5619          * If this bit is set, then the RX registers contain the time stamp. No
5620          * other packet will be time stamped until we read these registers, so
5621          * read the registers to make them available again. Because only one
5622          * packet can be time stamped at a time, we know that the register
5623          * values must belong to this one here and therefore we don't need to
5624          * compare any of the additional attributes stored for it.
5625          *
5626          * If nothing went wrong, then it should have a shared tx_flags that we
5627          * can turn into a skb_shared_hwtstamps.
5628          */
5629         if (staterr & E1000_RXDADV_STAT_TSIP) {
5630                 u32 *stamp = (u32 *)skb->data;
5631                 regval = le32_to_cpu(*(stamp + 2));
5632                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5633                 skb_pull(skb, IGB_TS_HDR_LEN);
5634         } else {
5635                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5636                         return;
5637
5638                 regval = rd32(E1000_RXSTMPL);
5639                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5640         }
5641
5642         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5643 }
5644 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5645                                union e1000_adv_rx_desc *rx_desc)
5646 {
5647         /* HW will not DMA in data larger than the given buffer, even if it
5648          * parses the (NFS, of course) header to be larger.  In that case, it
5649          * fills the header buffer and spills the rest into the page.
5650          */
5651         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5652                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5653         if (hlen > rx_ring->rx_buffer_len)
5654                 hlen = rx_ring->rx_buffer_len;
5655         return hlen;
5656 }
5657
5658 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5659                                  int *work_done, int budget)
5660 {
5661         struct igb_ring *rx_ring = q_vector->rx_ring;
5662         struct net_device *netdev = rx_ring->netdev;
5663         struct device *dev = rx_ring->dev;
5664         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5665         struct igb_buffer *buffer_info , *next_buffer;
5666         struct sk_buff *skb;
5667         bool cleaned = false;
5668         int cleaned_count = 0;
5669         int current_node = numa_node_id();
5670         unsigned int total_bytes = 0, total_packets = 0;
5671         unsigned int i;
5672         u32 staterr;
5673         u16 length;
5674         u16 vlan_tag;
5675
5676         i = rx_ring->next_to_clean;
5677         buffer_info = &rx_ring->buffer_info[i];
5678         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5679         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5680
5681         while (staterr & E1000_RXD_STAT_DD) {
5682                 if (*work_done >= budget)
5683                         break;
5684                 (*work_done)++;
5685                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5686
5687                 skb = buffer_info->skb;
5688                 prefetch(skb->data - NET_IP_ALIGN);
5689                 buffer_info->skb = NULL;
5690
5691                 i++;
5692                 if (i == rx_ring->count)
5693                         i = 0;
5694
5695                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5696                 prefetch(next_rxd);
5697                 next_buffer = &rx_ring->buffer_info[i];
5698
5699                 length = le16_to_cpu(rx_desc->wb.upper.length);
5700                 cleaned = true;
5701                 cleaned_count++;
5702
5703                 if (buffer_info->dma) {
5704                         dma_unmap_single(dev, buffer_info->dma,
5705                                          rx_ring->rx_buffer_len,
5706                                          DMA_FROM_DEVICE);
5707                         buffer_info->dma = 0;
5708                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5709                                 skb_put(skb, length);
5710                                 goto send_up;
5711                         }
5712                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5713                 }
5714
5715                 if (length) {
5716                         dma_unmap_page(dev, buffer_info->page_dma,
5717                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5718                         buffer_info->page_dma = 0;
5719
5720                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5721                                                 buffer_info->page,
5722                                                 buffer_info->page_offset,
5723                                                 length);
5724
5725                         if ((page_count(buffer_info->page) != 1) ||
5726                             (page_to_nid(buffer_info->page) != current_node))
5727                                 buffer_info->page = NULL;
5728                         else
5729                                 get_page(buffer_info->page);
5730
5731                         skb->len += length;
5732                         skb->data_len += length;
5733                         skb->truesize += length;
5734                 }
5735
5736                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5737                         buffer_info->skb = next_buffer->skb;
5738                         buffer_info->dma = next_buffer->dma;
5739                         next_buffer->skb = skb;
5740                         next_buffer->dma = 0;
5741                         goto next_desc;
5742                 }
5743 send_up:
5744                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5745                         dev_kfree_skb_irq(skb);
5746                         goto next_desc;
5747                 }
5748
5749                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5750                         igb_rx_hwtstamp(q_vector, staterr, skb);
5751                 total_bytes += skb->len;
5752                 total_packets++;
5753
5754                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5755
5756                 skb->protocol = eth_type_trans(skb, netdev);
5757                 skb_record_rx_queue(skb, rx_ring->queue_index);
5758
5759                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5760                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5761
5762                 igb_receive_skb(q_vector, skb, vlan_tag);
5763
5764 next_desc:
5765                 rx_desc->wb.upper.status_error = 0;
5766
5767                 /* return some buffers to hardware, one at a time is too slow */
5768                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5769                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5770                         cleaned_count = 0;
5771                 }
5772
5773                 /* use prefetched values */
5774                 rx_desc = next_rxd;
5775                 buffer_info = next_buffer;
5776                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5777         }
5778
5779         rx_ring->next_to_clean = i;
5780         cleaned_count = igb_desc_unused(rx_ring);
5781
5782         if (cleaned_count)
5783                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5784
5785         rx_ring->total_packets += total_packets;
5786         rx_ring->total_bytes += total_bytes;
5787         u64_stats_update_begin(&rx_ring->rx_syncp);
5788         rx_ring->rx_stats.packets += total_packets;
5789         rx_ring->rx_stats.bytes += total_bytes;
5790         u64_stats_update_end(&rx_ring->rx_syncp);
5791         return cleaned;
5792 }
5793
5794 /**
5795  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5796  * @adapter: address of board private structure
5797  **/
5798 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5799 {
5800         struct net_device *netdev = rx_ring->netdev;
5801         union e1000_adv_rx_desc *rx_desc;
5802         struct igb_buffer *buffer_info;
5803         struct sk_buff *skb;
5804         unsigned int i;
5805         int bufsz;
5806
5807         i = rx_ring->next_to_use;
5808         buffer_info = &rx_ring->buffer_info[i];
5809
5810         bufsz = rx_ring->rx_buffer_len;
5811
5812         while (cleaned_count--) {
5813                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5814
5815                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5816                         if (!buffer_info->page) {
5817                                 buffer_info->page = netdev_alloc_page(netdev);
5818                                 if (unlikely(!buffer_info->page)) {
5819                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5820                                         rx_ring->rx_stats.alloc_failed++;
5821                                         u64_stats_update_end(&rx_ring->rx_syncp);
5822                                         goto no_buffers;
5823                                 }
5824                                 buffer_info->page_offset = 0;
5825                         } else {
5826                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5827                         }
5828                         buffer_info->page_dma =
5829                                 dma_map_page(rx_ring->dev, buffer_info->page,
5830                                              buffer_info->page_offset,
5831                                              PAGE_SIZE / 2,
5832                                              DMA_FROM_DEVICE);
5833                         if (dma_mapping_error(rx_ring->dev,
5834                                               buffer_info->page_dma)) {
5835                                 buffer_info->page_dma = 0;
5836                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5837                                 rx_ring->rx_stats.alloc_failed++;
5838                                 u64_stats_update_end(&rx_ring->rx_syncp);
5839                                 goto no_buffers;
5840                         }
5841                 }
5842
5843                 skb = buffer_info->skb;
5844                 if (!skb) {
5845                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5846                         if (unlikely(!skb)) {
5847                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5848                                 rx_ring->rx_stats.alloc_failed++;
5849                                 u64_stats_update_end(&rx_ring->rx_syncp);
5850                                 goto no_buffers;
5851                         }
5852
5853                         buffer_info->skb = skb;
5854                 }
5855                 if (!buffer_info->dma) {
5856                         buffer_info->dma = dma_map_single(rx_ring->dev,
5857                                                           skb->data,
5858                                                           bufsz,
5859                                                           DMA_FROM_DEVICE);
5860                         if (dma_mapping_error(rx_ring->dev,
5861                                               buffer_info->dma)) {
5862                                 buffer_info->dma = 0;
5863                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5864                                 rx_ring->rx_stats.alloc_failed++;
5865                                 u64_stats_update_end(&rx_ring->rx_syncp);
5866                                 goto no_buffers;
5867                         }
5868                 }
5869                 /* Refresh the desc even if buffer_addrs didn't change because
5870                  * each write-back erases this info. */
5871                 if (bufsz < IGB_RXBUFFER_1024) {
5872                         rx_desc->read.pkt_addr =
5873                              cpu_to_le64(buffer_info->page_dma);
5874                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5875                 } else {
5876                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5877                         rx_desc->read.hdr_addr = 0;
5878                 }
5879
5880                 i++;
5881                 if (i == rx_ring->count)
5882                         i = 0;
5883                 buffer_info = &rx_ring->buffer_info[i];
5884         }
5885
5886 no_buffers:
5887         if (rx_ring->next_to_use != i) {
5888                 rx_ring->next_to_use = i;
5889                 if (i == 0)
5890                         i = (rx_ring->count - 1);
5891                 else
5892                         i--;
5893
5894                 /* Force memory writes to complete before letting h/w
5895                  * know there are new descriptors to fetch.  (Only
5896                  * applicable for weak-ordered memory model archs,
5897                  * such as IA-64). */
5898                 wmb();
5899                 writel(i, rx_ring->tail);
5900         }
5901 }
5902
5903 /**
5904  * igb_mii_ioctl -
5905  * @netdev:
5906  * @ifreq:
5907  * @cmd:
5908  **/
5909 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5910 {
5911         struct igb_adapter *adapter = netdev_priv(netdev);
5912         struct mii_ioctl_data *data = if_mii(ifr);
5913
5914         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5915                 return -EOPNOTSUPP;
5916
5917         switch (cmd) {
5918         case SIOCGMIIPHY:
5919                 data->phy_id = adapter->hw.phy.addr;
5920                 break;
5921         case SIOCGMIIREG:
5922                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5923                                      &data->val_out))
5924                         return -EIO;
5925                 break;
5926         case SIOCSMIIREG:
5927         default:
5928                 return -EOPNOTSUPP;
5929         }
5930         return 0;
5931 }
5932
5933 /**
5934  * igb_hwtstamp_ioctl - control hardware time stamping
5935  * @netdev:
5936  * @ifreq:
5937  * @cmd:
5938  *
5939  * Outgoing time stamping can be enabled and disabled. Play nice and
5940  * disable it when requested, although it shouldn't case any overhead
5941  * when no packet needs it. At most one packet in the queue may be
5942  * marked for time stamping, otherwise it would be impossible to tell
5943  * for sure to which packet the hardware time stamp belongs.
5944  *
5945  * Incoming time stamping has to be configured via the hardware
5946  * filters. Not all combinations are supported, in particular event
5947  * type has to be specified. Matching the kind of event packet is
5948  * not supported, with the exception of "all V2 events regardless of
5949  * level 2 or 4".
5950  *
5951  **/
5952 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5953                               struct ifreq *ifr, int cmd)
5954 {
5955         struct igb_adapter *adapter = netdev_priv(netdev);
5956         struct e1000_hw *hw = &adapter->hw;
5957         struct hwtstamp_config config;
5958         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5959         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5960         u32 tsync_rx_cfg = 0;
5961         bool is_l4 = false;
5962         bool is_l2 = false;
5963         u32 regval;
5964
5965         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5966                 return -EFAULT;
5967
5968         /* reserved for future extensions */
5969         if (config.flags)
5970                 return -EINVAL;
5971
5972         switch (config.tx_type) {
5973         case HWTSTAMP_TX_OFF:
5974                 tsync_tx_ctl = 0;
5975         case HWTSTAMP_TX_ON:
5976                 break;
5977         default:
5978                 return -ERANGE;
5979         }
5980
5981         switch (config.rx_filter) {
5982         case HWTSTAMP_FILTER_NONE:
5983                 tsync_rx_ctl = 0;
5984                 break;
5985         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5986         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5987         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5988         case HWTSTAMP_FILTER_ALL:
5989                 /*
5990                  * register TSYNCRXCFG must be set, therefore it is not
5991                  * possible to time stamp both Sync and Delay_Req messages
5992                  * => fall back to time stamping all packets
5993                  */
5994                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5995                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5996                 break;
5997         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5998                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5999                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6000                 is_l4 = true;
6001                 break;
6002         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6003                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6004                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6005                 is_l4 = true;
6006                 break;
6007         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6008         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6009                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6010                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6011                 is_l2 = true;
6012                 is_l4 = true;
6013                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6014                 break;
6015         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6016         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6017                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6018                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6019                 is_l2 = true;
6020                 is_l4 = true;
6021                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6022                 break;
6023         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6024         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6025         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6026                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6027                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6028                 is_l2 = true;
6029                 break;
6030         default:
6031                 return -ERANGE;
6032         }
6033
6034         if (hw->mac.type == e1000_82575) {
6035                 if (tsync_rx_ctl | tsync_tx_ctl)
6036                         return -EINVAL;
6037                 return 0;
6038         }
6039
6040         /*
6041          * Per-packet timestamping only works if all packets are
6042          * timestamped, so enable timestamping in all packets as
6043          * long as one rx filter was configured.
6044          */
6045         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6046                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6047                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6048         }
6049
6050         /* enable/disable TX */
6051         regval = rd32(E1000_TSYNCTXCTL);
6052         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6053         regval |= tsync_tx_ctl;
6054         wr32(E1000_TSYNCTXCTL, regval);
6055
6056         /* enable/disable RX */
6057         regval = rd32(E1000_TSYNCRXCTL);
6058         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6059         regval |= tsync_rx_ctl;
6060         wr32(E1000_TSYNCRXCTL, regval);
6061
6062         /* define which PTP packets are time stamped */
6063         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6064
6065         /* define ethertype filter for timestamped packets */
6066         if (is_l2)
6067                 wr32(E1000_ETQF(3),
6068                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6069                                  E1000_ETQF_1588 | /* enable timestamping */
6070                                  ETH_P_1588));     /* 1588 eth protocol type */
6071         else
6072                 wr32(E1000_ETQF(3), 0);
6073
6074 #define PTP_PORT 319
6075         /* L4 Queue Filter[3]: filter by destination port and protocol */
6076         if (is_l4) {
6077                 u32 ftqf = (IPPROTO_UDP /* UDP */
6078                         | E1000_FTQF_VF_BP /* VF not compared */
6079                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6080                         | E1000_FTQF_MASK); /* mask all inputs */
6081                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6082
6083                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6084                 wr32(E1000_IMIREXT(3),
6085                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6086                 if (hw->mac.type == e1000_82576) {
6087                         /* enable source port check */
6088                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6089                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6090                 }
6091                 wr32(E1000_FTQF(3), ftqf);
6092         } else {
6093                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6094         }
6095         wrfl();
6096
6097         adapter->hwtstamp_config = config;
6098
6099         /* clear TX/RX time stamp registers, just to be sure */
6100         regval = rd32(E1000_TXSTMPH);
6101         regval = rd32(E1000_RXSTMPH);
6102
6103         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6104                 -EFAULT : 0;
6105 }
6106
6107 /**
6108  * igb_ioctl -
6109  * @netdev:
6110  * @ifreq:
6111  * @cmd:
6112  **/
6113 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6114 {
6115         switch (cmd) {
6116         case SIOCGMIIPHY:
6117         case SIOCGMIIREG:
6118         case SIOCSMIIREG:
6119                 return igb_mii_ioctl(netdev, ifr, cmd);
6120         case SIOCSHWTSTAMP:
6121                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6122         default:
6123                 return -EOPNOTSUPP;
6124         }
6125 }
6126
6127 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6128 {
6129         struct igb_adapter *adapter = hw->back;
6130         u16 cap_offset;
6131
6132         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6133         if (!cap_offset)
6134                 return -E1000_ERR_CONFIG;
6135
6136         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6137
6138         return 0;
6139 }
6140
6141 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6142 {
6143         struct igb_adapter *adapter = hw->back;
6144         u16 cap_offset;
6145
6146         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6147         if (!cap_offset)
6148                 return -E1000_ERR_CONFIG;
6149
6150         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6151
6152         return 0;
6153 }
6154
6155 static void igb_vlan_rx_register(struct net_device *netdev,
6156                                  struct vlan_group *grp)
6157 {
6158         struct igb_adapter *adapter = netdev_priv(netdev);
6159         struct e1000_hw *hw = &adapter->hw;
6160         u32 ctrl, rctl;
6161
6162         igb_irq_disable(adapter);
6163         adapter->vlgrp = grp;
6164
6165         if (grp) {
6166                 /* enable VLAN tag insert/strip */
6167                 ctrl = rd32(E1000_CTRL);
6168                 ctrl |= E1000_CTRL_VME;
6169                 wr32(E1000_CTRL, ctrl);
6170
6171                 /* Disable CFI check */
6172                 rctl = rd32(E1000_RCTL);
6173                 rctl &= ~E1000_RCTL_CFIEN;
6174                 wr32(E1000_RCTL, rctl);
6175         } else {
6176                 /* disable VLAN tag insert/strip */
6177                 ctrl = rd32(E1000_CTRL);
6178                 ctrl &= ~E1000_CTRL_VME;
6179                 wr32(E1000_CTRL, ctrl);
6180         }
6181
6182         igb_rlpml_set(adapter);
6183
6184         if (!test_bit(__IGB_DOWN, &adapter->state))
6185                 igb_irq_enable(adapter);
6186 }
6187
6188 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6189 {
6190         struct igb_adapter *adapter = netdev_priv(netdev);
6191         struct e1000_hw *hw = &adapter->hw;
6192         int pf_id = adapter->vfs_allocated_count;
6193
6194         /* attempt to add filter to vlvf array */
6195         igb_vlvf_set(adapter, vid, true, pf_id);
6196
6197         /* add the filter since PF can receive vlans w/o entry in vlvf */
6198         igb_vfta_set(hw, vid, true);
6199 }
6200
6201 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6202 {
6203         struct igb_adapter *adapter = netdev_priv(netdev);
6204         struct e1000_hw *hw = &adapter->hw;
6205         int pf_id = adapter->vfs_allocated_count;
6206         s32 err;
6207
6208         igb_irq_disable(adapter);
6209         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6210
6211         if (!test_bit(__IGB_DOWN, &adapter->state))
6212                 igb_irq_enable(adapter);
6213
6214         /* remove vlan from VLVF table array */
6215         err = igb_vlvf_set(adapter, vid, false, pf_id);
6216
6217         /* if vid was not present in VLVF just remove it from table */
6218         if (err)
6219                 igb_vfta_set(hw, vid, false);
6220 }
6221
6222 static void igb_restore_vlan(struct igb_adapter *adapter)
6223 {
6224         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6225
6226         if (adapter->vlgrp) {
6227                 u16 vid;
6228                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6229                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6230                                 continue;
6231                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6232                 }
6233         }
6234 }
6235
6236 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6237 {
6238         struct pci_dev *pdev = adapter->pdev;
6239         struct e1000_mac_info *mac = &adapter->hw.mac;
6240
6241         mac->autoneg = 0;
6242
6243         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6244         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6245                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6246                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6247                 return -EINVAL;
6248         }
6249
6250         switch (spddplx) {
6251         case SPEED_10 + DUPLEX_HALF:
6252                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6253                 break;
6254         case SPEED_10 + DUPLEX_FULL:
6255                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6256                 break;
6257         case SPEED_100 + DUPLEX_HALF:
6258                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6259                 break;
6260         case SPEED_100 + DUPLEX_FULL:
6261                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6262                 break;
6263         case SPEED_1000 + DUPLEX_FULL:
6264                 mac->autoneg = 1;
6265                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6266                 break;
6267         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6268         default:
6269                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6270                 return -EINVAL;
6271         }
6272         return 0;
6273 }
6274
6275 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6276 {
6277         struct net_device *netdev = pci_get_drvdata(pdev);
6278         struct igb_adapter *adapter = netdev_priv(netdev);
6279         struct e1000_hw *hw = &adapter->hw;
6280         u32 ctrl, rctl, status;
6281         u32 wufc = adapter->wol;
6282 #ifdef CONFIG_PM
6283         int retval = 0;
6284 #endif
6285
6286         netif_device_detach(netdev);
6287
6288         if (netif_running(netdev))
6289                 igb_close(netdev);
6290
6291         igb_clear_interrupt_scheme(adapter);
6292
6293 #ifdef CONFIG_PM
6294         retval = pci_save_state(pdev);
6295         if (retval)
6296                 return retval;
6297 #endif
6298
6299         status = rd32(E1000_STATUS);
6300         if (status & E1000_STATUS_LU)
6301                 wufc &= ~E1000_WUFC_LNKC;
6302
6303         if (wufc) {
6304                 igb_setup_rctl(adapter);
6305                 igb_set_rx_mode(netdev);
6306
6307                 /* turn on all-multi mode if wake on multicast is enabled */
6308                 if (wufc & E1000_WUFC_MC) {
6309                         rctl = rd32(E1000_RCTL);
6310                         rctl |= E1000_RCTL_MPE;
6311                         wr32(E1000_RCTL, rctl);
6312                 }
6313
6314                 ctrl = rd32(E1000_CTRL);
6315                 /* advertise wake from D3Cold */
6316                 #define E1000_CTRL_ADVD3WUC 0x00100000
6317                 /* phy power management enable */
6318                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6319                 ctrl |= E1000_CTRL_ADVD3WUC;
6320                 wr32(E1000_CTRL, ctrl);
6321
6322                 /* Allow time for pending master requests to run */
6323                 igb_disable_pcie_master(hw);
6324
6325                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6326                 wr32(E1000_WUFC, wufc);
6327         } else {
6328                 wr32(E1000_WUC, 0);
6329                 wr32(E1000_WUFC, 0);
6330         }
6331
6332         *enable_wake = wufc || adapter->en_mng_pt;
6333         if (!*enable_wake)
6334                 igb_power_down_link(adapter);
6335         else
6336                 igb_power_up_link(adapter);
6337
6338         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6339          * would have already happened in close and is redundant. */
6340         igb_release_hw_control(adapter);
6341
6342         pci_disable_device(pdev);
6343
6344         return 0;
6345 }
6346
6347 #ifdef CONFIG_PM
6348 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6349 {
6350         int retval;
6351         bool wake;
6352
6353         retval = __igb_shutdown(pdev, &wake);
6354         if (retval)
6355                 return retval;
6356
6357         if (wake) {
6358                 pci_prepare_to_sleep(pdev);
6359         } else {
6360                 pci_wake_from_d3(pdev, false);
6361                 pci_set_power_state(pdev, PCI_D3hot);
6362         }
6363
6364         return 0;
6365 }
6366
6367 static int igb_resume(struct pci_dev *pdev)
6368 {
6369         struct net_device *netdev = pci_get_drvdata(pdev);
6370         struct igb_adapter *adapter = netdev_priv(netdev);
6371         struct e1000_hw *hw = &adapter->hw;
6372         u32 err;
6373
6374         pci_set_power_state(pdev, PCI_D0);
6375         pci_restore_state(pdev);
6376         pci_save_state(pdev);
6377
6378         err = pci_enable_device_mem(pdev);
6379         if (err) {
6380                 dev_err(&pdev->dev,
6381                         "igb: Cannot enable PCI device from suspend\n");
6382                 return err;
6383         }
6384         pci_set_master(pdev);
6385
6386         pci_enable_wake(pdev, PCI_D3hot, 0);
6387         pci_enable_wake(pdev, PCI_D3cold, 0);
6388
6389         if (igb_init_interrupt_scheme(adapter)) {
6390                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6391                 return -ENOMEM;
6392         }
6393
6394         igb_reset(adapter);
6395
6396         /* let the f/w know that the h/w is now under the control of the
6397          * driver. */
6398         igb_get_hw_control(adapter);
6399
6400         wr32(E1000_WUS, ~0);
6401
6402         if (netif_running(netdev)) {
6403                 err = igb_open(netdev);
6404                 if (err)
6405                         return err;
6406         }
6407
6408         netif_device_attach(netdev);
6409
6410         return 0;
6411 }
6412 #endif
6413
6414 static void igb_shutdown(struct pci_dev *pdev)
6415 {
6416         bool wake;
6417
6418         __igb_shutdown(pdev, &wake);
6419
6420         if (system_state == SYSTEM_POWER_OFF) {
6421                 pci_wake_from_d3(pdev, wake);
6422                 pci_set_power_state(pdev, PCI_D3hot);
6423         }
6424 }
6425
6426 #ifdef CONFIG_NET_POLL_CONTROLLER
6427 /*
6428  * Polling 'interrupt' - used by things like netconsole to send skbs
6429  * without having to re-enable interrupts. It's not called while
6430  * the interrupt routine is executing.
6431  */
6432 static void igb_netpoll(struct net_device *netdev)
6433 {
6434         struct igb_adapter *adapter = netdev_priv(netdev);
6435         struct e1000_hw *hw = &adapter->hw;
6436         int i;
6437
6438         if (!adapter->msix_entries) {
6439                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6440                 igb_irq_disable(adapter);
6441                 napi_schedule(&q_vector->napi);
6442                 return;
6443         }
6444
6445         for (i = 0; i < adapter->num_q_vectors; i++) {
6446                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6447                 wr32(E1000_EIMC, q_vector->eims_value);
6448                 napi_schedule(&q_vector->napi);
6449         }
6450 }
6451 #endif /* CONFIG_NET_POLL_CONTROLLER */
6452
6453 /**
6454  * igb_io_error_detected - called when PCI error is detected
6455  * @pdev: Pointer to PCI device
6456  * @state: The current pci connection state
6457  *
6458  * This function is called after a PCI bus error affecting
6459  * this device has been detected.
6460  */
6461 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6462                                               pci_channel_state_t state)
6463 {
6464         struct net_device *netdev = pci_get_drvdata(pdev);
6465         struct igb_adapter *adapter = netdev_priv(netdev);
6466
6467         netif_device_detach(netdev);
6468
6469         if (state == pci_channel_io_perm_failure)
6470                 return PCI_ERS_RESULT_DISCONNECT;
6471
6472         if (netif_running(netdev))
6473                 igb_down(adapter);
6474         pci_disable_device(pdev);
6475
6476         /* Request a slot slot reset. */
6477         return PCI_ERS_RESULT_NEED_RESET;
6478 }
6479
6480 /**
6481  * igb_io_slot_reset - called after the pci bus has been reset.
6482  * @pdev: Pointer to PCI device
6483  *
6484  * Restart the card from scratch, as if from a cold-boot. Implementation
6485  * resembles the first-half of the igb_resume routine.
6486  */
6487 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6488 {
6489         struct net_device *netdev = pci_get_drvdata(pdev);
6490         struct igb_adapter *adapter = netdev_priv(netdev);
6491         struct e1000_hw *hw = &adapter->hw;
6492         pci_ers_result_t result;
6493         int err;
6494
6495         if (pci_enable_device_mem(pdev)) {
6496                 dev_err(&pdev->dev,
6497                         "Cannot re-enable PCI device after reset.\n");
6498                 result = PCI_ERS_RESULT_DISCONNECT;
6499         } else {
6500                 pci_set_master(pdev);
6501                 pci_restore_state(pdev);
6502                 pci_save_state(pdev);
6503
6504                 pci_enable_wake(pdev, PCI_D3hot, 0);
6505                 pci_enable_wake(pdev, PCI_D3cold, 0);
6506
6507                 igb_reset(adapter);
6508                 wr32(E1000_WUS, ~0);
6509                 result = PCI_ERS_RESULT_RECOVERED;
6510         }
6511
6512         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6513         if (err) {
6514                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6515                         "failed 0x%0x\n", err);
6516                 /* non-fatal, continue */
6517         }
6518
6519         return result;
6520 }
6521
6522 /**
6523  * igb_io_resume - called when traffic can start flowing again.
6524  * @pdev: Pointer to PCI device
6525  *
6526  * This callback is called when the error recovery driver tells us that
6527  * its OK to resume normal operation. Implementation resembles the
6528  * second-half of the igb_resume routine.
6529  */
6530 static void igb_io_resume(struct pci_dev *pdev)
6531 {
6532         struct net_device *netdev = pci_get_drvdata(pdev);
6533         struct igb_adapter *adapter = netdev_priv(netdev);
6534
6535         if (netif_running(netdev)) {
6536                 if (igb_up(adapter)) {
6537                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6538                         return;
6539                 }
6540         }
6541
6542         netif_device_attach(netdev);
6543
6544         /* let the f/w know that the h/w is now under the control of the
6545          * driver. */
6546         igb_get_hw_control(adapter);
6547 }
6548
6549 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6550                              u8 qsel)
6551 {
6552         u32 rar_low, rar_high;
6553         struct e1000_hw *hw = &adapter->hw;
6554
6555         /* HW expects these in little endian so we reverse the byte order
6556          * from network order (big endian) to little endian
6557          */
6558         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6559                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6560         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6561
6562         /* Indicate to hardware the Address is Valid. */
6563         rar_high |= E1000_RAH_AV;
6564
6565         if (hw->mac.type == e1000_82575)
6566                 rar_high |= E1000_RAH_POOL_1 * qsel;
6567         else
6568                 rar_high |= E1000_RAH_POOL_1 << qsel;
6569
6570         wr32(E1000_RAL(index), rar_low);
6571         wrfl();
6572         wr32(E1000_RAH(index), rar_high);
6573         wrfl();
6574 }
6575
6576 static int igb_set_vf_mac(struct igb_adapter *adapter,
6577                           int vf, unsigned char *mac_addr)
6578 {
6579         struct e1000_hw *hw = &adapter->hw;
6580         /* VF MAC addresses start at end of receive addresses and moves
6581          * torwards the first, as a result a collision should not be possible */
6582         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6583
6584         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6585
6586         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6587
6588         return 0;
6589 }
6590
6591 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6592 {
6593         struct igb_adapter *adapter = netdev_priv(netdev);
6594         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6595                 return -EINVAL;
6596         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6597         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6598         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6599                                       " change effective.");
6600         if (test_bit(__IGB_DOWN, &adapter->state)) {
6601                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6602                          " but the PF device is not up.\n");
6603                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6604                          " attempting to use the VF device.\n");
6605         }
6606         return igb_set_vf_mac(adapter, vf, mac);
6607 }
6608
6609 static int igb_link_mbps(int internal_link_speed)
6610 {
6611         switch (internal_link_speed) {
6612         case SPEED_100:
6613                 return 100;
6614         case SPEED_1000:
6615                 return 1000;
6616         default:
6617                 return 0;
6618         }
6619 }
6620
6621 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6622                                   int link_speed)
6623 {
6624         int rf_dec, rf_int;
6625         u32 bcnrc_val;
6626
6627         if (tx_rate != 0) {
6628                 /* Calculate the rate factor values to set */
6629                 rf_int = link_speed / tx_rate;
6630                 rf_dec = (link_speed - (rf_int * tx_rate));
6631                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6632
6633                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6634                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6635                                E1000_RTTBCNRC_RF_INT_MASK);
6636                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6637         } else {
6638                 bcnrc_val = 0;
6639         }
6640
6641         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6642         wr32(E1000_RTTBCNRC, bcnrc_val);
6643 }
6644
6645 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6646 {
6647         int actual_link_speed, i;
6648         bool reset_rate = false;
6649
6650         /* VF TX rate limit was not set or not supported */
6651         if ((adapter->vf_rate_link_speed == 0) ||
6652             (adapter->hw.mac.type != e1000_82576))
6653                 return;
6654
6655         actual_link_speed = igb_link_mbps(adapter->link_speed);
6656         if (actual_link_speed != adapter->vf_rate_link_speed) {
6657                 reset_rate = true;
6658                 adapter->vf_rate_link_speed = 0;
6659                 dev_info(&adapter->pdev->dev,
6660                          "Link speed has been changed. VF Transmit "
6661                          "rate is disabled\n");
6662         }
6663
6664         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6665                 if (reset_rate)
6666                         adapter->vf_data[i].tx_rate = 0;
6667
6668                 igb_set_vf_rate_limit(&adapter->hw, i,
6669                                       adapter->vf_data[i].tx_rate,
6670                                       actual_link_speed);
6671         }
6672 }
6673
6674 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6675 {
6676         struct igb_adapter *adapter = netdev_priv(netdev);
6677         struct e1000_hw *hw = &adapter->hw;
6678         int actual_link_speed;
6679
6680         if (hw->mac.type != e1000_82576)
6681                 return -EOPNOTSUPP;
6682
6683         actual_link_speed = igb_link_mbps(adapter->link_speed);
6684         if ((vf >= adapter->vfs_allocated_count) ||
6685             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6686             (tx_rate < 0) || (tx_rate > actual_link_speed))
6687                 return -EINVAL;
6688
6689         adapter->vf_rate_link_speed = actual_link_speed;
6690         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6691         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6692
6693         return 0;
6694 }
6695
6696 static int igb_ndo_get_vf_config(struct net_device *netdev,
6697                                  int vf, struct ifla_vf_info *ivi)
6698 {
6699         struct igb_adapter *adapter = netdev_priv(netdev);
6700         if (vf >= adapter->vfs_allocated_count)
6701                 return -EINVAL;
6702         ivi->vf = vf;
6703         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6704         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6705         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6706         ivi->qos = adapter->vf_data[vf].pf_qos;
6707         return 0;
6708 }
6709
6710 static void igb_vmm_control(struct igb_adapter *adapter)
6711 {
6712         struct e1000_hw *hw = &adapter->hw;
6713         u32 reg;
6714
6715         switch (hw->mac.type) {
6716         case e1000_82575:
6717         default:
6718                 /* replication is not supported for 82575 */
6719                 return;
6720         case e1000_82576:
6721                 /* notify HW that the MAC is adding vlan tags */
6722                 reg = rd32(E1000_DTXCTL);
6723                 reg |= E1000_DTXCTL_VLAN_ADDED;
6724                 wr32(E1000_DTXCTL, reg);
6725         case e1000_82580:
6726                 /* enable replication vlan tag stripping */
6727                 reg = rd32(E1000_RPLOLR);
6728                 reg |= E1000_RPLOLR_STRVLAN;
6729                 wr32(E1000_RPLOLR, reg);
6730         case e1000_i350:
6731                 /* none of the above registers are supported by i350 */
6732                 break;
6733         }
6734
6735         if (adapter->vfs_allocated_count) {
6736                 igb_vmdq_set_loopback_pf(hw, true);
6737                 igb_vmdq_set_replication_pf(hw, true);
6738                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6739                                                 adapter->vfs_allocated_count);
6740         } else {
6741                 igb_vmdq_set_loopback_pf(hw, false);
6742                 igb_vmdq_set_replication_pf(hw, false);
6743         }
6744 }
6745
6746 /* igb_main.c */