8643f8c29199ff3f7f0e8b4316d6c8ab07206376
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.4.13-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
90         /* required last entry */
91         {0, }
92 };
93
94 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
95
96 void igb_reset(struct igb_adapter *);
97 static int igb_setup_all_tx_resources(struct igb_adapter *);
98 static int igb_setup_all_rx_resources(struct igb_adapter *);
99 static void igb_free_all_tx_resources(struct igb_adapter *);
100 static void igb_free_all_rx_resources(struct igb_adapter *);
101 static void igb_setup_mrqc(struct igb_adapter *);
102 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
103 static void __devexit igb_remove(struct pci_dev *pdev);
104 static int igb_sw_init(struct igb_adapter *);
105 static int igb_open(struct net_device *);
106 static int igb_close(struct net_device *);
107 static void igb_configure_tx(struct igb_adapter *);
108 static void igb_configure_rx(struct igb_adapter *);
109 static void igb_clean_all_tx_rings(struct igb_adapter *);
110 static void igb_clean_all_rx_rings(struct igb_adapter *);
111 static void igb_clean_tx_ring(struct igb_ring *);
112 static void igb_clean_rx_ring(struct igb_ring *);
113 static void igb_set_rx_mode(struct net_device *);
114 static void igb_update_phy_info(unsigned long);
115 static void igb_watchdog(unsigned long);
116 static void igb_watchdog_task(struct work_struct *);
117 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
118 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
119                                                  struct rtnl_link_stats64 *stats);
120 static int igb_change_mtu(struct net_device *, int);
121 static int igb_set_mac(struct net_device *, void *);
122 static void igb_set_uta(struct igb_adapter *adapter);
123 static irqreturn_t igb_intr(int irq, void *);
124 static irqreturn_t igb_intr_msi(int irq, void *);
125 static irqreturn_t igb_msix_other(int irq, void *);
126 static irqreturn_t igb_msix_ring(int irq, void *);
127 #ifdef CONFIG_IGB_DCA
128 static void igb_update_dca(struct igb_q_vector *);
129 static void igb_setup_dca(struct igb_adapter *);
130 #endif /* CONFIG_IGB_DCA */
131 static bool igb_clean_tx_irq(struct igb_q_vector *);
132 static int igb_poll(struct napi_struct *, int);
133 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
134 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
135 static void igb_tx_timeout(struct net_device *);
136 static void igb_reset_task(struct work_struct *);
137 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
138 static void igb_vlan_rx_add_vid(struct net_device *, u16);
139 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
140 static void igb_restore_vlan(struct igb_adapter *);
141 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
142 static void igb_ping_all_vfs(struct igb_adapter *);
143 static void igb_msg_task(struct igb_adapter *);
144 static void igb_vmm_control(struct igb_adapter *);
145 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
146 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
147 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
148 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
149                                int vf, u16 vlan, u8 qos);
150 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
151 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
152                                  struct ifla_vf_info *ivi);
153 static void igb_check_vf_rate_limit(struct igb_adapter *);
154
155 #ifdef CONFIG_PM
156 static int igb_suspend(struct pci_dev *, pm_message_t);
157 static int igb_resume(struct pci_dev *);
158 #endif
159 static void igb_shutdown(struct pci_dev *);
160 #ifdef CONFIG_IGB_DCA
161 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
162 static struct notifier_block dca_notifier = {
163         .notifier_call  = igb_notify_dca,
164         .next           = NULL,
165         .priority       = 0
166 };
167 #endif
168 #ifdef CONFIG_NET_POLL_CONTROLLER
169 /* for netdump / net console */
170 static void igb_netpoll(struct net_device *);
171 #endif
172 #ifdef CONFIG_PCI_IOV
173 static unsigned int max_vfs = 0;
174 module_param(max_vfs, uint, 0);
175 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
176                  "per physical function");
177 #endif /* CONFIG_PCI_IOV */
178
179 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
180                      pci_channel_state_t);
181 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
182 static void igb_io_resume(struct pci_dev *);
183
184 static struct pci_error_handlers igb_err_handler = {
185         .error_detected = igb_io_error_detected,
186         .slot_reset = igb_io_slot_reset,
187         .resume = igb_io_resume,
188 };
189
190
191 static struct pci_driver igb_driver = {
192         .name     = igb_driver_name,
193         .id_table = igb_pci_tbl,
194         .probe    = igb_probe,
195         .remove   = __devexit_p(igb_remove),
196 #ifdef CONFIG_PM
197         /* Power Managment Hooks */
198         .suspend  = igb_suspend,
199         .resume   = igb_resume,
200 #endif
201         .shutdown = igb_shutdown,
202         .err_handler = &igb_err_handler
203 };
204
205 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
206 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
207 MODULE_LICENSE("GPL");
208 MODULE_VERSION(DRV_VERSION);
209
210 struct igb_reg_info {
211         u32 ofs;
212         char *name;
213 };
214
215 static const struct igb_reg_info igb_reg_info_tbl[] = {
216
217         /* General Registers */
218         {E1000_CTRL, "CTRL"},
219         {E1000_STATUS, "STATUS"},
220         {E1000_CTRL_EXT, "CTRL_EXT"},
221
222         /* Interrupt Registers */
223         {E1000_ICR, "ICR"},
224
225         /* RX Registers */
226         {E1000_RCTL, "RCTL"},
227         {E1000_RDLEN(0), "RDLEN"},
228         {E1000_RDH(0), "RDH"},
229         {E1000_RDT(0), "RDT"},
230         {E1000_RXDCTL(0), "RXDCTL"},
231         {E1000_RDBAL(0), "RDBAL"},
232         {E1000_RDBAH(0), "RDBAH"},
233
234         /* TX Registers */
235         {E1000_TCTL, "TCTL"},
236         {E1000_TDBAL(0), "TDBAL"},
237         {E1000_TDBAH(0), "TDBAH"},
238         {E1000_TDLEN(0), "TDLEN"},
239         {E1000_TDH(0), "TDH"},
240         {E1000_TDT(0), "TDT"},
241         {E1000_TXDCTL(0), "TXDCTL"},
242         {E1000_TDFH, "TDFH"},
243         {E1000_TDFT, "TDFT"},
244         {E1000_TDFHS, "TDFHS"},
245         {E1000_TDFPC, "TDFPC"},
246
247         /* List Terminator */
248         {}
249 };
250
251 /*
252  * igb_regdump - register printout routine
253  */
254 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
255 {
256         int n = 0;
257         char rname[16];
258         u32 regs[8];
259
260         switch (reginfo->ofs) {
261         case E1000_RDLEN(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDLEN(n));
264                 break;
265         case E1000_RDH(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDH(n));
268                 break;
269         case E1000_RDT(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RDT(n));
272                 break;
273         case E1000_RXDCTL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RXDCTL(n));
276                 break;
277         case E1000_RDBAL(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAL(n));
280                 break;
281         case E1000_RDBAH(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAH(n));
284                 break;
285         case E1000_TDBAL(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_RDBAL(n));
288                 break;
289         case E1000_TDBAH(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDBAH(n));
292                 break;
293         case E1000_TDLEN(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDLEN(n));
296                 break;
297         case E1000_TDH(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDH(n));
300                 break;
301         case E1000_TDT(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TDT(n));
304                 break;
305         case E1000_TXDCTL(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_TXDCTL(n));
308                 break;
309         default:
310                 printk(KERN_INFO "%-15s %08x\n",
311                         reginfo->name, rd32(reginfo->ofs));
312                 return;
313         }
314
315         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
316         printk(KERN_INFO "%-15s ", rname);
317         for (n = 0; n < 4; n++)
318                 printk(KERN_CONT "%08x ", regs[n]);
319         printk(KERN_CONT "\n");
320 }
321
322 /*
323  * igb_dump - Print registers, tx-rings and rx-rings
324  */
325 static void igb_dump(struct igb_adapter *adapter)
326 {
327         struct net_device *netdev = adapter->netdev;
328         struct e1000_hw *hw = &adapter->hw;
329         struct igb_reg_info *reginfo;
330         int n = 0;
331         struct igb_ring *tx_ring;
332         union e1000_adv_tx_desc *tx_desc;
333         struct my_u0 { u64 a; u64 b; } *u0;
334         struct igb_buffer *buffer_info;
335         struct igb_ring *rx_ring;
336         union e1000_adv_rx_desc *rx_desc;
337         u32 staterr;
338         int i = 0;
339
340         if (!netif_msg_hw(adapter))
341                 return;
342
343         /* Print netdevice Info */
344         if (netdev) {
345                 dev_info(&adapter->pdev->dev, "Net device Info\n");
346                 printk(KERN_INFO "Device Name     state            "
347                         "trans_start      last_rx\n");
348                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
349                 netdev->name,
350                 netdev->state,
351                 netdev->trans_start,
352                 netdev->last_rx);
353         }
354
355         /* Print Registers */
356         dev_info(&adapter->pdev->dev, "Register Dump\n");
357         printk(KERN_INFO " Register Name   Value\n");
358         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
359              reginfo->name; reginfo++) {
360                 igb_regdump(hw, reginfo);
361         }
362
363         /* Print TX Ring Summary */
364         if (!netdev || !netif_running(netdev))
365                 goto exit;
366
367         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
368         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
369                 " leng ntw timestamp\n");
370         for (n = 0; n < adapter->num_tx_queues; n++) {
371                 tx_ring = adapter->tx_ring[n];
372                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
373                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
374                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
375                            (u64)buffer_info->dma,
376                            buffer_info->length,
377                            buffer_info->next_to_watch,
378                            (u64)buffer_info->time_stamp);
379         }
380
381         /* Print TX Rings */
382         if (!netif_msg_tx_done(adapter))
383                 goto rx_ring_summary;
384
385         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
386
387         /* Transmit Descriptor Formats
388          *
389          * Advanced Transmit Descriptor
390          *   +--------------------------------------------------------------+
391          * 0 |         Buffer Address [63:0]                                |
392          *   +--------------------------------------------------------------+
393          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
394          *   +--------------------------------------------------------------+
395          *   63      46 45    40 39 38 36 35 32 31   24             15       0
396          */
397
398         for (n = 0; n < adapter->num_tx_queues; n++) {
399                 tx_ring = adapter->tx_ring[n];
400                 printk(KERN_INFO "------------------------------------\n");
401                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
402                 printk(KERN_INFO "------------------------------------\n");
403                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
404                         "[PlPOCIStDDM Ln] [bi->dma       ] "
405                         "leng  ntw timestamp        bi->skb\n");
406
407                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
408                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
409                         buffer_info = &tx_ring->buffer_info[i];
410                         u0 = (struct my_u0 *)tx_desc;
411                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
412                                 " %04X  %3X %016llX %p", i,
413                                 le64_to_cpu(u0->a),
414                                 le64_to_cpu(u0->b),
415                                 (u64)buffer_info->dma,
416                                 buffer_info->length,
417                                 buffer_info->next_to_watch,
418                                 (u64)buffer_info->time_stamp,
419                                 buffer_info->skb);
420                         if (i == tx_ring->next_to_use &&
421                                 i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC/U\n");
423                         else if (i == tx_ring->next_to_use)
424                                 printk(KERN_CONT " NTU\n");
425                         else if (i == tx_ring->next_to_clean)
426                                 printk(KERN_CONT " NTC\n");
427                         else
428                                 printk(KERN_CONT "\n");
429
430                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
431                                 print_hex_dump(KERN_INFO, "",
432                                         DUMP_PREFIX_ADDRESS,
433                                         16, 1, phys_to_virt(buffer_info->dma),
434                                         buffer_info->length, true);
435                 }
436         }
437
438         /* Print RX Rings Summary */
439 rx_ring_summary:
440         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
441         printk(KERN_INFO "Queue [NTU] [NTC]\n");
442         for (n = 0; n < adapter->num_rx_queues; n++) {
443                 rx_ring = adapter->rx_ring[n];
444                 printk(KERN_INFO " %5d %5X %5X\n", n,
445                            rx_ring->next_to_use, rx_ring->next_to_clean);
446         }
447
448         /* Print RX Rings */
449         if (!netif_msg_rx_status(adapter))
450                 goto exit;
451
452         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
453
454         /* Advanced Receive Descriptor (Read) Format
455          *    63                                           1        0
456          *    +-----------------------------------------------------+
457          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
458          *    +----------------------------------------------+------+
459          *  8 |       Header Buffer Address [63:1]           |  DD  |
460          *    +-----------------------------------------------------+
461          *
462          *
463          * Advanced Receive Descriptor (Write-Back) Format
464          *
465          *   63       48 47    32 31  30      21 20 17 16   4 3     0
466          *   +------------------------------------------------------+
467          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
468          *   | Checksum   Ident  |   |           |    | Type | Type |
469          *   +------------------------------------------------------+
470          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
471          *   +------------------------------------------------------+
472          *   63       48 47    32 31            20 19               0
473          */
474
475         for (n = 0; n < adapter->num_rx_queues; n++) {
476                 rx_ring = adapter->rx_ring[n];
477                 printk(KERN_INFO "------------------------------------\n");
478                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
479                 printk(KERN_INFO "------------------------------------\n");
480                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
481                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
482                         "<-- Adv Rx Read format\n");
483                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
484                         "[vl er S cks ln] ---------------- [bi->skb] "
485                         "<-- Adv Rx Write-Back format\n");
486
487                 for (i = 0; i < rx_ring->count; i++) {
488                         buffer_info = &rx_ring->buffer_info[i];
489                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
490                         u0 = (struct my_u0 *)rx_desc;
491                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
492                         if (staterr & E1000_RXD_STAT_DD) {
493                                 /* Descriptor Done */
494                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
495                                         "%016llX ---------------- %p", i,
496                                         le64_to_cpu(u0->a),
497                                         le64_to_cpu(u0->b),
498                                         buffer_info->skb);
499                         } else {
500                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
501                                         "%016llX %016llX %p", i,
502                                         le64_to_cpu(u0->a),
503                                         le64_to_cpu(u0->b),
504                                         (u64)buffer_info->dma,
505                                         buffer_info->skb);
506
507                                 if (netif_msg_pktdata(adapter)) {
508                                         print_hex_dump(KERN_INFO, "",
509                                                 DUMP_PREFIX_ADDRESS,
510                                                 16, 1,
511                                                 phys_to_virt(buffer_info->dma),
512                                                 rx_ring->rx_buffer_len, true);
513                                         if (rx_ring->rx_buffer_len
514                                                 < IGB_RXBUFFER_1024)
515                                                 print_hex_dump(KERN_INFO, "",
516                                                   DUMP_PREFIX_ADDRESS,
517                                                   16, 1,
518                                                   phys_to_virt(
519                                                     buffer_info->page_dma +
520                                                     buffer_info->page_offset),
521                                                   PAGE_SIZE/2, true);
522                                 }
523                         }
524
525                         if (i == rx_ring->next_to_use)
526                                 printk(KERN_CONT " NTU\n");
527                         else if (i == rx_ring->next_to_clean)
528                                 printk(KERN_CONT " NTC\n");
529                         else
530                                 printk(KERN_CONT "\n");
531
532                 }
533         }
534
535 exit:
536         return;
537 }
538
539
540 /**
541  * igb_read_clock - read raw cycle counter (to be used by time counter)
542  */
543 static cycle_t igb_read_clock(const struct cyclecounter *tc)
544 {
545         struct igb_adapter *adapter =
546                 container_of(tc, struct igb_adapter, cycles);
547         struct e1000_hw *hw = &adapter->hw;
548         u64 stamp = 0;
549         int shift = 0;
550
551         /*
552          * The timestamp latches on lowest register read. For the 82580
553          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
554          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
555          */
556         if (hw->mac.type == e1000_82580) {
557                 stamp = rd32(E1000_SYSTIMR) >> 8;
558                 shift = IGB_82580_TSYNC_SHIFT;
559         }
560
561         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
562         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
563         return stamp;
564 }
565
566 /**
567  * igb_get_hw_dev - return device
568  * used by hardware layer to print debugging information
569  **/
570 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
571 {
572         struct igb_adapter *adapter = hw->back;
573         return adapter->netdev;
574 }
575
576 /**
577  * igb_init_module - Driver Registration Routine
578  *
579  * igb_init_module is the first routine called when the driver is
580  * loaded. All it does is register with the PCI subsystem.
581  **/
582 static int __init igb_init_module(void)
583 {
584         int ret;
585         printk(KERN_INFO "%s - version %s\n",
586                igb_driver_string, igb_driver_version);
587
588         printk(KERN_INFO "%s\n", igb_copyright);
589
590 #ifdef CONFIG_IGB_DCA
591         dca_register_notify(&dca_notifier);
592 #endif
593         ret = pci_register_driver(&igb_driver);
594         return ret;
595 }
596
597 module_init(igb_init_module);
598
599 /**
600  * igb_exit_module - Driver Exit Cleanup Routine
601  *
602  * igb_exit_module is called just before the driver is removed
603  * from memory.
604  **/
605 static void __exit igb_exit_module(void)
606 {
607 #ifdef CONFIG_IGB_DCA
608         dca_unregister_notify(&dca_notifier);
609 #endif
610         pci_unregister_driver(&igb_driver);
611 }
612
613 module_exit(igb_exit_module);
614
615 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
616 /**
617  * igb_cache_ring_register - Descriptor ring to register mapping
618  * @adapter: board private structure to initialize
619  *
620  * Once we know the feature-set enabled for the device, we'll cache
621  * the register offset the descriptor ring is assigned to.
622  **/
623 static void igb_cache_ring_register(struct igb_adapter *adapter)
624 {
625         int i = 0, j = 0;
626         u32 rbase_offset = adapter->vfs_allocated_count;
627
628         switch (adapter->hw.mac.type) {
629         case e1000_82576:
630                 /* The queues are allocated for virtualization such that VF 0
631                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
632                  * In order to avoid collision we start at the first free queue
633                  * and continue consuming queues in the same sequence
634                  */
635                 if (adapter->vfs_allocated_count) {
636                         for (; i < adapter->rss_queues; i++)
637                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
638                                                                Q_IDX_82576(i);
639                 }
640         case e1000_82575:
641         case e1000_82580:
642         case e1000_i350:
643         default:
644                 for (; i < adapter->num_rx_queues; i++)
645                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
646                 for (; j < adapter->num_tx_queues; j++)
647                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
648                 break;
649         }
650 }
651
652 static void igb_free_queues(struct igb_adapter *adapter)
653 {
654         int i;
655
656         for (i = 0; i < adapter->num_tx_queues; i++) {
657                 kfree(adapter->tx_ring[i]);
658                 adapter->tx_ring[i] = NULL;
659         }
660         for (i = 0; i < adapter->num_rx_queues; i++) {
661                 kfree(adapter->rx_ring[i]);
662                 adapter->rx_ring[i] = NULL;
663         }
664         adapter->num_rx_queues = 0;
665         adapter->num_tx_queues = 0;
666 }
667
668 /**
669  * igb_alloc_queues - Allocate memory for all rings
670  * @adapter: board private structure to initialize
671  *
672  * We allocate one ring per queue at run-time since we don't know the
673  * number of queues at compile-time.
674  **/
675 static int igb_alloc_queues(struct igb_adapter *adapter)
676 {
677         struct igb_ring *ring;
678         int i;
679
680         for (i = 0; i < adapter->num_tx_queues; i++) {
681                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
682                 if (!ring)
683                         goto err;
684                 ring->count = adapter->tx_ring_count;
685                 ring->queue_index = i;
686                 ring->dev = &adapter->pdev->dev;
687                 ring->netdev = adapter->netdev;
688                 /* For 82575, context index must be unique per ring. */
689                 if (adapter->hw.mac.type == e1000_82575)
690                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
691                 adapter->tx_ring[i] = ring;
692         }
693
694         for (i = 0; i < adapter->num_rx_queues; i++) {
695                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
696                 if (!ring)
697                         goto err;
698                 ring->count = adapter->rx_ring_count;
699                 ring->queue_index = i;
700                 ring->dev = &adapter->pdev->dev;
701                 ring->netdev = adapter->netdev;
702                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
703                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
704                 /* set flag indicating ring supports SCTP checksum offload */
705                 if (adapter->hw.mac.type >= e1000_82576)
706                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
707                 adapter->rx_ring[i] = ring;
708         }
709
710         igb_cache_ring_register(adapter);
711
712         return 0;
713
714 err:
715         igb_free_queues(adapter);
716
717         return -ENOMEM;
718 }
719
720 #define IGB_N0_QUEUE -1
721 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
722 {
723         u32 msixbm = 0;
724         struct igb_adapter *adapter = q_vector->adapter;
725         struct e1000_hw *hw = &adapter->hw;
726         u32 ivar, index;
727         int rx_queue = IGB_N0_QUEUE;
728         int tx_queue = IGB_N0_QUEUE;
729
730         if (q_vector->rx_ring)
731                 rx_queue = q_vector->rx_ring->reg_idx;
732         if (q_vector->tx_ring)
733                 tx_queue = q_vector->tx_ring->reg_idx;
734
735         switch (hw->mac.type) {
736         case e1000_82575:
737                 /* The 82575 assigns vectors using a bitmask, which matches the
738                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
739                    or more queues to a vector, we write the appropriate bits
740                    into the MSIXBM register for that vector. */
741                 if (rx_queue > IGB_N0_QUEUE)
742                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
743                 if (tx_queue > IGB_N0_QUEUE)
744                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
745                 if (!adapter->msix_entries && msix_vector == 0)
746                         msixbm |= E1000_EIMS_OTHER;
747                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
748                 q_vector->eims_value = msixbm;
749                 break;
750         case e1000_82576:
751                 /* 82576 uses a table-based method for assigning vectors.
752                    Each queue has a single entry in the table to which we write
753                    a vector number along with a "valid" bit.  Sadly, the layout
754                    of the table is somewhat counterintuitive. */
755                 if (rx_queue > IGB_N0_QUEUE) {
756                         index = (rx_queue & 0x7);
757                         ivar = array_rd32(E1000_IVAR0, index);
758                         if (rx_queue < 8) {
759                                 /* vector goes into low byte of register */
760                                 ivar = ivar & 0xFFFFFF00;
761                                 ivar |= msix_vector | E1000_IVAR_VALID;
762                         } else {
763                                 /* vector goes into third byte of register */
764                                 ivar = ivar & 0xFF00FFFF;
765                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
766                         }
767                         array_wr32(E1000_IVAR0, index, ivar);
768                 }
769                 if (tx_queue > IGB_N0_QUEUE) {
770                         index = (tx_queue & 0x7);
771                         ivar = array_rd32(E1000_IVAR0, index);
772                         if (tx_queue < 8) {
773                                 /* vector goes into second byte of register */
774                                 ivar = ivar & 0xFFFF00FF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
776                         } else {
777                                 /* vector goes into high byte of register */
778                                 ivar = ivar & 0x00FFFFFF;
779                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
780                         }
781                         array_wr32(E1000_IVAR0, index, ivar);
782                 }
783                 q_vector->eims_value = 1 << msix_vector;
784                 break;
785         case e1000_82580:
786         case e1000_i350:
787                 /* 82580 uses the same table-based approach as 82576 but has fewer
788                    entries as a result we carry over for queues greater than 4. */
789                 if (rx_queue > IGB_N0_QUEUE) {
790                         index = (rx_queue >> 1);
791                         ivar = array_rd32(E1000_IVAR0, index);
792                         if (rx_queue & 0x1) {
793                                 /* vector goes into third byte of register */
794                                 ivar = ivar & 0xFF00FFFF;
795                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
796                         } else {
797                                 /* vector goes into low byte of register */
798                                 ivar = ivar & 0xFFFFFF00;
799                                 ivar |= msix_vector | E1000_IVAR_VALID;
800                         }
801                         array_wr32(E1000_IVAR0, index, ivar);
802                 }
803                 if (tx_queue > IGB_N0_QUEUE) {
804                         index = (tx_queue >> 1);
805                         ivar = array_rd32(E1000_IVAR0, index);
806                         if (tx_queue & 0x1) {
807                                 /* vector goes into high byte of register */
808                                 ivar = ivar & 0x00FFFFFF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
810                         } else {
811                                 /* vector goes into second byte of register */
812                                 ivar = ivar & 0xFFFF00FF;
813                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
814                         }
815                         array_wr32(E1000_IVAR0, index, ivar);
816                 }
817                 q_vector->eims_value = 1 << msix_vector;
818                 break;
819         default:
820                 BUG();
821                 break;
822         }
823
824         /* add q_vector eims value to global eims_enable_mask */
825         adapter->eims_enable_mask |= q_vector->eims_value;
826
827         /* configure q_vector to set itr on first interrupt */
828         q_vector->set_itr = 1;
829 }
830
831 /**
832  * igb_configure_msix - Configure MSI-X hardware
833  *
834  * igb_configure_msix sets up the hardware to properly
835  * generate MSI-X interrupts.
836  **/
837 static void igb_configure_msix(struct igb_adapter *adapter)
838 {
839         u32 tmp;
840         int i, vector = 0;
841         struct e1000_hw *hw = &adapter->hw;
842
843         adapter->eims_enable_mask = 0;
844
845         /* set vector for other causes, i.e. link changes */
846         switch (hw->mac.type) {
847         case e1000_82575:
848                 tmp = rd32(E1000_CTRL_EXT);
849                 /* enable MSI-X PBA support*/
850                 tmp |= E1000_CTRL_EXT_PBA_CLR;
851
852                 /* Auto-Mask interrupts upon ICR read. */
853                 tmp |= E1000_CTRL_EXT_EIAME;
854                 tmp |= E1000_CTRL_EXT_IRCA;
855
856                 wr32(E1000_CTRL_EXT, tmp);
857
858                 /* enable msix_other interrupt */
859                 array_wr32(E1000_MSIXBM(0), vector++,
860                                       E1000_EIMS_OTHER);
861                 adapter->eims_other = E1000_EIMS_OTHER;
862
863                 break;
864
865         case e1000_82576:
866         case e1000_82580:
867         case e1000_i350:
868                 /* Turn on MSI-X capability first, or our settings
869                  * won't stick.  And it will take days to debug. */
870                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
871                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
872                                 E1000_GPIE_NSICR);
873
874                 /* enable msix_other interrupt */
875                 adapter->eims_other = 1 << vector;
876                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
877
878                 wr32(E1000_IVAR_MISC, tmp);
879                 break;
880         default:
881                 /* do nothing, since nothing else supports MSI-X */
882                 break;
883         } /* switch (hw->mac.type) */
884
885         adapter->eims_enable_mask |= adapter->eims_other;
886
887         for (i = 0; i < adapter->num_q_vectors; i++)
888                 igb_assign_vector(adapter->q_vector[i], vector++);
889
890         wrfl();
891 }
892
893 /**
894  * igb_request_msix - Initialize MSI-X interrupts
895  *
896  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
897  * kernel.
898  **/
899 static int igb_request_msix(struct igb_adapter *adapter)
900 {
901         struct net_device *netdev = adapter->netdev;
902         struct e1000_hw *hw = &adapter->hw;
903         int i, err = 0, vector = 0;
904
905         err = request_irq(adapter->msix_entries[vector].vector,
906                           igb_msix_other, 0, netdev->name, adapter);
907         if (err)
908                 goto out;
909         vector++;
910
911         for (i = 0; i < adapter->num_q_vectors; i++) {
912                 struct igb_q_vector *q_vector = adapter->q_vector[i];
913
914                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
915
916                 if (q_vector->rx_ring && q_vector->tx_ring)
917                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
918                                 q_vector->rx_ring->queue_index);
919                 else if (q_vector->tx_ring)
920                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
921                                 q_vector->tx_ring->queue_index);
922                 else if (q_vector->rx_ring)
923                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
924                                 q_vector->rx_ring->queue_index);
925                 else
926                         sprintf(q_vector->name, "%s-unused", netdev->name);
927
928                 err = request_irq(adapter->msix_entries[vector].vector,
929                                   igb_msix_ring, 0, q_vector->name,
930                                   q_vector);
931                 if (err)
932                         goto out;
933                 vector++;
934         }
935
936         igb_configure_msix(adapter);
937         return 0;
938 out:
939         return err;
940 }
941
942 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
943 {
944         if (adapter->msix_entries) {
945                 pci_disable_msix(adapter->pdev);
946                 kfree(adapter->msix_entries);
947                 adapter->msix_entries = NULL;
948         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
949                 pci_disable_msi(adapter->pdev);
950         }
951 }
952
953 /**
954  * igb_free_q_vectors - Free memory allocated for interrupt vectors
955  * @adapter: board private structure to initialize
956  *
957  * This function frees the memory allocated to the q_vectors.  In addition if
958  * NAPI is enabled it will delete any references to the NAPI struct prior
959  * to freeing the q_vector.
960  **/
961 static void igb_free_q_vectors(struct igb_adapter *adapter)
962 {
963         int v_idx;
964
965         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
966                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
967                 adapter->q_vector[v_idx] = NULL;
968                 if (!q_vector)
969                         continue;
970                 netif_napi_del(&q_vector->napi);
971                 kfree(q_vector);
972         }
973         adapter->num_q_vectors = 0;
974 }
975
976 /**
977  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
978  *
979  * This function resets the device so that it has 0 rx queues, tx queues, and
980  * MSI-X interrupts allocated.
981  */
982 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
983 {
984         igb_free_queues(adapter);
985         igb_free_q_vectors(adapter);
986         igb_reset_interrupt_capability(adapter);
987 }
988
989 /**
990  * igb_set_interrupt_capability - set MSI or MSI-X if supported
991  *
992  * Attempt to configure interrupts using the best available
993  * capabilities of the hardware and kernel.
994  **/
995 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
996 {
997         int err;
998         int numvecs, i;
999
1000         /* Number of supported queues. */
1001         adapter->num_rx_queues = adapter->rss_queues;
1002         if (adapter->vfs_allocated_count)
1003                 adapter->num_tx_queues = 1;
1004         else
1005                 adapter->num_tx_queues = adapter->rss_queues;
1006
1007         /* start with one vector for every rx queue */
1008         numvecs = adapter->num_rx_queues;
1009
1010         /* if tx handler is separate add 1 for every tx queue */
1011         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1012                 numvecs += adapter->num_tx_queues;
1013
1014         /* store the number of vectors reserved for queues */
1015         adapter->num_q_vectors = numvecs;
1016
1017         /* add 1 vector for link status interrupts */
1018         numvecs++;
1019         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1020                                         GFP_KERNEL);
1021         if (!adapter->msix_entries)
1022                 goto msi_only;
1023
1024         for (i = 0; i < numvecs; i++)
1025                 adapter->msix_entries[i].entry = i;
1026
1027         err = pci_enable_msix(adapter->pdev,
1028                               adapter->msix_entries,
1029                               numvecs);
1030         if (err == 0)
1031                 goto out;
1032
1033         igb_reset_interrupt_capability(adapter);
1034
1035         /* If we can't do MSI-X, try MSI */
1036 msi_only:
1037 #ifdef CONFIG_PCI_IOV
1038         /* disable SR-IOV for non MSI-X configurations */
1039         if (adapter->vf_data) {
1040                 struct e1000_hw *hw = &adapter->hw;
1041                 /* disable iov and allow time for transactions to clear */
1042                 pci_disable_sriov(adapter->pdev);
1043                 msleep(500);
1044
1045                 kfree(adapter->vf_data);
1046                 adapter->vf_data = NULL;
1047                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1048                 msleep(100);
1049                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1050         }
1051 #endif
1052         adapter->vfs_allocated_count = 0;
1053         adapter->rss_queues = 1;
1054         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1055         adapter->num_rx_queues = 1;
1056         adapter->num_tx_queues = 1;
1057         adapter->num_q_vectors = 1;
1058         if (!pci_enable_msi(adapter->pdev))
1059                 adapter->flags |= IGB_FLAG_HAS_MSI;
1060 out:
1061         /* Notify the stack of the (possibly) reduced queue counts. */
1062         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1063         return netif_set_real_num_rx_queues(adapter->netdev,
1064                                             adapter->num_rx_queues);
1065 }
1066
1067 /**
1068  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1069  * @adapter: board private structure to initialize
1070  *
1071  * We allocate one q_vector per queue interrupt.  If allocation fails we
1072  * return -ENOMEM.
1073  **/
1074 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1075 {
1076         struct igb_q_vector *q_vector;
1077         struct e1000_hw *hw = &adapter->hw;
1078         int v_idx;
1079
1080         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1081                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1082                 if (!q_vector)
1083                         goto err_out;
1084                 q_vector->adapter = adapter;
1085                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1086                 q_vector->itr_val = IGB_START_ITR;
1087                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1088                 adapter->q_vector[v_idx] = q_vector;
1089         }
1090         return 0;
1091
1092 err_out:
1093         igb_free_q_vectors(adapter);
1094         return -ENOMEM;
1095 }
1096
1097 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1098                                       int ring_idx, int v_idx)
1099 {
1100         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1101
1102         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1103         q_vector->rx_ring->q_vector = q_vector;
1104         q_vector->itr_val = adapter->rx_itr_setting;
1105         if (q_vector->itr_val && q_vector->itr_val <= 3)
1106                 q_vector->itr_val = IGB_START_ITR;
1107 }
1108
1109 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1110                                       int ring_idx, int v_idx)
1111 {
1112         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1113
1114         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1115         q_vector->tx_ring->q_vector = q_vector;
1116         q_vector->itr_val = adapter->tx_itr_setting;
1117         if (q_vector->itr_val && q_vector->itr_val <= 3)
1118                 q_vector->itr_val = IGB_START_ITR;
1119 }
1120
1121 /**
1122  * igb_map_ring_to_vector - maps allocated queues to vectors
1123  *
1124  * This function maps the recently allocated queues to vectors.
1125  **/
1126 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1127 {
1128         int i;
1129         int v_idx = 0;
1130
1131         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1132             (adapter->num_q_vectors < adapter->num_tx_queues))
1133                 return -ENOMEM;
1134
1135         if (adapter->num_q_vectors >=
1136             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1137                 for (i = 0; i < adapter->num_rx_queues; i++)
1138                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1139                 for (i = 0; i < adapter->num_tx_queues; i++)
1140                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1141         } else {
1142                 for (i = 0; i < adapter->num_rx_queues; i++) {
1143                         if (i < adapter->num_tx_queues)
1144                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1145                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146                 }
1147                 for (; i < adapter->num_tx_queues; i++)
1148                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149         }
1150         return 0;
1151 }
1152
1153 /**
1154  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1155  *
1156  * This function initializes the interrupts and allocates all of the queues.
1157  **/
1158 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1159 {
1160         struct pci_dev *pdev = adapter->pdev;
1161         int err;
1162
1163         err = igb_set_interrupt_capability(adapter);
1164         if (err)
1165                 return err;
1166
1167         err = igb_alloc_q_vectors(adapter);
1168         if (err) {
1169                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1170                 goto err_alloc_q_vectors;
1171         }
1172
1173         err = igb_alloc_queues(adapter);
1174         if (err) {
1175                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1176                 goto err_alloc_queues;
1177         }
1178
1179         err = igb_map_ring_to_vector(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1182                 goto err_map_queues;
1183         }
1184
1185
1186         return 0;
1187 err_map_queues:
1188         igb_free_queues(adapter);
1189 err_alloc_queues:
1190         igb_free_q_vectors(adapter);
1191 err_alloc_q_vectors:
1192         igb_reset_interrupt_capability(adapter);
1193         return err;
1194 }
1195
1196 /**
1197  * igb_request_irq - initialize interrupts
1198  *
1199  * Attempts to configure interrupts using the best available
1200  * capabilities of the hardware and kernel.
1201  **/
1202 static int igb_request_irq(struct igb_adapter *adapter)
1203 {
1204         struct net_device *netdev = adapter->netdev;
1205         struct pci_dev *pdev = adapter->pdev;
1206         int err = 0;
1207
1208         if (adapter->msix_entries) {
1209                 err = igb_request_msix(adapter);
1210                 if (!err)
1211                         goto request_done;
1212                 /* fall back to MSI */
1213                 igb_clear_interrupt_scheme(adapter);
1214                 if (!pci_enable_msi(adapter->pdev))
1215                         adapter->flags |= IGB_FLAG_HAS_MSI;
1216                 igb_free_all_tx_resources(adapter);
1217                 igb_free_all_rx_resources(adapter);
1218                 adapter->num_tx_queues = 1;
1219                 adapter->num_rx_queues = 1;
1220                 adapter->num_q_vectors = 1;
1221                 err = igb_alloc_q_vectors(adapter);
1222                 if (err) {
1223                         dev_err(&pdev->dev,
1224                                 "Unable to allocate memory for vectors\n");
1225                         goto request_done;
1226                 }
1227                 err = igb_alloc_queues(adapter);
1228                 if (err) {
1229                         dev_err(&pdev->dev,
1230                                 "Unable to allocate memory for queues\n");
1231                         igb_free_q_vectors(adapter);
1232                         goto request_done;
1233                 }
1234                 igb_setup_all_tx_resources(adapter);
1235                 igb_setup_all_rx_resources(adapter);
1236         } else {
1237                 igb_assign_vector(adapter->q_vector[0], 0);
1238         }
1239
1240         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1241                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1242                                   netdev->name, adapter);
1243                 if (!err)
1244                         goto request_done;
1245
1246                 /* fall back to legacy interrupts */
1247                 igb_reset_interrupt_capability(adapter);
1248                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1249         }
1250
1251         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1252                           netdev->name, adapter);
1253
1254         if (err)
1255                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1256                         err);
1257
1258 request_done:
1259         return err;
1260 }
1261
1262 static void igb_free_irq(struct igb_adapter *adapter)
1263 {
1264         if (adapter->msix_entries) {
1265                 int vector = 0, i;
1266
1267                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1268
1269                 for (i = 0; i < adapter->num_q_vectors; i++) {
1270                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1271                         free_irq(adapter->msix_entries[vector++].vector,
1272                                  q_vector);
1273                 }
1274         } else {
1275                 free_irq(adapter->pdev->irq, adapter);
1276         }
1277 }
1278
1279 /**
1280  * igb_irq_disable - Mask off interrupt generation on the NIC
1281  * @adapter: board private structure
1282  **/
1283 static void igb_irq_disable(struct igb_adapter *adapter)
1284 {
1285         struct e1000_hw *hw = &adapter->hw;
1286
1287         /*
1288          * we need to be careful when disabling interrupts.  The VFs are also
1289          * mapped into these registers and so clearing the bits can cause
1290          * issues on the VF drivers so we only need to clear what we set
1291          */
1292         if (adapter->msix_entries) {
1293                 u32 regval = rd32(E1000_EIAM);
1294                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1295                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1296                 regval = rd32(E1000_EIAC);
1297                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1298         }
1299
1300         wr32(E1000_IAM, 0);
1301         wr32(E1000_IMC, ~0);
1302         wrfl();
1303         if (adapter->msix_entries) {
1304                 int i;
1305                 for (i = 0; i < adapter->num_q_vectors; i++)
1306                         synchronize_irq(adapter->msix_entries[i].vector);
1307         } else {
1308                 synchronize_irq(adapter->pdev->irq);
1309         }
1310 }
1311
1312 /**
1313  * igb_irq_enable - Enable default interrupt generation settings
1314  * @adapter: board private structure
1315  **/
1316 static void igb_irq_enable(struct igb_adapter *adapter)
1317 {
1318         struct e1000_hw *hw = &adapter->hw;
1319
1320         if (adapter->msix_entries) {
1321                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1322                 u32 regval = rd32(E1000_EIAC);
1323                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1324                 regval = rd32(E1000_EIAM);
1325                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1326                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1327                 if (adapter->vfs_allocated_count) {
1328                         wr32(E1000_MBVFIMR, 0xFF);
1329                         ims |= E1000_IMS_VMMB;
1330                 }
1331                 if (adapter->hw.mac.type == e1000_82580)
1332                         ims |= E1000_IMS_DRSTA;
1333
1334                 wr32(E1000_IMS, ims);
1335         } else {
1336                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1337                                 E1000_IMS_DRSTA);
1338                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1339                                 E1000_IMS_DRSTA);
1340         }
1341 }
1342
1343 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1344 {
1345         struct e1000_hw *hw = &adapter->hw;
1346         u16 vid = adapter->hw.mng_cookie.vlan_id;
1347         u16 old_vid = adapter->mng_vlan_id;
1348
1349         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1350                 /* add VID to filter table */
1351                 igb_vfta_set(hw, vid, true);
1352                 adapter->mng_vlan_id = vid;
1353         } else {
1354                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1355         }
1356
1357         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1358             (vid != old_vid) &&
1359             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1360                 /* remove VID from filter table */
1361                 igb_vfta_set(hw, old_vid, false);
1362         }
1363 }
1364
1365 /**
1366  * igb_release_hw_control - release control of the h/w to f/w
1367  * @adapter: address of board private structure
1368  *
1369  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1370  * For ASF and Pass Through versions of f/w this means that the
1371  * driver is no longer loaded.
1372  *
1373  **/
1374 static void igb_release_hw_control(struct igb_adapter *adapter)
1375 {
1376         struct e1000_hw *hw = &adapter->hw;
1377         u32 ctrl_ext;
1378
1379         /* Let firmware take over control of h/w */
1380         ctrl_ext = rd32(E1000_CTRL_EXT);
1381         wr32(E1000_CTRL_EXT,
1382                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1383 }
1384
1385 /**
1386  * igb_get_hw_control - get control of the h/w from f/w
1387  * @adapter: address of board private structure
1388  *
1389  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1390  * For ASF and Pass Through versions of f/w this means that
1391  * the driver is loaded.
1392  *
1393  **/
1394 static void igb_get_hw_control(struct igb_adapter *adapter)
1395 {
1396         struct e1000_hw *hw = &adapter->hw;
1397         u32 ctrl_ext;
1398
1399         /* Let firmware know the driver has taken over */
1400         ctrl_ext = rd32(E1000_CTRL_EXT);
1401         wr32(E1000_CTRL_EXT,
1402                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1403 }
1404
1405 /**
1406  * igb_configure - configure the hardware for RX and TX
1407  * @adapter: private board structure
1408  **/
1409 static void igb_configure(struct igb_adapter *adapter)
1410 {
1411         struct net_device *netdev = adapter->netdev;
1412         int i;
1413
1414         igb_get_hw_control(adapter);
1415         igb_set_rx_mode(netdev);
1416
1417         igb_restore_vlan(adapter);
1418
1419         igb_setup_tctl(adapter);
1420         igb_setup_mrqc(adapter);
1421         igb_setup_rctl(adapter);
1422
1423         igb_configure_tx(adapter);
1424         igb_configure_rx(adapter);
1425
1426         igb_rx_fifo_flush_82575(&adapter->hw);
1427
1428         /* call igb_desc_unused which always leaves
1429          * at least 1 descriptor unused to make sure
1430          * next_to_use != next_to_clean */
1431         for (i = 0; i < adapter->num_rx_queues; i++) {
1432                 struct igb_ring *ring = adapter->rx_ring[i];
1433                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1434         }
1435 }
1436
1437 /**
1438  * igb_power_up_link - Power up the phy/serdes link
1439  * @adapter: address of board private structure
1440  **/
1441 void igb_power_up_link(struct igb_adapter *adapter)
1442 {
1443         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1444                 igb_power_up_phy_copper(&adapter->hw);
1445         else
1446                 igb_power_up_serdes_link_82575(&adapter->hw);
1447 }
1448
1449 /**
1450  * igb_power_down_link - Power down the phy/serdes link
1451  * @adapter: address of board private structure
1452  */
1453 static void igb_power_down_link(struct igb_adapter *adapter)
1454 {
1455         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1456                 igb_power_down_phy_copper_82575(&adapter->hw);
1457         else
1458                 igb_shutdown_serdes_link_82575(&adapter->hw);
1459 }
1460
1461 /**
1462  * igb_up - Open the interface and prepare it to handle traffic
1463  * @adapter: board private structure
1464  **/
1465 int igb_up(struct igb_adapter *adapter)
1466 {
1467         struct e1000_hw *hw = &adapter->hw;
1468         int i;
1469
1470         /* hardware has been reset, we need to reload some things */
1471         igb_configure(adapter);
1472
1473         clear_bit(__IGB_DOWN, &adapter->state);
1474
1475         for (i = 0; i < adapter->num_q_vectors; i++) {
1476                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1477                 napi_enable(&q_vector->napi);
1478         }
1479         if (adapter->msix_entries)
1480                 igb_configure_msix(adapter);
1481         else
1482                 igb_assign_vector(adapter->q_vector[0], 0);
1483
1484         /* Clear any pending interrupts. */
1485         rd32(E1000_ICR);
1486         igb_irq_enable(adapter);
1487
1488         /* notify VFs that reset has been completed */
1489         if (adapter->vfs_allocated_count) {
1490                 u32 reg_data = rd32(E1000_CTRL_EXT);
1491                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1492                 wr32(E1000_CTRL_EXT, reg_data);
1493         }
1494
1495         netif_tx_start_all_queues(adapter->netdev);
1496
1497         /* start the watchdog. */
1498         hw->mac.get_link_status = 1;
1499         schedule_work(&adapter->watchdog_task);
1500
1501         return 0;
1502 }
1503
1504 void igb_down(struct igb_adapter *adapter)
1505 {
1506         struct net_device *netdev = adapter->netdev;
1507         struct e1000_hw *hw = &adapter->hw;
1508         u32 tctl, rctl;
1509         int i;
1510
1511         /* signal that we're down so the interrupt handler does not
1512          * reschedule our watchdog timer */
1513         set_bit(__IGB_DOWN, &adapter->state);
1514
1515         /* disable receives in the hardware */
1516         rctl = rd32(E1000_RCTL);
1517         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1518         /* flush and sleep below */
1519
1520         netif_tx_stop_all_queues(netdev);
1521
1522         /* disable transmits in the hardware */
1523         tctl = rd32(E1000_TCTL);
1524         tctl &= ~E1000_TCTL_EN;
1525         wr32(E1000_TCTL, tctl);
1526         /* flush both disables and wait for them to finish */
1527         wrfl();
1528         msleep(10);
1529
1530         for (i = 0; i < adapter->num_q_vectors; i++) {
1531                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1532                 napi_disable(&q_vector->napi);
1533         }
1534
1535         igb_irq_disable(adapter);
1536
1537         del_timer_sync(&adapter->watchdog_timer);
1538         del_timer_sync(&adapter->phy_info_timer);
1539
1540         netif_carrier_off(netdev);
1541
1542         /* record the stats before reset*/
1543         spin_lock(&adapter->stats64_lock);
1544         igb_update_stats(adapter, &adapter->stats64);
1545         spin_unlock(&adapter->stats64_lock);
1546
1547         adapter->link_speed = 0;
1548         adapter->link_duplex = 0;
1549
1550         if (!pci_channel_offline(adapter->pdev))
1551                 igb_reset(adapter);
1552         igb_clean_all_tx_rings(adapter);
1553         igb_clean_all_rx_rings(adapter);
1554 #ifdef CONFIG_IGB_DCA
1555
1556         /* since we reset the hardware DCA settings were cleared */
1557         igb_setup_dca(adapter);
1558 #endif
1559 }
1560
1561 void igb_reinit_locked(struct igb_adapter *adapter)
1562 {
1563         WARN_ON(in_interrupt());
1564         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1565                 msleep(1);
1566         igb_down(adapter);
1567         igb_up(adapter);
1568         clear_bit(__IGB_RESETTING, &adapter->state);
1569 }
1570
1571 void igb_reset(struct igb_adapter *adapter)
1572 {
1573         struct pci_dev *pdev = adapter->pdev;
1574         struct e1000_hw *hw = &adapter->hw;
1575         struct e1000_mac_info *mac = &hw->mac;
1576         struct e1000_fc_info *fc = &hw->fc;
1577         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1578         u16 hwm;
1579
1580         /* Repartition Pba for greater than 9k mtu
1581          * To take effect CTRL.RST is required.
1582          */
1583         switch (mac->type) {
1584         case e1000_i350:
1585         case e1000_82580:
1586                 pba = rd32(E1000_RXPBS);
1587                 pba = igb_rxpbs_adjust_82580(pba);
1588                 break;
1589         case e1000_82576:
1590                 pba = rd32(E1000_RXPBS);
1591                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1592                 break;
1593         case e1000_82575:
1594         default:
1595                 pba = E1000_PBA_34K;
1596                 break;
1597         }
1598
1599         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1600             (mac->type < e1000_82576)) {
1601                 /* adjust PBA for jumbo frames */
1602                 wr32(E1000_PBA, pba);
1603
1604                 /* To maintain wire speed transmits, the Tx FIFO should be
1605                  * large enough to accommodate two full transmit packets,
1606                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1607                  * the Rx FIFO should be large enough to accommodate at least
1608                  * one full receive packet and is similarly rounded up and
1609                  * expressed in KB. */
1610                 pba = rd32(E1000_PBA);
1611                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1612                 tx_space = pba >> 16;
1613                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1614                 pba &= 0xffff;
1615                 /* the tx fifo also stores 16 bytes of information about the tx
1616                  * but don't include ethernet FCS because hardware appends it */
1617                 min_tx_space = (adapter->max_frame_size +
1618                                 sizeof(union e1000_adv_tx_desc) -
1619                                 ETH_FCS_LEN) * 2;
1620                 min_tx_space = ALIGN(min_tx_space, 1024);
1621                 min_tx_space >>= 10;
1622                 /* software strips receive CRC, so leave room for it */
1623                 min_rx_space = adapter->max_frame_size;
1624                 min_rx_space = ALIGN(min_rx_space, 1024);
1625                 min_rx_space >>= 10;
1626
1627                 /* If current Tx allocation is less than the min Tx FIFO size,
1628                  * and the min Tx FIFO size is less than the current Rx FIFO
1629                  * allocation, take space away from current Rx allocation */
1630                 if (tx_space < min_tx_space &&
1631                     ((min_tx_space - tx_space) < pba)) {
1632                         pba = pba - (min_tx_space - tx_space);
1633
1634                         /* if short on rx space, rx wins and must trump tx
1635                          * adjustment */
1636                         if (pba < min_rx_space)
1637                                 pba = min_rx_space;
1638                 }
1639                 wr32(E1000_PBA, pba);
1640         }
1641
1642         /* flow control settings */
1643         /* The high water mark must be low enough to fit one full frame
1644          * (or the size used for early receive) above it in the Rx FIFO.
1645          * Set it to the lower of:
1646          * - 90% of the Rx FIFO size, or
1647          * - the full Rx FIFO size minus one full frame */
1648         hwm = min(((pba << 10) * 9 / 10),
1649                         ((pba << 10) - 2 * adapter->max_frame_size));
1650
1651         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1652         fc->low_water = fc->high_water - 16;
1653         fc->pause_time = 0xFFFF;
1654         fc->send_xon = 1;
1655         fc->current_mode = fc->requested_mode;
1656
1657         /* disable receive for all VFs and wait one second */
1658         if (adapter->vfs_allocated_count) {
1659                 int i;
1660                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1661                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1662
1663                 /* ping all the active vfs to let them know we are going down */
1664                 igb_ping_all_vfs(adapter);
1665
1666                 /* disable transmits and receives */
1667                 wr32(E1000_VFRE, 0);
1668                 wr32(E1000_VFTE, 0);
1669         }
1670
1671         /* Allow time for pending master requests to run */
1672         hw->mac.ops.reset_hw(hw);
1673         wr32(E1000_WUC, 0);
1674
1675         if (hw->mac.ops.init_hw(hw))
1676                 dev_err(&pdev->dev, "Hardware Error\n");
1677
1678         if (hw->mac.type == e1000_82580) {
1679                 u32 reg = rd32(E1000_PCIEMISC);
1680                 wr32(E1000_PCIEMISC,
1681                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1682         }
1683         if (!netif_running(adapter->netdev))
1684                 igb_power_down_link(adapter);
1685
1686         igb_update_mng_vlan(adapter);
1687
1688         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1689         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1690
1691         igb_get_phy_info(hw);
1692 }
1693
1694 static const struct net_device_ops igb_netdev_ops = {
1695         .ndo_open               = igb_open,
1696         .ndo_stop               = igb_close,
1697         .ndo_start_xmit         = igb_xmit_frame_adv,
1698         .ndo_get_stats64        = igb_get_stats64,
1699         .ndo_set_rx_mode        = igb_set_rx_mode,
1700         .ndo_set_multicast_list = igb_set_rx_mode,
1701         .ndo_set_mac_address    = igb_set_mac,
1702         .ndo_change_mtu         = igb_change_mtu,
1703         .ndo_do_ioctl           = igb_ioctl,
1704         .ndo_tx_timeout         = igb_tx_timeout,
1705         .ndo_validate_addr      = eth_validate_addr,
1706         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1707         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1708         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1709         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1710         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1711         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1712         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1713 #ifdef CONFIG_NET_POLL_CONTROLLER
1714         .ndo_poll_controller    = igb_netpoll,
1715 #endif
1716 };
1717
1718 /**
1719  * igb_probe - Device Initialization Routine
1720  * @pdev: PCI device information struct
1721  * @ent: entry in igb_pci_tbl
1722  *
1723  * Returns 0 on success, negative on failure
1724  *
1725  * igb_probe initializes an adapter identified by a pci_dev structure.
1726  * The OS initialization, configuring of the adapter private structure,
1727  * and a hardware reset occur.
1728  **/
1729 static int __devinit igb_probe(struct pci_dev *pdev,
1730                                const struct pci_device_id *ent)
1731 {
1732         struct net_device *netdev;
1733         struct igb_adapter *adapter;
1734         struct e1000_hw *hw;
1735         u16 eeprom_data = 0;
1736         s32 ret_val;
1737         static int global_quad_port_a; /* global quad port a indication */
1738         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1739         unsigned long mmio_start, mmio_len;
1740         int err, pci_using_dac;
1741         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1742         u8 part_str[E1000_PBANUM_LENGTH];
1743
1744         /* Catch broken hardware that put the wrong VF device ID in
1745          * the PCIe SR-IOV capability.
1746          */
1747         if (pdev->is_virtfn) {
1748                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1749                      pci_name(pdev), pdev->vendor, pdev->device);
1750                 return -EINVAL;
1751         }
1752
1753         err = pci_enable_device_mem(pdev);
1754         if (err)
1755                 return err;
1756
1757         pci_using_dac = 0;
1758         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1759         if (!err) {
1760                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1761                 if (!err)
1762                         pci_using_dac = 1;
1763         } else {
1764                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1765                 if (err) {
1766                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1767                         if (err) {
1768                                 dev_err(&pdev->dev, "No usable DMA "
1769                                         "configuration, aborting\n");
1770                                 goto err_dma;
1771                         }
1772                 }
1773         }
1774
1775         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1776                                            IORESOURCE_MEM),
1777                                            igb_driver_name);
1778         if (err)
1779                 goto err_pci_reg;
1780
1781         pci_enable_pcie_error_reporting(pdev);
1782
1783         pci_set_master(pdev);
1784         pci_save_state(pdev);
1785
1786         err = -ENOMEM;
1787         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1788                                    IGB_ABS_MAX_TX_QUEUES);
1789         if (!netdev)
1790                 goto err_alloc_etherdev;
1791
1792         SET_NETDEV_DEV(netdev, &pdev->dev);
1793
1794         pci_set_drvdata(pdev, netdev);
1795         adapter = netdev_priv(netdev);
1796         adapter->netdev = netdev;
1797         adapter->pdev = pdev;
1798         hw = &adapter->hw;
1799         hw->back = adapter;
1800         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1801
1802         mmio_start = pci_resource_start(pdev, 0);
1803         mmio_len = pci_resource_len(pdev, 0);
1804
1805         err = -EIO;
1806         hw->hw_addr = ioremap(mmio_start, mmio_len);
1807         if (!hw->hw_addr)
1808                 goto err_ioremap;
1809
1810         netdev->netdev_ops = &igb_netdev_ops;
1811         igb_set_ethtool_ops(netdev);
1812         netdev->watchdog_timeo = 5 * HZ;
1813
1814         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1815
1816         netdev->mem_start = mmio_start;
1817         netdev->mem_end = mmio_start + mmio_len;
1818
1819         /* PCI config space info */
1820         hw->vendor_id = pdev->vendor;
1821         hw->device_id = pdev->device;
1822         hw->revision_id = pdev->revision;
1823         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1824         hw->subsystem_device_id = pdev->subsystem_device;
1825
1826         /* Copy the default MAC, PHY and NVM function pointers */
1827         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1828         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1829         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1830         /* Initialize skew-specific constants */
1831         err = ei->get_invariants(hw);
1832         if (err)
1833                 goto err_sw_init;
1834
1835         /* setup the private structure */
1836         err = igb_sw_init(adapter);
1837         if (err)
1838                 goto err_sw_init;
1839
1840         igb_get_bus_info_pcie(hw);
1841
1842         hw->phy.autoneg_wait_to_complete = false;
1843
1844         /* Copper options */
1845         if (hw->phy.media_type == e1000_media_type_copper) {
1846                 hw->phy.mdix = AUTO_ALL_MODES;
1847                 hw->phy.disable_polarity_correction = false;
1848                 hw->phy.ms_type = e1000_ms_hw_default;
1849         }
1850
1851         if (igb_check_reset_block(hw))
1852                 dev_info(&pdev->dev,
1853                         "PHY reset is blocked due to SOL/IDER session.\n");
1854
1855         netdev->features = NETIF_F_SG |
1856                            NETIF_F_IP_CSUM |
1857                            NETIF_F_HW_VLAN_TX |
1858                            NETIF_F_HW_VLAN_RX |
1859                            NETIF_F_HW_VLAN_FILTER;
1860
1861         netdev->features |= NETIF_F_IPV6_CSUM;
1862         netdev->features |= NETIF_F_TSO;
1863         netdev->features |= NETIF_F_TSO6;
1864         netdev->features |= NETIF_F_GRO;
1865
1866         netdev->vlan_features |= NETIF_F_TSO;
1867         netdev->vlan_features |= NETIF_F_TSO6;
1868         netdev->vlan_features |= NETIF_F_IP_CSUM;
1869         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1870         netdev->vlan_features |= NETIF_F_SG;
1871
1872         if (pci_using_dac) {
1873                 netdev->features |= NETIF_F_HIGHDMA;
1874                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1875         }
1876
1877         if (hw->mac.type >= e1000_82576)
1878                 netdev->features |= NETIF_F_SCTP_CSUM;
1879
1880         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1881
1882         /* before reading the NVM, reset the controller to put the device in a
1883          * known good starting state */
1884         hw->mac.ops.reset_hw(hw);
1885
1886         /* make sure the NVM is good */
1887         if (igb_validate_nvm_checksum(hw) < 0) {
1888                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1889                 err = -EIO;
1890                 goto err_eeprom;
1891         }
1892
1893         /* copy the MAC address out of the NVM */
1894         if (hw->mac.ops.read_mac_addr(hw))
1895                 dev_err(&pdev->dev, "NVM Read Error\n");
1896
1897         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1898         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1899
1900         if (!is_valid_ether_addr(netdev->perm_addr)) {
1901                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1902                 err = -EIO;
1903                 goto err_eeprom;
1904         }
1905
1906         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1907                     (unsigned long) adapter);
1908         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1909                     (unsigned long) adapter);
1910
1911         INIT_WORK(&adapter->reset_task, igb_reset_task);
1912         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1913
1914         /* Initialize link properties that are user-changeable */
1915         adapter->fc_autoneg = true;
1916         hw->mac.autoneg = true;
1917         hw->phy.autoneg_advertised = 0x2f;
1918
1919         hw->fc.requested_mode = e1000_fc_default;
1920         hw->fc.current_mode = e1000_fc_default;
1921
1922         igb_validate_mdi_setting(hw);
1923
1924         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1925          * enable the ACPI Magic Packet filter
1926          */
1927
1928         if (hw->bus.func == 0)
1929                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1930         else if (hw->mac.type == e1000_82580)
1931                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1932                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1933                                  &eeprom_data);
1934         else if (hw->bus.func == 1)
1935                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1936
1937         if (eeprom_data & eeprom_apme_mask)
1938                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1939
1940         /* now that we have the eeprom settings, apply the special cases where
1941          * the eeprom may be wrong or the board simply won't support wake on
1942          * lan on a particular port */
1943         switch (pdev->device) {
1944         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1945                 adapter->eeprom_wol = 0;
1946                 break;
1947         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1948         case E1000_DEV_ID_82576_FIBER:
1949         case E1000_DEV_ID_82576_SERDES:
1950                 /* Wake events only supported on port A for dual fiber
1951                  * regardless of eeprom setting */
1952                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1953                         adapter->eeprom_wol = 0;
1954                 break;
1955         case E1000_DEV_ID_82576_QUAD_COPPER:
1956         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1957                 /* if quad port adapter, disable WoL on all but port A */
1958                 if (global_quad_port_a != 0)
1959                         adapter->eeprom_wol = 0;
1960                 else
1961                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1962                 /* Reset for multiple quad port adapters */
1963                 if (++global_quad_port_a == 4)
1964                         global_quad_port_a = 0;
1965                 break;
1966         }
1967
1968         /* initialize the wol settings based on the eeprom settings */
1969         adapter->wol = adapter->eeprom_wol;
1970         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1971
1972         /* reset the hardware with the new settings */
1973         igb_reset(adapter);
1974
1975         /* let the f/w know that the h/w is now under the control of the
1976          * driver. */
1977         igb_get_hw_control(adapter);
1978
1979         strcpy(netdev->name, "eth%d");
1980         err = register_netdev(netdev);
1981         if (err)
1982                 goto err_register;
1983
1984         /* carrier off reporting is important to ethtool even BEFORE open */
1985         netif_carrier_off(netdev);
1986
1987 #ifdef CONFIG_IGB_DCA
1988         if (dca_add_requester(&pdev->dev) == 0) {
1989                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1990                 dev_info(&pdev->dev, "DCA enabled\n");
1991                 igb_setup_dca(adapter);
1992         }
1993
1994 #endif
1995         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1996         /* print bus type/speed/width info */
1997         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1998                  netdev->name,
1999                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2000                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2001                                                             "unknown"),
2002                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2003                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2004                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2005                    "unknown"),
2006                  netdev->dev_addr);
2007
2008         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2009         if (ret_val)
2010                 strcpy(part_str, "Unknown");
2011         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2012         dev_info(&pdev->dev,
2013                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2014                 adapter->msix_entries ? "MSI-X" :
2015                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2016                 adapter->num_rx_queues, adapter->num_tx_queues);
2017         switch (hw->mac.type) {
2018         case e1000_i350:
2019                 igb_set_eee_i350(hw);
2020                 break;
2021         default:
2022                 break;
2023         }
2024         return 0;
2025
2026 err_register:
2027         igb_release_hw_control(adapter);
2028 err_eeprom:
2029         if (!igb_check_reset_block(hw))
2030                 igb_reset_phy(hw);
2031
2032         if (hw->flash_address)
2033                 iounmap(hw->flash_address);
2034 err_sw_init:
2035         igb_clear_interrupt_scheme(adapter);
2036         iounmap(hw->hw_addr);
2037 err_ioremap:
2038         free_netdev(netdev);
2039 err_alloc_etherdev:
2040         pci_release_selected_regions(pdev,
2041                                      pci_select_bars(pdev, IORESOURCE_MEM));
2042 err_pci_reg:
2043 err_dma:
2044         pci_disable_device(pdev);
2045         return err;
2046 }
2047
2048 /**
2049  * igb_remove - Device Removal Routine
2050  * @pdev: PCI device information struct
2051  *
2052  * igb_remove is called by the PCI subsystem to alert the driver
2053  * that it should release a PCI device.  The could be caused by a
2054  * Hot-Plug event, or because the driver is going to be removed from
2055  * memory.
2056  **/
2057 static void __devexit igb_remove(struct pci_dev *pdev)
2058 {
2059         struct net_device *netdev = pci_get_drvdata(pdev);
2060         struct igb_adapter *adapter = netdev_priv(netdev);
2061         struct e1000_hw *hw = &adapter->hw;
2062
2063         /*
2064          * The watchdog timer may be rescheduled, so explicitly
2065          * disable watchdog from being rescheduled.
2066          */
2067         set_bit(__IGB_DOWN, &adapter->state);
2068         del_timer_sync(&adapter->watchdog_timer);
2069         del_timer_sync(&adapter->phy_info_timer);
2070
2071         cancel_work_sync(&adapter->reset_task);
2072         cancel_work_sync(&adapter->watchdog_task);
2073
2074 #ifdef CONFIG_IGB_DCA
2075         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2076                 dev_info(&pdev->dev, "DCA disabled\n");
2077                 dca_remove_requester(&pdev->dev);
2078                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2079                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2080         }
2081 #endif
2082
2083         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2084          * would have already happened in close and is redundant. */
2085         igb_release_hw_control(adapter);
2086
2087         unregister_netdev(netdev);
2088
2089         igb_clear_interrupt_scheme(adapter);
2090
2091 #ifdef CONFIG_PCI_IOV
2092         /* reclaim resources allocated to VFs */
2093         if (adapter->vf_data) {
2094                 /* disable iov and allow time for transactions to clear */
2095                 pci_disable_sriov(pdev);
2096                 msleep(500);
2097
2098                 kfree(adapter->vf_data);
2099                 adapter->vf_data = NULL;
2100                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2101                 msleep(100);
2102                 dev_info(&pdev->dev, "IOV Disabled\n");
2103         }
2104 #endif
2105
2106         iounmap(hw->hw_addr);
2107         if (hw->flash_address)
2108                 iounmap(hw->flash_address);
2109         pci_release_selected_regions(pdev,
2110                                      pci_select_bars(pdev, IORESOURCE_MEM));
2111
2112         free_netdev(netdev);
2113
2114         pci_disable_pcie_error_reporting(pdev);
2115
2116         pci_disable_device(pdev);
2117 }
2118
2119 /**
2120  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2121  * @adapter: board private structure to initialize
2122  *
2123  * This function initializes the vf specific data storage and then attempts to
2124  * allocate the VFs.  The reason for ordering it this way is because it is much
2125  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2126  * the memory for the VFs.
2127  **/
2128 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2129 {
2130 #ifdef CONFIG_PCI_IOV
2131         struct pci_dev *pdev = adapter->pdev;
2132
2133         if (adapter->vfs_allocated_count) {
2134                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2135                                            sizeof(struct vf_data_storage),
2136                                            GFP_KERNEL);
2137                 /* if allocation failed then we do not support SR-IOV */
2138                 if (!adapter->vf_data) {
2139                         adapter->vfs_allocated_count = 0;
2140                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2141                                 "Data Storage\n");
2142                 }
2143         }
2144
2145         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2146                 kfree(adapter->vf_data);
2147                 adapter->vf_data = NULL;
2148 #endif /* CONFIG_PCI_IOV */
2149                 adapter->vfs_allocated_count = 0;
2150 #ifdef CONFIG_PCI_IOV
2151         } else {
2152                 unsigned char mac_addr[ETH_ALEN];
2153                 int i;
2154                 dev_info(&pdev->dev, "%d vfs allocated\n",
2155                          adapter->vfs_allocated_count);
2156                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2157                         random_ether_addr(mac_addr);
2158                         igb_set_vf_mac(adapter, i, mac_addr);
2159                 }
2160         }
2161 #endif /* CONFIG_PCI_IOV */
2162 }
2163
2164
2165 /**
2166  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2167  * @adapter: board private structure to initialize
2168  *
2169  * igb_init_hw_timer initializes the function pointer and values for the hw
2170  * timer found in hardware.
2171  **/
2172 static void igb_init_hw_timer(struct igb_adapter *adapter)
2173 {
2174         struct e1000_hw *hw = &adapter->hw;
2175
2176         switch (hw->mac.type) {
2177         case e1000_i350:
2178         case e1000_82580:
2179                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2180                 adapter->cycles.read = igb_read_clock;
2181                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2182                 adapter->cycles.mult = 1;
2183                 /*
2184                  * The 82580 timesync updates the system timer every 8ns by 8ns
2185                  * and the value cannot be shifted.  Instead we need to shift
2186                  * the registers to generate a 64bit timer value.  As a result
2187                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2188                  * 24 in order to generate a larger value for synchronization.
2189                  */
2190                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2191                 /* disable system timer temporarily by setting bit 31 */
2192                 wr32(E1000_TSAUXC, 0x80000000);
2193                 wrfl();
2194
2195                 /* Set registers so that rollover occurs soon to test this. */
2196                 wr32(E1000_SYSTIMR, 0x00000000);
2197                 wr32(E1000_SYSTIML, 0x80000000);
2198                 wr32(E1000_SYSTIMH, 0x000000FF);
2199                 wrfl();
2200
2201                 /* enable system timer by clearing bit 31 */
2202                 wr32(E1000_TSAUXC, 0x0);
2203                 wrfl();
2204
2205                 timecounter_init(&adapter->clock,
2206                                  &adapter->cycles,
2207                                  ktime_to_ns(ktime_get_real()));
2208                 /*
2209                  * Synchronize our NIC clock against system wall clock. NIC
2210                  * time stamp reading requires ~3us per sample, each sample
2211                  * was pretty stable even under load => only require 10
2212                  * samples for each offset comparison.
2213                  */
2214                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2215                 adapter->compare.source = &adapter->clock;
2216                 adapter->compare.target = ktime_get_real;
2217                 adapter->compare.num_samples = 10;
2218                 timecompare_update(&adapter->compare, 0);
2219                 break;
2220         case e1000_82576:
2221                 /*
2222                  * Initialize hardware timer: we keep it running just in case
2223                  * that some program needs it later on.
2224                  */
2225                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2226                 adapter->cycles.read = igb_read_clock;
2227                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2228                 adapter->cycles.mult = 1;
2229                 /**
2230                  * Scale the NIC clock cycle by a large factor so that
2231                  * relatively small clock corrections can be added or
2232                  * substracted at each clock tick. The drawbacks of a large
2233                  * factor are a) that the clock register overflows more quickly
2234                  * (not such a big deal) and b) that the increment per tick has
2235                  * to fit into 24 bits.  As a result we need to use a shift of
2236                  * 19 so we can fit a value of 16 into the TIMINCA register.
2237                  */
2238                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2239                 wr32(E1000_TIMINCA,
2240                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2241                                 (16 << IGB_82576_TSYNC_SHIFT));
2242
2243                 /* Set registers so that rollover occurs soon to test this. */
2244                 wr32(E1000_SYSTIML, 0x00000000);
2245                 wr32(E1000_SYSTIMH, 0xFF800000);
2246                 wrfl();
2247
2248                 timecounter_init(&adapter->clock,
2249                                  &adapter->cycles,
2250                                  ktime_to_ns(ktime_get_real()));
2251                 /*
2252                  * Synchronize our NIC clock against system wall clock. NIC
2253                  * time stamp reading requires ~3us per sample, each sample
2254                  * was pretty stable even under load => only require 10
2255                  * samples for each offset comparison.
2256                  */
2257                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2258                 adapter->compare.source = &adapter->clock;
2259                 adapter->compare.target = ktime_get_real;
2260                 adapter->compare.num_samples = 10;
2261                 timecompare_update(&adapter->compare, 0);
2262                 break;
2263         case e1000_82575:
2264                 /* 82575 does not support timesync */
2265         default:
2266                 break;
2267         }
2268
2269 }
2270
2271 /**
2272  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2273  * @adapter: board private structure to initialize
2274  *
2275  * igb_sw_init initializes the Adapter private data structure.
2276  * Fields are initialized based on PCI device information and
2277  * OS network device settings (MTU size).
2278  **/
2279 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2280 {
2281         struct e1000_hw *hw = &adapter->hw;
2282         struct net_device *netdev = adapter->netdev;
2283         struct pci_dev *pdev = adapter->pdev;
2284
2285         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2286
2287         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2288         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2289         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2290         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2291
2292         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2293         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2294
2295         spin_lock_init(&adapter->stats64_lock);
2296 #ifdef CONFIG_PCI_IOV
2297         switch (hw->mac.type) {
2298         case e1000_82576:
2299         case e1000_i350:
2300                 if (max_vfs > 7) {
2301                         dev_warn(&pdev->dev,
2302                                  "Maximum of 7 VFs per PF, using max\n");
2303                         adapter->vfs_allocated_count = 7;
2304                 } else
2305                         adapter->vfs_allocated_count = max_vfs;
2306                 break;
2307         default:
2308                 break;
2309         }
2310 #endif /* CONFIG_PCI_IOV */
2311         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2312
2313         /*
2314          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2315          * then we should combine the queues into a queue pair in order to
2316          * conserve interrupts due to limited supply
2317          */
2318         if ((adapter->rss_queues > 4) ||
2319             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2320                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2321
2322         /* This call may decrease the number of queues */
2323         if (igb_init_interrupt_scheme(adapter)) {
2324                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2325                 return -ENOMEM;
2326         }
2327
2328         igb_init_hw_timer(adapter);
2329         igb_probe_vfs(adapter);
2330
2331         /* Explicitly disable IRQ since the NIC can be in any state. */
2332         igb_irq_disable(adapter);
2333
2334         set_bit(__IGB_DOWN, &adapter->state);
2335         return 0;
2336 }
2337
2338 /**
2339  * igb_open - Called when a network interface is made active
2340  * @netdev: network interface device structure
2341  *
2342  * Returns 0 on success, negative value on failure
2343  *
2344  * The open entry point is called when a network interface is made
2345  * active by the system (IFF_UP).  At this point all resources needed
2346  * for transmit and receive operations are allocated, the interrupt
2347  * handler is registered with the OS, the watchdog timer is started,
2348  * and the stack is notified that the interface is ready.
2349  **/
2350 static int igb_open(struct net_device *netdev)
2351 {
2352         struct igb_adapter *adapter = netdev_priv(netdev);
2353         struct e1000_hw *hw = &adapter->hw;
2354         int err;
2355         int i;
2356
2357         /* disallow open during test */
2358         if (test_bit(__IGB_TESTING, &adapter->state))
2359                 return -EBUSY;
2360
2361         netif_carrier_off(netdev);
2362
2363         /* allocate transmit descriptors */
2364         err = igb_setup_all_tx_resources(adapter);
2365         if (err)
2366                 goto err_setup_tx;
2367
2368         /* allocate receive descriptors */
2369         err = igb_setup_all_rx_resources(adapter);
2370         if (err)
2371                 goto err_setup_rx;
2372
2373         igb_power_up_link(adapter);
2374
2375         /* before we allocate an interrupt, we must be ready to handle it.
2376          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2377          * as soon as we call pci_request_irq, so we have to setup our
2378          * clean_rx handler before we do so.  */
2379         igb_configure(adapter);
2380
2381         err = igb_request_irq(adapter);
2382         if (err)
2383                 goto err_req_irq;
2384
2385         /* From here on the code is the same as igb_up() */
2386         clear_bit(__IGB_DOWN, &adapter->state);
2387
2388         for (i = 0; i < adapter->num_q_vectors; i++) {
2389                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2390                 napi_enable(&q_vector->napi);
2391         }
2392
2393         /* Clear any pending interrupts. */
2394         rd32(E1000_ICR);
2395
2396         igb_irq_enable(adapter);
2397
2398         /* notify VFs that reset has been completed */
2399         if (adapter->vfs_allocated_count) {
2400                 u32 reg_data = rd32(E1000_CTRL_EXT);
2401                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2402                 wr32(E1000_CTRL_EXT, reg_data);
2403         }
2404
2405         netif_tx_start_all_queues(netdev);
2406
2407         /* start the watchdog. */
2408         hw->mac.get_link_status = 1;
2409         schedule_work(&adapter->watchdog_task);
2410
2411         return 0;
2412
2413 err_req_irq:
2414         igb_release_hw_control(adapter);
2415         igb_power_down_link(adapter);
2416         igb_free_all_rx_resources(adapter);
2417 err_setup_rx:
2418         igb_free_all_tx_resources(adapter);
2419 err_setup_tx:
2420         igb_reset(adapter);
2421
2422         return err;
2423 }
2424
2425 /**
2426  * igb_close - Disables a network interface
2427  * @netdev: network interface device structure
2428  *
2429  * Returns 0, this is not allowed to fail
2430  *
2431  * The close entry point is called when an interface is de-activated
2432  * by the OS.  The hardware is still under the driver's control, but
2433  * needs to be disabled.  A global MAC reset is issued to stop the
2434  * hardware, and all transmit and receive resources are freed.
2435  **/
2436 static int igb_close(struct net_device *netdev)
2437 {
2438         struct igb_adapter *adapter = netdev_priv(netdev);
2439
2440         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2441         igb_down(adapter);
2442
2443         igb_free_irq(adapter);
2444
2445         igb_free_all_tx_resources(adapter);
2446         igb_free_all_rx_resources(adapter);
2447
2448         return 0;
2449 }
2450
2451 /**
2452  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2453  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2454  *
2455  * Return 0 on success, negative on failure
2456  **/
2457 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2458 {
2459         struct device *dev = tx_ring->dev;
2460         int size;
2461
2462         size = sizeof(struct igb_buffer) * tx_ring->count;
2463         tx_ring->buffer_info = vzalloc(size);
2464         if (!tx_ring->buffer_info)
2465                 goto err;
2466
2467         /* round up to nearest 4K */
2468         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2469         tx_ring->size = ALIGN(tx_ring->size, 4096);
2470
2471         tx_ring->desc = dma_alloc_coherent(dev,
2472                                            tx_ring->size,
2473                                            &tx_ring->dma,
2474                                            GFP_KERNEL);
2475
2476         if (!tx_ring->desc)
2477                 goto err;
2478
2479         tx_ring->next_to_use = 0;
2480         tx_ring->next_to_clean = 0;
2481         return 0;
2482
2483 err:
2484         vfree(tx_ring->buffer_info);
2485         dev_err(dev,
2486                 "Unable to allocate memory for the transmit descriptor ring\n");
2487         return -ENOMEM;
2488 }
2489
2490 /**
2491  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2492  *                                (Descriptors) for all queues
2493  * @adapter: board private structure
2494  *
2495  * Return 0 on success, negative on failure
2496  **/
2497 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2498 {
2499         struct pci_dev *pdev = adapter->pdev;
2500         int i, err = 0;
2501
2502         for (i = 0; i < adapter->num_tx_queues; i++) {
2503                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2504                 if (err) {
2505                         dev_err(&pdev->dev,
2506                                 "Allocation for Tx Queue %u failed\n", i);
2507                         for (i--; i >= 0; i--)
2508                                 igb_free_tx_resources(adapter->tx_ring[i]);
2509                         break;
2510                 }
2511         }
2512
2513         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2514                 int r_idx = i % adapter->num_tx_queues;
2515                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2516         }
2517         return err;
2518 }
2519
2520 /**
2521  * igb_setup_tctl - configure the transmit control registers
2522  * @adapter: Board private structure
2523  **/
2524 void igb_setup_tctl(struct igb_adapter *adapter)
2525 {
2526         struct e1000_hw *hw = &adapter->hw;
2527         u32 tctl;
2528
2529         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2530         wr32(E1000_TXDCTL(0), 0);
2531
2532         /* Program the Transmit Control Register */
2533         tctl = rd32(E1000_TCTL);
2534         tctl &= ~E1000_TCTL_CT;
2535         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2536                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2537
2538         igb_config_collision_dist(hw);
2539
2540         /* Enable transmits */
2541         tctl |= E1000_TCTL_EN;
2542
2543         wr32(E1000_TCTL, tctl);
2544 }
2545
2546 /**
2547  * igb_configure_tx_ring - Configure transmit ring after Reset
2548  * @adapter: board private structure
2549  * @ring: tx ring to configure
2550  *
2551  * Configure a transmit ring after a reset.
2552  **/
2553 void igb_configure_tx_ring(struct igb_adapter *adapter,
2554                            struct igb_ring *ring)
2555 {
2556         struct e1000_hw *hw = &adapter->hw;
2557         u32 txdctl;
2558         u64 tdba = ring->dma;
2559         int reg_idx = ring->reg_idx;
2560
2561         /* disable the queue */
2562         txdctl = rd32(E1000_TXDCTL(reg_idx));
2563         wr32(E1000_TXDCTL(reg_idx),
2564                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2565         wrfl();
2566         mdelay(10);
2567
2568         wr32(E1000_TDLEN(reg_idx),
2569                         ring->count * sizeof(union e1000_adv_tx_desc));
2570         wr32(E1000_TDBAL(reg_idx),
2571                         tdba & 0x00000000ffffffffULL);
2572         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2573
2574         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2575         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2576         writel(0, ring->head);
2577         writel(0, ring->tail);
2578
2579         txdctl |= IGB_TX_PTHRESH;
2580         txdctl |= IGB_TX_HTHRESH << 8;
2581         txdctl |= IGB_TX_WTHRESH << 16;
2582
2583         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2584         wr32(E1000_TXDCTL(reg_idx), txdctl);
2585 }
2586
2587 /**
2588  * igb_configure_tx - Configure transmit Unit after Reset
2589  * @adapter: board private structure
2590  *
2591  * Configure the Tx unit of the MAC after a reset.
2592  **/
2593 static void igb_configure_tx(struct igb_adapter *adapter)
2594 {
2595         int i;
2596
2597         for (i = 0; i < adapter->num_tx_queues; i++)
2598                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2599 }
2600
2601 /**
2602  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2603  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2604  *
2605  * Returns 0 on success, negative on failure
2606  **/
2607 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2608 {
2609         struct device *dev = rx_ring->dev;
2610         int size, desc_len;
2611
2612         size = sizeof(struct igb_buffer) * rx_ring->count;
2613         rx_ring->buffer_info = vzalloc(size);
2614         if (!rx_ring->buffer_info)
2615                 goto err;
2616
2617         desc_len = sizeof(union e1000_adv_rx_desc);
2618
2619         /* Round up to nearest 4K */
2620         rx_ring->size = rx_ring->count * desc_len;
2621         rx_ring->size = ALIGN(rx_ring->size, 4096);
2622
2623         rx_ring->desc = dma_alloc_coherent(dev,
2624                                            rx_ring->size,
2625                                            &rx_ring->dma,
2626                                            GFP_KERNEL);
2627
2628         if (!rx_ring->desc)
2629                 goto err;
2630
2631         rx_ring->next_to_clean = 0;
2632         rx_ring->next_to_use = 0;
2633
2634         return 0;
2635
2636 err:
2637         vfree(rx_ring->buffer_info);
2638         rx_ring->buffer_info = NULL;
2639         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2640                 " ring\n");
2641         return -ENOMEM;
2642 }
2643
2644 /**
2645  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2646  *                                (Descriptors) for all queues
2647  * @adapter: board private structure
2648  *
2649  * Return 0 on success, negative on failure
2650  **/
2651 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2652 {
2653         struct pci_dev *pdev = adapter->pdev;
2654         int i, err = 0;
2655
2656         for (i = 0; i < adapter->num_rx_queues; i++) {
2657                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2658                 if (err) {
2659                         dev_err(&pdev->dev,
2660                                 "Allocation for Rx Queue %u failed\n", i);
2661                         for (i--; i >= 0; i--)
2662                                 igb_free_rx_resources(adapter->rx_ring[i]);
2663                         break;
2664                 }
2665         }
2666
2667         return err;
2668 }
2669
2670 /**
2671  * igb_setup_mrqc - configure the multiple receive queue control registers
2672  * @adapter: Board private structure
2673  **/
2674 static void igb_setup_mrqc(struct igb_adapter *adapter)
2675 {
2676         struct e1000_hw *hw = &adapter->hw;
2677         u32 mrqc, rxcsum;
2678         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2679         union e1000_reta {
2680                 u32 dword;
2681                 u8  bytes[4];
2682         } reta;
2683         static const u8 rsshash[40] = {
2684                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2685                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2686                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2687                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2688
2689         /* Fill out hash function seeds */
2690         for (j = 0; j < 10; j++) {
2691                 u32 rsskey = rsshash[(j * 4)];
2692                 rsskey |= rsshash[(j * 4) + 1] << 8;
2693                 rsskey |= rsshash[(j * 4) + 2] << 16;
2694                 rsskey |= rsshash[(j * 4) + 3] << 24;
2695                 array_wr32(E1000_RSSRK(0), j, rsskey);
2696         }
2697
2698         num_rx_queues = adapter->rss_queues;
2699
2700         if (adapter->vfs_allocated_count) {
2701                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2702                 switch (hw->mac.type) {
2703                 case e1000_i350:
2704                 case e1000_82580:
2705                         num_rx_queues = 1;
2706                         shift = 0;
2707                         break;
2708                 case e1000_82576:
2709                         shift = 3;
2710                         num_rx_queues = 2;
2711                         break;
2712                 case e1000_82575:
2713                         shift = 2;
2714                         shift2 = 6;
2715                 default:
2716                         break;
2717                 }
2718         } else {
2719                 if (hw->mac.type == e1000_82575)
2720                         shift = 6;
2721         }
2722
2723         for (j = 0; j < (32 * 4); j++) {
2724                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2725                 if (shift2)
2726                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2727                 if ((j & 3) == 3)
2728                         wr32(E1000_RETA(j >> 2), reta.dword);
2729         }
2730
2731         /*
2732          * Disable raw packet checksumming so that RSS hash is placed in
2733          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2734          * offloads as they are enabled by default
2735          */
2736         rxcsum = rd32(E1000_RXCSUM);
2737         rxcsum |= E1000_RXCSUM_PCSD;
2738
2739         if (adapter->hw.mac.type >= e1000_82576)
2740                 /* Enable Receive Checksum Offload for SCTP */
2741                 rxcsum |= E1000_RXCSUM_CRCOFL;
2742
2743         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2744         wr32(E1000_RXCSUM, rxcsum);
2745
2746         /* If VMDq is enabled then we set the appropriate mode for that, else
2747          * we default to RSS so that an RSS hash is calculated per packet even
2748          * if we are only using one queue */
2749         if (adapter->vfs_allocated_count) {
2750                 if (hw->mac.type > e1000_82575) {
2751                         /* Set the default pool for the PF's first queue */
2752                         u32 vtctl = rd32(E1000_VT_CTL);
2753                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2754                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2755                         vtctl |= adapter->vfs_allocated_count <<
2756                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2757                         wr32(E1000_VT_CTL, vtctl);
2758                 }
2759                 if (adapter->rss_queues > 1)
2760                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2761                 else
2762                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2763         } else {
2764                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2765         }
2766         igb_vmm_control(adapter);
2767
2768         /*
2769          * Generate RSS hash based on TCP port numbers and/or
2770          * IPv4/v6 src and dst addresses since UDP cannot be
2771          * hashed reliably due to IP fragmentation
2772          */
2773         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2774                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2775                 E1000_MRQC_RSS_FIELD_IPV6 |
2776                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2777                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2778
2779         wr32(E1000_MRQC, mrqc);
2780 }
2781
2782 /**
2783  * igb_setup_rctl - configure the receive control registers
2784  * @adapter: Board private structure
2785  **/
2786 void igb_setup_rctl(struct igb_adapter *adapter)
2787 {
2788         struct e1000_hw *hw = &adapter->hw;
2789         u32 rctl;
2790
2791         rctl = rd32(E1000_RCTL);
2792
2793         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2794         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2795
2796         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2797                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2798
2799         /*
2800          * enable stripping of CRC. It's unlikely this will break BMC
2801          * redirection as it did with e1000. Newer features require
2802          * that the HW strips the CRC.
2803          */
2804         rctl |= E1000_RCTL_SECRC;
2805
2806         /* disable store bad packets and clear size bits. */
2807         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2808
2809         /* enable LPE to prevent packets larger than max_frame_size */
2810         rctl |= E1000_RCTL_LPE;
2811
2812         /* disable queue 0 to prevent tail write w/o re-config */
2813         wr32(E1000_RXDCTL(0), 0);
2814
2815         /* Attention!!!  For SR-IOV PF driver operations you must enable
2816          * queue drop for all VF and PF queues to prevent head of line blocking
2817          * if an un-trusted VF does not provide descriptors to hardware.
2818          */
2819         if (adapter->vfs_allocated_count) {
2820                 /* set all queue drop enable bits */
2821                 wr32(E1000_QDE, ALL_QUEUES);
2822         }
2823
2824         wr32(E1000_RCTL, rctl);
2825 }
2826
2827 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2828                                    int vfn)
2829 {
2830         struct e1000_hw *hw = &adapter->hw;
2831         u32 vmolr;
2832
2833         /* if it isn't the PF check to see if VFs are enabled and
2834          * increase the size to support vlan tags */
2835         if (vfn < adapter->vfs_allocated_count &&
2836             adapter->vf_data[vfn].vlans_enabled)
2837                 size += VLAN_TAG_SIZE;
2838
2839         vmolr = rd32(E1000_VMOLR(vfn));
2840         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2841         vmolr |= size | E1000_VMOLR_LPE;
2842         wr32(E1000_VMOLR(vfn), vmolr);
2843
2844         return 0;
2845 }
2846
2847 /**
2848  * igb_rlpml_set - set maximum receive packet size
2849  * @adapter: board private structure
2850  *
2851  * Configure maximum receivable packet size.
2852  **/
2853 static void igb_rlpml_set(struct igb_adapter *adapter)
2854 {
2855         u32 max_frame_size = adapter->max_frame_size;
2856         struct e1000_hw *hw = &adapter->hw;
2857         u16 pf_id = adapter->vfs_allocated_count;
2858
2859         if (adapter->vlgrp)
2860                 max_frame_size += VLAN_TAG_SIZE;
2861
2862         /* if vfs are enabled we set RLPML to the largest possible request
2863          * size and set the VMOLR RLPML to the size we need */
2864         if (pf_id) {
2865                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2866                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2867         }
2868
2869         wr32(E1000_RLPML, max_frame_size);
2870 }
2871
2872 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2873                                  int vfn, bool aupe)
2874 {
2875         struct e1000_hw *hw = &adapter->hw;
2876         u32 vmolr;
2877
2878         /*
2879          * This register exists only on 82576 and newer so if we are older then
2880          * we should exit and do nothing
2881          */
2882         if (hw->mac.type < e1000_82576)
2883                 return;
2884
2885         vmolr = rd32(E1000_VMOLR(vfn));
2886         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2887         if (aupe)
2888                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2889         else
2890                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2891
2892         /* clear all bits that might not be set */
2893         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2894
2895         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2896                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2897         /*
2898          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2899          * multicast packets
2900          */
2901         if (vfn <= adapter->vfs_allocated_count)
2902                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2903
2904         wr32(E1000_VMOLR(vfn), vmolr);
2905 }
2906
2907 /**
2908  * igb_configure_rx_ring - Configure a receive ring after Reset
2909  * @adapter: board private structure
2910  * @ring: receive ring to be configured
2911  *
2912  * Configure the Rx unit of the MAC after a reset.
2913  **/
2914 void igb_configure_rx_ring(struct igb_adapter *adapter,
2915                            struct igb_ring *ring)
2916 {
2917         struct e1000_hw *hw = &adapter->hw;
2918         u64 rdba = ring->dma;
2919         int reg_idx = ring->reg_idx;
2920         u32 srrctl, rxdctl;
2921
2922         /* disable the queue */
2923         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2924         wr32(E1000_RXDCTL(reg_idx),
2925                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2926
2927         /* Set DMA base address registers */
2928         wr32(E1000_RDBAL(reg_idx),
2929              rdba & 0x00000000ffffffffULL);
2930         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2931         wr32(E1000_RDLEN(reg_idx),
2932                        ring->count * sizeof(union e1000_adv_rx_desc));
2933
2934         /* initialize head and tail */
2935         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2936         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2937         writel(0, ring->head);
2938         writel(0, ring->tail);
2939
2940         /* set descriptor configuration */
2941         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2942                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2943                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2944 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2945                 srrctl |= IGB_RXBUFFER_16384 >>
2946                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2947 #else
2948                 srrctl |= (PAGE_SIZE / 2) >>
2949                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2950 #endif
2951                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2952         } else {
2953                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2954                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2955                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2956         }
2957         if (hw->mac.type == e1000_82580)
2958                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2959         /* Only set Drop Enable if we are supporting multiple queues */
2960         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2961                 srrctl |= E1000_SRRCTL_DROP_EN;
2962
2963         wr32(E1000_SRRCTL(reg_idx), srrctl);
2964
2965         /* set filtering for VMDQ pools */
2966         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2967
2968         /* enable receive descriptor fetching */
2969         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2970         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2971         rxdctl &= 0xFFF00000;
2972         rxdctl |= IGB_RX_PTHRESH;
2973         rxdctl |= IGB_RX_HTHRESH << 8;
2974         rxdctl |= IGB_RX_WTHRESH << 16;
2975         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2976 }
2977
2978 /**
2979  * igb_configure_rx - Configure receive Unit after Reset
2980  * @adapter: board private structure
2981  *
2982  * Configure the Rx unit of the MAC after a reset.
2983  **/
2984 static void igb_configure_rx(struct igb_adapter *adapter)
2985 {
2986         int i;
2987
2988         /* set UTA to appropriate mode */
2989         igb_set_uta(adapter);
2990
2991         /* set the correct pool for the PF default MAC address in entry 0 */
2992         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2993                          adapter->vfs_allocated_count);
2994
2995         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2996          * the Base and Length of the Rx Descriptor Ring */
2997         for (i = 0; i < adapter->num_rx_queues; i++)
2998                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2999 }
3000
3001 /**
3002  * igb_free_tx_resources - Free Tx Resources per Queue
3003  * @tx_ring: Tx descriptor ring for a specific queue
3004  *
3005  * Free all transmit software resources
3006  **/
3007 void igb_free_tx_resources(struct igb_ring *tx_ring)
3008 {
3009         igb_clean_tx_ring(tx_ring);
3010
3011         vfree(tx_ring->buffer_info);
3012         tx_ring->buffer_info = NULL;
3013
3014         /* if not set, then don't free */
3015         if (!tx_ring->desc)
3016                 return;
3017
3018         dma_free_coherent(tx_ring->dev, tx_ring->size,
3019                           tx_ring->desc, tx_ring->dma);
3020
3021         tx_ring->desc = NULL;
3022 }
3023
3024 /**
3025  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3026  * @adapter: board private structure
3027  *
3028  * Free all transmit software resources
3029  **/
3030 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3031 {
3032         int i;
3033
3034         for (i = 0; i < adapter->num_tx_queues; i++)
3035                 igb_free_tx_resources(adapter->tx_ring[i]);
3036 }
3037
3038 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3039                                     struct igb_buffer *buffer_info)
3040 {
3041         if (buffer_info->dma) {
3042                 if (buffer_info->mapped_as_page)
3043                         dma_unmap_page(tx_ring->dev,
3044                                         buffer_info->dma,
3045                                         buffer_info->length,
3046                                         DMA_TO_DEVICE);
3047                 else
3048                         dma_unmap_single(tx_ring->dev,
3049                                         buffer_info->dma,
3050                                         buffer_info->length,
3051                                         DMA_TO_DEVICE);
3052                 buffer_info->dma = 0;
3053         }
3054         if (buffer_info->skb) {
3055                 dev_kfree_skb_any(buffer_info->skb);
3056                 buffer_info->skb = NULL;
3057         }
3058         buffer_info->time_stamp = 0;
3059         buffer_info->length = 0;
3060         buffer_info->next_to_watch = 0;
3061         buffer_info->mapped_as_page = false;
3062 }
3063
3064 /**
3065  * igb_clean_tx_ring - Free Tx Buffers
3066  * @tx_ring: ring to be cleaned
3067  **/
3068 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3069 {
3070         struct igb_buffer *buffer_info;
3071         unsigned long size;
3072         unsigned int i;
3073
3074         if (!tx_ring->buffer_info)
3075                 return;
3076         /* Free all the Tx ring sk_buffs */
3077
3078         for (i = 0; i < tx_ring->count; i++) {
3079                 buffer_info = &tx_ring->buffer_info[i];
3080                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3081         }
3082
3083         size = sizeof(struct igb_buffer) * tx_ring->count;
3084         memset(tx_ring->buffer_info, 0, size);
3085
3086         /* Zero out the descriptor ring */
3087         memset(tx_ring->desc, 0, tx_ring->size);
3088
3089         tx_ring->next_to_use = 0;
3090         tx_ring->next_to_clean = 0;
3091 }
3092
3093 /**
3094  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3095  * @adapter: board private structure
3096  **/
3097 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3098 {
3099         int i;
3100
3101         for (i = 0; i < adapter->num_tx_queues; i++)
3102                 igb_clean_tx_ring(adapter->tx_ring[i]);
3103 }
3104
3105 /**
3106  * igb_free_rx_resources - Free Rx Resources
3107  * @rx_ring: ring to clean the resources from
3108  *
3109  * Free all receive software resources
3110  **/
3111 void igb_free_rx_resources(struct igb_ring *rx_ring)
3112 {
3113         igb_clean_rx_ring(rx_ring);
3114
3115         vfree(rx_ring->buffer_info);
3116         rx_ring->buffer_info = NULL;
3117
3118         /* if not set, then don't free */
3119         if (!rx_ring->desc)
3120                 return;
3121
3122         dma_free_coherent(rx_ring->dev, rx_ring->size,
3123                           rx_ring->desc, rx_ring->dma);
3124
3125         rx_ring->desc = NULL;
3126 }
3127
3128 /**
3129  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3130  * @adapter: board private structure
3131  *
3132  * Free all receive software resources
3133  **/
3134 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3135 {
3136         int i;
3137
3138         for (i = 0; i < adapter->num_rx_queues; i++)
3139                 igb_free_rx_resources(adapter->rx_ring[i]);
3140 }
3141
3142 /**
3143  * igb_clean_rx_ring - Free Rx Buffers per Queue
3144  * @rx_ring: ring to free buffers from
3145  **/
3146 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3147 {
3148         struct igb_buffer *buffer_info;
3149         unsigned long size;
3150         unsigned int i;
3151
3152         if (!rx_ring->buffer_info)
3153                 return;
3154
3155         /* Free all the Rx ring sk_buffs */
3156         for (i = 0; i < rx_ring->count; i++) {
3157                 buffer_info = &rx_ring->buffer_info[i];
3158                 if (buffer_info->dma) {
3159                         dma_unmap_single(rx_ring->dev,
3160                                          buffer_info->dma,
3161                                          rx_ring->rx_buffer_len,
3162                                          DMA_FROM_DEVICE);
3163                         buffer_info->dma = 0;
3164                 }
3165
3166                 if (buffer_info->skb) {
3167                         dev_kfree_skb(buffer_info->skb);
3168                         buffer_info->skb = NULL;
3169                 }
3170                 if (buffer_info->page_dma) {
3171                         dma_unmap_page(rx_ring->dev,
3172                                        buffer_info->page_dma,
3173                                        PAGE_SIZE / 2,
3174                                        DMA_FROM_DEVICE);
3175                         buffer_info->page_dma = 0;
3176                 }
3177                 if (buffer_info->page) {
3178                         put_page(buffer_info->page);
3179                         buffer_info->page = NULL;
3180                         buffer_info->page_offset = 0;
3181                 }
3182         }
3183
3184         size = sizeof(struct igb_buffer) * rx_ring->count;
3185         memset(rx_ring->buffer_info, 0, size);
3186
3187         /* Zero out the descriptor ring */
3188         memset(rx_ring->desc, 0, rx_ring->size);
3189
3190         rx_ring->next_to_clean = 0;
3191         rx_ring->next_to_use = 0;
3192 }
3193
3194 /**
3195  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3196  * @adapter: board private structure
3197  **/
3198 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3199 {
3200         int i;
3201
3202         for (i = 0; i < adapter->num_rx_queues; i++)
3203                 igb_clean_rx_ring(adapter->rx_ring[i]);
3204 }
3205
3206 /**
3207  * igb_set_mac - Change the Ethernet Address of the NIC
3208  * @netdev: network interface device structure
3209  * @p: pointer to an address structure
3210  *
3211  * Returns 0 on success, negative on failure
3212  **/
3213 static int igb_set_mac(struct net_device *netdev, void *p)
3214 {
3215         struct igb_adapter *adapter = netdev_priv(netdev);
3216         struct e1000_hw *hw = &adapter->hw;
3217         struct sockaddr *addr = p;
3218
3219         if (!is_valid_ether_addr(addr->sa_data))
3220                 return -EADDRNOTAVAIL;
3221
3222         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3223         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3224
3225         /* set the correct pool for the new PF MAC address in entry 0 */
3226         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3227                          adapter->vfs_allocated_count);
3228
3229         return 0;
3230 }
3231
3232 /**
3233  * igb_write_mc_addr_list - write multicast addresses to MTA
3234  * @netdev: network interface device structure
3235  *
3236  * Writes multicast address list to the MTA hash table.
3237  * Returns: -ENOMEM on failure
3238  *                0 on no addresses written
3239  *                X on writing X addresses to MTA
3240  **/
3241 static int igb_write_mc_addr_list(struct net_device *netdev)
3242 {
3243         struct igb_adapter *adapter = netdev_priv(netdev);
3244         struct e1000_hw *hw = &adapter->hw;
3245         struct netdev_hw_addr *ha;
3246         u8  *mta_list;
3247         int i;
3248
3249         if (netdev_mc_empty(netdev)) {
3250                 /* nothing to program, so clear mc list */
3251                 igb_update_mc_addr_list(hw, NULL, 0);
3252                 igb_restore_vf_multicasts(adapter);
3253                 return 0;
3254         }
3255
3256         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3257         if (!mta_list)
3258                 return -ENOMEM;
3259
3260         /* The shared function expects a packed array of only addresses. */
3261         i = 0;
3262         netdev_for_each_mc_addr(ha, netdev)
3263                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3264
3265         igb_update_mc_addr_list(hw, mta_list, i);
3266         kfree(mta_list);
3267
3268         return netdev_mc_count(netdev);
3269 }
3270
3271 /**
3272  * igb_write_uc_addr_list - write unicast addresses to RAR table
3273  * @netdev: network interface device structure
3274  *
3275  * Writes unicast address list to the RAR table.
3276  * Returns: -ENOMEM on failure/insufficient address space
3277  *                0 on no addresses written
3278  *                X on writing X addresses to the RAR table
3279  **/
3280 static int igb_write_uc_addr_list(struct net_device *netdev)
3281 {
3282         struct igb_adapter *adapter = netdev_priv(netdev);
3283         struct e1000_hw *hw = &adapter->hw;
3284         unsigned int vfn = adapter->vfs_allocated_count;
3285         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3286         int count = 0;
3287
3288         /* return ENOMEM indicating insufficient memory for addresses */
3289         if (netdev_uc_count(netdev) > rar_entries)
3290                 return -ENOMEM;
3291
3292         if (!netdev_uc_empty(netdev) && rar_entries) {
3293                 struct netdev_hw_addr *ha;
3294
3295                 netdev_for_each_uc_addr(ha, netdev) {
3296                         if (!rar_entries)
3297                                 break;
3298                         igb_rar_set_qsel(adapter, ha->addr,
3299                                          rar_entries--,
3300                                          vfn);
3301                         count++;
3302                 }
3303         }
3304         /* write the addresses in reverse order to avoid write combining */
3305         for (; rar_entries > 0 ; rar_entries--) {
3306                 wr32(E1000_RAH(rar_entries), 0);
3307                 wr32(E1000_RAL(rar_entries), 0);
3308         }
3309         wrfl();
3310
3311         return count;
3312 }
3313
3314 /**
3315  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3316  * @netdev: network interface device structure
3317  *
3318  * The set_rx_mode entry point is called whenever the unicast or multicast
3319  * address lists or the network interface flags are updated.  This routine is
3320  * responsible for configuring the hardware for proper unicast, multicast,
3321  * promiscuous mode, and all-multi behavior.
3322  **/
3323 static void igb_set_rx_mode(struct net_device *netdev)
3324 {
3325         struct igb_adapter *adapter = netdev_priv(netdev);
3326         struct e1000_hw *hw = &adapter->hw;
3327         unsigned int vfn = adapter->vfs_allocated_count;
3328         u32 rctl, vmolr = 0;
3329         int count;
3330
3331         /* Check for Promiscuous and All Multicast modes */
3332         rctl = rd32(E1000_RCTL);
3333
3334         /* clear the effected bits */
3335         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3336
3337         if (netdev->flags & IFF_PROMISC) {
3338                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3339                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3340         } else {
3341                 if (netdev->flags & IFF_ALLMULTI) {
3342                         rctl |= E1000_RCTL_MPE;
3343                         vmolr |= E1000_VMOLR_MPME;
3344                 } else {
3345                         /*
3346                          * Write addresses to the MTA, if the attempt fails
3347                          * then we should just turn on promiscous mode so
3348                          * that we can at least receive multicast traffic
3349                          */
3350                         count = igb_write_mc_addr_list(netdev);
3351                         if (count < 0) {
3352                                 rctl |= E1000_RCTL_MPE;
3353                                 vmolr |= E1000_VMOLR_MPME;
3354                         } else if (count) {
3355                                 vmolr |= E1000_VMOLR_ROMPE;
3356                         }
3357                 }
3358                 /*
3359                  * Write addresses to available RAR registers, if there is not
3360                  * sufficient space to store all the addresses then enable
3361                  * unicast promiscous mode
3362                  */
3363                 count = igb_write_uc_addr_list(netdev);
3364                 if (count < 0) {
3365                         rctl |= E1000_RCTL_UPE;
3366                         vmolr |= E1000_VMOLR_ROPE;
3367                 }
3368                 rctl |= E1000_RCTL_VFE;
3369         }
3370         wr32(E1000_RCTL, rctl);
3371
3372         /*
3373          * In order to support SR-IOV and eventually VMDq it is necessary to set
3374          * the VMOLR to enable the appropriate modes.  Without this workaround
3375          * we will have issues with VLAN tag stripping not being done for frames
3376          * that are only arriving because we are the default pool
3377          */
3378         if (hw->mac.type < e1000_82576)
3379                 return;
3380
3381         vmolr |= rd32(E1000_VMOLR(vfn)) &
3382                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3383         wr32(E1000_VMOLR(vfn), vmolr);
3384         igb_restore_vf_multicasts(adapter);
3385 }
3386
3387 static void igb_check_wvbr(struct igb_adapter *adapter)
3388 {
3389         struct e1000_hw *hw = &adapter->hw;
3390         u32 wvbr = 0;
3391
3392         switch (hw->mac.type) {
3393         case e1000_82576:
3394         case e1000_i350:
3395                 if (!(wvbr = rd32(E1000_WVBR)))
3396                         return;
3397                 break;
3398         default:
3399                 break;
3400         }
3401
3402         adapter->wvbr |= wvbr;
3403 }
3404
3405 #define IGB_STAGGERED_QUEUE_OFFSET 8
3406
3407 static void igb_spoof_check(struct igb_adapter *adapter)
3408 {
3409         int j;
3410
3411         if (!adapter->wvbr)
3412                 return;
3413
3414         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3415                 if (adapter->wvbr & (1 << j) ||
3416                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3417                         dev_warn(&adapter->pdev->dev,
3418                                 "Spoof event(s) detected on VF %d\n", j);
3419                         adapter->wvbr &=
3420                                 ~((1 << j) |
3421                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3422                 }
3423         }
3424 }
3425
3426 /* Need to wait a few seconds after link up to get diagnostic information from
3427  * the phy */
3428 static void igb_update_phy_info(unsigned long data)
3429 {
3430         struct igb_adapter *adapter = (struct igb_adapter *) data;
3431         igb_get_phy_info(&adapter->hw);
3432 }
3433
3434 /**
3435  * igb_has_link - check shared code for link and determine up/down
3436  * @adapter: pointer to driver private info
3437  **/
3438 bool igb_has_link(struct igb_adapter *adapter)
3439 {
3440         struct e1000_hw *hw = &adapter->hw;
3441         bool link_active = false;
3442         s32 ret_val = 0;
3443
3444         /* get_link_status is set on LSC (link status) interrupt or
3445          * rx sequence error interrupt.  get_link_status will stay
3446          * false until the e1000_check_for_link establishes link
3447          * for copper adapters ONLY
3448          */
3449         switch (hw->phy.media_type) {
3450         case e1000_media_type_copper:
3451                 if (hw->mac.get_link_status) {
3452                         ret_val = hw->mac.ops.check_for_link(hw);
3453                         link_active = !hw->mac.get_link_status;
3454                 } else {
3455                         link_active = true;
3456                 }
3457                 break;
3458         case e1000_media_type_internal_serdes:
3459                 ret_val = hw->mac.ops.check_for_link(hw);
3460                 link_active = hw->mac.serdes_has_link;
3461                 break;
3462         default:
3463         case e1000_media_type_unknown:
3464                 break;
3465         }
3466
3467         return link_active;
3468 }
3469
3470 /**
3471  * igb_watchdog - Timer Call-back
3472  * @data: pointer to adapter cast into an unsigned long
3473  **/
3474 static void igb_watchdog(unsigned long data)
3475 {
3476         struct igb_adapter *adapter = (struct igb_adapter *)data;
3477         /* Do the rest outside of interrupt context */
3478         schedule_work(&adapter->watchdog_task);
3479 }
3480
3481 static void igb_watchdog_task(struct work_struct *work)
3482 {
3483         struct igb_adapter *adapter = container_of(work,
3484                                                    struct igb_adapter,
3485                                                    watchdog_task);
3486         struct e1000_hw *hw = &adapter->hw;
3487         struct net_device *netdev = adapter->netdev;
3488         u32 link;
3489         int i;
3490
3491         link = igb_has_link(adapter);
3492         if (link) {
3493                 if (!netif_carrier_ok(netdev)) {
3494                         u32 ctrl;
3495                         hw->mac.ops.get_speed_and_duplex(hw,
3496                                                          &adapter->link_speed,
3497                                                          &adapter->link_duplex);
3498
3499                         ctrl = rd32(E1000_CTRL);
3500                         /* Links status message must follow this format */
3501                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3502                                  "Flow Control: %s\n",
3503                                netdev->name,
3504                                adapter->link_speed,
3505                                adapter->link_duplex == FULL_DUPLEX ?
3506                                  "Full Duplex" : "Half Duplex",
3507                                ((ctrl & E1000_CTRL_TFCE) &&
3508                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3509                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3510                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3511
3512                         /* adjust timeout factor according to speed/duplex */
3513                         adapter->tx_timeout_factor = 1;
3514                         switch (adapter->link_speed) {
3515                         case SPEED_10:
3516                                 adapter->tx_timeout_factor = 14;
3517                                 break;
3518                         case SPEED_100:
3519                                 /* maybe add some timeout factor ? */
3520                                 break;
3521                         }
3522
3523                         netif_carrier_on(netdev);
3524
3525                         igb_ping_all_vfs(adapter);
3526                         igb_check_vf_rate_limit(adapter);
3527
3528                         /* link state has changed, schedule phy info update */
3529                         if (!test_bit(__IGB_DOWN, &adapter->state))
3530                                 mod_timer(&adapter->phy_info_timer,
3531                                           round_jiffies(jiffies + 2 * HZ));
3532                 }
3533         } else {
3534                 if (netif_carrier_ok(netdev)) {
3535                         adapter->link_speed = 0;
3536                         adapter->link_duplex = 0;
3537                         /* Links status message must follow this format */
3538                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3539                                netdev->name);
3540                         netif_carrier_off(netdev);
3541
3542                         igb_ping_all_vfs(adapter);
3543
3544                         /* link state has changed, schedule phy info update */
3545                         if (!test_bit(__IGB_DOWN, &adapter->state))
3546                                 mod_timer(&adapter->phy_info_timer,
3547                                           round_jiffies(jiffies + 2 * HZ));
3548                 }
3549         }
3550
3551         spin_lock(&adapter->stats64_lock);
3552         igb_update_stats(adapter, &adapter->stats64);
3553         spin_unlock(&adapter->stats64_lock);
3554
3555         for (i = 0; i < adapter->num_tx_queues; i++) {
3556                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3557                 if (!netif_carrier_ok(netdev)) {
3558                         /* We've lost link, so the controller stops DMA,
3559                          * but we've got queued Tx work that's never going
3560                          * to get done, so reset controller to flush Tx.
3561                          * (Do the reset outside of interrupt context). */
3562                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3563                                 adapter->tx_timeout_count++;
3564                                 schedule_work(&adapter->reset_task);
3565                                 /* return immediately since reset is imminent */
3566                                 return;
3567                         }
3568                 }
3569
3570                 /* Force detection of hung controller every watchdog period */
3571                 tx_ring->detect_tx_hung = true;
3572         }
3573
3574         /* Cause software interrupt to ensure rx ring is cleaned */
3575         if (adapter->msix_entries) {
3576                 u32 eics = 0;
3577                 for (i = 0; i < adapter->num_q_vectors; i++) {
3578                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3579                         eics |= q_vector->eims_value;
3580                 }
3581                 wr32(E1000_EICS, eics);
3582         } else {
3583                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3584         }
3585
3586         igb_spoof_check(adapter);
3587
3588         /* Reset the timer */
3589         if (!test_bit(__IGB_DOWN, &adapter->state))
3590                 mod_timer(&adapter->watchdog_timer,
3591                           round_jiffies(jiffies + 2 * HZ));
3592 }
3593
3594 enum latency_range {
3595         lowest_latency = 0,
3596         low_latency = 1,
3597         bulk_latency = 2,
3598         latency_invalid = 255
3599 };
3600
3601 /**
3602  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3603  *
3604  *      Stores a new ITR value based on strictly on packet size.  This
3605  *      algorithm is less sophisticated than that used in igb_update_itr,
3606  *      due to the difficulty of synchronizing statistics across multiple
3607  *      receive rings.  The divisors and thresholds used by this function
3608  *      were determined based on theoretical maximum wire speed and testing
3609  *      data, in order to minimize response time while increasing bulk
3610  *      throughput.
3611  *      This functionality is controlled by the InterruptThrottleRate module
3612  *      parameter (see igb_param.c)
3613  *      NOTE:  This function is called only when operating in a multiqueue
3614  *             receive environment.
3615  * @q_vector: pointer to q_vector
3616  **/
3617 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3618 {
3619         int new_val = q_vector->itr_val;
3620         int avg_wire_size = 0;
3621         struct igb_adapter *adapter = q_vector->adapter;
3622         struct igb_ring *ring;
3623         unsigned int packets;
3624
3625         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3626          * ints/sec - ITR timer value of 120 ticks.
3627          */
3628         if (adapter->link_speed != SPEED_1000) {
3629                 new_val = 976;
3630                 goto set_itr_val;
3631         }
3632
3633         ring = q_vector->rx_ring;
3634         if (ring) {
3635                 packets = ACCESS_ONCE(ring->total_packets);
3636
3637                 if (packets)
3638                         avg_wire_size = ring->total_bytes / packets;
3639         }
3640
3641         ring = q_vector->tx_ring;
3642         if (ring) {
3643                 packets = ACCESS_ONCE(ring->total_packets);
3644
3645                 if (packets)
3646                         avg_wire_size = max_t(u32, avg_wire_size,
3647                                               ring->total_bytes / packets);
3648         }
3649
3650         /* if avg_wire_size isn't set no work was done */
3651         if (!avg_wire_size)
3652                 goto clear_counts;
3653
3654         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3655         avg_wire_size += 24;
3656
3657         /* Don't starve jumbo frames */
3658         avg_wire_size = min(avg_wire_size, 3000);
3659
3660         /* Give a little boost to mid-size frames */
3661         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3662                 new_val = avg_wire_size / 3;
3663         else
3664                 new_val = avg_wire_size / 2;
3665
3666         /* when in itr mode 3 do not exceed 20K ints/sec */
3667         if (adapter->rx_itr_setting == 3 && new_val < 196)
3668                 new_val = 196;
3669
3670 set_itr_val:
3671         if (new_val != q_vector->itr_val) {
3672                 q_vector->itr_val = new_val;
3673                 q_vector->set_itr = 1;
3674         }
3675 clear_counts:
3676         if (q_vector->rx_ring) {
3677                 q_vector->rx_ring->total_bytes = 0;
3678                 q_vector->rx_ring->total_packets = 0;
3679         }
3680         if (q_vector->tx_ring) {
3681                 q_vector->tx_ring->total_bytes = 0;
3682                 q_vector->tx_ring->total_packets = 0;
3683         }
3684 }
3685
3686 /**
3687  * igb_update_itr - update the dynamic ITR value based on statistics
3688  *      Stores a new ITR value based on packets and byte
3689  *      counts during the last interrupt.  The advantage of per interrupt
3690  *      computation is faster updates and more accurate ITR for the current
3691  *      traffic pattern.  Constants in this function were computed
3692  *      based on theoretical maximum wire speed and thresholds were set based
3693  *      on testing data as well as attempting to minimize response time
3694  *      while increasing bulk throughput.
3695  *      this functionality is controlled by the InterruptThrottleRate module
3696  *      parameter (see igb_param.c)
3697  *      NOTE:  These calculations are only valid when operating in a single-
3698  *             queue environment.
3699  * @adapter: pointer to adapter
3700  * @itr_setting: current q_vector->itr_val
3701  * @packets: the number of packets during this measurement interval
3702  * @bytes: the number of bytes during this measurement interval
3703  **/
3704 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3705                                    int packets, int bytes)
3706 {
3707         unsigned int retval = itr_setting;
3708
3709         if (packets == 0)
3710                 goto update_itr_done;
3711
3712         switch (itr_setting) {
3713         case lowest_latency:
3714                 /* handle TSO and jumbo frames */
3715                 if (bytes/packets > 8000)
3716                         retval = bulk_latency;
3717                 else if ((packets < 5) && (bytes > 512))
3718                         retval = low_latency;
3719                 break;
3720         case low_latency:  /* 50 usec aka 20000 ints/s */
3721                 if (bytes > 10000) {
3722                         /* this if handles the TSO accounting */
3723                         if (bytes/packets > 8000) {
3724                                 retval = bulk_latency;
3725                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3726                                 retval = bulk_latency;
3727                         } else if ((packets > 35)) {
3728                                 retval = lowest_latency;
3729                         }
3730                 } else if (bytes/packets > 2000) {
3731                         retval = bulk_latency;
3732                 } else if (packets <= 2 && bytes < 512) {
3733                         retval = lowest_latency;
3734                 }
3735                 break;
3736         case bulk_latency: /* 250 usec aka 4000 ints/s */
3737                 if (bytes > 25000) {
3738                         if (packets > 35)
3739                                 retval = low_latency;
3740                 } else if (bytes < 1500) {
3741                         retval = low_latency;
3742                 }
3743                 break;
3744         }
3745
3746 update_itr_done:
3747         return retval;
3748 }
3749
3750 static void igb_set_itr(struct igb_adapter *adapter)
3751 {
3752         struct igb_q_vector *q_vector = adapter->q_vector[0];
3753         u16 current_itr;
3754         u32 new_itr = q_vector->itr_val;
3755
3756         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3757         if (adapter->link_speed != SPEED_1000) {
3758                 current_itr = 0;
3759                 new_itr = 4000;
3760                 goto set_itr_now;
3761         }
3762
3763         adapter->rx_itr = igb_update_itr(adapter,
3764                                     adapter->rx_itr,
3765                                     q_vector->rx_ring->total_packets,
3766                                     q_vector->rx_ring->total_bytes);
3767
3768         adapter->tx_itr = igb_update_itr(adapter,
3769                                     adapter->tx_itr,
3770                                     q_vector->tx_ring->total_packets,
3771                                     q_vector->tx_ring->total_bytes);
3772         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3773
3774         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3775         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3776                 current_itr = low_latency;
3777
3778         switch (current_itr) {
3779         /* counts and packets in update_itr are dependent on these numbers */
3780         case lowest_latency:
3781                 new_itr = 56;  /* aka 70,000 ints/sec */
3782                 break;
3783         case low_latency:
3784                 new_itr = 196; /* aka 20,000 ints/sec */
3785                 break;
3786         case bulk_latency:
3787                 new_itr = 980; /* aka 4,000 ints/sec */
3788                 break;
3789         default:
3790                 break;
3791         }
3792
3793 set_itr_now:
3794         q_vector->rx_ring->total_bytes = 0;
3795         q_vector->rx_ring->total_packets = 0;
3796         q_vector->tx_ring->total_bytes = 0;
3797         q_vector->tx_ring->total_packets = 0;
3798
3799         if (new_itr != q_vector->itr_val) {
3800                 /* this attempts to bias the interrupt rate towards Bulk
3801                  * by adding intermediate steps when interrupt rate is
3802                  * increasing */
3803                 new_itr = new_itr > q_vector->itr_val ?
3804                              max((new_itr * q_vector->itr_val) /
3805                                  (new_itr + (q_vector->itr_val >> 2)),
3806                                  new_itr) :
3807                              new_itr;
3808                 /* Don't write the value here; it resets the adapter's
3809                  * internal timer, and causes us to delay far longer than
3810                  * we should between interrupts.  Instead, we write the ITR
3811                  * value at the beginning of the next interrupt so the timing
3812                  * ends up being correct.
3813                  */
3814                 q_vector->itr_val = new_itr;
3815                 q_vector->set_itr = 1;
3816         }
3817 }
3818
3819 #define IGB_TX_FLAGS_CSUM               0x00000001
3820 #define IGB_TX_FLAGS_VLAN               0x00000002
3821 #define IGB_TX_FLAGS_TSO                0x00000004
3822 #define IGB_TX_FLAGS_IPV4               0x00000008
3823 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3824 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3825 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3826
3827 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3828                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3829 {
3830         struct e1000_adv_tx_context_desc *context_desc;
3831         unsigned int i;
3832         int err;
3833         struct igb_buffer *buffer_info;
3834         u32 info = 0, tu_cmd = 0;
3835         u32 mss_l4len_idx;
3836         u8 l4len;
3837
3838         if (skb_header_cloned(skb)) {
3839                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3840                 if (err)
3841                         return err;
3842         }
3843
3844         l4len = tcp_hdrlen(skb);
3845         *hdr_len += l4len;
3846
3847         if (skb->protocol == htons(ETH_P_IP)) {
3848                 struct iphdr *iph = ip_hdr(skb);
3849                 iph->tot_len = 0;
3850                 iph->check = 0;
3851                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3852                                                          iph->daddr, 0,
3853                                                          IPPROTO_TCP,
3854                                                          0);
3855         } else if (skb_is_gso_v6(skb)) {
3856                 ipv6_hdr(skb)->payload_len = 0;
3857                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3858                                                        &ipv6_hdr(skb)->daddr,
3859                                                        0, IPPROTO_TCP, 0);
3860         }
3861
3862         i = tx_ring->next_to_use;
3863
3864         buffer_info = &tx_ring->buffer_info[i];
3865         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3866         /* VLAN MACLEN IPLEN */
3867         if (tx_flags & IGB_TX_FLAGS_VLAN)
3868                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3869         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3870         *hdr_len += skb_network_offset(skb);
3871         info |= skb_network_header_len(skb);
3872         *hdr_len += skb_network_header_len(skb);
3873         context_desc->vlan_macip_lens = cpu_to_le32(info);
3874
3875         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3876         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3877
3878         if (skb->protocol == htons(ETH_P_IP))
3879                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3880         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3881
3882         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3883
3884         /* MSS L4LEN IDX */
3885         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3886         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3887
3888         /* For 82575, context index must be unique per ring. */
3889         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3890                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3891
3892         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3893         context_desc->seqnum_seed = 0;
3894
3895         buffer_info->time_stamp = jiffies;
3896         buffer_info->next_to_watch = i;
3897         buffer_info->dma = 0;
3898         i++;
3899         if (i == tx_ring->count)
3900                 i = 0;
3901
3902         tx_ring->next_to_use = i;
3903
3904         return true;
3905 }
3906
3907 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3908                                    struct sk_buff *skb, u32 tx_flags)
3909 {
3910         struct e1000_adv_tx_context_desc *context_desc;
3911         struct device *dev = tx_ring->dev;
3912         struct igb_buffer *buffer_info;
3913         u32 info = 0, tu_cmd = 0;
3914         unsigned int i;
3915
3916         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3917             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3918                 i = tx_ring->next_to_use;
3919                 buffer_info = &tx_ring->buffer_info[i];
3920                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3921
3922                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3923                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3924
3925                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3926                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3927                         info |= skb_network_header_len(skb);
3928
3929                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3930
3931                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3932
3933                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3934                         __be16 protocol;
3935
3936                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3937                                 const struct vlan_ethhdr *vhdr =
3938                                           (const struct vlan_ethhdr*)skb->data;
3939
3940                                 protocol = vhdr->h_vlan_encapsulated_proto;
3941                         } else {
3942                                 protocol = skb->protocol;
3943                         }
3944
3945                         switch (protocol) {
3946                         case cpu_to_be16(ETH_P_IP):
3947                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3948                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3949                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3950                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3951                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3952                                 break;
3953                         case cpu_to_be16(ETH_P_IPV6):
3954                                 /* XXX what about other V6 headers?? */
3955                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3956                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3957                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3958                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3959                                 break;
3960                         default:
3961                                 if (unlikely(net_ratelimit()))
3962                                         dev_warn(dev,
3963                                             "partial checksum but proto=%x!\n",
3964                                             skb->protocol);
3965                                 break;
3966                         }
3967                 }
3968
3969                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3970                 context_desc->seqnum_seed = 0;
3971                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3972                         context_desc->mss_l4len_idx =
3973                                 cpu_to_le32(tx_ring->reg_idx << 4);
3974
3975                 buffer_info->time_stamp = jiffies;
3976                 buffer_info->next_to_watch = i;
3977                 buffer_info->dma = 0;
3978
3979                 i++;
3980                 if (i == tx_ring->count)
3981                         i = 0;
3982                 tx_ring->next_to_use = i;
3983
3984                 return true;
3985         }
3986         return false;
3987 }
3988
3989 #define IGB_MAX_TXD_PWR 16
3990 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3991
3992 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3993                                  unsigned int first)
3994 {
3995         struct igb_buffer *buffer_info;
3996         struct device *dev = tx_ring->dev;
3997         unsigned int hlen = skb_headlen(skb);
3998         unsigned int count = 0, i;
3999         unsigned int f;
4000         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4001
4002         i = tx_ring->next_to_use;
4003
4004         buffer_info = &tx_ring->buffer_info[i];
4005         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4006         buffer_info->length = hlen;
4007         /* set time_stamp *before* dma to help avoid a possible race */
4008         buffer_info->time_stamp = jiffies;
4009         buffer_info->next_to_watch = i;
4010         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4011                                           DMA_TO_DEVICE);
4012         if (dma_mapping_error(dev, buffer_info->dma))
4013                 goto dma_error;
4014
4015         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4016                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4017                 unsigned int len = frag->size;
4018
4019                 count++;
4020                 i++;
4021                 if (i == tx_ring->count)
4022                         i = 0;
4023
4024                 buffer_info = &tx_ring->buffer_info[i];
4025                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4026                 buffer_info->length = len;
4027                 buffer_info->time_stamp = jiffies;
4028                 buffer_info->next_to_watch = i;
4029                 buffer_info->mapped_as_page = true;
4030                 buffer_info->dma = dma_map_page(dev,
4031                                                 frag->page,
4032                                                 frag->page_offset,
4033                                                 len,
4034                                                 DMA_TO_DEVICE);
4035                 if (dma_mapping_error(dev, buffer_info->dma))
4036                         goto dma_error;
4037
4038         }
4039
4040         tx_ring->buffer_info[i].skb = skb;
4041         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4042         /* multiply data chunks by size of headers */
4043         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4044         tx_ring->buffer_info[i].gso_segs = gso_segs;
4045         tx_ring->buffer_info[first].next_to_watch = i;
4046
4047         return ++count;
4048
4049 dma_error:
4050         dev_err(dev, "TX DMA map failed\n");
4051
4052         /* clear timestamp and dma mappings for failed buffer_info mapping */
4053         buffer_info->dma = 0;
4054         buffer_info->time_stamp = 0;
4055         buffer_info->length = 0;
4056         buffer_info->next_to_watch = 0;
4057         buffer_info->mapped_as_page = false;
4058
4059         /* clear timestamp and dma mappings for remaining portion of packet */
4060         while (count--) {
4061                 if (i == 0)
4062                         i = tx_ring->count;
4063                 i--;
4064                 buffer_info = &tx_ring->buffer_info[i];
4065                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4066         }
4067
4068         return 0;
4069 }
4070
4071 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4072                                     u32 tx_flags, int count, u32 paylen,
4073                                     u8 hdr_len)
4074 {
4075         union e1000_adv_tx_desc *tx_desc;
4076         struct igb_buffer *buffer_info;
4077         u32 olinfo_status = 0, cmd_type_len;
4078         unsigned int i = tx_ring->next_to_use;
4079
4080         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4081                         E1000_ADVTXD_DCMD_DEXT);
4082
4083         if (tx_flags & IGB_TX_FLAGS_VLAN)
4084                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4085
4086         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4087                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4088
4089         if (tx_flags & IGB_TX_FLAGS_TSO) {
4090                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4091
4092                 /* insert tcp checksum */
4093                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4094
4095                 /* insert ip checksum */
4096                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4097                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4098
4099         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4100                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4101         }
4102
4103         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4104             (tx_flags & (IGB_TX_FLAGS_CSUM |
4105                          IGB_TX_FLAGS_TSO |
4106                          IGB_TX_FLAGS_VLAN)))
4107                 olinfo_status |= tx_ring->reg_idx << 4;
4108
4109         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4110
4111         do {
4112                 buffer_info = &tx_ring->buffer_info[i];
4113                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4114                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4115                 tx_desc->read.cmd_type_len =
4116                         cpu_to_le32(cmd_type_len | buffer_info->length);
4117                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4118                 count--;
4119                 i++;
4120                 if (i == tx_ring->count)
4121                         i = 0;
4122         } while (count > 0);
4123
4124         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4125         /* Force memory writes to complete before letting h/w
4126          * know there are new descriptors to fetch.  (Only
4127          * applicable for weak-ordered memory model archs,
4128          * such as IA-64). */
4129         wmb();
4130
4131         tx_ring->next_to_use = i;
4132         writel(i, tx_ring->tail);
4133         /* we need this if more than one processor can write to our tail
4134          * at a time, it syncronizes IO on IA64/Altix systems */
4135         mmiowb();
4136 }
4137
4138 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4139 {
4140         struct net_device *netdev = tx_ring->netdev;
4141
4142         netif_stop_subqueue(netdev, tx_ring->queue_index);
4143
4144         /* Herbert's original patch had:
4145          *  smp_mb__after_netif_stop_queue();
4146          * but since that doesn't exist yet, just open code it. */
4147         smp_mb();
4148
4149         /* We need to check again in a case another CPU has just
4150          * made room available. */
4151         if (igb_desc_unused(tx_ring) < size)
4152                 return -EBUSY;
4153
4154         /* A reprieve! */
4155         netif_wake_subqueue(netdev, tx_ring->queue_index);
4156
4157         u64_stats_update_begin(&tx_ring->tx_syncp2);
4158         tx_ring->tx_stats.restart_queue2++;
4159         u64_stats_update_end(&tx_ring->tx_syncp2);
4160
4161         return 0;
4162 }
4163
4164 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4165 {
4166         if (igb_desc_unused(tx_ring) >= size)
4167                 return 0;
4168         return __igb_maybe_stop_tx(tx_ring, size);
4169 }
4170
4171 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4172                                     struct igb_ring *tx_ring)
4173 {
4174         int tso = 0, count;
4175         u32 tx_flags = 0;
4176         u16 first;
4177         u8 hdr_len = 0;
4178
4179         /* need: 1 descriptor per page,
4180          *       + 2 desc gap to keep tail from touching head,
4181          *       + 1 desc for skb->data,
4182          *       + 1 desc for context descriptor,
4183          * otherwise try next time */
4184         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4185                 /* this is a hard error */
4186                 return NETDEV_TX_BUSY;
4187         }
4188
4189         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4190                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4191                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4192         }
4193
4194         if (vlan_tx_tag_present(skb)) {
4195                 tx_flags |= IGB_TX_FLAGS_VLAN;
4196                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4197         }
4198
4199         if (skb->protocol == htons(ETH_P_IP))
4200                 tx_flags |= IGB_TX_FLAGS_IPV4;
4201
4202         first = tx_ring->next_to_use;
4203         if (skb_is_gso(skb)) {
4204                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4205
4206                 if (tso < 0) {
4207                         dev_kfree_skb_any(skb);
4208                         return NETDEV_TX_OK;
4209                 }
4210         }
4211
4212         if (tso)
4213                 tx_flags |= IGB_TX_FLAGS_TSO;
4214         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4215                  (skb->ip_summed == CHECKSUM_PARTIAL))
4216                 tx_flags |= IGB_TX_FLAGS_CSUM;
4217
4218         /*
4219          * count reflects descriptors mapped, if 0 or less then mapping error
4220          * has occured and we need to rewind the descriptor queue
4221          */
4222         count = igb_tx_map_adv(tx_ring, skb, first);
4223         if (!count) {
4224                 dev_kfree_skb_any(skb);
4225                 tx_ring->buffer_info[first].time_stamp = 0;
4226                 tx_ring->next_to_use = first;
4227                 return NETDEV_TX_OK;
4228         }
4229
4230         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4231
4232         /* Make sure there is space in the ring for the next send. */
4233         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4234
4235         return NETDEV_TX_OK;
4236 }
4237
4238 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4239                                       struct net_device *netdev)
4240 {
4241         struct igb_adapter *adapter = netdev_priv(netdev);
4242         struct igb_ring *tx_ring;
4243         int r_idx = 0;
4244
4245         if (test_bit(__IGB_DOWN, &adapter->state)) {
4246                 dev_kfree_skb_any(skb);
4247                 return NETDEV_TX_OK;
4248         }
4249
4250         if (skb->len <= 0) {
4251                 dev_kfree_skb_any(skb);
4252                 return NETDEV_TX_OK;
4253         }
4254
4255         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4256         tx_ring = adapter->multi_tx_table[r_idx];
4257
4258         /* This goes back to the question of how to logically map a tx queue
4259          * to a flow.  Right now, performance is impacted slightly negatively
4260          * if using multiple tx queues.  If the stack breaks away from a
4261          * single qdisc implementation, we can look at this again. */
4262         return igb_xmit_frame_ring_adv(skb, tx_ring);
4263 }
4264
4265 /**
4266  * igb_tx_timeout - Respond to a Tx Hang
4267  * @netdev: network interface device structure
4268  **/
4269 static void igb_tx_timeout(struct net_device *netdev)
4270 {
4271         struct igb_adapter *adapter = netdev_priv(netdev);
4272         struct e1000_hw *hw = &adapter->hw;
4273
4274         /* Do the reset outside of interrupt context */
4275         adapter->tx_timeout_count++;
4276
4277         if (hw->mac.type == e1000_82580)
4278                 hw->dev_spec._82575.global_device_reset = true;
4279
4280         schedule_work(&adapter->reset_task);
4281         wr32(E1000_EICS,
4282              (adapter->eims_enable_mask & ~adapter->eims_other));
4283 }
4284
4285 static void igb_reset_task(struct work_struct *work)
4286 {
4287         struct igb_adapter *adapter;
4288         adapter = container_of(work, struct igb_adapter, reset_task);
4289
4290         igb_dump(adapter);
4291         netdev_err(adapter->netdev, "Reset adapter\n");
4292         igb_reinit_locked(adapter);
4293 }
4294
4295 /**
4296  * igb_get_stats64 - Get System Network Statistics
4297  * @netdev: network interface device structure
4298  * @stats: rtnl_link_stats64 pointer
4299  *
4300  **/
4301 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4302                                                  struct rtnl_link_stats64 *stats)
4303 {
4304         struct igb_adapter *adapter = netdev_priv(netdev);
4305
4306         spin_lock(&adapter->stats64_lock);
4307         igb_update_stats(adapter, &adapter->stats64);
4308         memcpy(stats, &adapter->stats64, sizeof(*stats));
4309         spin_unlock(&adapter->stats64_lock);
4310
4311         return stats;
4312 }
4313
4314 /**
4315  * igb_change_mtu - Change the Maximum Transfer Unit
4316  * @netdev: network interface device structure
4317  * @new_mtu: new value for maximum frame size
4318  *
4319  * Returns 0 on success, negative on failure
4320  **/
4321 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4322 {
4323         struct igb_adapter *adapter = netdev_priv(netdev);
4324         struct pci_dev *pdev = adapter->pdev;
4325         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4326         u32 rx_buffer_len, i;
4327
4328         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4329                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4330                 return -EINVAL;
4331         }
4332
4333         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4334                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4335                 return -EINVAL;
4336         }
4337
4338         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4339                 msleep(1);
4340
4341         /* igb_down has a dependency on max_frame_size */
4342         adapter->max_frame_size = max_frame;
4343
4344         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4345          * means we reserve 2 more, this pushes us to allocate from the next
4346          * larger slab size.
4347          * i.e. RXBUFFER_2048 --> size-4096 slab
4348          */
4349
4350         if (adapter->hw.mac.type == e1000_82580)
4351                 max_frame += IGB_TS_HDR_LEN;
4352
4353         if (max_frame <= IGB_RXBUFFER_1024)
4354                 rx_buffer_len = IGB_RXBUFFER_1024;
4355         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4356                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4357         else
4358                 rx_buffer_len = IGB_RXBUFFER_128;
4359
4360         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4361              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4362                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4363
4364         if ((adapter->hw.mac.type == e1000_82580) &&
4365             (rx_buffer_len == IGB_RXBUFFER_128))
4366                 rx_buffer_len += IGB_RXBUFFER_64;
4367
4368         if (netif_running(netdev))
4369                 igb_down(adapter);
4370
4371         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4372                  netdev->mtu, new_mtu);
4373         netdev->mtu = new_mtu;
4374
4375         for (i = 0; i < adapter->num_rx_queues; i++)
4376                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4377
4378         if (netif_running(netdev))
4379                 igb_up(adapter);
4380         else
4381                 igb_reset(adapter);
4382
4383         clear_bit(__IGB_RESETTING, &adapter->state);
4384
4385         return 0;
4386 }
4387
4388 /**
4389  * igb_update_stats - Update the board statistics counters
4390  * @adapter: board private structure
4391  **/
4392
4393 void igb_update_stats(struct igb_adapter *adapter,
4394                       struct rtnl_link_stats64 *net_stats)
4395 {
4396         struct e1000_hw *hw = &adapter->hw;
4397         struct pci_dev *pdev = adapter->pdev;
4398         u32 reg, mpc;
4399         u16 phy_tmp;
4400         int i;
4401         u64 bytes, packets;
4402         unsigned int start;
4403         u64 _bytes, _packets;
4404
4405 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4406
4407         /*
4408          * Prevent stats update while adapter is being reset, or if the pci
4409          * connection is down.
4410          */
4411         if (adapter->link_speed == 0)
4412                 return;
4413         if (pci_channel_offline(pdev))
4414                 return;
4415
4416         bytes = 0;
4417         packets = 0;
4418         for (i = 0; i < adapter->num_rx_queues; i++) {
4419                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4420                 struct igb_ring *ring = adapter->rx_ring[i];
4421
4422                 ring->rx_stats.drops += rqdpc_tmp;
4423                 net_stats->rx_fifo_errors += rqdpc_tmp;
4424
4425                 do {
4426                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4427                         _bytes = ring->rx_stats.bytes;
4428                         _packets = ring->rx_stats.packets;
4429                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4430                 bytes += _bytes;
4431                 packets += _packets;
4432         }
4433
4434         net_stats->rx_bytes = bytes;
4435         net_stats->rx_packets = packets;
4436
4437         bytes = 0;
4438         packets = 0;
4439         for (i = 0; i < adapter->num_tx_queues; i++) {
4440                 struct igb_ring *ring = adapter->tx_ring[i];
4441                 do {
4442                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4443                         _bytes = ring->tx_stats.bytes;
4444                         _packets = ring->tx_stats.packets;
4445                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4446                 bytes += _bytes;
4447                 packets += _packets;
4448         }
4449         net_stats->tx_bytes = bytes;
4450         net_stats->tx_packets = packets;
4451
4452         /* read stats registers */
4453         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4454         adapter->stats.gprc += rd32(E1000_GPRC);
4455         adapter->stats.gorc += rd32(E1000_GORCL);
4456         rd32(E1000_GORCH); /* clear GORCL */
4457         adapter->stats.bprc += rd32(E1000_BPRC);
4458         adapter->stats.mprc += rd32(E1000_MPRC);
4459         adapter->stats.roc += rd32(E1000_ROC);
4460
4461         adapter->stats.prc64 += rd32(E1000_PRC64);
4462         adapter->stats.prc127 += rd32(E1000_PRC127);
4463         adapter->stats.prc255 += rd32(E1000_PRC255);
4464         adapter->stats.prc511 += rd32(E1000_PRC511);
4465         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4466         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4467         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4468         adapter->stats.sec += rd32(E1000_SEC);
4469
4470         mpc = rd32(E1000_MPC);
4471         adapter->stats.mpc += mpc;
4472         net_stats->rx_fifo_errors += mpc;
4473         adapter->stats.scc += rd32(E1000_SCC);
4474         adapter->stats.ecol += rd32(E1000_ECOL);
4475         adapter->stats.mcc += rd32(E1000_MCC);
4476         adapter->stats.latecol += rd32(E1000_LATECOL);
4477         adapter->stats.dc += rd32(E1000_DC);
4478         adapter->stats.rlec += rd32(E1000_RLEC);
4479         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4480         adapter->stats.xontxc += rd32(E1000_XONTXC);
4481         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4482         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4483         adapter->stats.fcruc += rd32(E1000_FCRUC);
4484         adapter->stats.gptc += rd32(E1000_GPTC);
4485         adapter->stats.gotc += rd32(E1000_GOTCL);
4486         rd32(E1000_GOTCH); /* clear GOTCL */
4487         adapter->stats.rnbc += rd32(E1000_RNBC);
4488         adapter->stats.ruc += rd32(E1000_RUC);
4489         adapter->stats.rfc += rd32(E1000_RFC);
4490         adapter->stats.rjc += rd32(E1000_RJC);
4491         adapter->stats.tor += rd32(E1000_TORH);
4492         adapter->stats.tot += rd32(E1000_TOTH);
4493         adapter->stats.tpr += rd32(E1000_TPR);
4494
4495         adapter->stats.ptc64 += rd32(E1000_PTC64);
4496         adapter->stats.ptc127 += rd32(E1000_PTC127);
4497         adapter->stats.ptc255 += rd32(E1000_PTC255);
4498         adapter->stats.ptc511 += rd32(E1000_PTC511);
4499         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4500         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4501
4502         adapter->stats.mptc += rd32(E1000_MPTC);
4503         adapter->stats.bptc += rd32(E1000_BPTC);
4504
4505         adapter->stats.tpt += rd32(E1000_TPT);
4506         adapter->stats.colc += rd32(E1000_COLC);
4507
4508         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4509         /* read internal phy specific stats */
4510         reg = rd32(E1000_CTRL_EXT);
4511         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4512                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4513                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4514         }
4515
4516         adapter->stats.tsctc += rd32(E1000_TSCTC);
4517         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4518
4519         adapter->stats.iac += rd32(E1000_IAC);
4520         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4521         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4522         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4523         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4524         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4525         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4526         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4527         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4528
4529         /* Fill out the OS statistics structure */
4530         net_stats->multicast = adapter->stats.mprc;
4531         net_stats->collisions = adapter->stats.colc;
4532
4533         /* Rx Errors */
4534
4535         /* RLEC on some newer hardware can be incorrect so build
4536          * our own version based on RUC and ROC */
4537         net_stats->rx_errors = adapter->stats.rxerrc +
4538                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4539                 adapter->stats.ruc + adapter->stats.roc +
4540                 adapter->stats.cexterr;
4541         net_stats->rx_length_errors = adapter->stats.ruc +
4542                                       adapter->stats.roc;
4543         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4544         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4545         net_stats->rx_missed_errors = adapter->stats.mpc;
4546
4547         /* Tx Errors */
4548         net_stats->tx_errors = adapter->stats.ecol +
4549                                adapter->stats.latecol;
4550         net_stats->tx_aborted_errors = adapter->stats.ecol;
4551         net_stats->tx_window_errors = adapter->stats.latecol;
4552         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4553
4554         /* Tx Dropped needs to be maintained elsewhere */
4555
4556         /* Phy Stats */
4557         if (hw->phy.media_type == e1000_media_type_copper) {
4558                 if ((adapter->link_speed == SPEED_1000) &&
4559                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4560                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4561                         adapter->phy_stats.idle_errors += phy_tmp;
4562                 }
4563         }
4564
4565         /* Management Stats */
4566         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4567         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4568         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4569
4570         /* OS2BMC Stats */
4571         reg = rd32(E1000_MANC);
4572         if (reg & E1000_MANC_EN_BMC2OS) {
4573                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4574                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4575                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4576                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4577         }
4578 }
4579
4580 static irqreturn_t igb_msix_other(int irq, void *data)
4581 {
4582         struct igb_adapter *adapter = data;
4583         struct e1000_hw *hw = &adapter->hw;
4584         u32 icr = rd32(E1000_ICR);
4585         /* reading ICR causes bit 31 of EICR to be cleared */
4586
4587         if (icr & E1000_ICR_DRSTA)
4588                 schedule_work(&adapter->reset_task);
4589
4590         if (icr & E1000_ICR_DOUTSYNC) {
4591                 /* HW is reporting DMA is out of sync */
4592                 adapter->stats.doosync++;
4593                 /* The DMA Out of Sync is also indication of a spoof event
4594                  * in IOV mode. Check the Wrong VM Behavior register to
4595                  * see if it is really a spoof event. */
4596                 igb_check_wvbr(adapter);
4597         }
4598
4599         /* Check for a mailbox event */
4600         if (icr & E1000_ICR_VMMB)
4601                 igb_msg_task(adapter);
4602
4603         if (icr & E1000_ICR_LSC) {
4604                 hw->mac.get_link_status = 1;
4605                 /* guard against interrupt when we're going down */
4606                 if (!test_bit(__IGB_DOWN, &adapter->state))
4607                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4608         }
4609
4610         if (adapter->vfs_allocated_count)
4611                 wr32(E1000_IMS, E1000_IMS_LSC |
4612                                 E1000_IMS_VMMB |
4613                                 E1000_IMS_DOUTSYNC);
4614         else
4615                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4616         wr32(E1000_EIMS, adapter->eims_other);
4617
4618         return IRQ_HANDLED;
4619 }
4620
4621 static void igb_write_itr(struct igb_q_vector *q_vector)
4622 {
4623         struct igb_adapter *adapter = q_vector->adapter;
4624         u32 itr_val = q_vector->itr_val & 0x7FFC;
4625
4626         if (!q_vector->set_itr)
4627                 return;
4628
4629         if (!itr_val)
4630                 itr_val = 0x4;
4631
4632         if (adapter->hw.mac.type == e1000_82575)
4633                 itr_val |= itr_val << 16;
4634         else
4635                 itr_val |= 0x8000000;
4636
4637         writel(itr_val, q_vector->itr_register);
4638         q_vector->set_itr = 0;
4639 }
4640
4641 static irqreturn_t igb_msix_ring(int irq, void *data)
4642 {
4643         struct igb_q_vector *q_vector = data;
4644
4645         /* Write the ITR value calculated from the previous interrupt. */
4646         igb_write_itr(q_vector);
4647
4648         napi_schedule(&q_vector->napi);
4649
4650         return IRQ_HANDLED;
4651 }
4652
4653 #ifdef CONFIG_IGB_DCA
4654 static void igb_update_dca(struct igb_q_vector *q_vector)
4655 {
4656         struct igb_adapter *adapter = q_vector->adapter;
4657         struct e1000_hw *hw = &adapter->hw;
4658         int cpu = get_cpu();
4659
4660         if (q_vector->cpu == cpu)
4661                 goto out_no_update;
4662
4663         if (q_vector->tx_ring) {
4664                 int q = q_vector->tx_ring->reg_idx;
4665                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4666                 if (hw->mac.type == e1000_82575) {
4667                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4668                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4669                 } else {
4670                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4671                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4672                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4673                 }
4674                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4675                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4676         }
4677         if (q_vector->rx_ring) {
4678                 int q = q_vector->rx_ring->reg_idx;
4679                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4680                 if (hw->mac.type == e1000_82575) {
4681                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4682                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4683                 } else {
4684                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4685                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4686                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4687                 }
4688                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4689                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4690                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4691                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4692         }
4693         q_vector->cpu = cpu;
4694 out_no_update:
4695         put_cpu();
4696 }
4697
4698 static void igb_setup_dca(struct igb_adapter *adapter)
4699 {
4700         struct e1000_hw *hw = &adapter->hw;
4701         int i;
4702
4703         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4704                 return;
4705
4706         /* Always use CB2 mode, difference is masked in the CB driver. */
4707         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4708
4709         for (i = 0; i < adapter->num_q_vectors; i++) {
4710                 adapter->q_vector[i]->cpu = -1;
4711                 igb_update_dca(adapter->q_vector[i]);
4712         }
4713 }
4714
4715 static int __igb_notify_dca(struct device *dev, void *data)
4716 {
4717         struct net_device *netdev = dev_get_drvdata(dev);
4718         struct igb_adapter *adapter = netdev_priv(netdev);
4719         struct pci_dev *pdev = adapter->pdev;
4720         struct e1000_hw *hw = &adapter->hw;
4721         unsigned long event = *(unsigned long *)data;
4722
4723         switch (event) {
4724         case DCA_PROVIDER_ADD:
4725                 /* if already enabled, don't do it again */
4726                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4727                         break;
4728                 if (dca_add_requester(dev) == 0) {
4729                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4730                         dev_info(&pdev->dev, "DCA enabled\n");
4731                         igb_setup_dca(adapter);
4732                         break;
4733                 }
4734                 /* Fall Through since DCA is disabled. */
4735         case DCA_PROVIDER_REMOVE:
4736                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4737                         /* without this a class_device is left
4738                          * hanging around in the sysfs model */
4739                         dca_remove_requester(dev);
4740                         dev_info(&pdev->dev, "DCA disabled\n");
4741                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4742                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4743                 }
4744                 break;
4745         }
4746
4747         return 0;
4748 }
4749
4750 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4751                           void *p)
4752 {
4753         int ret_val;
4754
4755         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4756                                          __igb_notify_dca);
4757
4758         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4759 }
4760 #endif /* CONFIG_IGB_DCA */
4761
4762 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4763 {
4764         struct e1000_hw *hw = &adapter->hw;
4765         u32 ping;
4766         int i;
4767
4768         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4769                 ping = E1000_PF_CONTROL_MSG;
4770                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4771                         ping |= E1000_VT_MSGTYPE_CTS;
4772                 igb_write_mbx(hw, &ping, 1, i);
4773         }
4774 }
4775
4776 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4777 {
4778         struct e1000_hw *hw = &adapter->hw;
4779         u32 vmolr = rd32(E1000_VMOLR(vf));
4780         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4781
4782         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4783                             IGB_VF_FLAG_MULTI_PROMISC);
4784         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4785
4786         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4787                 vmolr |= E1000_VMOLR_MPME;
4788                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4789                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4790         } else {
4791                 /*
4792                  * if we have hashes and we are clearing a multicast promisc
4793                  * flag we need to write the hashes to the MTA as this step
4794                  * was previously skipped
4795                  */
4796                 if (vf_data->num_vf_mc_hashes > 30) {
4797                         vmolr |= E1000_VMOLR_MPME;
4798                 } else if (vf_data->num_vf_mc_hashes) {
4799                         int j;
4800                         vmolr |= E1000_VMOLR_ROMPE;
4801                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4802                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4803                 }
4804         }
4805
4806         wr32(E1000_VMOLR(vf), vmolr);
4807
4808         /* there are flags left unprocessed, likely not supported */
4809         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4810                 return -EINVAL;
4811
4812         return 0;
4813
4814 }
4815
4816 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4817                                   u32 *msgbuf, u32 vf)
4818 {
4819         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4820         u16 *hash_list = (u16 *)&msgbuf[1];
4821         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4822         int i;
4823
4824         /* salt away the number of multicast addresses assigned
4825          * to this VF for later use to restore when the PF multi cast
4826          * list changes
4827          */
4828         vf_data->num_vf_mc_hashes = n;
4829
4830         /* only up to 30 hash values supported */
4831         if (n > 30)
4832                 n = 30;
4833
4834         /* store the hashes for later use */
4835         for (i = 0; i < n; i++)
4836                 vf_data->vf_mc_hashes[i] = hash_list[i];
4837
4838         /* Flush and reset the mta with the new values */
4839         igb_set_rx_mode(adapter->netdev);
4840
4841         return 0;
4842 }
4843
4844 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4845 {
4846         struct e1000_hw *hw = &adapter->hw;
4847         struct vf_data_storage *vf_data;
4848         int i, j;
4849
4850         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4851                 u32 vmolr = rd32(E1000_VMOLR(i));
4852                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4853
4854                 vf_data = &adapter->vf_data[i];
4855
4856                 if ((vf_data->num_vf_mc_hashes > 30) ||
4857                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4858                         vmolr |= E1000_VMOLR_MPME;
4859                 } else if (vf_data->num_vf_mc_hashes) {
4860                         vmolr |= E1000_VMOLR_ROMPE;
4861                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4862                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4863                 }
4864                 wr32(E1000_VMOLR(i), vmolr);
4865         }
4866 }
4867
4868 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4869 {
4870         struct e1000_hw *hw = &adapter->hw;
4871         u32 pool_mask, reg, vid;
4872         int i;
4873
4874         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4875
4876         /* Find the vlan filter for this id */
4877         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4878                 reg = rd32(E1000_VLVF(i));
4879
4880                 /* remove the vf from the pool */
4881                 reg &= ~pool_mask;
4882
4883                 /* if pool is empty then remove entry from vfta */
4884                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4885                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4886                         reg = 0;
4887                         vid = reg & E1000_VLVF_VLANID_MASK;
4888                         igb_vfta_set(hw, vid, false);
4889                 }
4890
4891                 wr32(E1000_VLVF(i), reg);
4892         }
4893
4894         adapter->vf_data[vf].vlans_enabled = 0;
4895 }
4896
4897 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4898 {
4899         struct e1000_hw *hw = &adapter->hw;
4900         u32 reg, i;
4901
4902         /* The vlvf table only exists on 82576 hardware and newer */
4903         if (hw->mac.type < e1000_82576)
4904                 return -1;
4905
4906         /* we only need to do this if VMDq is enabled */
4907         if (!adapter->vfs_allocated_count)
4908                 return -1;
4909
4910         /* Find the vlan filter for this id */
4911         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4912                 reg = rd32(E1000_VLVF(i));
4913                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4914                     vid == (reg & E1000_VLVF_VLANID_MASK))
4915                         break;
4916         }
4917
4918         if (add) {
4919                 if (i == E1000_VLVF_ARRAY_SIZE) {
4920                         /* Did not find a matching VLAN ID entry that was
4921                          * enabled.  Search for a free filter entry, i.e.
4922                          * one without the enable bit set
4923                          */
4924                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4925                                 reg = rd32(E1000_VLVF(i));
4926                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4927                                         break;
4928                         }
4929                 }
4930                 if (i < E1000_VLVF_ARRAY_SIZE) {
4931                         /* Found an enabled/available entry */
4932                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4933
4934                         /* if !enabled we need to set this up in vfta */
4935                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4936                                 /* add VID to filter table */
4937                                 igb_vfta_set(hw, vid, true);
4938                                 reg |= E1000_VLVF_VLANID_ENABLE;
4939                         }
4940                         reg &= ~E1000_VLVF_VLANID_MASK;
4941                         reg |= vid;
4942                         wr32(E1000_VLVF(i), reg);
4943
4944                         /* do not modify RLPML for PF devices */
4945                         if (vf >= adapter->vfs_allocated_count)
4946                                 return 0;
4947
4948                         if (!adapter->vf_data[vf].vlans_enabled) {
4949                                 u32 size;
4950                                 reg = rd32(E1000_VMOLR(vf));
4951                                 size = reg & E1000_VMOLR_RLPML_MASK;
4952                                 size += 4;
4953                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4954                                 reg |= size;
4955                                 wr32(E1000_VMOLR(vf), reg);
4956                         }
4957
4958                         adapter->vf_data[vf].vlans_enabled++;
4959                         return 0;
4960                 }
4961         } else {
4962                 if (i < E1000_VLVF_ARRAY_SIZE) {
4963                         /* remove vf from the pool */
4964                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4965                         /* if pool is empty then remove entry from vfta */
4966                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4967                                 reg = 0;
4968                                 igb_vfta_set(hw, vid, false);
4969                         }
4970                         wr32(E1000_VLVF(i), reg);
4971
4972                         /* do not modify RLPML for PF devices */
4973                         if (vf >= adapter->vfs_allocated_count)
4974                                 return 0;
4975
4976                         adapter->vf_data[vf].vlans_enabled--;
4977                         if (!adapter->vf_data[vf].vlans_enabled) {
4978                                 u32 size;
4979                                 reg = rd32(E1000_VMOLR(vf));
4980                                 size = reg & E1000_VMOLR_RLPML_MASK;
4981                                 size -= 4;
4982                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4983                                 reg |= size;
4984                                 wr32(E1000_VMOLR(vf), reg);
4985                         }
4986                 }
4987         }
4988         return 0;
4989 }
4990
4991 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4992 {
4993         struct e1000_hw *hw = &adapter->hw;
4994
4995         if (vid)
4996                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4997         else
4998                 wr32(E1000_VMVIR(vf), 0);
4999 }
5000
5001 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5002                                int vf, u16 vlan, u8 qos)
5003 {
5004         int err = 0;
5005         struct igb_adapter *adapter = netdev_priv(netdev);
5006
5007         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5008                 return -EINVAL;
5009         if (vlan || qos) {
5010                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5011                 if (err)
5012                         goto out;
5013                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5014                 igb_set_vmolr(adapter, vf, !vlan);
5015                 adapter->vf_data[vf].pf_vlan = vlan;
5016                 adapter->vf_data[vf].pf_qos = qos;
5017                 dev_info(&adapter->pdev->dev,
5018                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5019                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5020                         dev_warn(&adapter->pdev->dev,
5021                                  "The VF VLAN has been set,"
5022                                  " but the PF device is not up.\n");
5023                         dev_warn(&adapter->pdev->dev,
5024                                  "Bring the PF device up before"
5025                                  " attempting to use the VF device.\n");
5026                 }
5027         } else {
5028                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5029                                    false, vf);
5030                 igb_set_vmvir(adapter, vlan, vf);
5031                 igb_set_vmolr(adapter, vf, true);
5032                 adapter->vf_data[vf].pf_vlan = 0;
5033                 adapter->vf_data[vf].pf_qos = 0;
5034        }
5035 out:
5036        return err;
5037 }
5038
5039 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5040 {
5041         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5042         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5043
5044         return igb_vlvf_set(adapter, vid, add, vf);
5045 }
5046
5047 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5048 {
5049         /* clear flags - except flag that indicates PF has set the MAC */
5050         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5051         adapter->vf_data[vf].last_nack = jiffies;
5052
5053         /* reset offloads to defaults */
5054         igb_set_vmolr(adapter, vf, true);
5055
5056         /* reset vlans for device */
5057         igb_clear_vf_vfta(adapter, vf);
5058         if (adapter->vf_data[vf].pf_vlan)
5059                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5060                                     adapter->vf_data[vf].pf_vlan,
5061                                     adapter->vf_data[vf].pf_qos);
5062         else
5063                 igb_clear_vf_vfta(adapter, vf);
5064
5065         /* reset multicast table array for vf */
5066         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5067
5068         /* Flush and reset the mta with the new values */
5069         igb_set_rx_mode(adapter->netdev);
5070 }
5071
5072 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5073 {
5074         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5075
5076         /* generate a new mac address as we were hotplug removed/added */
5077         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5078                 random_ether_addr(vf_mac);
5079
5080         /* process remaining reset events */
5081         igb_vf_reset(adapter, vf);
5082 }
5083
5084 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5085 {
5086         struct e1000_hw *hw = &adapter->hw;
5087         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5088         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5089         u32 reg, msgbuf[3];
5090         u8 *addr = (u8 *)(&msgbuf[1]);
5091
5092         /* process all the same items cleared in a function level reset */
5093         igb_vf_reset(adapter, vf);
5094
5095         /* set vf mac address */
5096         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5097
5098         /* enable transmit and receive for vf */
5099         reg = rd32(E1000_VFTE);
5100         wr32(E1000_VFTE, reg | (1 << vf));
5101         reg = rd32(E1000_VFRE);
5102         wr32(E1000_VFRE, reg | (1 << vf));
5103
5104         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5105
5106         /* reply to reset with ack and vf mac address */
5107         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5108         memcpy(addr, vf_mac, 6);
5109         igb_write_mbx(hw, msgbuf, 3, vf);
5110 }
5111
5112 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5113 {
5114         /*
5115          * The VF MAC Address is stored in a packed array of bytes
5116          * starting at the second 32 bit word of the msg array
5117          */
5118         unsigned char *addr = (char *)&msg[1];
5119         int err = -1;
5120
5121         if (is_valid_ether_addr(addr))
5122                 err = igb_set_vf_mac(adapter, vf, addr);
5123
5124         return err;
5125 }
5126
5127 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5128 {
5129         struct e1000_hw *hw = &adapter->hw;
5130         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5131         u32 msg = E1000_VT_MSGTYPE_NACK;
5132
5133         /* if device isn't clear to send it shouldn't be reading either */
5134         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5135             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5136                 igb_write_mbx(hw, &msg, 1, vf);
5137                 vf_data->last_nack = jiffies;
5138         }
5139 }
5140
5141 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5142 {
5143         struct pci_dev *pdev = adapter->pdev;
5144         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5145         struct e1000_hw *hw = &adapter->hw;
5146         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5147         s32 retval;
5148
5149         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5150
5151         if (retval) {
5152                 /* if receive failed revoke VF CTS stats and restart init */
5153                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5154                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5155                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5156                         return;
5157                 goto out;
5158         }
5159
5160         /* this is a message we already processed, do nothing */
5161         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5162                 return;
5163
5164         /*
5165          * until the vf completes a reset it should not be
5166          * allowed to start any configuration.
5167          */
5168
5169         if (msgbuf[0] == E1000_VF_RESET) {
5170                 igb_vf_reset_msg(adapter, vf);
5171                 return;
5172         }
5173
5174         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5175                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5176                         return;
5177                 retval = -1;
5178                 goto out;
5179         }
5180
5181         switch ((msgbuf[0] & 0xFFFF)) {
5182         case E1000_VF_SET_MAC_ADDR:
5183                 retval = -EINVAL;
5184                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5185                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5186                 else
5187                         dev_warn(&pdev->dev,
5188                                  "VF %d attempted to override administratively "
5189                                  "set MAC address\nReload the VF driver to "
5190                                  "resume operations\n", vf);
5191                 break;
5192         case E1000_VF_SET_PROMISC:
5193                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5194                 break;
5195         case E1000_VF_SET_MULTICAST:
5196                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5197                 break;
5198         case E1000_VF_SET_LPE:
5199                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5200                 break;
5201         case E1000_VF_SET_VLAN:
5202                 retval = -1;
5203                 if (vf_data->pf_vlan)
5204                         dev_warn(&pdev->dev,
5205                                  "VF %d attempted to override administratively "
5206                                  "set VLAN tag\nReload the VF driver to "
5207                                  "resume operations\n", vf);
5208                 else
5209                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5210                 break;
5211         default:
5212                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5213                 retval = -1;
5214                 break;
5215         }
5216
5217         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5218 out:
5219         /* notify the VF of the results of what it sent us */
5220         if (retval)
5221                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5222         else
5223                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5224
5225         igb_write_mbx(hw, msgbuf, 1, vf);
5226 }
5227
5228 static void igb_msg_task(struct igb_adapter *adapter)
5229 {
5230         struct e1000_hw *hw = &adapter->hw;
5231         u32 vf;
5232
5233         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5234                 /* process any reset requests */
5235                 if (!igb_check_for_rst(hw, vf))
5236                         igb_vf_reset_event(adapter, vf);
5237
5238                 /* process any messages pending */
5239                 if (!igb_check_for_msg(hw, vf))
5240                         igb_rcv_msg_from_vf(adapter, vf);
5241
5242                 /* process any acks */
5243                 if (!igb_check_for_ack(hw, vf))
5244                         igb_rcv_ack_from_vf(adapter, vf);
5245         }
5246 }
5247
5248 /**
5249  *  igb_set_uta - Set unicast filter table address
5250  *  @adapter: board private structure
5251  *
5252  *  The unicast table address is a register array of 32-bit registers.
5253  *  The table is meant to be used in a way similar to how the MTA is used
5254  *  however due to certain limitations in the hardware it is necessary to
5255  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5256  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5257  **/
5258 static void igb_set_uta(struct igb_adapter *adapter)
5259 {
5260         struct e1000_hw *hw = &adapter->hw;
5261         int i;
5262
5263         /* The UTA table only exists on 82576 hardware and newer */
5264         if (hw->mac.type < e1000_82576)
5265                 return;
5266
5267         /* we only need to do this if VMDq is enabled */
5268         if (!adapter->vfs_allocated_count)
5269                 return;
5270
5271         for (i = 0; i < hw->mac.uta_reg_count; i++)
5272                 array_wr32(E1000_UTA, i, ~0);
5273 }
5274
5275 /**
5276  * igb_intr_msi - Interrupt Handler
5277  * @irq: interrupt number
5278  * @data: pointer to a network interface device structure
5279  **/
5280 static irqreturn_t igb_intr_msi(int irq, void *data)
5281 {
5282         struct igb_adapter *adapter = data;
5283         struct igb_q_vector *q_vector = adapter->q_vector[0];
5284         struct e1000_hw *hw = &adapter->hw;
5285         /* read ICR disables interrupts using IAM */
5286         u32 icr = rd32(E1000_ICR);
5287
5288         igb_write_itr(q_vector);
5289
5290         if (icr & E1000_ICR_DRSTA)
5291                 schedule_work(&adapter->reset_task);
5292
5293         if (icr & E1000_ICR_DOUTSYNC) {
5294                 /* HW is reporting DMA is out of sync */
5295                 adapter->stats.doosync++;
5296         }
5297
5298         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5299                 hw->mac.get_link_status = 1;
5300                 if (!test_bit(__IGB_DOWN, &adapter->state))
5301                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5302         }
5303
5304         napi_schedule(&q_vector->napi);
5305
5306         return IRQ_HANDLED;
5307 }
5308
5309 /**
5310  * igb_intr - Legacy Interrupt Handler
5311  * @irq: interrupt number
5312  * @data: pointer to a network interface device structure
5313  **/
5314 static irqreturn_t igb_intr(int irq, void *data)
5315 {
5316         struct igb_adapter *adapter = data;
5317         struct igb_q_vector *q_vector = adapter->q_vector[0];
5318         struct e1000_hw *hw = &adapter->hw;
5319         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5320          * need for the IMC write */
5321         u32 icr = rd32(E1000_ICR);
5322         if (!icr)
5323                 return IRQ_NONE;  /* Not our interrupt */
5324
5325         igb_write_itr(q_vector);
5326
5327         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5328          * not set, then the adapter didn't send an interrupt */
5329         if (!(icr & E1000_ICR_INT_ASSERTED))
5330                 return IRQ_NONE;
5331
5332         if (icr & E1000_ICR_DRSTA)
5333                 schedule_work(&adapter->reset_task);
5334
5335         if (icr & E1000_ICR_DOUTSYNC) {
5336                 /* HW is reporting DMA is out of sync */
5337                 adapter->stats.doosync++;
5338         }
5339
5340         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5341                 hw->mac.get_link_status = 1;
5342                 /* guard against interrupt when we're going down */
5343                 if (!test_bit(__IGB_DOWN, &adapter->state))
5344                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5345         }
5346
5347         napi_schedule(&q_vector->napi);
5348
5349         return IRQ_HANDLED;
5350 }
5351
5352 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5353 {
5354         struct igb_adapter *adapter = q_vector->adapter;
5355         struct e1000_hw *hw = &adapter->hw;
5356
5357         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5358             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5359                 if (!adapter->msix_entries)
5360                         igb_set_itr(adapter);
5361                 else
5362                         igb_update_ring_itr(q_vector);
5363         }
5364
5365         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5366                 if (adapter->msix_entries)
5367                         wr32(E1000_EIMS, q_vector->eims_value);
5368                 else
5369                         igb_irq_enable(adapter);
5370         }
5371 }
5372
5373 /**
5374  * igb_poll - NAPI Rx polling callback
5375  * @napi: napi polling structure
5376  * @budget: count of how many packets we should handle
5377  **/
5378 static int igb_poll(struct napi_struct *napi, int budget)
5379 {
5380         struct igb_q_vector *q_vector = container_of(napi,
5381                                                      struct igb_q_vector,
5382                                                      napi);
5383         int tx_clean_complete = 1, work_done = 0;
5384
5385 #ifdef CONFIG_IGB_DCA
5386         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5387                 igb_update_dca(q_vector);
5388 #endif
5389         if (q_vector->tx_ring)
5390                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5391
5392         if (q_vector->rx_ring)
5393                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5394
5395         if (!tx_clean_complete)
5396                 work_done = budget;
5397
5398         /* If not enough Rx work done, exit the polling mode */
5399         if (work_done < budget) {
5400                 napi_complete(napi);
5401                 igb_ring_irq_enable(q_vector);
5402         }
5403
5404         return work_done;
5405 }
5406
5407 /**
5408  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5409  * @adapter: board private structure
5410  * @shhwtstamps: timestamp structure to update
5411  * @regval: unsigned 64bit system time value.
5412  *
5413  * We need to convert the system time value stored in the RX/TXSTMP registers
5414  * into a hwtstamp which can be used by the upper level timestamping functions
5415  */
5416 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5417                                    struct skb_shared_hwtstamps *shhwtstamps,
5418                                    u64 regval)
5419 {
5420         u64 ns;
5421
5422         /*
5423          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5424          * 24 to match clock shift we setup earlier.
5425          */
5426         if (adapter->hw.mac.type == e1000_82580)
5427                 regval <<= IGB_82580_TSYNC_SHIFT;
5428
5429         ns = timecounter_cyc2time(&adapter->clock, regval);
5430         timecompare_update(&adapter->compare, ns);
5431         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5432         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5433         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5434 }
5435
5436 /**
5437  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5438  * @q_vector: pointer to q_vector containing needed info
5439  * @buffer: pointer to igb_buffer structure
5440  *
5441  * If we were asked to do hardware stamping and such a time stamp is
5442  * available, then it must have been for this skb here because we only
5443  * allow only one such packet into the queue.
5444  */
5445 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5446 {
5447         struct igb_adapter *adapter = q_vector->adapter;
5448         struct e1000_hw *hw = &adapter->hw;
5449         struct skb_shared_hwtstamps shhwtstamps;
5450         u64 regval;
5451
5452         /* if skb does not support hw timestamp or TX stamp not valid exit */
5453         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5454             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5455                 return;
5456
5457         regval = rd32(E1000_TXSTMPL);
5458         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5459
5460         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5461         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5462 }
5463
5464 /**
5465  * igb_clean_tx_irq - Reclaim resources after transmit completes
5466  * @q_vector: pointer to q_vector containing needed info
5467  * returns true if ring is completely cleaned
5468  **/
5469 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5470 {
5471         struct igb_adapter *adapter = q_vector->adapter;
5472         struct igb_ring *tx_ring = q_vector->tx_ring;
5473         struct net_device *netdev = tx_ring->netdev;
5474         struct e1000_hw *hw = &adapter->hw;
5475         struct igb_buffer *buffer_info;
5476         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5477         unsigned int total_bytes = 0, total_packets = 0;
5478         unsigned int i, eop, count = 0;
5479         bool cleaned = false;
5480
5481         i = tx_ring->next_to_clean;
5482         eop = tx_ring->buffer_info[i].next_to_watch;
5483         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5484
5485         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5486                (count < tx_ring->count)) {
5487                 rmb();  /* read buffer_info after eop_desc status */
5488                 for (cleaned = false; !cleaned; count++) {
5489                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5490                         buffer_info = &tx_ring->buffer_info[i];
5491                         cleaned = (i == eop);
5492
5493                         if (buffer_info->skb) {
5494                                 total_bytes += buffer_info->bytecount;
5495                                 /* gso_segs is currently only valid for tcp */
5496                                 total_packets += buffer_info->gso_segs;
5497                                 igb_tx_hwtstamp(q_vector, buffer_info);
5498                         }
5499
5500                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5501                         tx_desc->wb.status = 0;
5502
5503                         i++;
5504                         if (i == tx_ring->count)
5505                                 i = 0;
5506                 }
5507                 eop = tx_ring->buffer_info[i].next_to_watch;
5508                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5509         }
5510
5511         tx_ring->next_to_clean = i;
5512
5513         if (unlikely(count &&
5514                      netif_carrier_ok(netdev) &&
5515                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5516                 /* Make sure that anybody stopping the queue after this
5517                  * sees the new next_to_clean.
5518                  */
5519                 smp_mb();
5520                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5521                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5522                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5523
5524                         u64_stats_update_begin(&tx_ring->tx_syncp);
5525                         tx_ring->tx_stats.restart_queue++;
5526                         u64_stats_update_end(&tx_ring->tx_syncp);
5527                 }
5528         }
5529
5530         if (tx_ring->detect_tx_hung) {
5531                 /* Detect a transmit hang in hardware, this serializes the
5532                  * check with the clearing of time_stamp and movement of i */
5533                 tx_ring->detect_tx_hung = false;
5534                 if (tx_ring->buffer_info[i].time_stamp &&
5535                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5536                                (adapter->tx_timeout_factor * HZ)) &&
5537                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5538
5539                         /* detected Tx unit hang */
5540                         dev_err(tx_ring->dev,
5541                                 "Detected Tx Unit Hang\n"
5542                                 "  Tx Queue             <%d>\n"
5543                                 "  TDH                  <%x>\n"
5544                                 "  TDT                  <%x>\n"
5545                                 "  next_to_use          <%x>\n"
5546                                 "  next_to_clean        <%x>\n"
5547                                 "buffer_info[next_to_clean]\n"
5548                                 "  time_stamp           <%lx>\n"
5549                                 "  next_to_watch        <%x>\n"
5550                                 "  jiffies              <%lx>\n"
5551                                 "  desc.status          <%x>\n",
5552                                 tx_ring->queue_index,
5553                                 readl(tx_ring->head),
5554                                 readl(tx_ring->tail),
5555                                 tx_ring->next_to_use,
5556                                 tx_ring->next_to_clean,
5557                                 tx_ring->buffer_info[eop].time_stamp,
5558                                 eop,
5559                                 jiffies,
5560                                 eop_desc->wb.status);
5561                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5562                 }
5563         }
5564         tx_ring->total_bytes += total_bytes;
5565         tx_ring->total_packets += total_packets;
5566         u64_stats_update_begin(&tx_ring->tx_syncp);
5567         tx_ring->tx_stats.bytes += total_bytes;
5568         tx_ring->tx_stats.packets += total_packets;
5569         u64_stats_update_end(&tx_ring->tx_syncp);
5570         return count < tx_ring->count;
5571 }
5572
5573 /**
5574  * igb_receive_skb - helper function to handle rx indications
5575  * @q_vector: structure containing interrupt and ring information
5576  * @skb: packet to send up
5577  * @vlan_tag: vlan tag for packet
5578  **/
5579 static void igb_receive_skb(struct igb_q_vector *q_vector,
5580                             struct sk_buff *skb,
5581                             u16 vlan_tag)
5582 {
5583         struct igb_adapter *adapter = q_vector->adapter;
5584
5585         if (vlan_tag && adapter->vlgrp)
5586                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5587                                  vlan_tag, skb);
5588         else
5589                 napi_gro_receive(&q_vector->napi, skb);
5590 }
5591
5592 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5593                                        u32 status_err, struct sk_buff *skb)
5594 {
5595         skb_checksum_none_assert(skb);
5596
5597         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5598         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5599              (status_err & E1000_RXD_STAT_IXSM))
5600                 return;
5601
5602         /* TCP/UDP checksum error bit is set */
5603         if (status_err &
5604             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5605                 /*
5606                  * work around errata with sctp packets where the TCPE aka
5607                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5608                  * packets, (aka let the stack check the crc32c)
5609                  */
5610                 if ((skb->len == 60) &&
5611                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5612                         u64_stats_update_begin(&ring->rx_syncp);
5613                         ring->rx_stats.csum_err++;
5614                         u64_stats_update_end(&ring->rx_syncp);
5615                 }
5616                 /* let the stack verify checksum errors */
5617                 return;
5618         }
5619         /* It must be a TCP or UDP packet with a valid checksum */
5620         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5621                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5622
5623         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5624 }
5625
5626 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5627                                    struct sk_buff *skb)
5628 {
5629         struct igb_adapter *adapter = q_vector->adapter;
5630         struct e1000_hw *hw = &adapter->hw;
5631         u64 regval;
5632
5633         /*
5634          * If this bit is set, then the RX registers contain the time stamp. No
5635          * other packet will be time stamped until we read these registers, so
5636          * read the registers to make them available again. Because only one
5637          * packet can be time stamped at a time, we know that the register
5638          * values must belong to this one here and therefore we don't need to
5639          * compare any of the additional attributes stored for it.
5640          *
5641          * If nothing went wrong, then it should have a shared tx_flags that we
5642          * can turn into a skb_shared_hwtstamps.
5643          */
5644         if (staterr & E1000_RXDADV_STAT_TSIP) {
5645                 u32 *stamp = (u32 *)skb->data;
5646                 regval = le32_to_cpu(*(stamp + 2));
5647                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5648                 skb_pull(skb, IGB_TS_HDR_LEN);
5649         } else {
5650                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5651                         return;
5652
5653                 regval = rd32(E1000_RXSTMPL);
5654                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5655         }
5656
5657         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5658 }
5659 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5660                                union e1000_adv_rx_desc *rx_desc)
5661 {
5662         /* HW will not DMA in data larger than the given buffer, even if it
5663          * parses the (NFS, of course) header to be larger.  In that case, it
5664          * fills the header buffer and spills the rest into the page.
5665          */
5666         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5667                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5668         if (hlen > rx_ring->rx_buffer_len)
5669                 hlen = rx_ring->rx_buffer_len;
5670         return hlen;
5671 }
5672
5673 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5674                                  int *work_done, int budget)
5675 {
5676         struct igb_ring *rx_ring = q_vector->rx_ring;
5677         struct net_device *netdev = rx_ring->netdev;
5678         struct device *dev = rx_ring->dev;
5679         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5680         struct igb_buffer *buffer_info , *next_buffer;
5681         struct sk_buff *skb;
5682         bool cleaned = false;
5683         int cleaned_count = 0;
5684         int current_node = numa_node_id();
5685         unsigned int total_bytes = 0, total_packets = 0;
5686         unsigned int i;
5687         u32 staterr;
5688         u16 length;
5689         u16 vlan_tag;
5690
5691         i = rx_ring->next_to_clean;
5692         buffer_info = &rx_ring->buffer_info[i];
5693         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5694         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5695
5696         while (staterr & E1000_RXD_STAT_DD) {
5697                 if (*work_done >= budget)
5698                         break;
5699                 (*work_done)++;
5700                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5701
5702                 skb = buffer_info->skb;
5703                 prefetch(skb->data - NET_IP_ALIGN);
5704                 buffer_info->skb = NULL;
5705
5706                 i++;
5707                 if (i == rx_ring->count)
5708                         i = 0;
5709
5710                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5711                 prefetch(next_rxd);
5712                 next_buffer = &rx_ring->buffer_info[i];
5713
5714                 length = le16_to_cpu(rx_desc->wb.upper.length);
5715                 cleaned = true;
5716                 cleaned_count++;
5717
5718                 if (buffer_info->dma) {
5719                         dma_unmap_single(dev, buffer_info->dma,
5720                                          rx_ring->rx_buffer_len,
5721                                          DMA_FROM_DEVICE);
5722                         buffer_info->dma = 0;
5723                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5724                                 skb_put(skb, length);
5725                                 goto send_up;
5726                         }
5727                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5728                 }
5729
5730                 if (length) {
5731                         dma_unmap_page(dev, buffer_info->page_dma,
5732                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5733                         buffer_info->page_dma = 0;
5734
5735                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5736                                                 buffer_info->page,
5737                                                 buffer_info->page_offset,
5738                                                 length);
5739
5740                         if ((page_count(buffer_info->page) != 1) ||
5741                             (page_to_nid(buffer_info->page) != current_node))
5742                                 buffer_info->page = NULL;
5743                         else
5744                                 get_page(buffer_info->page);
5745
5746                         skb->len += length;
5747                         skb->data_len += length;
5748                         skb->truesize += length;
5749                 }
5750
5751                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5752                         buffer_info->skb = next_buffer->skb;
5753                         buffer_info->dma = next_buffer->dma;
5754                         next_buffer->skb = skb;
5755                         next_buffer->dma = 0;
5756                         goto next_desc;
5757                 }
5758 send_up:
5759                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5760                         dev_kfree_skb_irq(skb);
5761                         goto next_desc;
5762                 }
5763
5764                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5765                         igb_rx_hwtstamp(q_vector, staterr, skb);
5766                 total_bytes += skb->len;
5767                 total_packets++;
5768
5769                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5770
5771                 skb->protocol = eth_type_trans(skb, netdev);
5772                 skb_record_rx_queue(skb, rx_ring->queue_index);
5773
5774                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5775                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5776
5777                 igb_receive_skb(q_vector, skb, vlan_tag);
5778
5779 next_desc:
5780                 rx_desc->wb.upper.status_error = 0;
5781
5782                 /* return some buffers to hardware, one at a time is too slow */
5783                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5784                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5785                         cleaned_count = 0;
5786                 }
5787
5788                 /* use prefetched values */
5789                 rx_desc = next_rxd;
5790                 buffer_info = next_buffer;
5791                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5792         }
5793
5794         rx_ring->next_to_clean = i;
5795         cleaned_count = igb_desc_unused(rx_ring);
5796
5797         if (cleaned_count)
5798                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5799
5800         rx_ring->total_packets += total_packets;
5801         rx_ring->total_bytes += total_bytes;
5802         u64_stats_update_begin(&rx_ring->rx_syncp);
5803         rx_ring->rx_stats.packets += total_packets;
5804         rx_ring->rx_stats.bytes += total_bytes;
5805         u64_stats_update_end(&rx_ring->rx_syncp);
5806         return cleaned;
5807 }
5808
5809 /**
5810  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5811  * @adapter: address of board private structure
5812  **/
5813 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5814 {
5815         struct net_device *netdev = rx_ring->netdev;
5816         union e1000_adv_rx_desc *rx_desc;
5817         struct igb_buffer *buffer_info;
5818         struct sk_buff *skb;
5819         unsigned int i;
5820         int bufsz;
5821
5822         i = rx_ring->next_to_use;
5823         buffer_info = &rx_ring->buffer_info[i];
5824
5825         bufsz = rx_ring->rx_buffer_len;
5826
5827         while (cleaned_count--) {
5828                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5829
5830                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5831                         if (!buffer_info->page) {
5832                                 buffer_info->page = netdev_alloc_page(netdev);
5833                                 if (unlikely(!buffer_info->page)) {
5834                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5835                                         rx_ring->rx_stats.alloc_failed++;
5836                                         u64_stats_update_end(&rx_ring->rx_syncp);
5837                                         goto no_buffers;
5838                                 }
5839                                 buffer_info->page_offset = 0;
5840                         } else {
5841                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5842                         }
5843                         buffer_info->page_dma =
5844                                 dma_map_page(rx_ring->dev, buffer_info->page,
5845                                              buffer_info->page_offset,
5846                                              PAGE_SIZE / 2,
5847                                              DMA_FROM_DEVICE);
5848                         if (dma_mapping_error(rx_ring->dev,
5849                                               buffer_info->page_dma)) {
5850                                 buffer_info->page_dma = 0;
5851                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5852                                 rx_ring->rx_stats.alloc_failed++;
5853                                 u64_stats_update_end(&rx_ring->rx_syncp);
5854                                 goto no_buffers;
5855                         }
5856                 }
5857
5858                 skb = buffer_info->skb;
5859                 if (!skb) {
5860                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5861                         if (unlikely(!skb)) {
5862                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5863                                 rx_ring->rx_stats.alloc_failed++;
5864                                 u64_stats_update_end(&rx_ring->rx_syncp);
5865                                 goto no_buffers;
5866                         }
5867
5868                         buffer_info->skb = skb;
5869                 }
5870                 if (!buffer_info->dma) {
5871                         buffer_info->dma = dma_map_single(rx_ring->dev,
5872                                                           skb->data,
5873                                                           bufsz,
5874                                                           DMA_FROM_DEVICE);
5875                         if (dma_mapping_error(rx_ring->dev,
5876                                               buffer_info->dma)) {
5877                                 buffer_info->dma = 0;
5878                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5879                                 rx_ring->rx_stats.alloc_failed++;
5880                                 u64_stats_update_end(&rx_ring->rx_syncp);
5881                                 goto no_buffers;
5882                         }
5883                 }
5884                 /* Refresh the desc even if buffer_addrs didn't change because
5885                  * each write-back erases this info. */
5886                 if (bufsz < IGB_RXBUFFER_1024) {
5887                         rx_desc->read.pkt_addr =
5888                              cpu_to_le64(buffer_info->page_dma);
5889                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5890                 } else {
5891                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5892                         rx_desc->read.hdr_addr = 0;
5893                 }
5894
5895                 i++;
5896                 if (i == rx_ring->count)
5897                         i = 0;
5898                 buffer_info = &rx_ring->buffer_info[i];
5899         }
5900
5901 no_buffers:
5902         if (rx_ring->next_to_use != i) {
5903                 rx_ring->next_to_use = i;
5904                 if (i == 0)
5905                         i = (rx_ring->count - 1);
5906                 else
5907                         i--;
5908
5909                 /* Force memory writes to complete before letting h/w
5910                  * know there are new descriptors to fetch.  (Only
5911                  * applicable for weak-ordered memory model archs,
5912                  * such as IA-64). */
5913                 wmb();
5914                 writel(i, rx_ring->tail);
5915         }
5916 }
5917
5918 /**
5919  * igb_mii_ioctl -
5920  * @netdev:
5921  * @ifreq:
5922  * @cmd:
5923  **/
5924 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5925 {
5926         struct igb_adapter *adapter = netdev_priv(netdev);
5927         struct mii_ioctl_data *data = if_mii(ifr);
5928
5929         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5930                 return -EOPNOTSUPP;
5931
5932         switch (cmd) {
5933         case SIOCGMIIPHY:
5934                 data->phy_id = adapter->hw.phy.addr;
5935                 break;
5936         case SIOCGMIIREG:
5937                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5938                                      &data->val_out))
5939                         return -EIO;
5940                 break;
5941         case SIOCSMIIREG:
5942         default:
5943                 return -EOPNOTSUPP;
5944         }
5945         return 0;
5946 }
5947
5948 /**
5949  * igb_hwtstamp_ioctl - control hardware time stamping
5950  * @netdev:
5951  * @ifreq:
5952  * @cmd:
5953  *
5954  * Outgoing time stamping can be enabled and disabled. Play nice and
5955  * disable it when requested, although it shouldn't case any overhead
5956  * when no packet needs it. At most one packet in the queue may be
5957  * marked for time stamping, otherwise it would be impossible to tell
5958  * for sure to which packet the hardware time stamp belongs.
5959  *
5960  * Incoming time stamping has to be configured via the hardware
5961  * filters. Not all combinations are supported, in particular event
5962  * type has to be specified. Matching the kind of event packet is
5963  * not supported, with the exception of "all V2 events regardless of
5964  * level 2 or 4".
5965  *
5966  **/
5967 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5968                               struct ifreq *ifr, int cmd)
5969 {
5970         struct igb_adapter *adapter = netdev_priv(netdev);
5971         struct e1000_hw *hw = &adapter->hw;
5972         struct hwtstamp_config config;
5973         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5974         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5975         u32 tsync_rx_cfg = 0;
5976         bool is_l4 = false;
5977         bool is_l2 = false;
5978         u32 regval;
5979
5980         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5981                 return -EFAULT;
5982
5983         /* reserved for future extensions */
5984         if (config.flags)
5985                 return -EINVAL;
5986
5987         switch (config.tx_type) {
5988         case HWTSTAMP_TX_OFF:
5989                 tsync_tx_ctl = 0;
5990         case HWTSTAMP_TX_ON:
5991                 break;
5992         default:
5993                 return -ERANGE;
5994         }
5995
5996         switch (config.rx_filter) {
5997         case HWTSTAMP_FILTER_NONE:
5998                 tsync_rx_ctl = 0;
5999                 break;
6000         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6001         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6002         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6003         case HWTSTAMP_FILTER_ALL:
6004                 /*
6005                  * register TSYNCRXCFG must be set, therefore it is not
6006                  * possible to time stamp both Sync and Delay_Req messages
6007                  * => fall back to time stamping all packets
6008                  */
6009                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6010                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6011                 break;
6012         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6013                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6014                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6015                 is_l4 = true;
6016                 break;
6017         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6018                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6019                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6020                 is_l4 = true;
6021                 break;
6022         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6023         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6024                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6025                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6026                 is_l2 = true;
6027                 is_l4 = true;
6028                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6029                 break;
6030         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6031         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6032                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6033                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6034                 is_l2 = true;
6035                 is_l4 = true;
6036                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6037                 break;
6038         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6039         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6040         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6041                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6042                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6043                 is_l2 = true;
6044                 break;
6045         default:
6046                 return -ERANGE;
6047         }
6048
6049         if (hw->mac.type == e1000_82575) {
6050                 if (tsync_rx_ctl | tsync_tx_ctl)
6051                         return -EINVAL;
6052                 return 0;
6053         }
6054
6055         /*
6056          * Per-packet timestamping only works if all packets are
6057          * timestamped, so enable timestamping in all packets as
6058          * long as one rx filter was configured.
6059          */
6060         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6061                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6062                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6063         }
6064
6065         /* enable/disable TX */
6066         regval = rd32(E1000_TSYNCTXCTL);
6067         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6068         regval |= tsync_tx_ctl;
6069         wr32(E1000_TSYNCTXCTL, regval);
6070
6071         /* enable/disable RX */
6072         regval = rd32(E1000_TSYNCRXCTL);
6073         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6074         regval |= tsync_rx_ctl;
6075         wr32(E1000_TSYNCRXCTL, regval);
6076
6077         /* define which PTP packets are time stamped */
6078         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6079
6080         /* define ethertype filter for timestamped packets */
6081         if (is_l2)
6082                 wr32(E1000_ETQF(3),
6083                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6084                                  E1000_ETQF_1588 | /* enable timestamping */
6085                                  ETH_P_1588));     /* 1588 eth protocol type */
6086         else
6087                 wr32(E1000_ETQF(3), 0);
6088
6089 #define PTP_PORT 319
6090         /* L4 Queue Filter[3]: filter by destination port and protocol */
6091         if (is_l4) {
6092                 u32 ftqf = (IPPROTO_UDP /* UDP */
6093                         | E1000_FTQF_VF_BP /* VF not compared */
6094                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6095                         | E1000_FTQF_MASK); /* mask all inputs */
6096                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6097
6098                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6099                 wr32(E1000_IMIREXT(3),
6100                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6101                 if (hw->mac.type == e1000_82576) {
6102                         /* enable source port check */
6103                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6104                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6105                 }
6106                 wr32(E1000_FTQF(3), ftqf);
6107         } else {
6108                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6109         }
6110         wrfl();
6111
6112         adapter->hwtstamp_config = config;
6113
6114         /* clear TX/RX time stamp registers, just to be sure */
6115         regval = rd32(E1000_TXSTMPH);
6116         regval = rd32(E1000_RXSTMPH);
6117
6118         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6119                 -EFAULT : 0;
6120 }
6121
6122 /**
6123  * igb_ioctl -
6124  * @netdev:
6125  * @ifreq:
6126  * @cmd:
6127  **/
6128 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6129 {
6130         switch (cmd) {
6131         case SIOCGMIIPHY:
6132         case SIOCGMIIREG:
6133         case SIOCSMIIREG:
6134                 return igb_mii_ioctl(netdev, ifr, cmd);
6135         case SIOCSHWTSTAMP:
6136                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6137         default:
6138                 return -EOPNOTSUPP;
6139         }
6140 }
6141
6142 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6143 {
6144         struct igb_adapter *adapter = hw->back;
6145         u16 cap_offset;
6146
6147         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6148         if (!cap_offset)
6149                 return -E1000_ERR_CONFIG;
6150
6151         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6152
6153         return 0;
6154 }
6155
6156 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6157 {
6158         struct igb_adapter *adapter = hw->back;
6159         u16 cap_offset;
6160
6161         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6162         if (!cap_offset)
6163                 return -E1000_ERR_CONFIG;
6164
6165         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6166
6167         return 0;
6168 }
6169
6170 static void igb_vlan_rx_register(struct net_device *netdev,
6171                                  struct vlan_group *grp)
6172 {
6173         struct igb_adapter *adapter = netdev_priv(netdev);
6174         struct e1000_hw *hw = &adapter->hw;
6175         u32 ctrl, rctl;
6176
6177         igb_irq_disable(adapter);
6178         adapter->vlgrp = grp;
6179
6180         if (grp) {
6181                 /* enable VLAN tag insert/strip */
6182                 ctrl = rd32(E1000_CTRL);
6183                 ctrl |= E1000_CTRL_VME;
6184                 wr32(E1000_CTRL, ctrl);
6185
6186                 /* Disable CFI check */
6187                 rctl = rd32(E1000_RCTL);
6188                 rctl &= ~E1000_RCTL_CFIEN;
6189                 wr32(E1000_RCTL, rctl);
6190         } else {
6191                 /* disable VLAN tag insert/strip */
6192                 ctrl = rd32(E1000_CTRL);
6193                 ctrl &= ~E1000_CTRL_VME;
6194                 wr32(E1000_CTRL, ctrl);
6195         }
6196
6197         igb_rlpml_set(adapter);
6198
6199         if (!test_bit(__IGB_DOWN, &adapter->state))
6200                 igb_irq_enable(adapter);
6201 }
6202
6203 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6204 {
6205         struct igb_adapter *adapter = netdev_priv(netdev);
6206         struct e1000_hw *hw = &adapter->hw;
6207         int pf_id = adapter->vfs_allocated_count;
6208
6209         /* attempt to add filter to vlvf array */
6210         igb_vlvf_set(adapter, vid, true, pf_id);
6211
6212         /* add the filter since PF can receive vlans w/o entry in vlvf */
6213         igb_vfta_set(hw, vid, true);
6214 }
6215
6216 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6217 {
6218         struct igb_adapter *adapter = netdev_priv(netdev);
6219         struct e1000_hw *hw = &adapter->hw;
6220         int pf_id = adapter->vfs_allocated_count;
6221         s32 err;
6222
6223         igb_irq_disable(adapter);
6224         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6225
6226         if (!test_bit(__IGB_DOWN, &adapter->state))
6227                 igb_irq_enable(adapter);
6228
6229         /* remove vlan from VLVF table array */
6230         err = igb_vlvf_set(adapter, vid, false, pf_id);
6231
6232         /* if vid was not present in VLVF just remove it from table */
6233         if (err)
6234                 igb_vfta_set(hw, vid, false);
6235 }
6236
6237 static void igb_restore_vlan(struct igb_adapter *adapter)
6238 {
6239         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6240
6241         if (adapter->vlgrp) {
6242                 u16 vid;
6243                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6244                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6245                                 continue;
6246                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6247                 }
6248         }
6249 }
6250
6251 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6252 {
6253         struct pci_dev *pdev = adapter->pdev;
6254         struct e1000_mac_info *mac = &adapter->hw.mac;
6255
6256         mac->autoneg = 0;
6257
6258         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6259         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6260                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6261                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6262                 return -EINVAL;
6263         }
6264
6265         switch (spddplx) {
6266         case SPEED_10 + DUPLEX_HALF:
6267                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6268                 break;
6269         case SPEED_10 + DUPLEX_FULL:
6270                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6271                 break;
6272         case SPEED_100 + DUPLEX_HALF:
6273                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6274                 break;
6275         case SPEED_100 + DUPLEX_FULL:
6276                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6277                 break;
6278         case SPEED_1000 + DUPLEX_FULL:
6279                 mac->autoneg = 1;
6280                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6281                 break;
6282         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6283         default:
6284                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6285                 return -EINVAL;
6286         }
6287         return 0;
6288 }
6289
6290 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6291 {
6292         struct net_device *netdev = pci_get_drvdata(pdev);
6293         struct igb_adapter *adapter = netdev_priv(netdev);
6294         struct e1000_hw *hw = &adapter->hw;
6295         u32 ctrl, rctl, status;
6296         u32 wufc = adapter->wol;
6297 #ifdef CONFIG_PM
6298         int retval = 0;
6299 #endif
6300
6301         netif_device_detach(netdev);
6302
6303         if (netif_running(netdev))
6304                 igb_close(netdev);
6305
6306         igb_clear_interrupt_scheme(adapter);
6307
6308 #ifdef CONFIG_PM
6309         retval = pci_save_state(pdev);
6310         if (retval)
6311                 return retval;
6312 #endif
6313
6314         status = rd32(E1000_STATUS);
6315         if (status & E1000_STATUS_LU)
6316                 wufc &= ~E1000_WUFC_LNKC;
6317
6318         if (wufc) {
6319                 igb_setup_rctl(adapter);
6320                 igb_set_rx_mode(netdev);
6321
6322                 /* turn on all-multi mode if wake on multicast is enabled */
6323                 if (wufc & E1000_WUFC_MC) {
6324                         rctl = rd32(E1000_RCTL);
6325                         rctl |= E1000_RCTL_MPE;
6326                         wr32(E1000_RCTL, rctl);
6327                 }
6328
6329                 ctrl = rd32(E1000_CTRL);
6330                 /* advertise wake from D3Cold */
6331                 #define E1000_CTRL_ADVD3WUC 0x00100000
6332                 /* phy power management enable */
6333                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6334                 ctrl |= E1000_CTRL_ADVD3WUC;
6335                 wr32(E1000_CTRL, ctrl);
6336
6337                 /* Allow time for pending master requests to run */
6338                 igb_disable_pcie_master(hw);
6339
6340                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6341                 wr32(E1000_WUFC, wufc);
6342         } else {
6343                 wr32(E1000_WUC, 0);
6344                 wr32(E1000_WUFC, 0);
6345         }
6346
6347         *enable_wake = wufc || adapter->en_mng_pt;
6348         if (!*enable_wake)
6349                 igb_power_down_link(adapter);
6350         else
6351                 igb_power_up_link(adapter);
6352
6353         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6354          * would have already happened in close and is redundant. */
6355         igb_release_hw_control(adapter);
6356
6357         pci_disable_device(pdev);
6358
6359         return 0;
6360 }
6361
6362 #ifdef CONFIG_PM
6363 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6364 {
6365         int retval;
6366         bool wake;
6367
6368         retval = __igb_shutdown(pdev, &wake);
6369         if (retval)
6370                 return retval;
6371
6372         if (wake) {
6373                 pci_prepare_to_sleep(pdev);
6374         } else {
6375                 pci_wake_from_d3(pdev, false);
6376                 pci_set_power_state(pdev, PCI_D3hot);
6377         }
6378
6379         return 0;
6380 }
6381
6382 static int igb_resume(struct pci_dev *pdev)
6383 {
6384         struct net_device *netdev = pci_get_drvdata(pdev);
6385         struct igb_adapter *adapter = netdev_priv(netdev);
6386         struct e1000_hw *hw = &adapter->hw;
6387         u32 err;
6388
6389         pci_set_power_state(pdev, PCI_D0);
6390         pci_restore_state(pdev);
6391         pci_save_state(pdev);
6392
6393         err = pci_enable_device_mem(pdev);
6394         if (err) {
6395                 dev_err(&pdev->dev,
6396                         "igb: Cannot enable PCI device from suspend\n");
6397                 return err;
6398         }
6399         pci_set_master(pdev);
6400
6401         pci_enable_wake(pdev, PCI_D3hot, 0);
6402         pci_enable_wake(pdev, PCI_D3cold, 0);
6403
6404         if (igb_init_interrupt_scheme(adapter)) {
6405                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6406                 return -ENOMEM;
6407         }
6408
6409         igb_reset(adapter);
6410
6411         /* let the f/w know that the h/w is now under the control of the
6412          * driver. */
6413         igb_get_hw_control(adapter);
6414
6415         wr32(E1000_WUS, ~0);
6416
6417         if (netif_running(netdev)) {
6418                 err = igb_open(netdev);
6419                 if (err)
6420                         return err;
6421         }
6422
6423         netif_device_attach(netdev);
6424
6425         return 0;
6426 }
6427 #endif
6428
6429 static void igb_shutdown(struct pci_dev *pdev)
6430 {
6431         bool wake;
6432
6433         __igb_shutdown(pdev, &wake);
6434
6435         if (system_state == SYSTEM_POWER_OFF) {
6436                 pci_wake_from_d3(pdev, wake);
6437                 pci_set_power_state(pdev, PCI_D3hot);
6438         }
6439 }
6440
6441 #ifdef CONFIG_NET_POLL_CONTROLLER
6442 /*
6443  * Polling 'interrupt' - used by things like netconsole to send skbs
6444  * without having to re-enable interrupts. It's not called while
6445  * the interrupt routine is executing.
6446  */
6447 static void igb_netpoll(struct net_device *netdev)
6448 {
6449         struct igb_adapter *adapter = netdev_priv(netdev);
6450         struct e1000_hw *hw = &adapter->hw;
6451         int i;
6452
6453         if (!adapter->msix_entries) {
6454                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6455                 igb_irq_disable(adapter);
6456                 napi_schedule(&q_vector->napi);
6457                 return;
6458         }
6459
6460         for (i = 0; i < adapter->num_q_vectors; i++) {
6461                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6462                 wr32(E1000_EIMC, q_vector->eims_value);
6463                 napi_schedule(&q_vector->napi);
6464         }
6465 }
6466 #endif /* CONFIG_NET_POLL_CONTROLLER */
6467
6468 /**
6469  * igb_io_error_detected - called when PCI error is detected
6470  * @pdev: Pointer to PCI device
6471  * @state: The current pci connection state
6472  *
6473  * This function is called after a PCI bus error affecting
6474  * this device has been detected.
6475  */
6476 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6477                                               pci_channel_state_t state)
6478 {
6479         struct net_device *netdev = pci_get_drvdata(pdev);
6480         struct igb_adapter *adapter = netdev_priv(netdev);
6481
6482         netif_device_detach(netdev);
6483
6484         if (state == pci_channel_io_perm_failure)
6485                 return PCI_ERS_RESULT_DISCONNECT;
6486
6487         if (netif_running(netdev))
6488                 igb_down(adapter);
6489         pci_disable_device(pdev);
6490
6491         /* Request a slot slot reset. */
6492         return PCI_ERS_RESULT_NEED_RESET;
6493 }
6494
6495 /**
6496  * igb_io_slot_reset - called after the pci bus has been reset.
6497  * @pdev: Pointer to PCI device
6498  *
6499  * Restart the card from scratch, as if from a cold-boot. Implementation
6500  * resembles the first-half of the igb_resume routine.
6501  */
6502 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6503 {
6504         struct net_device *netdev = pci_get_drvdata(pdev);
6505         struct igb_adapter *adapter = netdev_priv(netdev);
6506         struct e1000_hw *hw = &adapter->hw;
6507         pci_ers_result_t result;
6508         int err;
6509
6510         if (pci_enable_device_mem(pdev)) {
6511                 dev_err(&pdev->dev,
6512                         "Cannot re-enable PCI device after reset.\n");
6513                 result = PCI_ERS_RESULT_DISCONNECT;
6514         } else {
6515                 pci_set_master(pdev);
6516                 pci_restore_state(pdev);
6517                 pci_save_state(pdev);
6518
6519                 pci_enable_wake(pdev, PCI_D3hot, 0);
6520                 pci_enable_wake(pdev, PCI_D3cold, 0);
6521
6522                 igb_reset(adapter);
6523                 wr32(E1000_WUS, ~0);
6524                 result = PCI_ERS_RESULT_RECOVERED;
6525         }
6526
6527         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6528         if (err) {
6529                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6530                         "failed 0x%0x\n", err);
6531                 /* non-fatal, continue */
6532         }
6533
6534         return result;
6535 }
6536
6537 /**
6538  * igb_io_resume - called when traffic can start flowing again.
6539  * @pdev: Pointer to PCI device
6540  *
6541  * This callback is called when the error recovery driver tells us that
6542  * its OK to resume normal operation. Implementation resembles the
6543  * second-half of the igb_resume routine.
6544  */
6545 static void igb_io_resume(struct pci_dev *pdev)
6546 {
6547         struct net_device *netdev = pci_get_drvdata(pdev);
6548         struct igb_adapter *adapter = netdev_priv(netdev);
6549
6550         if (netif_running(netdev)) {
6551                 if (igb_up(adapter)) {
6552                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6553                         return;
6554                 }
6555         }
6556
6557         netif_device_attach(netdev);
6558
6559         /* let the f/w know that the h/w is now under the control of the
6560          * driver. */
6561         igb_get_hw_control(adapter);
6562 }
6563
6564 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6565                              u8 qsel)
6566 {
6567         u32 rar_low, rar_high;
6568         struct e1000_hw *hw = &adapter->hw;
6569
6570         /* HW expects these in little endian so we reverse the byte order
6571          * from network order (big endian) to little endian
6572          */
6573         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6574                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6575         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6576
6577         /* Indicate to hardware the Address is Valid. */
6578         rar_high |= E1000_RAH_AV;
6579
6580         if (hw->mac.type == e1000_82575)
6581                 rar_high |= E1000_RAH_POOL_1 * qsel;
6582         else
6583                 rar_high |= E1000_RAH_POOL_1 << qsel;
6584
6585         wr32(E1000_RAL(index), rar_low);
6586         wrfl();
6587         wr32(E1000_RAH(index), rar_high);
6588         wrfl();
6589 }
6590
6591 static int igb_set_vf_mac(struct igb_adapter *adapter,
6592                           int vf, unsigned char *mac_addr)
6593 {
6594         struct e1000_hw *hw = &adapter->hw;
6595         /* VF MAC addresses start at end of receive addresses and moves
6596          * torwards the first, as a result a collision should not be possible */
6597         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6598
6599         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6600
6601         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6602
6603         return 0;
6604 }
6605
6606 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6607 {
6608         struct igb_adapter *adapter = netdev_priv(netdev);
6609         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6610                 return -EINVAL;
6611         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6612         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6613         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6614                                       " change effective.");
6615         if (test_bit(__IGB_DOWN, &adapter->state)) {
6616                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6617                          " but the PF device is not up.\n");
6618                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6619                          " attempting to use the VF device.\n");
6620         }
6621         return igb_set_vf_mac(adapter, vf, mac);
6622 }
6623
6624 static int igb_link_mbps(int internal_link_speed)
6625 {
6626         switch (internal_link_speed) {
6627         case SPEED_100:
6628                 return 100;
6629         case SPEED_1000:
6630                 return 1000;
6631         default:
6632                 return 0;
6633         }
6634 }
6635
6636 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6637                                   int link_speed)
6638 {
6639         int rf_dec, rf_int;
6640         u32 bcnrc_val;
6641
6642         if (tx_rate != 0) {
6643                 /* Calculate the rate factor values to set */
6644                 rf_int = link_speed / tx_rate;
6645                 rf_dec = (link_speed - (rf_int * tx_rate));
6646                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6647
6648                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6649                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6650                                E1000_RTTBCNRC_RF_INT_MASK);
6651                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6652         } else {
6653                 bcnrc_val = 0;
6654         }
6655
6656         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6657         wr32(E1000_RTTBCNRC, bcnrc_val);
6658 }
6659
6660 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6661 {
6662         int actual_link_speed, i;
6663         bool reset_rate = false;
6664
6665         /* VF TX rate limit was not set or not supported */
6666         if ((adapter->vf_rate_link_speed == 0) ||
6667             (adapter->hw.mac.type != e1000_82576))
6668                 return;
6669
6670         actual_link_speed = igb_link_mbps(adapter->link_speed);
6671         if (actual_link_speed != adapter->vf_rate_link_speed) {
6672                 reset_rate = true;
6673                 adapter->vf_rate_link_speed = 0;
6674                 dev_info(&adapter->pdev->dev,
6675                          "Link speed has been changed. VF Transmit "
6676                          "rate is disabled\n");
6677         }
6678
6679         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6680                 if (reset_rate)
6681                         adapter->vf_data[i].tx_rate = 0;
6682
6683                 igb_set_vf_rate_limit(&adapter->hw, i,
6684                                       adapter->vf_data[i].tx_rate,
6685                                       actual_link_speed);
6686         }
6687 }
6688
6689 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6690 {
6691         struct igb_adapter *adapter = netdev_priv(netdev);
6692         struct e1000_hw *hw = &adapter->hw;
6693         int actual_link_speed;
6694
6695         if (hw->mac.type != e1000_82576)
6696                 return -EOPNOTSUPP;
6697
6698         actual_link_speed = igb_link_mbps(adapter->link_speed);
6699         if ((vf >= adapter->vfs_allocated_count) ||
6700             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6701             (tx_rate < 0) || (tx_rate > actual_link_speed))
6702                 return -EINVAL;
6703
6704         adapter->vf_rate_link_speed = actual_link_speed;
6705         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6706         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6707
6708         return 0;
6709 }
6710
6711 static int igb_ndo_get_vf_config(struct net_device *netdev,
6712                                  int vf, struct ifla_vf_info *ivi)
6713 {
6714         struct igb_adapter *adapter = netdev_priv(netdev);
6715         if (vf >= adapter->vfs_allocated_count)
6716                 return -EINVAL;
6717         ivi->vf = vf;
6718         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6719         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6720         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6721         ivi->qos = adapter->vf_data[vf].pf_qos;
6722         return 0;
6723 }
6724
6725 static void igb_vmm_control(struct igb_adapter *adapter)
6726 {
6727         struct e1000_hw *hw = &adapter->hw;
6728         u32 reg;
6729
6730         switch (hw->mac.type) {
6731         case e1000_82575:
6732         default:
6733                 /* replication is not supported for 82575 */
6734                 return;
6735         case e1000_82576:
6736                 /* notify HW that the MAC is adding vlan tags */
6737                 reg = rd32(E1000_DTXCTL);
6738                 reg |= E1000_DTXCTL_VLAN_ADDED;
6739                 wr32(E1000_DTXCTL, reg);
6740         case e1000_82580:
6741                 /* enable replication vlan tag stripping */
6742                 reg = rd32(E1000_RPLOLR);
6743                 reg |= E1000_RPLOLR_STRVLAN;
6744                 wr32(E1000_RPLOLR, reg);
6745         case e1000_i350:
6746                 /* none of the above registers are supported by i350 */
6747                 break;
6748         }
6749
6750         if (adapter->vfs_allocated_count) {
6751                 igb_vmdq_set_loopback_pf(hw, true);
6752                 igb_vmdq_set_replication_pf(hw, true);
6753                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6754                                                 adapter->vfs_allocated_count);
6755         } else {
6756                 igb_vmdq_set_loopback_pf(hw, false);
6757                 igb_vmdq_set_replication_pf(hw, false);
6758         }
6759 }
6760
6761 /* igb_main.c */