Merge branch 'core-printk-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if_vlan.h>
43 #include <linux/pci.h>
44 #include <linux/pci-aspm.h>
45 #include <linux/delay.h>
46 #include <linux/interrupt.h>
47 #include <linux/if_ether.h>
48 #include <linux/aer.h>
49 #include <linux/prefetch.h>
50 #include <linux/if_vlan.h>
51 #ifdef CONFIG_IGB_DCA
52 #include <linux/dca.h>
53 #endif
54 #include "igb.h"
55
56 #define MAJ 3
57 #define MIN 0
58 #define BUILD 6
59 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60 __stringify(BUILD) "-k"
61 char igb_driver_name[] = "igb";
62 char igb_driver_version[] = DRV_VERSION;
63 static const char igb_driver_string[] =
64                                 "Intel(R) Gigabit Ethernet Network Driver";
65 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66
67 static const struct e1000_info *igb_info_tbl[] = {
68         [board_82575] = &e1000_82575_info,
69 };
70
71 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97         /* required last entry */
98         {0, }
99 };
100
101 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102
103 void igb_reset(struct igb_adapter *);
104 static int igb_setup_all_tx_resources(struct igb_adapter *);
105 static int igb_setup_all_rx_resources(struct igb_adapter *);
106 static void igb_free_all_tx_resources(struct igb_adapter *);
107 static void igb_free_all_rx_resources(struct igb_adapter *);
108 static void igb_setup_mrqc(struct igb_adapter *);
109 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110 static void __devexit igb_remove(struct pci_dev *pdev);
111 static void igb_init_hw_timer(struct igb_adapter *adapter);
112 static int igb_sw_init(struct igb_adapter *);
113 static int igb_open(struct net_device *);
114 static int igb_close(struct net_device *);
115 static void igb_configure_tx(struct igb_adapter *);
116 static void igb_configure_rx(struct igb_adapter *);
117 static void igb_clean_all_tx_rings(struct igb_adapter *);
118 static void igb_clean_all_rx_rings(struct igb_adapter *);
119 static void igb_clean_tx_ring(struct igb_ring *);
120 static void igb_clean_rx_ring(struct igb_ring *);
121 static void igb_set_rx_mode(struct net_device *);
122 static void igb_update_phy_info(unsigned long);
123 static void igb_watchdog(unsigned long);
124 static void igb_watchdog_task(struct work_struct *);
125 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
126 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127                                                  struct rtnl_link_stats64 *stats);
128 static int igb_change_mtu(struct net_device *, int);
129 static int igb_set_mac(struct net_device *, void *);
130 static void igb_set_uta(struct igb_adapter *adapter);
131 static irqreturn_t igb_intr(int irq, void *);
132 static irqreturn_t igb_intr_msi(int irq, void *);
133 static irqreturn_t igb_msix_other(int irq, void *);
134 static irqreturn_t igb_msix_ring(int irq, void *);
135 #ifdef CONFIG_IGB_DCA
136 static void igb_update_dca(struct igb_q_vector *);
137 static void igb_setup_dca(struct igb_adapter *);
138 #endif /* CONFIG_IGB_DCA */
139 static bool igb_clean_tx_irq(struct igb_q_vector *);
140 static int igb_poll(struct napi_struct *, int);
141 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
142 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143 static void igb_tx_timeout(struct net_device *);
144 static void igb_reset_task(struct work_struct *);
145 static void igb_vlan_mode(struct net_device *netdev, u32 features);
146 static void igb_vlan_rx_add_vid(struct net_device *, u16);
147 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148 static void igb_restore_vlan(struct igb_adapter *);
149 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150 static void igb_ping_all_vfs(struct igb_adapter *);
151 static void igb_msg_task(struct igb_adapter *);
152 static void igb_vmm_control(struct igb_adapter *);
153 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157                                int vf, u16 vlan, u8 qos);
158 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160                                  struct ifla_vf_info *ivi);
161 static void igb_check_vf_rate_limit(struct igb_adapter *);
162
163 #ifdef CONFIG_PM
164 static int igb_suspend(struct pci_dev *, pm_message_t);
165 static int igb_resume(struct pci_dev *);
166 #endif
167 static void igb_shutdown(struct pci_dev *);
168 #ifdef CONFIG_IGB_DCA
169 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170 static struct notifier_block dca_notifier = {
171         .notifier_call  = igb_notify_dca,
172         .next           = NULL,
173         .priority       = 0
174 };
175 #endif
176 #ifdef CONFIG_NET_POLL_CONTROLLER
177 /* for netdump / net console */
178 static void igb_netpoll(struct net_device *);
179 #endif
180 #ifdef CONFIG_PCI_IOV
181 static unsigned int max_vfs = 0;
182 module_param(max_vfs, uint, 0);
183 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184                  "per physical function");
185 #endif /* CONFIG_PCI_IOV */
186
187 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188                      pci_channel_state_t);
189 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190 static void igb_io_resume(struct pci_dev *);
191
192 static struct pci_error_handlers igb_err_handler = {
193         .error_detected = igb_io_error_detected,
194         .slot_reset = igb_io_slot_reset,
195         .resume = igb_io_resume,
196 };
197
198
199 static struct pci_driver igb_driver = {
200         .name     = igb_driver_name,
201         .id_table = igb_pci_tbl,
202         .probe    = igb_probe,
203         .remove   = __devexit_p(igb_remove),
204 #ifdef CONFIG_PM
205         /* Power Management Hooks */
206         .suspend  = igb_suspend,
207         .resume   = igb_resume,
208 #endif
209         .shutdown = igb_shutdown,
210         .err_handler = &igb_err_handler
211 };
212
213 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215 MODULE_LICENSE("GPL");
216 MODULE_VERSION(DRV_VERSION);
217
218 struct igb_reg_info {
219         u32 ofs;
220         char *name;
221 };
222
223 static const struct igb_reg_info igb_reg_info_tbl[] = {
224
225         /* General Registers */
226         {E1000_CTRL, "CTRL"},
227         {E1000_STATUS, "STATUS"},
228         {E1000_CTRL_EXT, "CTRL_EXT"},
229
230         /* Interrupt Registers */
231         {E1000_ICR, "ICR"},
232
233         /* RX Registers */
234         {E1000_RCTL, "RCTL"},
235         {E1000_RDLEN(0), "RDLEN"},
236         {E1000_RDH(0), "RDH"},
237         {E1000_RDT(0), "RDT"},
238         {E1000_RXDCTL(0), "RXDCTL"},
239         {E1000_RDBAL(0), "RDBAL"},
240         {E1000_RDBAH(0), "RDBAH"},
241
242         /* TX Registers */
243         {E1000_TCTL, "TCTL"},
244         {E1000_TDBAL(0), "TDBAL"},
245         {E1000_TDBAH(0), "TDBAH"},
246         {E1000_TDLEN(0), "TDLEN"},
247         {E1000_TDH(0), "TDH"},
248         {E1000_TDT(0), "TDT"},
249         {E1000_TXDCTL(0), "TXDCTL"},
250         {E1000_TDFH, "TDFH"},
251         {E1000_TDFT, "TDFT"},
252         {E1000_TDFHS, "TDFHS"},
253         {E1000_TDFPC, "TDFPC"},
254
255         /* List Terminator */
256         {}
257 };
258
259 /*
260  * igb_regdump - register printout routine
261  */
262 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263 {
264         int n = 0;
265         char rname[16];
266         u32 regs[8];
267
268         switch (reginfo->ofs) {
269         case E1000_RDLEN(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RDLEN(n));
272                 break;
273         case E1000_RDH(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDH(n));
276                 break;
277         case E1000_RDT(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDT(n));
280                 break;
281         case E1000_RXDCTL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RXDCTL(n));
284                 break;
285         case E1000_RDBAL(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_RDBAL(n));
288                 break;
289         case E1000_RDBAH(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_RDBAH(n));
292                 break;
293         case E1000_TDBAL(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_RDBAL(n));
296                 break;
297         case E1000_TDBAH(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDBAH(n));
300                 break;
301         case E1000_TDLEN(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TDLEN(n));
304                 break;
305         case E1000_TDH(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_TDH(n));
308                 break;
309         case E1000_TDT(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_TDT(n));
312                 break;
313         case E1000_TXDCTL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_TXDCTL(n));
316                 break;
317         default:
318                 printk(KERN_INFO "%-15s %08x\n",
319                         reginfo->name, rd32(reginfo->ofs));
320                 return;
321         }
322
323         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324         printk(KERN_INFO "%-15s ", rname);
325         for (n = 0; n < 4; n++)
326                 printk(KERN_CONT "%08x ", regs[n]);
327         printk(KERN_CONT "\n");
328 }
329
330 /*
331  * igb_dump - Print registers, tx-rings and rx-rings
332  */
333 static void igb_dump(struct igb_adapter *adapter)
334 {
335         struct net_device *netdev = adapter->netdev;
336         struct e1000_hw *hw = &adapter->hw;
337         struct igb_reg_info *reginfo;
338         int n = 0;
339         struct igb_ring *tx_ring;
340         union e1000_adv_tx_desc *tx_desc;
341         struct my_u0 { u64 a; u64 b; } *u0;
342         struct igb_buffer *buffer_info;
343         struct igb_ring *rx_ring;
344         union e1000_adv_rx_desc *rx_desc;
345         u32 staterr;
346         int i = 0;
347
348         if (!netif_msg_hw(adapter))
349                 return;
350
351         /* Print netdevice Info */
352         if (netdev) {
353                 dev_info(&adapter->pdev->dev, "Net device Info\n");
354                 printk(KERN_INFO "Device Name     state            "
355                         "trans_start      last_rx\n");
356                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
357                 netdev->name,
358                 netdev->state,
359                 netdev->trans_start,
360                 netdev->last_rx);
361         }
362
363         /* Print Registers */
364         dev_info(&adapter->pdev->dev, "Register Dump\n");
365         printk(KERN_INFO " Register Name   Value\n");
366         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367              reginfo->name; reginfo++) {
368                 igb_regdump(hw, reginfo);
369         }
370
371         /* Print TX Ring Summary */
372         if (!netdev || !netif_running(netdev))
373                 goto exit;
374
375         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
377                 " leng ntw timestamp\n");
378         for (n = 0; n < adapter->num_tx_queues; n++) {
379                 tx_ring = adapter->tx_ring[n];
380                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
383                            (u64)buffer_info->dma,
384                            buffer_info->length,
385                            buffer_info->next_to_watch,
386                            (u64)buffer_info->time_stamp);
387         }
388
389         /* Print TX Rings */
390         if (!netif_msg_tx_done(adapter))
391                 goto rx_ring_summary;
392
393         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394
395         /* Transmit Descriptor Formats
396          *
397          * Advanced Transmit Descriptor
398          *   +--------------------------------------------------------------+
399          * 0 |         Buffer Address [63:0]                                |
400          *   +--------------------------------------------------------------+
401          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
402          *   +--------------------------------------------------------------+
403          *   63      46 45    40 39 38 36 35 32 31   24             15       0
404          */
405
406         for (n = 0; n < adapter->num_tx_queues; n++) {
407                 tx_ring = adapter->tx_ring[n];
408                 printk(KERN_INFO "------------------------------------\n");
409                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410                 printk(KERN_INFO "------------------------------------\n");
411                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
412                         "[PlPOCIStDDM Ln] [bi->dma       ] "
413                         "leng  ntw timestamp        bi->skb\n");
414
415                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
417                         buffer_info = &tx_ring->buffer_info[i];
418                         u0 = (struct my_u0 *)tx_desc;
419                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
420                                 " %04X  %3X %016llX %p", i,
421                                 le64_to_cpu(u0->a),
422                                 le64_to_cpu(u0->b),
423                                 (u64)buffer_info->dma,
424                                 buffer_info->length,
425                                 buffer_info->next_to_watch,
426                                 (u64)buffer_info->time_stamp,
427                                 buffer_info->skb);
428                         if (i == tx_ring->next_to_use &&
429                                 i == tx_ring->next_to_clean)
430                                 printk(KERN_CONT " NTC/U\n");
431                         else if (i == tx_ring->next_to_use)
432                                 printk(KERN_CONT " NTU\n");
433                         else if (i == tx_ring->next_to_clean)
434                                 printk(KERN_CONT " NTC\n");
435                         else
436                                 printk(KERN_CONT "\n");
437
438                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439                                 print_hex_dump(KERN_INFO, "",
440                                         DUMP_PREFIX_ADDRESS,
441                                         16, 1, phys_to_virt(buffer_info->dma),
442                                         buffer_info->length, true);
443                 }
444         }
445
446         /* Print RX Rings Summary */
447 rx_ring_summary:
448         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449         printk(KERN_INFO "Queue [NTU] [NTC]\n");
450         for (n = 0; n < adapter->num_rx_queues; n++) {
451                 rx_ring = adapter->rx_ring[n];
452                 printk(KERN_INFO " %5d %5X %5X\n", n,
453                            rx_ring->next_to_use, rx_ring->next_to_clean);
454         }
455
456         /* Print RX Rings */
457         if (!netif_msg_rx_status(adapter))
458                 goto exit;
459
460         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461
462         /* Advanced Receive Descriptor (Read) Format
463          *    63                                           1        0
464          *    +-----------------------------------------------------+
465          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
466          *    +----------------------------------------------+------+
467          *  8 |       Header Buffer Address [63:1]           |  DD  |
468          *    +-----------------------------------------------------+
469          *
470          *
471          * Advanced Receive Descriptor (Write-Back) Format
472          *
473          *   63       48 47    32 31  30      21 20 17 16   4 3     0
474          *   +------------------------------------------------------+
475          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
476          *   | Checksum   Ident  |   |           |    | Type | Type |
477          *   +------------------------------------------------------+
478          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479          *   +------------------------------------------------------+
480          *   63       48 47    32 31            20 19               0
481          */
482
483         for (n = 0; n < adapter->num_rx_queues; n++) {
484                 rx_ring = adapter->rx_ring[n];
485                 printk(KERN_INFO "------------------------------------\n");
486                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487                 printk(KERN_INFO "------------------------------------\n");
488                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
489                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
490                         "<-- Adv Rx Read format\n");
491                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
492                         "[vl er S cks ln] ---------------- [bi->skb] "
493                         "<-- Adv Rx Write-Back format\n");
494
495                 for (i = 0; i < rx_ring->count; i++) {
496                         buffer_info = &rx_ring->buffer_info[i];
497                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
498                         u0 = (struct my_u0 *)rx_desc;
499                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500                         if (staterr & E1000_RXD_STAT_DD) {
501                                 /* Descriptor Done */
502                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
503                                         "%016llX ---------------- %p", i,
504                                         le64_to_cpu(u0->a),
505                                         le64_to_cpu(u0->b),
506                                         buffer_info->skb);
507                         } else {
508                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
509                                         "%016llX %016llX %p", i,
510                                         le64_to_cpu(u0->a),
511                                         le64_to_cpu(u0->b),
512                                         (u64)buffer_info->dma,
513                                         buffer_info->skb);
514
515                                 if (netif_msg_pktdata(adapter)) {
516                                         print_hex_dump(KERN_INFO, "",
517                                                 DUMP_PREFIX_ADDRESS,
518                                                 16, 1,
519                                                 phys_to_virt(buffer_info->dma),
520                                                 rx_ring->rx_buffer_len, true);
521                                         if (rx_ring->rx_buffer_len
522                                                 < IGB_RXBUFFER_1024)
523                                                 print_hex_dump(KERN_INFO, "",
524                                                   DUMP_PREFIX_ADDRESS,
525                                                   16, 1,
526                                                   phys_to_virt(
527                                                     buffer_info->page_dma +
528                                                     buffer_info->page_offset),
529                                                   PAGE_SIZE/2, true);
530                                 }
531                         }
532
533                         if (i == rx_ring->next_to_use)
534                                 printk(KERN_CONT " NTU\n");
535                         else if (i == rx_ring->next_to_clean)
536                                 printk(KERN_CONT " NTC\n");
537                         else
538                                 printk(KERN_CONT "\n");
539
540                 }
541         }
542
543 exit:
544         return;
545 }
546
547
548 /**
549  * igb_read_clock - read raw cycle counter (to be used by time counter)
550  */
551 static cycle_t igb_read_clock(const struct cyclecounter *tc)
552 {
553         struct igb_adapter *adapter =
554                 container_of(tc, struct igb_adapter, cycles);
555         struct e1000_hw *hw = &adapter->hw;
556         u64 stamp = 0;
557         int shift = 0;
558
559         /*
560          * The timestamp latches on lowest register read. For the 82580
561          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
562          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
563          */
564         if (hw->mac.type == e1000_82580) {
565                 stamp = rd32(E1000_SYSTIMR) >> 8;
566                 shift = IGB_82580_TSYNC_SHIFT;
567         }
568
569         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
570         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
571         return stamp;
572 }
573
574 /**
575  * igb_get_hw_dev - return device
576  * used by hardware layer to print debugging information
577  **/
578 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
579 {
580         struct igb_adapter *adapter = hw->back;
581         return adapter->netdev;
582 }
583
584 /**
585  * igb_init_module - Driver Registration Routine
586  *
587  * igb_init_module is the first routine called when the driver is
588  * loaded. All it does is register with the PCI subsystem.
589  **/
590 static int __init igb_init_module(void)
591 {
592         int ret;
593         printk(KERN_INFO "%s - version %s\n",
594                igb_driver_string, igb_driver_version);
595
596         printk(KERN_INFO "%s\n", igb_copyright);
597
598 #ifdef CONFIG_IGB_DCA
599         dca_register_notify(&dca_notifier);
600 #endif
601         ret = pci_register_driver(&igb_driver);
602         return ret;
603 }
604
605 module_init(igb_init_module);
606
607 /**
608  * igb_exit_module - Driver Exit Cleanup Routine
609  *
610  * igb_exit_module is called just before the driver is removed
611  * from memory.
612  **/
613 static void __exit igb_exit_module(void)
614 {
615 #ifdef CONFIG_IGB_DCA
616         dca_unregister_notify(&dca_notifier);
617 #endif
618         pci_unregister_driver(&igb_driver);
619 }
620
621 module_exit(igb_exit_module);
622
623 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
624 /**
625  * igb_cache_ring_register - Descriptor ring to register mapping
626  * @adapter: board private structure to initialize
627  *
628  * Once we know the feature-set enabled for the device, we'll cache
629  * the register offset the descriptor ring is assigned to.
630  **/
631 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 {
633         int i = 0, j = 0;
634         u32 rbase_offset = adapter->vfs_allocated_count;
635
636         switch (adapter->hw.mac.type) {
637         case e1000_82576:
638                 /* The queues are allocated for virtualization such that VF 0
639                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
640                  * In order to avoid collision we start at the first free queue
641                  * and continue consuming queues in the same sequence
642                  */
643                 if (adapter->vfs_allocated_count) {
644                         for (; i < adapter->rss_queues; i++)
645                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
646                                                                Q_IDX_82576(i);
647                 }
648         case e1000_82575:
649         case e1000_82580:
650         case e1000_i350:
651         default:
652                 for (; i < adapter->num_rx_queues; i++)
653                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654                 for (; j < adapter->num_tx_queues; j++)
655                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656                 break;
657         }
658 }
659
660 static void igb_free_queues(struct igb_adapter *adapter)
661 {
662         int i;
663
664         for (i = 0; i < adapter->num_tx_queues; i++) {
665                 kfree(adapter->tx_ring[i]);
666                 adapter->tx_ring[i] = NULL;
667         }
668         for (i = 0; i < adapter->num_rx_queues; i++) {
669                 kfree(adapter->rx_ring[i]);
670                 adapter->rx_ring[i] = NULL;
671         }
672         adapter->num_rx_queues = 0;
673         adapter->num_tx_queues = 0;
674 }
675
676 /**
677  * igb_alloc_queues - Allocate memory for all rings
678  * @adapter: board private structure to initialize
679  *
680  * We allocate one ring per queue at run-time since we don't know the
681  * number of queues at compile-time.
682  **/
683 static int igb_alloc_queues(struct igb_adapter *adapter)
684 {
685         struct igb_ring *ring;
686         int i;
687
688         for (i = 0; i < adapter->num_tx_queues; i++) {
689                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
690                 if (!ring)
691                         goto err;
692                 ring->count = adapter->tx_ring_count;
693                 ring->queue_index = i;
694                 ring->dev = &adapter->pdev->dev;
695                 ring->netdev = adapter->netdev;
696                 /* For 82575, context index must be unique per ring. */
697                 if (adapter->hw.mac.type == e1000_82575)
698                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
699                 adapter->tx_ring[i] = ring;
700         }
701
702         for (i = 0; i < adapter->num_rx_queues; i++) {
703                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
704                 if (!ring)
705                         goto err;
706                 ring->count = adapter->rx_ring_count;
707                 ring->queue_index = i;
708                 ring->dev = &adapter->pdev->dev;
709                 ring->netdev = adapter->netdev;
710                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
711                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
712                 /* set flag indicating ring supports SCTP checksum offload */
713                 if (adapter->hw.mac.type >= e1000_82576)
714                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
715                 adapter->rx_ring[i] = ring;
716         }
717
718         igb_cache_ring_register(adapter);
719
720         return 0;
721
722 err:
723         igb_free_queues(adapter);
724
725         return -ENOMEM;
726 }
727
728 #define IGB_N0_QUEUE -1
729 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
730 {
731         u32 msixbm = 0;
732         struct igb_adapter *adapter = q_vector->adapter;
733         struct e1000_hw *hw = &adapter->hw;
734         u32 ivar, index;
735         int rx_queue = IGB_N0_QUEUE;
736         int tx_queue = IGB_N0_QUEUE;
737
738         if (q_vector->rx_ring)
739                 rx_queue = q_vector->rx_ring->reg_idx;
740         if (q_vector->tx_ring)
741                 tx_queue = q_vector->tx_ring->reg_idx;
742
743         switch (hw->mac.type) {
744         case e1000_82575:
745                 /* The 82575 assigns vectors using a bitmask, which matches the
746                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
747                    or more queues to a vector, we write the appropriate bits
748                    into the MSIXBM register for that vector. */
749                 if (rx_queue > IGB_N0_QUEUE)
750                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
751                 if (tx_queue > IGB_N0_QUEUE)
752                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
753                 if (!adapter->msix_entries && msix_vector == 0)
754                         msixbm |= E1000_EIMS_OTHER;
755                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
756                 q_vector->eims_value = msixbm;
757                 break;
758         case e1000_82576:
759                 /* 82576 uses a table-based method for assigning vectors.
760                    Each queue has a single entry in the table to which we write
761                    a vector number along with a "valid" bit.  Sadly, the layout
762                    of the table is somewhat counterintuitive. */
763                 if (rx_queue > IGB_N0_QUEUE) {
764                         index = (rx_queue & 0x7);
765                         ivar = array_rd32(E1000_IVAR0, index);
766                         if (rx_queue < 8) {
767                                 /* vector goes into low byte of register */
768                                 ivar = ivar & 0xFFFFFF00;
769                                 ivar |= msix_vector | E1000_IVAR_VALID;
770                         } else {
771                                 /* vector goes into third byte of register */
772                                 ivar = ivar & 0xFF00FFFF;
773                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
774                         }
775                         array_wr32(E1000_IVAR0, index, ivar);
776                 }
777                 if (tx_queue > IGB_N0_QUEUE) {
778                         index = (tx_queue & 0x7);
779                         ivar = array_rd32(E1000_IVAR0, index);
780                         if (tx_queue < 8) {
781                                 /* vector goes into second byte of register */
782                                 ivar = ivar & 0xFFFF00FF;
783                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
784                         } else {
785                                 /* vector goes into high byte of register */
786                                 ivar = ivar & 0x00FFFFFF;
787                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
788                         }
789                         array_wr32(E1000_IVAR0, index, ivar);
790                 }
791                 q_vector->eims_value = 1 << msix_vector;
792                 break;
793         case e1000_82580:
794         case e1000_i350:
795                 /* 82580 uses the same table-based approach as 82576 but has fewer
796                    entries as a result we carry over for queues greater than 4. */
797                 if (rx_queue > IGB_N0_QUEUE) {
798                         index = (rx_queue >> 1);
799                         ivar = array_rd32(E1000_IVAR0, index);
800                         if (rx_queue & 0x1) {
801                                 /* vector goes into third byte of register */
802                                 ivar = ivar & 0xFF00FFFF;
803                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
804                         } else {
805                                 /* vector goes into low byte of register */
806                                 ivar = ivar & 0xFFFFFF00;
807                                 ivar |= msix_vector | E1000_IVAR_VALID;
808                         }
809                         array_wr32(E1000_IVAR0, index, ivar);
810                 }
811                 if (tx_queue > IGB_N0_QUEUE) {
812                         index = (tx_queue >> 1);
813                         ivar = array_rd32(E1000_IVAR0, index);
814                         if (tx_queue & 0x1) {
815                                 /* vector goes into high byte of register */
816                                 ivar = ivar & 0x00FFFFFF;
817                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
818                         } else {
819                                 /* vector goes into second byte of register */
820                                 ivar = ivar & 0xFFFF00FF;
821                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
822                         }
823                         array_wr32(E1000_IVAR0, index, ivar);
824                 }
825                 q_vector->eims_value = 1 << msix_vector;
826                 break;
827         default:
828                 BUG();
829                 break;
830         }
831
832         /* add q_vector eims value to global eims_enable_mask */
833         adapter->eims_enable_mask |= q_vector->eims_value;
834
835         /* configure q_vector to set itr on first interrupt */
836         q_vector->set_itr = 1;
837 }
838
839 /**
840  * igb_configure_msix - Configure MSI-X hardware
841  *
842  * igb_configure_msix sets up the hardware to properly
843  * generate MSI-X interrupts.
844  **/
845 static void igb_configure_msix(struct igb_adapter *adapter)
846 {
847         u32 tmp;
848         int i, vector = 0;
849         struct e1000_hw *hw = &adapter->hw;
850
851         adapter->eims_enable_mask = 0;
852
853         /* set vector for other causes, i.e. link changes */
854         switch (hw->mac.type) {
855         case e1000_82575:
856                 tmp = rd32(E1000_CTRL_EXT);
857                 /* enable MSI-X PBA support*/
858                 tmp |= E1000_CTRL_EXT_PBA_CLR;
859
860                 /* Auto-Mask interrupts upon ICR read. */
861                 tmp |= E1000_CTRL_EXT_EIAME;
862                 tmp |= E1000_CTRL_EXT_IRCA;
863
864                 wr32(E1000_CTRL_EXT, tmp);
865
866                 /* enable msix_other interrupt */
867                 array_wr32(E1000_MSIXBM(0), vector++,
868                                       E1000_EIMS_OTHER);
869                 adapter->eims_other = E1000_EIMS_OTHER;
870
871                 break;
872
873         case e1000_82576:
874         case e1000_82580:
875         case e1000_i350:
876                 /* Turn on MSI-X capability first, or our settings
877                  * won't stick.  And it will take days to debug. */
878                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
879                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
880                                 E1000_GPIE_NSICR);
881
882                 /* enable msix_other interrupt */
883                 adapter->eims_other = 1 << vector;
884                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
885
886                 wr32(E1000_IVAR_MISC, tmp);
887                 break;
888         default:
889                 /* do nothing, since nothing else supports MSI-X */
890                 break;
891         } /* switch (hw->mac.type) */
892
893         adapter->eims_enable_mask |= adapter->eims_other;
894
895         for (i = 0; i < adapter->num_q_vectors; i++)
896                 igb_assign_vector(adapter->q_vector[i], vector++);
897
898         wrfl();
899 }
900
901 /**
902  * igb_request_msix - Initialize MSI-X interrupts
903  *
904  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
905  * kernel.
906  **/
907 static int igb_request_msix(struct igb_adapter *adapter)
908 {
909         struct net_device *netdev = adapter->netdev;
910         struct e1000_hw *hw = &adapter->hw;
911         int i, err = 0, vector = 0;
912
913         err = request_irq(adapter->msix_entries[vector].vector,
914                           igb_msix_other, 0, netdev->name, adapter);
915         if (err)
916                 goto out;
917         vector++;
918
919         for (i = 0; i < adapter->num_q_vectors; i++) {
920                 struct igb_q_vector *q_vector = adapter->q_vector[i];
921
922                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
923
924                 if (q_vector->rx_ring && q_vector->tx_ring)
925                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
926                                 q_vector->rx_ring->queue_index);
927                 else if (q_vector->tx_ring)
928                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
929                                 q_vector->tx_ring->queue_index);
930                 else if (q_vector->rx_ring)
931                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
932                                 q_vector->rx_ring->queue_index);
933                 else
934                         sprintf(q_vector->name, "%s-unused", netdev->name);
935
936                 err = request_irq(adapter->msix_entries[vector].vector,
937                                   igb_msix_ring, 0, q_vector->name,
938                                   q_vector);
939                 if (err)
940                         goto out;
941                 vector++;
942         }
943
944         igb_configure_msix(adapter);
945         return 0;
946 out:
947         return err;
948 }
949
950 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
951 {
952         if (adapter->msix_entries) {
953                 pci_disable_msix(adapter->pdev);
954                 kfree(adapter->msix_entries);
955                 adapter->msix_entries = NULL;
956         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
957                 pci_disable_msi(adapter->pdev);
958         }
959 }
960
961 /**
962  * igb_free_q_vectors - Free memory allocated for interrupt vectors
963  * @adapter: board private structure to initialize
964  *
965  * This function frees the memory allocated to the q_vectors.  In addition if
966  * NAPI is enabled it will delete any references to the NAPI struct prior
967  * to freeing the q_vector.
968  **/
969 static void igb_free_q_vectors(struct igb_adapter *adapter)
970 {
971         int v_idx;
972
973         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
974                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
975                 adapter->q_vector[v_idx] = NULL;
976                 if (!q_vector)
977                         continue;
978                 netif_napi_del(&q_vector->napi);
979                 kfree(q_vector);
980         }
981         adapter->num_q_vectors = 0;
982 }
983
984 /**
985  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
986  *
987  * This function resets the device so that it has 0 rx queues, tx queues, and
988  * MSI-X interrupts allocated.
989  */
990 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
991 {
992         igb_free_queues(adapter);
993         igb_free_q_vectors(adapter);
994         igb_reset_interrupt_capability(adapter);
995 }
996
997 /**
998  * igb_set_interrupt_capability - set MSI or MSI-X if supported
999  *
1000  * Attempt to configure interrupts using the best available
1001  * capabilities of the hardware and kernel.
1002  **/
1003 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1004 {
1005         int err;
1006         int numvecs, i;
1007
1008         /* Number of supported queues. */
1009         adapter->num_rx_queues = adapter->rss_queues;
1010         if (adapter->vfs_allocated_count)
1011                 adapter->num_tx_queues = 1;
1012         else
1013                 adapter->num_tx_queues = adapter->rss_queues;
1014
1015         /* start with one vector for every rx queue */
1016         numvecs = adapter->num_rx_queues;
1017
1018         /* if tx handler is separate add 1 for every tx queue */
1019         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1020                 numvecs += adapter->num_tx_queues;
1021
1022         /* store the number of vectors reserved for queues */
1023         adapter->num_q_vectors = numvecs;
1024
1025         /* add 1 vector for link status interrupts */
1026         numvecs++;
1027         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1028                                         GFP_KERNEL);
1029         if (!adapter->msix_entries)
1030                 goto msi_only;
1031
1032         for (i = 0; i < numvecs; i++)
1033                 adapter->msix_entries[i].entry = i;
1034
1035         err = pci_enable_msix(adapter->pdev,
1036                               adapter->msix_entries,
1037                               numvecs);
1038         if (err == 0)
1039                 goto out;
1040
1041         igb_reset_interrupt_capability(adapter);
1042
1043         /* If we can't do MSI-X, try MSI */
1044 msi_only:
1045 #ifdef CONFIG_PCI_IOV
1046         /* disable SR-IOV for non MSI-X configurations */
1047         if (adapter->vf_data) {
1048                 struct e1000_hw *hw = &adapter->hw;
1049                 /* disable iov and allow time for transactions to clear */
1050                 pci_disable_sriov(adapter->pdev);
1051                 msleep(500);
1052
1053                 kfree(adapter->vf_data);
1054                 adapter->vf_data = NULL;
1055                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1056                 msleep(100);
1057                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1058         }
1059 #endif
1060         adapter->vfs_allocated_count = 0;
1061         adapter->rss_queues = 1;
1062         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1063         adapter->num_rx_queues = 1;
1064         adapter->num_tx_queues = 1;
1065         adapter->num_q_vectors = 1;
1066         if (!pci_enable_msi(adapter->pdev))
1067                 adapter->flags |= IGB_FLAG_HAS_MSI;
1068 out:
1069         /* Notify the stack of the (possibly) reduced queue counts. */
1070         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1071         return netif_set_real_num_rx_queues(adapter->netdev,
1072                                             adapter->num_rx_queues);
1073 }
1074
1075 /**
1076  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1077  * @adapter: board private structure to initialize
1078  *
1079  * We allocate one q_vector per queue interrupt.  If allocation fails we
1080  * return -ENOMEM.
1081  **/
1082 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1083 {
1084         struct igb_q_vector *q_vector;
1085         struct e1000_hw *hw = &adapter->hw;
1086         int v_idx;
1087
1088         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1089                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1090                 if (!q_vector)
1091                         goto err_out;
1092                 q_vector->adapter = adapter;
1093                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1094                 q_vector->itr_val = IGB_START_ITR;
1095                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1096                 adapter->q_vector[v_idx] = q_vector;
1097         }
1098         return 0;
1099
1100 err_out:
1101         igb_free_q_vectors(adapter);
1102         return -ENOMEM;
1103 }
1104
1105 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1106                                       int ring_idx, int v_idx)
1107 {
1108         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109
1110         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1111         q_vector->rx_ring->q_vector = q_vector;
1112         q_vector->itr_val = adapter->rx_itr_setting;
1113         if (q_vector->itr_val && q_vector->itr_val <= 3)
1114                 q_vector->itr_val = IGB_START_ITR;
1115 }
1116
1117 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1118                                       int ring_idx, int v_idx)
1119 {
1120         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1121
1122         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1123         q_vector->tx_ring->q_vector = q_vector;
1124         q_vector->itr_val = adapter->tx_itr_setting;
1125         if (q_vector->itr_val && q_vector->itr_val <= 3)
1126                 q_vector->itr_val = IGB_START_ITR;
1127 }
1128
1129 /**
1130  * igb_map_ring_to_vector - maps allocated queues to vectors
1131  *
1132  * This function maps the recently allocated queues to vectors.
1133  **/
1134 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1135 {
1136         int i;
1137         int v_idx = 0;
1138
1139         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1140             (adapter->num_q_vectors < adapter->num_tx_queues))
1141                 return -ENOMEM;
1142
1143         if (adapter->num_q_vectors >=
1144             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1145                 for (i = 0; i < adapter->num_rx_queues; i++)
1146                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1147                 for (i = 0; i < adapter->num_tx_queues; i++)
1148                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149         } else {
1150                 for (i = 0; i < adapter->num_rx_queues; i++) {
1151                         if (i < adapter->num_tx_queues)
1152                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1153                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1154                 }
1155                 for (; i < adapter->num_tx_queues; i++)
1156                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1157         }
1158         return 0;
1159 }
1160
1161 /**
1162  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1163  *
1164  * This function initializes the interrupts and allocates all of the queues.
1165  **/
1166 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1167 {
1168         struct pci_dev *pdev = adapter->pdev;
1169         int err;
1170
1171         err = igb_set_interrupt_capability(adapter);
1172         if (err)
1173                 return err;
1174
1175         err = igb_alloc_q_vectors(adapter);
1176         if (err) {
1177                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1178                 goto err_alloc_q_vectors;
1179         }
1180
1181         err = igb_alloc_queues(adapter);
1182         if (err) {
1183                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1184                 goto err_alloc_queues;
1185         }
1186
1187         err = igb_map_ring_to_vector(adapter);
1188         if (err) {
1189                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1190                 goto err_map_queues;
1191         }
1192
1193
1194         return 0;
1195 err_map_queues:
1196         igb_free_queues(adapter);
1197 err_alloc_queues:
1198         igb_free_q_vectors(adapter);
1199 err_alloc_q_vectors:
1200         igb_reset_interrupt_capability(adapter);
1201         return err;
1202 }
1203
1204 /**
1205  * igb_request_irq - initialize interrupts
1206  *
1207  * Attempts to configure interrupts using the best available
1208  * capabilities of the hardware and kernel.
1209  **/
1210 static int igb_request_irq(struct igb_adapter *adapter)
1211 {
1212         struct net_device *netdev = adapter->netdev;
1213         struct pci_dev *pdev = adapter->pdev;
1214         int err = 0;
1215
1216         if (adapter->msix_entries) {
1217                 err = igb_request_msix(adapter);
1218                 if (!err)
1219                         goto request_done;
1220                 /* fall back to MSI */
1221                 igb_clear_interrupt_scheme(adapter);
1222                 if (!pci_enable_msi(adapter->pdev))
1223                         adapter->flags |= IGB_FLAG_HAS_MSI;
1224                 igb_free_all_tx_resources(adapter);
1225                 igb_free_all_rx_resources(adapter);
1226                 adapter->num_tx_queues = 1;
1227                 adapter->num_rx_queues = 1;
1228                 adapter->num_q_vectors = 1;
1229                 err = igb_alloc_q_vectors(adapter);
1230                 if (err) {
1231                         dev_err(&pdev->dev,
1232                                 "Unable to allocate memory for vectors\n");
1233                         goto request_done;
1234                 }
1235                 err = igb_alloc_queues(adapter);
1236                 if (err) {
1237                         dev_err(&pdev->dev,
1238                                 "Unable to allocate memory for queues\n");
1239                         igb_free_q_vectors(adapter);
1240                         goto request_done;
1241                 }
1242                 igb_setup_all_tx_resources(adapter);
1243                 igb_setup_all_rx_resources(adapter);
1244         } else {
1245                 igb_assign_vector(adapter->q_vector[0], 0);
1246         }
1247
1248         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1249                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1250                                   netdev->name, adapter);
1251                 if (!err)
1252                         goto request_done;
1253
1254                 /* fall back to legacy interrupts */
1255                 igb_reset_interrupt_capability(adapter);
1256                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1257         }
1258
1259         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1260                           netdev->name, adapter);
1261
1262         if (err)
1263                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1264                         err);
1265
1266 request_done:
1267         return err;
1268 }
1269
1270 static void igb_free_irq(struct igb_adapter *adapter)
1271 {
1272         if (adapter->msix_entries) {
1273                 int vector = 0, i;
1274
1275                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1276
1277                 for (i = 0; i < adapter->num_q_vectors; i++) {
1278                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1279                         free_irq(adapter->msix_entries[vector++].vector,
1280                                  q_vector);
1281                 }
1282         } else {
1283                 free_irq(adapter->pdev->irq, adapter);
1284         }
1285 }
1286
1287 /**
1288  * igb_irq_disable - Mask off interrupt generation on the NIC
1289  * @adapter: board private structure
1290  **/
1291 static void igb_irq_disable(struct igb_adapter *adapter)
1292 {
1293         struct e1000_hw *hw = &adapter->hw;
1294
1295         /*
1296          * we need to be careful when disabling interrupts.  The VFs are also
1297          * mapped into these registers and so clearing the bits can cause
1298          * issues on the VF drivers so we only need to clear what we set
1299          */
1300         if (adapter->msix_entries) {
1301                 u32 regval = rd32(E1000_EIAM);
1302                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1303                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1304                 regval = rd32(E1000_EIAC);
1305                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1306         }
1307
1308         wr32(E1000_IAM, 0);
1309         wr32(E1000_IMC, ~0);
1310         wrfl();
1311         if (adapter->msix_entries) {
1312                 int i;
1313                 for (i = 0; i < adapter->num_q_vectors; i++)
1314                         synchronize_irq(adapter->msix_entries[i].vector);
1315         } else {
1316                 synchronize_irq(adapter->pdev->irq);
1317         }
1318 }
1319
1320 /**
1321  * igb_irq_enable - Enable default interrupt generation settings
1322  * @adapter: board private structure
1323  **/
1324 static void igb_irq_enable(struct igb_adapter *adapter)
1325 {
1326         struct e1000_hw *hw = &adapter->hw;
1327
1328         if (adapter->msix_entries) {
1329                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1330                 u32 regval = rd32(E1000_EIAC);
1331                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1332                 regval = rd32(E1000_EIAM);
1333                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1334                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1335                 if (adapter->vfs_allocated_count) {
1336                         wr32(E1000_MBVFIMR, 0xFF);
1337                         ims |= E1000_IMS_VMMB;
1338                 }
1339                 if (adapter->hw.mac.type == e1000_82580)
1340                         ims |= E1000_IMS_DRSTA;
1341
1342                 wr32(E1000_IMS, ims);
1343         } else {
1344                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1345                                 E1000_IMS_DRSTA);
1346                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1347                                 E1000_IMS_DRSTA);
1348         }
1349 }
1350
1351 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1352 {
1353         struct e1000_hw *hw = &adapter->hw;
1354         u16 vid = adapter->hw.mng_cookie.vlan_id;
1355         u16 old_vid = adapter->mng_vlan_id;
1356
1357         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1358                 /* add VID to filter table */
1359                 igb_vfta_set(hw, vid, true);
1360                 adapter->mng_vlan_id = vid;
1361         } else {
1362                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1363         }
1364
1365         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1366             (vid != old_vid) &&
1367             !test_bit(old_vid, adapter->active_vlans)) {
1368                 /* remove VID from filter table */
1369                 igb_vfta_set(hw, old_vid, false);
1370         }
1371 }
1372
1373 /**
1374  * igb_release_hw_control - release control of the h/w to f/w
1375  * @adapter: address of board private structure
1376  *
1377  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1378  * For ASF and Pass Through versions of f/w this means that the
1379  * driver is no longer loaded.
1380  *
1381  **/
1382 static void igb_release_hw_control(struct igb_adapter *adapter)
1383 {
1384         struct e1000_hw *hw = &adapter->hw;
1385         u32 ctrl_ext;
1386
1387         /* Let firmware take over control of h/w */
1388         ctrl_ext = rd32(E1000_CTRL_EXT);
1389         wr32(E1000_CTRL_EXT,
1390                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1391 }
1392
1393 /**
1394  * igb_get_hw_control - get control of the h/w from f/w
1395  * @adapter: address of board private structure
1396  *
1397  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1398  * For ASF and Pass Through versions of f/w this means that
1399  * the driver is loaded.
1400  *
1401  **/
1402 static void igb_get_hw_control(struct igb_adapter *adapter)
1403 {
1404         struct e1000_hw *hw = &adapter->hw;
1405         u32 ctrl_ext;
1406
1407         /* Let firmware know the driver has taken over */
1408         ctrl_ext = rd32(E1000_CTRL_EXT);
1409         wr32(E1000_CTRL_EXT,
1410                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1411 }
1412
1413 /**
1414  * igb_configure - configure the hardware for RX and TX
1415  * @adapter: private board structure
1416  **/
1417 static void igb_configure(struct igb_adapter *adapter)
1418 {
1419         struct net_device *netdev = adapter->netdev;
1420         int i;
1421
1422         igb_get_hw_control(adapter);
1423         igb_set_rx_mode(netdev);
1424
1425         igb_restore_vlan(adapter);
1426
1427         igb_setup_tctl(adapter);
1428         igb_setup_mrqc(adapter);
1429         igb_setup_rctl(adapter);
1430
1431         igb_configure_tx(adapter);
1432         igb_configure_rx(adapter);
1433
1434         igb_rx_fifo_flush_82575(&adapter->hw);
1435
1436         /* call igb_desc_unused which always leaves
1437          * at least 1 descriptor unused to make sure
1438          * next_to_use != next_to_clean */
1439         for (i = 0; i < adapter->num_rx_queues; i++) {
1440                 struct igb_ring *ring = adapter->rx_ring[i];
1441                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1442         }
1443 }
1444
1445 /**
1446  * igb_power_up_link - Power up the phy/serdes link
1447  * @adapter: address of board private structure
1448  **/
1449 void igb_power_up_link(struct igb_adapter *adapter)
1450 {
1451         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452                 igb_power_up_phy_copper(&adapter->hw);
1453         else
1454                 igb_power_up_serdes_link_82575(&adapter->hw);
1455 }
1456
1457 /**
1458  * igb_power_down_link - Power down the phy/serdes link
1459  * @adapter: address of board private structure
1460  */
1461 static void igb_power_down_link(struct igb_adapter *adapter)
1462 {
1463         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1464                 igb_power_down_phy_copper_82575(&adapter->hw);
1465         else
1466                 igb_shutdown_serdes_link_82575(&adapter->hw);
1467 }
1468
1469 /**
1470  * igb_up - Open the interface and prepare it to handle traffic
1471  * @adapter: board private structure
1472  **/
1473 int igb_up(struct igb_adapter *adapter)
1474 {
1475         struct e1000_hw *hw = &adapter->hw;
1476         int i;
1477
1478         /* hardware has been reset, we need to reload some things */
1479         igb_configure(adapter);
1480
1481         clear_bit(__IGB_DOWN, &adapter->state);
1482
1483         for (i = 0; i < adapter->num_q_vectors; i++) {
1484                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1485                 napi_enable(&q_vector->napi);
1486         }
1487         if (adapter->msix_entries)
1488                 igb_configure_msix(adapter);
1489         else
1490                 igb_assign_vector(adapter->q_vector[0], 0);
1491
1492         /* Clear any pending interrupts. */
1493         rd32(E1000_ICR);
1494         igb_irq_enable(adapter);
1495
1496         /* notify VFs that reset has been completed */
1497         if (adapter->vfs_allocated_count) {
1498                 u32 reg_data = rd32(E1000_CTRL_EXT);
1499                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1500                 wr32(E1000_CTRL_EXT, reg_data);
1501         }
1502
1503         netif_tx_start_all_queues(adapter->netdev);
1504
1505         /* start the watchdog. */
1506         hw->mac.get_link_status = 1;
1507         schedule_work(&adapter->watchdog_task);
1508
1509         return 0;
1510 }
1511
1512 void igb_down(struct igb_adapter *adapter)
1513 {
1514         struct net_device *netdev = adapter->netdev;
1515         struct e1000_hw *hw = &adapter->hw;
1516         u32 tctl, rctl;
1517         int i;
1518
1519         /* signal that we're down so the interrupt handler does not
1520          * reschedule our watchdog timer */
1521         set_bit(__IGB_DOWN, &adapter->state);
1522
1523         /* disable receives in the hardware */
1524         rctl = rd32(E1000_RCTL);
1525         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1526         /* flush and sleep below */
1527
1528         netif_tx_stop_all_queues(netdev);
1529
1530         /* disable transmits in the hardware */
1531         tctl = rd32(E1000_TCTL);
1532         tctl &= ~E1000_TCTL_EN;
1533         wr32(E1000_TCTL, tctl);
1534         /* flush both disables and wait for them to finish */
1535         wrfl();
1536         msleep(10);
1537
1538         for (i = 0; i < adapter->num_q_vectors; i++) {
1539                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1540                 napi_disable(&q_vector->napi);
1541         }
1542
1543         igb_irq_disable(adapter);
1544
1545         del_timer_sync(&adapter->watchdog_timer);
1546         del_timer_sync(&adapter->phy_info_timer);
1547
1548         netif_carrier_off(netdev);
1549
1550         /* record the stats before reset*/
1551         spin_lock(&adapter->stats64_lock);
1552         igb_update_stats(adapter, &adapter->stats64);
1553         spin_unlock(&adapter->stats64_lock);
1554
1555         adapter->link_speed = 0;
1556         adapter->link_duplex = 0;
1557
1558         if (!pci_channel_offline(adapter->pdev))
1559                 igb_reset(adapter);
1560         igb_clean_all_tx_rings(adapter);
1561         igb_clean_all_rx_rings(adapter);
1562 #ifdef CONFIG_IGB_DCA
1563
1564         /* since we reset the hardware DCA settings were cleared */
1565         igb_setup_dca(adapter);
1566 #endif
1567 }
1568
1569 void igb_reinit_locked(struct igb_adapter *adapter)
1570 {
1571         WARN_ON(in_interrupt());
1572         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1573                 msleep(1);
1574         igb_down(adapter);
1575         igb_up(adapter);
1576         clear_bit(__IGB_RESETTING, &adapter->state);
1577 }
1578
1579 void igb_reset(struct igb_adapter *adapter)
1580 {
1581         struct pci_dev *pdev = adapter->pdev;
1582         struct e1000_hw *hw = &adapter->hw;
1583         struct e1000_mac_info *mac = &hw->mac;
1584         struct e1000_fc_info *fc = &hw->fc;
1585         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1586         u16 hwm;
1587
1588         /* Repartition Pba for greater than 9k mtu
1589          * To take effect CTRL.RST is required.
1590          */
1591         switch (mac->type) {
1592         case e1000_i350:
1593         case e1000_82580:
1594                 pba = rd32(E1000_RXPBS);
1595                 pba = igb_rxpbs_adjust_82580(pba);
1596                 break;
1597         case e1000_82576:
1598                 pba = rd32(E1000_RXPBS);
1599                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1600                 break;
1601         case e1000_82575:
1602         default:
1603                 pba = E1000_PBA_34K;
1604                 break;
1605         }
1606
1607         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1608             (mac->type < e1000_82576)) {
1609                 /* adjust PBA for jumbo frames */
1610                 wr32(E1000_PBA, pba);
1611
1612                 /* To maintain wire speed transmits, the Tx FIFO should be
1613                  * large enough to accommodate two full transmit packets,
1614                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1615                  * the Rx FIFO should be large enough to accommodate at least
1616                  * one full receive packet and is similarly rounded up and
1617                  * expressed in KB. */
1618                 pba = rd32(E1000_PBA);
1619                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1620                 tx_space = pba >> 16;
1621                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1622                 pba &= 0xffff;
1623                 /* the tx fifo also stores 16 bytes of information about the tx
1624                  * but don't include ethernet FCS because hardware appends it */
1625                 min_tx_space = (adapter->max_frame_size +
1626                                 sizeof(union e1000_adv_tx_desc) -
1627                                 ETH_FCS_LEN) * 2;
1628                 min_tx_space = ALIGN(min_tx_space, 1024);
1629                 min_tx_space >>= 10;
1630                 /* software strips receive CRC, so leave room for it */
1631                 min_rx_space = adapter->max_frame_size;
1632                 min_rx_space = ALIGN(min_rx_space, 1024);
1633                 min_rx_space >>= 10;
1634
1635                 /* If current Tx allocation is less than the min Tx FIFO size,
1636                  * and the min Tx FIFO size is less than the current Rx FIFO
1637                  * allocation, take space away from current Rx allocation */
1638                 if (tx_space < min_tx_space &&
1639                     ((min_tx_space - tx_space) < pba)) {
1640                         pba = pba - (min_tx_space - tx_space);
1641
1642                         /* if short on rx space, rx wins and must trump tx
1643                          * adjustment */
1644                         if (pba < min_rx_space)
1645                                 pba = min_rx_space;
1646                 }
1647                 wr32(E1000_PBA, pba);
1648         }
1649
1650         /* flow control settings */
1651         /* The high water mark must be low enough to fit one full frame
1652          * (or the size used for early receive) above it in the Rx FIFO.
1653          * Set it to the lower of:
1654          * - 90% of the Rx FIFO size, or
1655          * - the full Rx FIFO size minus one full frame */
1656         hwm = min(((pba << 10) * 9 / 10),
1657                         ((pba << 10) - 2 * adapter->max_frame_size));
1658
1659         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1660         fc->low_water = fc->high_water - 16;
1661         fc->pause_time = 0xFFFF;
1662         fc->send_xon = 1;
1663         fc->current_mode = fc->requested_mode;
1664
1665         /* disable receive for all VFs and wait one second */
1666         if (adapter->vfs_allocated_count) {
1667                 int i;
1668                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1669                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1670
1671                 /* ping all the active vfs to let them know we are going down */
1672                 igb_ping_all_vfs(adapter);
1673
1674                 /* disable transmits and receives */
1675                 wr32(E1000_VFRE, 0);
1676                 wr32(E1000_VFTE, 0);
1677         }
1678
1679         /* Allow time for pending master requests to run */
1680         hw->mac.ops.reset_hw(hw);
1681         wr32(E1000_WUC, 0);
1682
1683         if (hw->mac.ops.init_hw(hw))
1684                 dev_err(&pdev->dev, "Hardware Error\n");
1685         if (hw->mac.type > e1000_82580) {
1686                 if (adapter->flags & IGB_FLAG_DMAC) {
1687                         u32 reg;
1688
1689                         /*
1690                          * DMA Coalescing high water mark needs to be higher
1691                          * than * the * Rx threshold.  The Rx threshold is
1692                          * currently * pba - 6, so we * should use a high water
1693                          * mark of pba * - 4. */
1694                         hwm = (pba - 4) << 10;
1695
1696                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1697                                & E1000_DMACR_DMACTHR_MASK);
1698
1699                         /* transition to L0x or L1 if available..*/
1700                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1701
1702                         /* watchdog timer= +-1000 usec in 32usec intervals */
1703                         reg |= (1000 >> 5);
1704                         wr32(E1000_DMACR, reg);
1705
1706                         /* no lower threshold to disable coalescing(smart fifb)
1707                          * -UTRESH=0*/
1708                         wr32(E1000_DMCRTRH, 0);
1709
1710                         /* set hwm to PBA -  2 * max frame size */
1711                         wr32(E1000_FCRTC, hwm);
1712
1713                         /*
1714                          * This sets the time to wait before requesting tran-
1715                          * sition to * low power state to number of usecs needed
1716                          * to receive 1 512 * byte frame at gigabit line rate
1717                          */
1718                         reg = rd32(E1000_DMCTLX);
1719                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1720
1721                         /* Delay 255 usec before entering Lx state. */
1722                         reg |= 0xFF;
1723                         wr32(E1000_DMCTLX, reg);
1724
1725                         /* free space in Tx packet buffer to wake from DMAC */
1726                         wr32(E1000_DMCTXTH,
1727                              (IGB_MIN_TXPBSIZE -
1728                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1729                              >> 6);
1730
1731                         /* make low power state decision controlled by DMAC */
1732                         reg = rd32(E1000_PCIEMISC);
1733                         reg |= E1000_PCIEMISC_LX_DECISION;
1734                         wr32(E1000_PCIEMISC, reg);
1735                 } /* end if IGB_FLAG_DMAC set */
1736         }
1737         if (hw->mac.type == e1000_82580) {
1738                 u32 reg = rd32(E1000_PCIEMISC);
1739                 wr32(E1000_PCIEMISC,
1740                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1741         }
1742         if (!netif_running(adapter->netdev))
1743                 igb_power_down_link(adapter);
1744
1745         igb_update_mng_vlan(adapter);
1746
1747         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749
1750         igb_get_phy_info(hw);
1751 }
1752
1753 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1754 {
1755         /*
1756          * Since there is no support for separate rx/tx vlan accel
1757          * enable/disable make sure tx flag is always in same state as rx.
1758          */
1759         if (features & NETIF_F_HW_VLAN_RX)
1760                 features |= NETIF_F_HW_VLAN_TX;
1761         else
1762                 features &= ~NETIF_F_HW_VLAN_TX;
1763
1764         return features;
1765 }
1766
1767 static int igb_set_features(struct net_device *netdev, u32 features)
1768 {
1769         struct igb_adapter *adapter = netdev_priv(netdev);
1770         int i;
1771         u32 changed = netdev->features ^ features;
1772
1773         for (i = 0; i < adapter->num_rx_queues; i++) {
1774                 if (features & NETIF_F_RXCSUM)
1775                         adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1776                 else
1777                         adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1778         }
1779
1780         if (changed & NETIF_F_HW_VLAN_RX)
1781                 igb_vlan_mode(netdev, features);
1782
1783         return 0;
1784 }
1785
1786 static const struct net_device_ops igb_netdev_ops = {
1787         .ndo_open               = igb_open,
1788         .ndo_stop               = igb_close,
1789         .ndo_start_xmit         = igb_xmit_frame_adv,
1790         .ndo_get_stats64        = igb_get_stats64,
1791         .ndo_set_rx_mode        = igb_set_rx_mode,
1792         .ndo_set_multicast_list = igb_set_rx_mode,
1793         .ndo_set_mac_address    = igb_set_mac,
1794         .ndo_change_mtu         = igb_change_mtu,
1795         .ndo_do_ioctl           = igb_ioctl,
1796         .ndo_tx_timeout         = igb_tx_timeout,
1797         .ndo_validate_addr      = eth_validate_addr,
1798         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1799         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1800         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1801         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1802         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1803         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1804 #ifdef CONFIG_NET_POLL_CONTROLLER
1805         .ndo_poll_controller    = igb_netpoll,
1806 #endif
1807         .ndo_fix_features       = igb_fix_features,
1808         .ndo_set_features       = igb_set_features,
1809 };
1810
1811 /**
1812  * igb_probe - Device Initialization Routine
1813  * @pdev: PCI device information struct
1814  * @ent: entry in igb_pci_tbl
1815  *
1816  * Returns 0 on success, negative on failure
1817  *
1818  * igb_probe initializes an adapter identified by a pci_dev structure.
1819  * The OS initialization, configuring of the adapter private structure,
1820  * and a hardware reset occur.
1821  **/
1822 static int __devinit igb_probe(struct pci_dev *pdev,
1823                                const struct pci_device_id *ent)
1824 {
1825         struct net_device *netdev;
1826         struct igb_adapter *adapter;
1827         struct e1000_hw *hw;
1828         u16 eeprom_data = 0;
1829         s32 ret_val;
1830         static int global_quad_port_a; /* global quad port a indication */
1831         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1832         unsigned long mmio_start, mmio_len;
1833         int err, pci_using_dac;
1834         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1835         u8 part_str[E1000_PBANUM_LENGTH];
1836
1837         /* Catch broken hardware that put the wrong VF device ID in
1838          * the PCIe SR-IOV capability.
1839          */
1840         if (pdev->is_virtfn) {
1841                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1842                      pci_name(pdev), pdev->vendor, pdev->device);
1843                 return -EINVAL;
1844         }
1845
1846         err = pci_enable_device_mem(pdev);
1847         if (err)
1848                 return err;
1849
1850         pci_using_dac = 0;
1851         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1852         if (!err) {
1853                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1854                 if (!err)
1855                         pci_using_dac = 1;
1856         } else {
1857                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1858                 if (err) {
1859                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1860                         if (err) {
1861                                 dev_err(&pdev->dev, "No usable DMA "
1862                                         "configuration, aborting\n");
1863                                 goto err_dma;
1864                         }
1865                 }
1866         }
1867
1868         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1869                                            IORESOURCE_MEM),
1870                                            igb_driver_name);
1871         if (err)
1872                 goto err_pci_reg;
1873
1874         pci_enable_pcie_error_reporting(pdev);
1875
1876         pci_set_master(pdev);
1877         pci_save_state(pdev);
1878
1879         err = -ENOMEM;
1880         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1881                                    IGB_ABS_MAX_TX_QUEUES);
1882         if (!netdev)
1883                 goto err_alloc_etherdev;
1884
1885         SET_NETDEV_DEV(netdev, &pdev->dev);
1886
1887         pci_set_drvdata(pdev, netdev);
1888         adapter = netdev_priv(netdev);
1889         adapter->netdev = netdev;
1890         adapter->pdev = pdev;
1891         hw = &adapter->hw;
1892         hw->back = adapter;
1893         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1894
1895         mmio_start = pci_resource_start(pdev, 0);
1896         mmio_len = pci_resource_len(pdev, 0);
1897
1898         err = -EIO;
1899         hw->hw_addr = ioremap(mmio_start, mmio_len);
1900         if (!hw->hw_addr)
1901                 goto err_ioremap;
1902
1903         netdev->netdev_ops = &igb_netdev_ops;
1904         igb_set_ethtool_ops(netdev);
1905         netdev->watchdog_timeo = 5 * HZ;
1906
1907         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1908
1909         netdev->mem_start = mmio_start;
1910         netdev->mem_end = mmio_start + mmio_len;
1911
1912         /* PCI config space info */
1913         hw->vendor_id = pdev->vendor;
1914         hw->device_id = pdev->device;
1915         hw->revision_id = pdev->revision;
1916         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1917         hw->subsystem_device_id = pdev->subsystem_device;
1918
1919         /* Copy the default MAC, PHY and NVM function pointers */
1920         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1921         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1922         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1923         /* Initialize skew-specific constants */
1924         err = ei->get_invariants(hw);
1925         if (err)
1926                 goto err_sw_init;
1927
1928         /* setup the private structure */
1929         err = igb_sw_init(adapter);
1930         if (err)
1931                 goto err_sw_init;
1932
1933         igb_get_bus_info_pcie(hw);
1934
1935         hw->phy.autoneg_wait_to_complete = false;
1936
1937         /* Copper options */
1938         if (hw->phy.media_type == e1000_media_type_copper) {
1939                 hw->phy.mdix = AUTO_ALL_MODES;
1940                 hw->phy.disable_polarity_correction = false;
1941                 hw->phy.ms_type = e1000_ms_hw_default;
1942         }
1943
1944         if (igb_check_reset_block(hw))
1945                 dev_info(&pdev->dev,
1946                         "PHY reset is blocked due to SOL/IDER session.\n");
1947
1948         netdev->hw_features = NETIF_F_SG |
1949                            NETIF_F_IP_CSUM |
1950                            NETIF_F_IPV6_CSUM |
1951                            NETIF_F_TSO |
1952                            NETIF_F_TSO6 |
1953                            NETIF_F_RXCSUM |
1954                            NETIF_F_HW_VLAN_RX;
1955
1956         netdev->features = netdev->hw_features |
1957                            NETIF_F_HW_VLAN_TX |
1958                            NETIF_F_HW_VLAN_FILTER;
1959
1960         netdev->vlan_features |= NETIF_F_TSO;
1961         netdev->vlan_features |= NETIF_F_TSO6;
1962         netdev->vlan_features |= NETIF_F_IP_CSUM;
1963         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1964         netdev->vlan_features |= NETIF_F_SG;
1965
1966         if (pci_using_dac) {
1967                 netdev->features |= NETIF_F_HIGHDMA;
1968                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1969         }
1970
1971         if (hw->mac.type >= e1000_82576) {
1972                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1973                 netdev->features |= NETIF_F_SCTP_CSUM;
1974         }
1975
1976         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1977
1978         /* before reading the NVM, reset the controller to put the device in a
1979          * known good starting state */
1980         hw->mac.ops.reset_hw(hw);
1981
1982         /* make sure the NVM is good */
1983         if (hw->nvm.ops.validate(hw) < 0) {
1984                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1985                 err = -EIO;
1986                 goto err_eeprom;
1987         }
1988
1989         /* copy the MAC address out of the NVM */
1990         if (hw->mac.ops.read_mac_addr(hw))
1991                 dev_err(&pdev->dev, "NVM Read Error\n");
1992
1993         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1994         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1995
1996         if (!is_valid_ether_addr(netdev->perm_addr)) {
1997                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1998                 err = -EIO;
1999                 goto err_eeprom;
2000         }
2001
2002         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2003                     (unsigned long) adapter);
2004         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2005                     (unsigned long) adapter);
2006
2007         INIT_WORK(&adapter->reset_task, igb_reset_task);
2008         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2009
2010         /* Initialize link properties that are user-changeable */
2011         adapter->fc_autoneg = true;
2012         hw->mac.autoneg = true;
2013         hw->phy.autoneg_advertised = 0x2f;
2014
2015         hw->fc.requested_mode = e1000_fc_default;
2016         hw->fc.current_mode = e1000_fc_default;
2017
2018         igb_validate_mdi_setting(hw);
2019
2020         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2021          * enable the ACPI Magic Packet filter
2022          */
2023
2024         if (hw->bus.func == 0)
2025                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2026         else if (hw->mac.type == e1000_82580)
2027                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2028                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2029                                  &eeprom_data);
2030         else if (hw->bus.func == 1)
2031                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2032
2033         if (eeprom_data & eeprom_apme_mask)
2034                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2035
2036         /* now that we have the eeprom settings, apply the special cases where
2037          * the eeprom may be wrong or the board simply won't support wake on
2038          * lan on a particular port */
2039         switch (pdev->device) {
2040         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2041                 adapter->eeprom_wol = 0;
2042                 break;
2043         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2044         case E1000_DEV_ID_82576_FIBER:
2045         case E1000_DEV_ID_82576_SERDES:
2046                 /* Wake events only supported on port A for dual fiber
2047                  * regardless of eeprom setting */
2048                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2049                         adapter->eeprom_wol = 0;
2050                 break;
2051         case E1000_DEV_ID_82576_QUAD_COPPER:
2052         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2053                 /* if quad port adapter, disable WoL on all but port A */
2054                 if (global_quad_port_a != 0)
2055                         adapter->eeprom_wol = 0;
2056                 else
2057                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2058                 /* Reset for multiple quad port adapters */
2059                 if (++global_quad_port_a == 4)
2060                         global_quad_port_a = 0;
2061                 break;
2062         }
2063
2064         /* initialize the wol settings based on the eeprom settings */
2065         adapter->wol = adapter->eeprom_wol;
2066         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2067
2068         /* reset the hardware with the new settings */
2069         igb_reset(adapter);
2070
2071         /* let the f/w know that the h/w is now under the control of the
2072          * driver. */
2073         igb_get_hw_control(adapter);
2074
2075         strcpy(netdev->name, "eth%d");
2076         err = register_netdev(netdev);
2077         if (err)
2078                 goto err_register;
2079
2080         igb_vlan_mode(netdev, netdev->features);
2081
2082         /* carrier off reporting is important to ethtool even BEFORE open */
2083         netif_carrier_off(netdev);
2084
2085 #ifdef CONFIG_IGB_DCA
2086         if (dca_add_requester(&pdev->dev) == 0) {
2087                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2088                 dev_info(&pdev->dev, "DCA enabled\n");
2089                 igb_setup_dca(adapter);
2090         }
2091
2092 #endif
2093         /* do hw tstamp init after resetting */
2094         igb_init_hw_timer(adapter);
2095
2096         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2097         /* print bus type/speed/width info */
2098         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2099                  netdev->name,
2100                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2101                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2102                                                             "unknown"),
2103                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2104                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2105                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2106                    "unknown"),
2107                  netdev->dev_addr);
2108
2109         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2110         if (ret_val)
2111                 strcpy(part_str, "Unknown");
2112         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2113         dev_info(&pdev->dev,
2114                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2115                 adapter->msix_entries ? "MSI-X" :
2116                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2117                 adapter->num_rx_queues, adapter->num_tx_queues);
2118         switch (hw->mac.type) {
2119         case e1000_i350:
2120                 igb_set_eee_i350(hw);
2121                 break;
2122         default:
2123                 break;
2124         }
2125         return 0;
2126
2127 err_register:
2128         igb_release_hw_control(adapter);
2129 err_eeprom:
2130         if (!igb_check_reset_block(hw))
2131                 igb_reset_phy(hw);
2132
2133         if (hw->flash_address)
2134                 iounmap(hw->flash_address);
2135 err_sw_init:
2136         igb_clear_interrupt_scheme(adapter);
2137         iounmap(hw->hw_addr);
2138 err_ioremap:
2139         free_netdev(netdev);
2140 err_alloc_etherdev:
2141         pci_release_selected_regions(pdev,
2142                                      pci_select_bars(pdev, IORESOURCE_MEM));
2143 err_pci_reg:
2144 err_dma:
2145         pci_disable_device(pdev);
2146         return err;
2147 }
2148
2149 /**
2150  * igb_remove - Device Removal Routine
2151  * @pdev: PCI device information struct
2152  *
2153  * igb_remove is called by the PCI subsystem to alert the driver
2154  * that it should release a PCI device.  The could be caused by a
2155  * Hot-Plug event, or because the driver is going to be removed from
2156  * memory.
2157  **/
2158 static void __devexit igb_remove(struct pci_dev *pdev)
2159 {
2160         struct net_device *netdev = pci_get_drvdata(pdev);
2161         struct igb_adapter *adapter = netdev_priv(netdev);
2162         struct e1000_hw *hw = &adapter->hw;
2163
2164         /*
2165          * The watchdog timer may be rescheduled, so explicitly
2166          * disable watchdog from being rescheduled.
2167          */
2168         set_bit(__IGB_DOWN, &adapter->state);
2169         del_timer_sync(&adapter->watchdog_timer);
2170         del_timer_sync(&adapter->phy_info_timer);
2171
2172         cancel_work_sync(&adapter->reset_task);
2173         cancel_work_sync(&adapter->watchdog_task);
2174
2175 #ifdef CONFIG_IGB_DCA
2176         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2177                 dev_info(&pdev->dev, "DCA disabled\n");
2178                 dca_remove_requester(&pdev->dev);
2179                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2180                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2181         }
2182 #endif
2183
2184         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2185          * would have already happened in close and is redundant. */
2186         igb_release_hw_control(adapter);
2187
2188         unregister_netdev(netdev);
2189
2190         igb_clear_interrupt_scheme(adapter);
2191
2192 #ifdef CONFIG_PCI_IOV
2193         /* reclaim resources allocated to VFs */
2194         if (adapter->vf_data) {
2195                 /* disable iov and allow time for transactions to clear */
2196                 pci_disable_sriov(pdev);
2197                 msleep(500);
2198
2199                 kfree(adapter->vf_data);
2200                 adapter->vf_data = NULL;
2201                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2202                 msleep(100);
2203                 dev_info(&pdev->dev, "IOV Disabled\n");
2204         }
2205 #endif
2206
2207         iounmap(hw->hw_addr);
2208         if (hw->flash_address)
2209                 iounmap(hw->flash_address);
2210         pci_release_selected_regions(pdev,
2211                                      pci_select_bars(pdev, IORESOURCE_MEM));
2212
2213         free_netdev(netdev);
2214
2215         pci_disable_pcie_error_reporting(pdev);
2216
2217         pci_disable_device(pdev);
2218 }
2219
2220 /**
2221  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2222  * @adapter: board private structure to initialize
2223  *
2224  * This function initializes the vf specific data storage and then attempts to
2225  * allocate the VFs.  The reason for ordering it this way is because it is much
2226  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2227  * the memory for the VFs.
2228  **/
2229 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2230 {
2231 #ifdef CONFIG_PCI_IOV
2232         struct pci_dev *pdev = adapter->pdev;
2233
2234         if (adapter->vfs_allocated_count) {
2235                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2236                                            sizeof(struct vf_data_storage),
2237                                            GFP_KERNEL);
2238                 /* if allocation failed then we do not support SR-IOV */
2239                 if (!adapter->vf_data) {
2240                         adapter->vfs_allocated_count = 0;
2241                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2242                                 "Data Storage\n");
2243                 }
2244         }
2245
2246         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2247                 kfree(adapter->vf_data);
2248                 adapter->vf_data = NULL;
2249 #endif /* CONFIG_PCI_IOV */
2250                 adapter->vfs_allocated_count = 0;
2251 #ifdef CONFIG_PCI_IOV
2252         } else {
2253                 unsigned char mac_addr[ETH_ALEN];
2254                 int i;
2255                 dev_info(&pdev->dev, "%d vfs allocated\n",
2256                          adapter->vfs_allocated_count);
2257                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2258                         random_ether_addr(mac_addr);
2259                         igb_set_vf_mac(adapter, i, mac_addr);
2260                 }
2261                 /* DMA Coalescing is not supported in IOV mode. */
2262                 if (adapter->flags & IGB_FLAG_DMAC)
2263                         adapter->flags &= ~IGB_FLAG_DMAC;
2264         }
2265 #endif /* CONFIG_PCI_IOV */
2266 }
2267
2268
2269 /**
2270  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2271  * @adapter: board private structure to initialize
2272  *
2273  * igb_init_hw_timer initializes the function pointer and values for the hw
2274  * timer found in hardware.
2275  **/
2276 static void igb_init_hw_timer(struct igb_adapter *adapter)
2277 {
2278         struct e1000_hw *hw = &adapter->hw;
2279
2280         switch (hw->mac.type) {
2281         case e1000_i350:
2282         case e1000_82580:
2283                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2284                 adapter->cycles.read = igb_read_clock;
2285                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2286                 adapter->cycles.mult = 1;
2287                 /*
2288                  * The 82580 timesync updates the system timer every 8ns by 8ns
2289                  * and the value cannot be shifted.  Instead we need to shift
2290                  * the registers to generate a 64bit timer value.  As a result
2291                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2292                  * 24 in order to generate a larger value for synchronization.
2293                  */
2294                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2295                 /* disable system timer temporarily by setting bit 31 */
2296                 wr32(E1000_TSAUXC, 0x80000000);
2297                 wrfl();
2298
2299                 /* Set registers so that rollover occurs soon to test this. */
2300                 wr32(E1000_SYSTIMR, 0x00000000);
2301                 wr32(E1000_SYSTIML, 0x80000000);
2302                 wr32(E1000_SYSTIMH, 0x000000FF);
2303                 wrfl();
2304
2305                 /* enable system timer by clearing bit 31 */
2306                 wr32(E1000_TSAUXC, 0x0);
2307                 wrfl();
2308
2309                 timecounter_init(&adapter->clock,
2310                                  &adapter->cycles,
2311                                  ktime_to_ns(ktime_get_real()));
2312                 /*
2313                  * Synchronize our NIC clock against system wall clock. NIC
2314                  * time stamp reading requires ~3us per sample, each sample
2315                  * was pretty stable even under load => only require 10
2316                  * samples for each offset comparison.
2317                  */
2318                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2319                 adapter->compare.source = &adapter->clock;
2320                 adapter->compare.target = ktime_get_real;
2321                 adapter->compare.num_samples = 10;
2322                 timecompare_update(&adapter->compare, 0);
2323                 break;
2324         case e1000_82576:
2325                 /*
2326                  * Initialize hardware timer: we keep it running just in case
2327                  * that some program needs it later on.
2328                  */
2329                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2330                 adapter->cycles.read = igb_read_clock;
2331                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2332                 adapter->cycles.mult = 1;
2333                 /**
2334                  * Scale the NIC clock cycle by a large factor so that
2335                  * relatively small clock corrections can be added or
2336                  * subtracted at each clock tick. The drawbacks of a large
2337                  * factor are a) that the clock register overflows more quickly
2338                  * (not such a big deal) and b) that the increment per tick has
2339                  * to fit into 24 bits.  As a result we need to use a shift of
2340                  * 19 so we can fit a value of 16 into the TIMINCA register.
2341                  */
2342                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2343                 wr32(E1000_TIMINCA,
2344                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2345                                 (16 << IGB_82576_TSYNC_SHIFT));
2346
2347                 /* Set registers so that rollover occurs soon to test this. */
2348                 wr32(E1000_SYSTIML, 0x00000000);
2349                 wr32(E1000_SYSTIMH, 0xFF800000);
2350                 wrfl();
2351
2352                 timecounter_init(&adapter->clock,
2353                                  &adapter->cycles,
2354                                  ktime_to_ns(ktime_get_real()));
2355                 /*
2356                  * Synchronize our NIC clock against system wall clock. NIC
2357                  * time stamp reading requires ~3us per sample, each sample
2358                  * was pretty stable even under load => only require 10
2359                  * samples for each offset comparison.
2360                  */
2361                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2362                 adapter->compare.source = &adapter->clock;
2363                 adapter->compare.target = ktime_get_real;
2364                 adapter->compare.num_samples = 10;
2365                 timecompare_update(&adapter->compare, 0);
2366                 break;
2367         case e1000_82575:
2368                 /* 82575 does not support timesync */
2369         default:
2370                 break;
2371         }
2372
2373 }
2374
2375 /**
2376  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2377  * @adapter: board private structure to initialize
2378  *
2379  * igb_sw_init initializes the Adapter private data structure.
2380  * Fields are initialized based on PCI device information and
2381  * OS network device settings (MTU size).
2382  **/
2383 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2384 {
2385         struct e1000_hw *hw = &adapter->hw;
2386         struct net_device *netdev = adapter->netdev;
2387         struct pci_dev *pdev = adapter->pdev;
2388
2389         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2390
2391         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2392         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2393         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2394         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2395
2396         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2397         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2398
2399         spin_lock_init(&adapter->stats64_lock);
2400 #ifdef CONFIG_PCI_IOV
2401         switch (hw->mac.type) {
2402         case e1000_82576:
2403         case e1000_i350:
2404                 if (max_vfs > 7) {
2405                         dev_warn(&pdev->dev,
2406                                  "Maximum of 7 VFs per PF, using max\n");
2407                         adapter->vfs_allocated_count = 7;
2408                 } else
2409                         adapter->vfs_allocated_count = max_vfs;
2410                 break;
2411         default:
2412                 break;
2413         }
2414 #endif /* CONFIG_PCI_IOV */
2415         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2416         /* i350 cannot do RSS and SR-IOV at the same time */
2417         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2418                 adapter->rss_queues = 1;
2419
2420         /*
2421          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2422          * then we should combine the queues into a queue pair in order to
2423          * conserve interrupts due to limited supply
2424          */
2425         if ((adapter->rss_queues > 4) ||
2426             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2427                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2428
2429         /* This call may decrease the number of queues */
2430         if (igb_init_interrupt_scheme(adapter)) {
2431                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2432                 return -ENOMEM;
2433         }
2434
2435         igb_probe_vfs(adapter);
2436
2437         /* Explicitly disable IRQ since the NIC can be in any state. */
2438         igb_irq_disable(adapter);
2439
2440         if (hw->mac.type == e1000_i350)
2441                 adapter->flags &= ~IGB_FLAG_DMAC;
2442
2443         set_bit(__IGB_DOWN, &adapter->state);
2444         return 0;
2445 }
2446
2447 /**
2448  * igb_open - Called when a network interface is made active
2449  * @netdev: network interface device structure
2450  *
2451  * Returns 0 on success, negative value on failure
2452  *
2453  * The open entry point is called when a network interface is made
2454  * active by the system (IFF_UP).  At this point all resources needed
2455  * for transmit and receive operations are allocated, the interrupt
2456  * handler is registered with the OS, the watchdog timer is started,
2457  * and the stack is notified that the interface is ready.
2458  **/
2459 static int igb_open(struct net_device *netdev)
2460 {
2461         struct igb_adapter *adapter = netdev_priv(netdev);
2462         struct e1000_hw *hw = &adapter->hw;
2463         int err;
2464         int i;
2465
2466         /* disallow open during test */
2467         if (test_bit(__IGB_TESTING, &adapter->state))
2468                 return -EBUSY;
2469
2470         netif_carrier_off(netdev);
2471
2472         /* allocate transmit descriptors */
2473         err = igb_setup_all_tx_resources(adapter);
2474         if (err)
2475                 goto err_setup_tx;
2476
2477         /* allocate receive descriptors */
2478         err = igb_setup_all_rx_resources(adapter);
2479         if (err)
2480                 goto err_setup_rx;
2481
2482         igb_power_up_link(adapter);
2483
2484         /* before we allocate an interrupt, we must be ready to handle it.
2485          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2486          * as soon as we call pci_request_irq, so we have to setup our
2487          * clean_rx handler before we do so.  */
2488         igb_configure(adapter);
2489
2490         err = igb_request_irq(adapter);
2491         if (err)
2492                 goto err_req_irq;
2493
2494         /* From here on the code is the same as igb_up() */
2495         clear_bit(__IGB_DOWN, &adapter->state);
2496
2497         for (i = 0; i < adapter->num_q_vectors; i++) {
2498                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2499                 napi_enable(&q_vector->napi);
2500         }
2501
2502         /* Clear any pending interrupts. */
2503         rd32(E1000_ICR);
2504
2505         igb_irq_enable(adapter);
2506
2507         /* notify VFs that reset has been completed */
2508         if (adapter->vfs_allocated_count) {
2509                 u32 reg_data = rd32(E1000_CTRL_EXT);
2510                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2511                 wr32(E1000_CTRL_EXT, reg_data);
2512         }
2513
2514         netif_tx_start_all_queues(netdev);
2515
2516         /* start the watchdog. */
2517         hw->mac.get_link_status = 1;
2518         schedule_work(&adapter->watchdog_task);
2519
2520         return 0;
2521
2522 err_req_irq:
2523         igb_release_hw_control(adapter);
2524         igb_power_down_link(adapter);
2525         igb_free_all_rx_resources(adapter);
2526 err_setup_rx:
2527         igb_free_all_tx_resources(adapter);
2528 err_setup_tx:
2529         igb_reset(adapter);
2530
2531         return err;
2532 }
2533
2534 /**
2535  * igb_close - Disables a network interface
2536  * @netdev: network interface device structure
2537  *
2538  * Returns 0, this is not allowed to fail
2539  *
2540  * The close entry point is called when an interface is de-activated
2541  * by the OS.  The hardware is still under the driver's control, but
2542  * needs to be disabled.  A global MAC reset is issued to stop the
2543  * hardware, and all transmit and receive resources are freed.
2544  **/
2545 static int igb_close(struct net_device *netdev)
2546 {
2547         struct igb_adapter *adapter = netdev_priv(netdev);
2548
2549         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2550         igb_down(adapter);
2551
2552         igb_free_irq(adapter);
2553
2554         igb_free_all_tx_resources(adapter);
2555         igb_free_all_rx_resources(adapter);
2556
2557         return 0;
2558 }
2559
2560 /**
2561  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2562  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2563  *
2564  * Return 0 on success, negative on failure
2565  **/
2566 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2567 {
2568         struct device *dev = tx_ring->dev;
2569         int size;
2570
2571         size = sizeof(struct igb_buffer) * tx_ring->count;
2572         tx_ring->buffer_info = vzalloc(size);
2573         if (!tx_ring->buffer_info)
2574                 goto err;
2575
2576         /* round up to nearest 4K */
2577         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2578         tx_ring->size = ALIGN(tx_ring->size, 4096);
2579
2580         tx_ring->desc = dma_alloc_coherent(dev,
2581                                            tx_ring->size,
2582                                            &tx_ring->dma,
2583                                            GFP_KERNEL);
2584
2585         if (!tx_ring->desc)
2586                 goto err;
2587
2588         tx_ring->next_to_use = 0;
2589         tx_ring->next_to_clean = 0;
2590         return 0;
2591
2592 err:
2593         vfree(tx_ring->buffer_info);
2594         dev_err(dev,
2595                 "Unable to allocate memory for the transmit descriptor ring\n");
2596         return -ENOMEM;
2597 }
2598
2599 /**
2600  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2601  *                                (Descriptors) for all queues
2602  * @adapter: board private structure
2603  *
2604  * Return 0 on success, negative on failure
2605  **/
2606 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2607 {
2608         struct pci_dev *pdev = adapter->pdev;
2609         int i, err = 0;
2610
2611         for (i = 0; i < adapter->num_tx_queues; i++) {
2612                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2613                 if (err) {
2614                         dev_err(&pdev->dev,
2615                                 "Allocation for Tx Queue %u failed\n", i);
2616                         for (i--; i >= 0; i--)
2617                                 igb_free_tx_resources(adapter->tx_ring[i]);
2618                         break;
2619                 }
2620         }
2621
2622         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2623                 int r_idx = i % adapter->num_tx_queues;
2624                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2625         }
2626         return err;
2627 }
2628
2629 /**
2630  * igb_setup_tctl - configure the transmit control registers
2631  * @adapter: Board private structure
2632  **/
2633 void igb_setup_tctl(struct igb_adapter *adapter)
2634 {
2635         struct e1000_hw *hw = &adapter->hw;
2636         u32 tctl;
2637
2638         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2639         wr32(E1000_TXDCTL(0), 0);
2640
2641         /* Program the Transmit Control Register */
2642         tctl = rd32(E1000_TCTL);
2643         tctl &= ~E1000_TCTL_CT;
2644         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2645                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2646
2647         igb_config_collision_dist(hw);
2648
2649         /* Enable transmits */
2650         tctl |= E1000_TCTL_EN;
2651
2652         wr32(E1000_TCTL, tctl);
2653 }
2654
2655 /**
2656  * igb_configure_tx_ring - Configure transmit ring after Reset
2657  * @adapter: board private structure
2658  * @ring: tx ring to configure
2659  *
2660  * Configure a transmit ring after a reset.
2661  **/
2662 void igb_configure_tx_ring(struct igb_adapter *adapter,
2663                            struct igb_ring *ring)
2664 {
2665         struct e1000_hw *hw = &adapter->hw;
2666         u32 txdctl;
2667         u64 tdba = ring->dma;
2668         int reg_idx = ring->reg_idx;
2669
2670         /* disable the queue */
2671         txdctl = rd32(E1000_TXDCTL(reg_idx));
2672         wr32(E1000_TXDCTL(reg_idx),
2673                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2674         wrfl();
2675         mdelay(10);
2676
2677         wr32(E1000_TDLEN(reg_idx),
2678                         ring->count * sizeof(union e1000_adv_tx_desc));
2679         wr32(E1000_TDBAL(reg_idx),
2680                         tdba & 0x00000000ffffffffULL);
2681         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2682
2683         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2684         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2685         writel(0, ring->head);
2686         writel(0, ring->tail);
2687
2688         txdctl |= IGB_TX_PTHRESH;
2689         txdctl |= IGB_TX_HTHRESH << 8;
2690         txdctl |= IGB_TX_WTHRESH << 16;
2691
2692         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2693         wr32(E1000_TXDCTL(reg_idx), txdctl);
2694 }
2695
2696 /**
2697  * igb_configure_tx - Configure transmit Unit after Reset
2698  * @adapter: board private structure
2699  *
2700  * Configure the Tx unit of the MAC after a reset.
2701  **/
2702 static void igb_configure_tx(struct igb_adapter *adapter)
2703 {
2704         int i;
2705
2706         for (i = 0; i < adapter->num_tx_queues; i++)
2707                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2708 }
2709
2710 /**
2711  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2712  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2713  *
2714  * Returns 0 on success, negative on failure
2715  **/
2716 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2717 {
2718         struct device *dev = rx_ring->dev;
2719         int size, desc_len;
2720
2721         size = sizeof(struct igb_buffer) * rx_ring->count;
2722         rx_ring->buffer_info = vzalloc(size);
2723         if (!rx_ring->buffer_info)
2724                 goto err;
2725
2726         desc_len = sizeof(union e1000_adv_rx_desc);
2727
2728         /* Round up to nearest 4K */
2729         rx_ring->size = rx_ring->count * desc_len;
2730         rx_ring->size = ALIGN(rx_ring->size, 4096);
2731
2732         rx_ring->desc = dma_alloc_coherent(dev,
2733                                            rx_ring->size,
2734                                            &rx_ring->dma,
2735                                            GFP_KERNEL);
2736
2737         if (!rx_ring->desc)
2738                 goto err;
2739
2740         rx_ring->next_to_clean = 0;
2741         rx_ring->next_to_use = 0;
2742
2743         return 0;
2744
2745 err:
2746         vfree(rx_ring->buffer_info);
2747         rx_ring->buffer_info = NULL;
2748         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2749                 " ring\n");
2750         return -ENOMEM;
2751 }
2752
2753 /**
2754  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2755  *                                (Descriptors) for all queues
2756  * @adapter: board private structure
2757  *
2758  * Return 0 on success, negative on failure
2759  **/
2760 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2761 {
2762         struct pci_dev *pdev = adapter->pdev;
2763         int i, err = 0;
2764
2765         for (i = 0; i < adapter->num_rx_queues; i++) {
2766                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2767                 if (err) {
2768                         dev_err(&pdev->dev,
2769                                 "Allocation for Rx Queue %u failed\n", i);
2770                         for (i--; i >= 0; i--)
2771                                 igb_free_rx_resources(adapter->rx_ring[i]);
2772                         break;
2773                 }
2774         }
2775
2776         return err;
2777 }
2778
2779 /**
2780  * igb_setup_mrqc - configure the multiple receive queue control registers
2781  * @adapter: Board private structure
2782  **/
2783 static void igb_setup_mrqc(struct igb_adapter *adapter)
2784 {
2785         struct e1000_hw *hw = &adapter->hw;
2786         u32 mrqc, rxcsum;
2787         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2788         union e1000_reta {
2789                 u32 dword;
2790                 u8  bytes[4];
2791         } reta;
2792         static const u8 rsshash[40] = {
2793                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2794                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2795                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2796                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2797
2798         /* Fill out hash function seeds */
2799         for (j = 0; j < 10; j++) {
2800                 u32 rsskey = rsshash[(j * 4)];
2801                 rsskey |= rsshash[(j * 4) + 1] << 8;
2802                 rsskey |= rsshash[(j * 4) + 2] << 16;
2803                 rsskey |= rsshash[(j * 4) + 3] << 24;
2804                 array_wr32(E1000_RSSRK(0), j, rsskey);
2805         }
2806
2807         num_rx_queues = adapter->rss_queues;
2808
2809         if (adapter->vfs_allocated_count) {
2810                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2811                 switch (hw->mac.type) {
2812                 case e1000_i350:
2813                 case e1000_82580:
2814                         num_rx_queues = 1;
2815                         shift = 0;
2816                         break;
2817                 case e1000_82576:
2818                         shift = 3;
2819                         num_rx_queues = 2;
2820                         break;
2821                 case e1000_82575:
2822                         shift = 2;
2823                         shift2 = 6;
2824                 default:
2825                         break;
2826                 }
2827         } else {
2828                 if (hw->mac.type == e1000_82575)
2829                         shift = 6;
2830         }
2831
2832         for (j = 0; j < (32 * 4); j++) {
2833                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2834                 if (shift2)
2835                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2836                 if ((j & 3) == 3)
2837                         wr32(E1000_RETA(j >> 2), reta.dword);
2838         }
2839
2840         /*
2841          * Disable raw packet checksumming so that RSS hash is placed in
2842          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2843          * offloads as they are enabled by default
2844          */
2845         rxcsum = rd32(E1000_RXCSUM);
2846         rxcsum |= E1000_RXCSUM_PCSD;
2847
2848         if (adapter->hw.mac.type >= e1000_82576)
2849                 /* Enable Receive Checksum Offload for SCTP */
2850                 rxcsum |= E1000_RXCSUM_CRCOFL;
2851
2852         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2853         wr32(E1000_RXCSUM, rxcsum);
2854
2855         /* If VMDq is enabled then we set the appropriate mode for that, else
2856          * we default to RSS so that an RSS hash is calculated per packet even
2857          * if we are only using one queue */
2858         if (adapter->vfs_allocated_count) {
2859                 if (hw->mac.type > e1000_82575) {
2860                         /* Set the default pool for the PF's first queue */
2861                         u32 vtctl = rd32(E1000_VT_CTL);
2862                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2863                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2864                         vtctl |= adapter->vfs_allocated_count <<
2865                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2866                         wr32(E1000_VT_CTL, vtctl);
2867                 }
2868                 if (adapter->rss_queues > 1)
2869                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2870                 else
2871                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2872         } else {
2873                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2874         }
2875         igb_vmm_control(adapter);
2876
2877         /*
2878          * Generate RSS hash based on TCP port numbers and/or
2879          * IPv4/v6 src and dst addresses since UDP cannot be
2880          * hashed reliably due to IP fragmentation
2881          */
2882         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2883                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2884                 E1000_MRQC_RSS_FIELD_IPV6 |
2885                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2886                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2887
2888         wr32(E1000_MRQC, mrqc);
2889 }
2890
2891 /**
2892  * igb_setup_rctl - configure the receive control registers
2893  * @adapter: Board private structure
2894  **/
2895 void igb_setup_rctl(struct igb_adapter *adapter)
2896 {
2897         struct e1000_hw *hw = &adapter->hw;
2898         u32 rctl;
2899
2900         rctl = rd32(E1000_RCTL);
2901
2902         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2903         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2904
2905         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2906                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2907
2908         /*
2909          * enable stripping of CRC. It's unlikely this will break BMC
2910          * redirection as it did with e1000. Newer features require
2911          * that the HW strips the CRC.
2912          */
2913         rctl |= E1000_RCTL_SECRC;
2914
2915         /* disable store bad packets and clear size bits. */
2916         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2917
2918         /* enable LPE to prevent packets larger than max_frame_size */
2919         rctl |= E1000_RCTL_LPE;
2920
2921         /* disable queue 0 to prevent tail write w/o re-config */
2922         wr32(E1000_RXDCTL(0), 0);
2923
2924         /* Attention!!!  For SR-IOV PF driver operations you must enable
2925          * queue drop for all VF and PF queues to prevent head of line blocking
2926          * if an un-trusted VF does not provide descriptors to hardware.
2927          */
2928         if (adapter->vfs_allocated_count) {
2929                 /* set all queue drop enable bits */
2930                 wr32(E1000_QDE, ALL_QUEUES);
2931         }
2932
2933         wr32(E1000_RCTL, rctl);
2934 }
2935
2936 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2937                                    int vfn)
2938 {
2939         struct e1000_hw *hw = &adapter->hw;
2940         u32 vmolr;
2941
2942         /* if it isn't the PF check to see if VFs are enabled and
2943          * increase the size to support vlan tags */
2944         if (vfn < adapter->vfs_allocated_count &&
2945             adapter->vf_data[vfn].vlans_enabled)
2946                 size += VLAN_TAG_SIZE;
2947
2948         vmolr = rd32(E1000_VMOLR(vfn));
2949         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2950         vmolr |= size | E1000_VMOLR_LPE;
2951         wr32(E1000_VMOLR(vfn), vmolr);
2952
2953         return 0;
2954 }
2955
2956 /**
2957  * igb_rlpml_set - set maximum receive packet size
2958  * @adapter: board private structure
2959  *
2960  * Configure maximum receivable packet size.
2961  **/
2962 static void igb_rlpml_set(struct igb_adapter *adapter)
2963 {
2964         u32 max_frame_size;
2965         struct e1000_hw *hw = &adapter->hw;
2966         u16 pf_id = adapter->vfs_allocated_count;
2967
2968         max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2969
2970         /* if vfs are enabled we set RLPML to the largest possible request
2971          * size and set the VMOLR RLPML to the size we need */
2972         if (pf_id) {
2973                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2974                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2975         }
2976
2977         wr32(E1000_RLPML, max_frame_size);
2978 }
2979
2980 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2981                                  int vfn, bool aupe)
2982 {
2983         struct e1000_hw *hw = &adapter->hw;
2984         u32 vmolr;
2985
2986         /*
2987          * This register exists only on 82576 and newer so if we are older then
2988          * we should exit and do nothing
2989          */
2990         if (hw->mac.type < e1000_82576)
2991                 return;
2992
2993         vmolr = rd32(E1000_VMOLR(vfn));
2994         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2995         if (aupe)
2996                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2997         else
2998                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2999
3000         /* clear all bits that might not be set */
3001         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3002
3003         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3004                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3005         /*
3006          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3007          * multicast packets
3008          */
3009         if (vfn <= adapter->vfs_allocated_count)
3010                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3011
3012         wr32(E1000_VMOLR(vfn), vmolr);
3013 }
3014
3015 /**
3016  * igb_configure_rx_ring - Configure a receive ring after Reset
3017  * @adapter: board private structure
3018  * @ring: receive ring to be configured
3019  *
3020  * Configure the Rx unit of the MAC after a reset.
3021  **/
3022 void igb_configure_rx_ring(struct igb_adapter *adapter,
3023                            struct igb_ring *ring)
3024 {
3025         struct e1000_hw *hw = &adapter->hw;
3026         u64 rdba = ring->dma;
3027         int reg_idx = ring->reg_idx;
3028         u32 srrctl, rxdctl;
3029
3030         /* disable the queue */
3031         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3032         wr32(E1000_RXDCTL(reg_idx),
3033                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3034
3035         /* Set DMA base address registers */
3036         wr32(E1000_RDBAL(reg_idx),
3037              rdba & 0x00000000ffffffffULL);
3038         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3039         wr32(E1000_RDLEN(reg_idx),
3040                        ring->count * sizeof(union e1000_adv_rx_desc));
3041
3042         /* initialize head and tail */
3043         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3044         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3045         writel(0, ring->head);
3046         writel(0, ring->tail);
3047
3048         /* set descriptor configuration */
3049         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3050                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3051                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3052 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3053                 srrctl |= IGB_RXBUFFER_16384 >>
3054                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3055 #else
3056                 srrctl |= (PAGE_SIZE / 2) >>
3057                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3058 #endif
3059                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3060         } else {
3061                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3062                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3063                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3064         }
3065         if (hw->mac.type == e1000_82580)
3066                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3067         /* Only set Drop Enable if we are supporting multiple queues */
3068         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3069                 srrctl |= E1000_SRRCTL_DROP_EN;
3070
3071         wr32(E1000_SRRCTL(reg_idx), srrctl);
3072
3073         /* set filtering for VMDQ pools */
3074         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3075
3076         /* enable receive descriptor fetching */
3077         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3078         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3079         rxdctl &= 0xFFF00000;
3080         rxdctl |= IGB_RX_PTHRESH;
3081         rxdctl |= IGB_RX_HTHRESH << 8;
3082         rxdctl |= IGB_RX_WTHRESH << 16;
3083         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3084 }
3085
3086 /**
3087  * igb_configure_rx - Configure receive Unit after Reset
3088  * @adapter: board private structure
3089  *
3090  * Configure the Rx unit of the MAC after a reset.
3091  **/
3092 static void igb_configure_rx(struct igb_adapter *adapter)
3093 {
3094         int i;
3095
3096         /* set UTA to appropriate mode */
3097         igb_set_uta(adapter);
3098
3099         /* set the correct pool for the PF default MAC address in entry 0 */
3100         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3101                          adapter->vfs_allocated_count);
3102
3103         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3104          * the Base and Length of the Rx Descriptor Ring */
3105         for (i = 0; i < adapter->num_rx_queues; i++)
3106                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3107 }
3108
3109 /**
3110  * igb_free_tx_resources - Free Tx Resources per Queue
3111  * @tx_ring: Tx descriptor ring for a specific queue
3112  *
3113  * Free all transmit software resources
3114  **/
3115 void igb_free_tx_resources(struct igb_ring *tx_ring)
3116 {
3117         igb_clean_tx_ring(tx_ring);
3118
3119         vfree(tx_ring->buffer_info);
3120         tx_ring->buffer_info = NULL;
3121
3122         /* if not set, then don't free */
3123         if (!tx_ring->desc)
3124                 return;
3125
3126         dma_free_coherent(tx_ring->dev, tx_ring->size,
3127                           tx_ring->desc, tx_ring->dma);
3128
3129         tx_ring->desc = NULL;
3130 }
3131
3132 /**
3133  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3134  * @adapter: board private structure
3135  *
3136  * Free all transmit software resources
3137  **/
3138 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3139 {
3140         int i;
3141
3142         for (i = 0; i < adapter->num_tx_queues; i++)
3143                 igb_free_tx_resources(adapter->tx_ring[i]);
3144 }
3145
3146 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3147                                     struct igb_buffer *buffer_info)
3148 {
3149         if (buffer_info->dma) {
3150                 if (buffer_info->mapped_as_page)
3151                         dma_unmap_page(tx_ring->dev,
3152                                         buffer_info->dma,
3153                                         buffer_info->length,
3154                                         DMA_TO_DEVICE);
3155                 else
3156                         dma_unmap_single(tx_ring->dev,
3157                                         buffer_info->dma,
3158                                         buffer_info->length,
3159                                         DMA_TO_DEVICE);
3160                 buffer_info->dma = 0;
3161         }
3162         if (buffer_info->skb) {
3163                 dev_kfree_skb_any(buffer_info->skb);
3164                 buffer_info->skb = NULL;
3165         }
3166         buffer_info->time_stamp = 0;
3167         buffer_info->length = 0;
3168         buffer_info->next_to_watch = 0;
3169         buffer_info->mapped_as_page = false;
3170 }
3171
3172 /**
3173  * igb_clean_tx_ring - Free Tx Buffers
3174  * @tx_ring: ring to be cleaned
3175  **/
3176 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3177 {
3178         struct igb_buffer *buffer_info;
3179         unsigned long size;
3180         unsigned int i;
3181
3182         if (!tx_ring->buffer_info)
3183                 return;
3184         /* Free all the Tx ring sk_buffs */
3185
3186         for (i = 0; i < tx_ring->count; i++) {
3187                 buffer_info = &tx_ring->buffer_info[i];
3188                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3189         }
3190
3191         size = sizeof(struct igb_buffer) * tx_ring->count;
3192         memset(tx_ring->buffer_info, 0, size);
3193
3194         /* Zero out the descriptor ring */
3195         memset(tx_ring->desc, 0, tx_ring->size);
3196
3197         tx_ring->next_to_use = 0;
3198         tx_ring->next_to_clean = 0;
3199 }
3200
3201 /**
3202  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3203  * @adapter: board private structure
3204  **/
3205 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3206 {
3207         int i;
3208
3209         for (i = 0; i < adapter->num_tx_queues; i++)
3210                 igb_clean_tx_ring(adapter->tx_ring[i]);
3211 }
3212
3213 /**
3214  * igb_free_rx_resources - Free Rx Resources
3215  * @rx_ring: ring to clean the resources from
3216  *
3217  * Free all receive software resources
3218  **/
3219 void igb_free_rx_resources(struct igb_ring *rx_ring)
3220 {
3221         igb_clean_rx_ring(rx_ring);
3222
3223         vfree(rx_ring->buffer_info);
3224         rx_ring->buffer_info = NULL;
3225
3226         /* if not set, then don't free */
3227         if (!rx_ring->desc)
3228                 return;
3229
3230         dma_free_coherent(rx_ring->dev, rx_ring->size,
3231                           rx_ring->desc, rx_ring->dma);
3232
3233         rx_ring->desc = NULL;
3234 }
3235
3236 /**
3237  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3238  * @adapter: board private structure
3239  *
3240  * Free all receive software resources
3241  **/
3242 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3243 {
3244         int i;
3245
3246         for (i = 0; i < adapter->num_rx_queues; i++)
3247                 igb_free_rx_resources(adapter->rx_ring[i]);
3248 }
3249
3250 /**
3251  * igb_clean_rx_ring - Free Rx Buffers per Queue
3252  * @rx_ring: ring to free buffers from
3253  **/
3254 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3255 {
3256         struct igb_buffer *buffer_info;
3257         unsigned long size;
3258         unsigned int i;
3259
3260         if (!rx_ring->buffer_info)
3261                 return;
3262
3263         /* Free all the Rx ring sk_buffs */
3264         for (i = 0; i < rx_ring->count; i++) {
3265                 buffer_info = &rx_ring->buffer_info[i];
3266                 if (buffer_info->dma) {
3267                         dma_unmap_single(rx_ring->dev,
3268                                          buffer_info->dma,
3269                                          rx_ring->rx_buffer_len,
3270                                          DMA_FROM_DEVICE);
3271                         buffer_info->dma = 0;
3272                 }
3273
3274                 if (buffer_info->skb) {
3275                         dev_kfree_skb(buffer_info->skb);
3276                         buffer_info->skb = NULL;
3277                 }
3278                 if (buffer_info->page_dma) {
3279                         dma_unmap_page(rx_ring->dev,
3280                                        buffer_info->page_dma,
3281                                        PAGE_SIZE / 2,
3282                                        DMA_FROM_DEVICE);
3283                         buffer_info->page_dma = 0;
3284                 }
3285                 if (buffer_info->page) {
3286                         put_page(buffer_info->page);
3287                         buffer_info->page = NULL;
3288                         buffer_info->page_offset = 0;
3289                 }
3290         }
3291
3292         size = sizeof(struct igb_buffer) * rx_ring->count;
3293         memset(rx_ring->buffer_info, 0, size);
3294
3295         /* Zero out the descriptor ring */
3296         memset(rx_ring->desc, 0, rx_ring->size);
3297
3298         rx_ring->next_to_clean = 0;
3299         rx_ring->next_to_use = 0;
3300 }
3301
3302 /**
3303  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3304  * @adapter: board private structure
3305  **/
3306 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3307 {
3308         int i;
3309
3310         for (i = 0; i < adapter->num_rx_queues; i++)
3311                 igb_clean_rx_ring(adapter->rx_ring[i]);
3312 }
3313
3314 /**
3315  * igb_set_mac - Change the Ethernet Address of the NIC
3316  * @netdev: network interface device structure
3317  * @p: pointer to an address structure
3318  *
3319  * Returns 0 on success, negative on failure
3320  **/
3321 static int igb_set_mac(struct net_device *netdev, void *p)
3322 {
3323         struct igb_adapter *adapter = netdev_priv(netdev);
3324         struct e1000_hw *hw = &adapter->hw;
3325         struct sockaddr *addr = p;
3326
3327         if (!is_valid_ether_addr(addr->sa_data))
3328                 return -EADDRNOTAVAIL;
3329
3330         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3331         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3332
3333         /* set the correct pool for the new PF MAC address in entry 0 */
3334         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3335                          adapter->vfs_allocated_count);
3336
3337         return 0;
3338 }
3339
3340 /**
3341  * igb_write_mc_addr_list - write multicast addresses to MTA
3342  * @netdev: network interface device structure
3343  *
3344  * Writes multicast address list to the MTA hash table.
3345  * Returns: -ENOMEM on failure
3346  *                0 on no addresses written
3347  *                X on writing X addresses to MTA
3348  **/
3349 static int igb_write_mc_addr_list(struct net_device *netdev)
3350 {
3351         struct igb_adapter *adapter = netdev_priv(netdev);
3352         struct e1000_hw *hw = &adapter->hw;
3353         struct netdev_hw_addr *ha;
3354         u8  *mta_list;
3355         int i;
3356
3357         if (netdev_mc_empty(netdev)) {
3358                 /* nothing to program, so clear mc list */
3359                 igb_update_mc_addr_list(hw, NULL, 0);
3360                 igb_restore_vf_multicasts(adapter);
3361                 return 0;
3362         }
3363
3364         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3365         if (!mta_list)
3366                 return -ENOMEM;
3367
3368         /* The shared function expects a packed array of only addresses. */
3369         i = 0;
3370         netdev_for_each_mc_addr(ha, netdev)
3371                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3372
3373         igb_update_mc_addr_list(hw, mta_list, i);
3374         kfree(mta_list);
3375
3376         return netdev_mc_count(netdev);
3377 }
3378
3379 /**
3380  * igb_write_uc_addr_list - write unicast addresses to RAR table
3381  * @netdev: network interface device structure
3382  *
3383  * Writes unicast address list to the RAR table.
3384  * Returns: -ENOMEM on failure/insufficient address space
3385  *                0 on no addresses written
3386  *                X on writing X addresses to the RAR table
3387  **/
3388 static int igb_write_uc_addr_list(struct net_device *netdev)
3389 {
3390         struct igb_adapter *adapter = netdev_priv(netdev);
3391         struct e1000_hw *hw = &adapter->hw;
3392         unsigned int vfn = adapter->vfs_allocated_count;
3393         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3394         int count = 0;
3395
3396         /* return ENOMEM indicating insufficient memory for addresses */
3397         if (netdev_uc_count(netdev) > rar_entries)
3398                 return -ENOMEM;
3399
3400         if (!netdev_uc_empty(netdev) && rar_entries) {
3401                 struct netdev_hw_addr *ha;
3402
3403                 netdev_for_each_uc_addr(ha, netdev) {
3404                         if (!rar_entries)
3405                                 break;
3406                         igb_rar_set_qsel(adapter, ha->addr,
3407                                          rar_entries--,
3408                                          vfn);
3409                         count++;
3410                 }
3411         }
3412         /* write the addresses in reverse order to avoid write combining */
3413         for (; rar_entries > 0 ; rar_entries--) {
3414                 wr32(E1000_RAH(rar_entries), 0);
3415                 wr32(E1000_RAL(rar_entries), 0);
3416         }
3417         wrfl();
3418
3419         return count;
3420 }
3421
3422 /**
3423  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3424  * @netdev: network interface device structure
3425  *
3426  * The set_rx_mode entry point is called whenever the unicast or multicast
3427  * address lists or the network interface flags are updated.  This routine is
3428  * responsible for configuring the hardware for proper unicast, multicast,
3429  * promiscuous mode, and all-multi behavior.
3430  **/
3431 static void igb_set_rx_mode(struct net_device *netdev)
3432 {
3433         struct igb_adapter *adapter = netdev_priv(netdev);
3434         struct e1000_hw *hw = &adapter->hw;
3435         unsigned int vfn = adapter->vfs_allocated_count;
3436         u32 rctl, vmolr = 0;
3437         int count;
3438
3439         /* Check for Promiscuous and All Multicast modes */
3440         rctl = rd32(E1000_RCTL);
3441
3442         /* clear the effected bits */
3443         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3444
3445         if (netdev->flags & IFF_PROMISC) {
3446                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3447                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3448         } else {
3449                 if (netdev->flags & IFF_ALLMULTI) {
3450                         rctl |= E1000_RCTL_MPE;
3451                         vmolr |= E1000_VMOLR_MPME;
3452                 } else {
3453                         /*
3454                          * Write addresses to the MTA, if the attempt fails
3455                          * then we should just turn on promiscuous mode so
3456                          * that we can at least receive multicast traffic
3457                          */
3458                         count = igb_write_mc_addr_list(netdev);
3459                         if (count < 0) {
3460                                 rctl |= E1000_RCTL_MPE;
3461                                 vmolr |= E1000_VMOLR_MPME;
3462                         } else if (count) {
3463                                 vmolr |= E1000_VMOLR_ROMPE;
3464                         }
3465                 }
3466                 /*
3467                  * Write addresses to available RAR registers, if there is not
3468                  * sufficient space to store all the addresses then enable
3469                  * unicast promiscuous mode
3470                  */
3471                 count = igb_write_uc_addr_list(netdev);
3472                 if (count < 0) {
3473                         rctl |= E1000_RCTL_UPE;
3474                         vmolr |= E1000_VMOLR_ROPE;
3475                 }
3476                 rctl |= E1000_RCTL_VFE;
3477         }
3478         wr32(E1000_RCTL, rctl);
3479
3480         /*
3481          * In order to support SR-IOV and eventually VMDq it is necessary to set
3482          * the VMOLR to enable the appropriate modes.  Without this workaround
3483          * we will have issues with VLAN tag stripping not being done for frames
3484          * that are only arriving because we are the default pool
3485          */
3486         if (hw->mac.type < e1000_82576)
3487                 return;
3488
3489         vmolr |= rd32(E1000_VMOLR(vfn)) &
3490                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3491         wr32(E1000_VMOLR(vfn), vmolr);
3492         igb_restore_vf_multicasts(adapter);
3493 }
3494
3495 static void igb_check_wvbr(struct igb_adapter *adapter)
3496 {
3497         struct e1000_hw *hw = &adapter->hw;
3498         u32 wvbr = 0;
3499
3500         switch (hw->mac.type) {
3501         case e1000_82576:
3502         case e1000_i350:
3503                 if (!(wvbr = rd32(E1000_WVBR)))
3504                         return;
3505                 break;
3506         default:
3507                 break;
3508         }
3509
3510         adapter->wvbr |= wvbr;
3511 }
3512
3513 #define IGB_STAGGERED_QUEUE_OFFSET 8
3514
3515 static void igb_spoof_check(struct igb_adapter *adapter)
3516 {
3517         int j;
3518
3519         if (!adapter->wvbr)
3520                 return;
3521
3522         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3523                 if (adapter->wvbr & (1 << j) ||
3524                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3525                         dev_warn(&adapter->pdev->dev,
3526                                 "Spoof event(s) detected on VF %d\n", j);
3527                         adapter->wvbr &=
3528                                 ~((1 << j) |
3529                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3530                 }
3531         }
3532 }
3533
3534 /* Need to wait a few seconds after link up to get diagnostic information from
3535  * the phy */
3536 static void igb_update_phy_info(unsigned long data)
3537 {
3538         struct igb_adapter *adapter = (struct igb_adapter *) data;
3539         igb_get_phy_info(&adapter->hw);
3540 }
3541
3542 /**
3543  * igb_has_link - check shared code for link and determine up/down
3544  * @adapter: pointer to driver private info
3545  **/
3546 bool igb_has_link(struct igb_adapter *adapter)
3547 {
3548         struct e1000_hw *hw = &adapter->hw;
3549         bool link_active = false;
3550         s32 ret_val = 0;
3551
3552         /* get_link_status is set on LSC (link status) interrupt or
3553          * rx sequence error interrupt.  get_link_status will stay
3554          * false until the e1000_check_for_link establishes link
3555          * for copper adapters ONLY
3556          */
3557         switch (hw->phy.media_type) {
3558         case e1000_media_type_copper:
3559                 if (hw->mac.get_link_status) {
3560                         ret_val = hw->mac.ops.check_for_link(hw);
3561                         link_active = !hw->mac.get_link_status;
3562                 } else {
3563                         link_active = true;
3564                 }
3565                 break;
3566         case e1000_media_type_internal_serdes:
3567                 ret_val = hw->mac.ops.check_for_link(hw);
3568                 link_active = hw->mac.serdes_has_link;
3569                 break;
3570         default:
3571         case e1000_media_type_unknown:
3572                 break;
3573         }
3574
3575         return link_active;
3576 }
3577
3578 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3579 {
3580         bool ret = false;
3581         u32 ctrl_ext, thstat;
3582
3583         /* check for thermal sensor event on i350, copper only */
3584         if (hw->mac.type == e1000_i350) {
3585                 thstat = rd32(E1000_THSTAT);
3586                 ctrl_ext = rd32(E1000_CTRL_EXT);
3587
3588                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3589                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3590                         ret = !!(thstat & event);
3591                 }
3592         }
3593
3594         return ret;
3595 }
3596
3597 /**
3598  * igb_watchdog - Timer Call-back
3599  * @data: pointer to adapter cast into an unsigned long
3600  **/
3601 static void igb_watchdog(unsigned long data)
3602 {
3603         struct igb_adapter *adapter = (struct igb_adapter *)data;
3604         /* Do the rest outside of interrupt context */
3605         schedule_work(&adapter->watchdog_task);
3606 }
3607
3608 static void igb_watchdog_task(struct work_struct *work)
3609 {
3610         struct igb_adapter *adapter = container_of(work,
3611                                                    struct igb_adapter,
3612                                                    watchdog_task);
3613         struct e1000_hw *hw = &adapter->hw;
3614         struct net_device *netdev = adapter->netdev;
3615         u32 link;
3616         int i;
3617
3618         link = igb_has_link(adapter);
3619         if (link) {
3620                 if (!netif_carrier_ok(netdev)) {
3621                         u32 ctrl;
3622                         hw->mac.ops.get_speed_and_duplex(hw,
3623                                                          &adapter->link_speed,
3624                                                          &adapter->link_duplex);
3625
3626                         ctrl = rd32(E1000_CTRL);
3627                         /* Links status message must follow this format */
3628                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3629                                  "Flow Control: %s\n",
3630                                netdev->name,
3631                                adapter->link_speed,
3632                                adapter->link_duplex == FULL_DUPLEX ?
3633                                  "Full Duplex" : "Half Duplex",
3634                                ((ctrl & E1000_CTRL_TFCE) &&
3635                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3636                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3637                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3638
3639                         /* check for thermal sensor event */
3640                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3641                                 printk(KERN_INFO "igb: %s The network adapter "
3642                                                  "link speed was downshifted "
3643                                                  "because it overheated.\n",
3644                                                  netdev->name);
3645                         }
3646
3647                         /* adjust timeout factor according to speed/duplex */
3648                         adapter->tx_timeout_factor = 1;
3649                         switch (adapter->link_speed) {
3650                         case SPEED_10:
3651                                 adapter->tx_timeout_factor = 14;
3652                                 break;
3653                         case SPEED_100:
3654                                 /* maybe add some timeout factor ? */
3655                                 break;
3656                         }
3657
3658                         netif_carrier_on(netdev);
3659
3660                         igb_ping_all_vfs(adapter);
3661                         igb_check_vf_rate_limit(adapter);
3662
3663                         /* link state has changed, schedule phy info update */
3664                         if (!test_bit(__IGB_DOWN, &adapter->state))
3665                                 mod_timer(&adapter->phy_info_timer,
3666                                           round_jiffies(jiffies + 2 * HZ));
3667                 }
3668         } else {
3669                 if (netif_carrier_ok(netdev)) {
3670                         adapter->link_speed = 0;
3671                         adapter->link_duplex = 0;
3672
3673                         /* check for thermal sensor event */
3674                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3675                                 printk(KERN_ERR "igb: %s The network adapter "
3676                                                 "was stopped because it "
3677                                                 "overheated.\n",
3678                                                 netdev->name);
3679                         }
3680
3681                         /* Links status message must follow this format */
3682                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3683                                netdev->name);
3684                         netif_carrier_off(netdev);
3685
3686                         igb_ping_all_vfs(adapter);
3687
3688                         /* link state has changed, schedule phy info update */
3689                         if (!test_bit(__IGB_DOWN, &adapter->state))
3690                                 mod_timer(&adapter->phy_info_timer,
3691                                           round_jiffies(jiffies + 2 * HZ));
3692                 }
3693         }
3694
3695         spin_lock(&adapter->stats64_lock);
3696         igb_update_stats(adapter, &adapter->stats64);
3697         spin_unlock(&adapter->stats64_lock);
3698
3699         for (i = 0; i < adapter->num_tx_queues; i++) {
3700                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3701                 if (!netif_carrier_ok(netdev)) {
3702                         /* We've lost link, so the controller stops DMA,
3703                          * but we've got queued Tx work that's never going
3704                          * to get done, so reset controller to flush Tx.
3705                          * (Do the reset outside of interrupt context). */
3706                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3707                                 adapter->tx_timeout_count++;
3708                                 schedule_work(&adapter->reset_task);
3709                                 /* return immediately since reset is imminent */
3710                                 return;
3711                         }
3712                 }
3713
3714                 /* Force detection of hung controller every watchdog period */
3715                 tx_ring->detect_tx_hung = true;
3716         }
3717
3718         /* Cause software interrupt to ensure rx ring is cleaned */
3719         if (adapter->msix_entries) {
3720                 u32 eics = 0;
3721                 for (i = 0; i < adapter->num_q_vectors; i++) {
3722                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3723                         eics |= q_vector->eims_value;
3724                 }
3725                 wr32(E1000_EICS, eics);
3726         } else {
3727                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3728         }
3729
3730         igb_spoof_check(adapter);
3731
3732         /* Reset the timer */
3733         if (!test_bit(__IGB_DOWN, &adapter->state))
3734                 mod_timer(&adapter->watchdog_timer,
3735                           round_jiffies(jiffies + 2 * HZ));
3736 }
3737
3738 enum latency_range {
3739         lowest_latency = 0,
3740         low_latency = 1,
3741         bulk_latency = 2,
3742         latency_invalid = 255
3743 };
3744
3745 /**
3746  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3747  *
3748  *      Stores a new ITR value based on strictly on packet size.  This
3749  *      algorithm is less sophisticated than that used in igb_update_itr,
3750  *      due to the difficulty of synchronizing statistics across multiple
3751  *      receive rings.  The divisors and thresholds used by this function
3752  *      were determined based on theoretical maximum wire speed and testing
3753  *      data, in order to minimize response time while increasing bulk
3754  *      throughput.
3755  *      This functionality is controlled by the InterruptThrottleRate module
3756  *      parameter (see igb_param.c)
3757  *      NOTE:  This function is called only when operating in a multiqueue
3758  *             receive environment.
3759  * @q_vector: pointer to q_vector
3760  **/
3761 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3762 {
3763         int new_val = q_vector->itr_val;
3764         int avg_wire_size = 0;
3765         struct igb_adapter *adapter = q_vector->adapter;
3766         struct igb_ring *ring;
3767         unsigned int packets;
3768
3769         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3770          * ints/sec - ITR timer value of 120 ticks.
3771          */
3772         if (adapter->link_speed != SPEED_1000) {
3773                 new_val = 976;
3774                 goto set_itr_val;
3775         }
3776
3777         ring = q_vector->rx_ring;
3778         if (ring) {
3779                 packets = ACCESS_ONCE(ring->total_packets);
3780
3781                 if (packets)
3782                         avg_wire_size = ring->total_bytes / packets;
3783         }
3784
3785         ring = q_vector->tx_ring;
3786         if (ring) {
3787                 packets = ACCESS_ONCE(ring->total_packets);
3788
3789                 if (packets)
3790                         avg_wire_size = max_t(u32, avg_wire_size,
3791                                               ring->total_bytes / packets);
3792         }
3793
3794         /* if avg_wire_size isn't set no work was done */
3795         if (!avg_wire_size)
3796                 goto clear_counts;
3797
3798         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3799         avg_wire_size += 24;
3800
3801         /* Don't starve jumbo frames */
3802         avg_wire_size = min(avg_wire_size, 3000);
3803
3804         /* Give a little boost to mid-size frames */
3805         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3806                 new_val = avg_wire_size / 3;
3807         else
3808                 new_val = avg_wire_size / 2;
3809
3810         /* when in itr mode 3 do not exceed 20K ints/sec */
3811         if (adapter->rx_itr_setting == 3 && new_val < 196)
3812                 new_val = 196;
3813
3814 set_itr_val:
3815         if (new_val != q_vector->itr_val) {
3816                 q_vector->itr_val = new_val;
3817                 q_vector->set_itr = 1;
3818         }
3819 clear_counts:
3820         if (q_vector->rx_ring) {
3821                 q_vector->rx_ring->total_bytes = 0;
3822                 q_vector->rx_ring->total_packets = 0;
3823         }
3824         if (q_vector->tx_ring) {
3825                 q_vector->tx_ring->total_bytes = 0;
3826                 q_vector->tx_ring->total_packets = 0;
3827         }
3828 }
3829
3830 /**
3831  * igb_update_itr - update the dynamic ITR value based on statistics
3832  *      Stores a new ITR value based on packets and byte
3833  *      counts during the last interrupt.  The advantage of per interrupt
3834  *      computation is faster updates and more accurate ITR for the current
3835  *      traffic pattern.  Constants in this function were computed
3836  *      based on theoretical maximum wire speed and thresholds were set based
3837  *      on testing data as well as attempting to minimize response time
3838  *      while increasing bulk throughput.
3839  *      this functionality is controlled by the InterruptThrottleRate module
3840  *      parameter (see igb_param.c)
3841  *      NOTE:  These calculations are only valid when operating in a single-
3842  *             queue environment.
3843  * @adapter: pointer to adapter
3844  * @itr_setting: current q_vector->itr_val
3845  * @packets: the number of packets during this measurement interval
3846  * @bytes: the number of bytes during this measurement interval
3847  **/
3848 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3849                                    int packets, int bytes)
3850 {
3851         unsigned int retval = itr_setting;
3852
3853         if (packets == 0)
3854                 goto update_itr_done;
3855
3856         switch (itr_setting) {
3857         case lowest_latency:
3858                 /* handle TSO and jumbo frames */
3859                 if (bytes/packets > 8000)
3860                         retval = bulk_latency;
3861                 else if ((packets < 5) && (bytes > 512))
3862                         retval = low_latency;
3863                 break;
3864         case low_latency:  /* 50 usec aka 20000 ints/s */
3865                 if (bytes > 10000) {
3866                         /* this if handles the TSO accounting */
3867                         if (bytes/packets > 8000) {
3868                                 retval = bulk_latency;
3869                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3870                                 retval = bulk_latency;
3871                         } else if ((packets > 35)) {
3872                                 retval = lowest_latency;
3873                         }
3874                 } else if (bytes/packets > 2000) {
3875                         retval = bulk_latency;
3876                 } else if (packets <= 2 && bytes < 512) {
3877                         retval = lowest_latency;
3878                 }
3879                 break;
3880         case bulk_latency: /* 250 usec aka 4000 ints/s */
3881                 if (bytes > 25000) {
3882                         if (packets > 35)
3883                                 retval = low_latency;
3884                 } else if (bytes < 1500) {
3885                         retval = low_latency;
3886                 }
3887                 break;
3888         }
3889
3890 update_itr_done:
3891         return retval;
3892 }
3893
3894 static void igb_set_itr(struct igb_adapter *adapter)
3895 {
3896         struct igb_q_vector *q_vector = adapter->q_vector[0];
3897         u16 current_itr;
3898         u32 new_itr = q_vector->itr_val;
3899
3900         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3901         if (adapter->link_speed != SPEED_1000) {
3902                 current_itr = 0;
3903                 new_itr = 4000;
3904                 goto set_itr_now;
3905         }
3906
3907         adapter->rx_itr = igb_update_itr(adapter,
3908                                     adapter->rx_itr,
3909                                     q_vector->rx_ring->total_packets,
3910                                     q_vector->rx_ring->total_bytes);
3911
3912         adapter->tx_itr = igb_update_itr(adapter,
3913                                     adapter->tx_itr,
3914                                     q_vector->tx_ring->total_packets,
3915                                     q_vector->tx_ring->total_bytes);
3916         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3917
3918         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3919         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3920                 current_itr = low_latency;
3921
3922         switch (current_itr) {
3923         /* counts and packets in update_itr are dependent on these numbers */
3924         case lowest_latency:
3925                 new_itr = 56;  /* aka 70,000 ints/sec */
3926                 break;
3927         case low_latency:
3928                 new_itr = 196; /* aka 20,000 ints/sec */
3929                 break;
3930         case bulk_latency:
3931                 new_itr = 980; /* aka 4,000 ints/sec */
3932                 break;
3933         default:
3934                 break;
3935         }
3936
3937 set_itr_now:
3938         q_vector->rx_ring->total_bytes = 0;
3939         q_vector->rx_ring->total_packets = 0;
3940         q_vector->tx_ring->total_bytes = 0;
3941         q_vector->tx_ring->total_packets = 0;
3942
3943         if (new_itr != q_vector->itr_val) {
3944                 /* this attempts to bias the interrupt rate towards Bulk
3945                  * by adding intermediate steps when interrupt rate is
3946                  * increasing */
3947                 new_itr = new_itr > q_vector->itr_val ?
3948                              max((new_itr * q_vector->itr_val) /
3949                                  (new_itr + (q_vector->itr_val >> 2)),
3950                                  new_itr) :
3951                              new_itr;
3952                 /* Don't write the value here; it resets the adapter's
3953                  * internal timer, and causes us to delay far longer than
3954                  * we should between interrupts.  Instead, we write the ITR
3955                  * value at the beginning of the next interrupt so the timing
3956                  * ends up being correct.
3957                  */
3958                 q_vector->itr_val = new_itr;
3959                 q_vector->set_itr = 1;
3960         }
3961 }
3962
3963 #define IGB_TX_FLAGS_CSUM               0x00000001
3964 #define IGB_TX_FLAGS_VLAN               0x00000002
3965 #define IGB_TX_FLAGS_TSO                0x00000004
3966 #define IGB_TX_FLAGS_IPV4               0x00000008
3967 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3968 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3969 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3970
3971 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3972                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3973 {
3974         struct e1000_adv_tx_context_desc *context_desc;
3975         unsigned int i;
3976         int err;
3977         struct igb_buffer *buffer_info;
3978         u32 info = 0, tu_cmd = 0;
3979         u32 mss_l4len_idx;
3980         u8 l4len;
3981
3982         if (skb_header_cloned(skb)) {
3983                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3984                 if (err)
3985                         return err;
3986         }
3987
3988         l4len = tcp_hdrlen(skb);
3989         *hdr_len += l4len;
3990
3991         if (skb->protocol == htons(ETH_P_IP)) {
3992                 struct iphdr *iph = ip_hdr(skb);
3993                 iph->tot_len = 0;
3994                 iph->check = 0;
3995                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3996                                                          iph->daddr, 0,
3997                                                          IPPROTO_TCP,
3998                                                          0);
3999         } else if (skb_is_gso_v6(skb)) {
4000                 ipv6_hdr(skb)->payload_len = 0;
4001                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4002                                                        &ipv6_hdr(skb)->daddr,
4003                                                        0, IPPROTO_TCP, 0);
4004         }
4005
4006         i = tx_ring->next_to_use;
4007
4008         buffer_info = &tx_ring->buffer_info[i];
4009         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4010         /* VLAN MACLEN IPLEN */
4011         if (tx_flags & IGB_TX_FLAGS_VLAN)
4012                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4013         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4014         *hdr_len += skb_network_offset(skb);
4015         info |= skb_network_header_len(skb);
4016         *hdr_len += skb_network_header_len(skb);
4017         context_desc->vlan_macip_lens = cpu_to_le32(info);
4018
4019         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4020         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4021
4022         if (skb->protocol == htons(ETH_P_IP))
4023                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4024         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4025
4026         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4027
4028         /* MSS L4LEN IDX */
4029         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4030         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4031
4032         /* For 82575, context index must be unique per ring. */
4033         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4034                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4035
4036         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4037         context_desc->seqnum_seed = 0;
4038
4039         buffer_info->time_stamp = jiffies;
4040         buffer_info->next_to_watch = i;
4041         buffer_info->dma = 0;
4042         i++;
4043         if (i == tx_ring->count)
4044                 i = 0;
4045
4046         tx_ring->next_to_use = i;
4047
4048         return true;
4049 }
4050
4051 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4052                                    struct sk_buff *skb, u32 tx_flags)
4053 {
4054         struct e1000_adv_tx_context_desc *context_desc;
4055         struct device *dev = tx_ring->dev;
4056         struct igb_buffer *buffer_info;
4057         u32 info = 0, tu_cmd = 0;
4058         unsigned int i;
4059
4060         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4061             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4062                 i = tx_ring->next_to_use;
4063                 buffer_info = &tx_ring->buffer_info[i];
4064                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4065
4066                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4067                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4068
4069                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4070                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4071                         info |= skb_network_header_len(skb);
4072
4073                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4074
4075                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4076
4077                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4078                         __be16 protocol;
4079
4080                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4081                                 const struct vlan_ethhdr *vhdr =
4082                                           (const struct vlan_ethhdr*)skb->data;
4083
4084                                 protocol = vhdr->h_vlan_encapsulated_proto;
4085                         } else {
4086                                 protocol = skb->protocol;
4087                         }
4088
4089                         switch (protocol) {
4090                         case cpu_to_be16(ETH_P_IP):
4091                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4092                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4093                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4094                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4095                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4096                                 break;
4097                         case cpu_to_be16(ETH_P_IPV6):
4098                                 /* XXX what about other V6 headers?? */
4099                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4100                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4101                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4102                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4103                                 break;
4104                         default:
4105                                 if (unlikely(net_ratelimit()))
4106                                         dev_warn(dev,
4107                                             "partial checksum but proto=%x!\n",
4108                                             skb->protocol);
4109                                 break;
4110                         }
4111                 }
4112
4113                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4114                 context_desc->seqnum_seed = 0;
4115                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4116                         context_desc->mss_l4len_idx =
4117                                 cpu_to_le32(tx_ring->reg_idx << 4);
4118
4119                 buffer_info->time_stamp = jiffies;
4120                 buffer_info->next_to_watch = i;
4121                 buffer_info->dma = 0;
4122
4123                 i++;
4124                 if (i == tx_ring->count)
4125                         i = 0;
4126                 tx_ring->next_to_use = i;
4127
4128                 return true;
4129         }
4130         return false;
4131 }
4132
4133 #define IGB_MAX_TXD_PWR 16
4134 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4135
4136 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4137                                  unsigned int first)
4138 {
4139         struct igb_buffer *buffer_info;
4140         struct device *dev = tx_ring->dev;
4141         unsigned int hlen = skb_headlen(skb);
4142         unsigned int count = 0, i;
4143         unsigned int f;
4144         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4145
4146         i = tx_ring->next_to_use;
4147
4148         buffer_info = &tx_ring->buffer_info[i];
4149         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4150         buffer_info->length = hlen;
4151         /* set time_stamp *before* dma to help avoid a possible race */
4152         buffer_info->time_stamp = jiffies;
4153         buffer_info->next_to_watch = i;
4154         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4155                                           DMA_TO_DEVICE);
4156         if (dma_mapping_error(dev, buffer_info->dma))
4157                 goto dma_error;
4158
4159         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4160                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4161                 unsigned int len = frag->size;
4162
4163                 count++;
4164                 i++;
4165                 if (i == tx_ring->count)
4166                         i = 0;
4167
4168                 buffer_info = &tx_ring->buffer_info[i];
4169                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4170                 buffer_info->length = len;
4171                 buffer_info->time_stamp = jiffies;
4172                 buffer_info->next_to_watch = i;
4173                 buffer_info->mapped_as_page = true;
4174                 buffer_info->dma = dma_map_page(dev,
4175                                                 frag->page,
4176                                                 frag->page_offset,
4177                                                 len,
4178                                                 DMA_TO_DEVICE);
4179                 if (dma_mapping_error(dev, buffer_info->dma))
4180                         goto dma_error;
4181
4182         }
4183
4184         tx_ring->buffer_info[i].skb = skb;
4185         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4186         /* multiply data chunks by size of headers */
4187         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4188         tx_ring->buffer_info[i].gso_segs = gso_segs;
4189         tx_ring->buffer_info[first].next_to_watch = i;
4190
4191         return ++count;
4192
4193 dma_error:
4194         dev_err(dev, "TX DMA map failed\n");
4195
4196         /* clear timestamp and dma mappings for failed buffer_info mapping */
4197         buffer_info->dma = 0;
4198         buffer_info->time_stamp = 0;
4199         buffer_info->length = 0;
4200         buffer_info->next_to_watch = 0;
4201         buffer_info->mapped_as_page = false;
4202
4203         /* clear timestamp and dma mappings for remaining portion of packet */
4204         while (count--) {
4205                 if (i == 0)
4206                         i = tx_ring->count;
4207                 i--;
4208                 buffer_info = &tx_ring->buffer_info[i];
4209                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4210         }
4211
4212         return 0;
4213 }
4214
4215 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4216                                     u32 tx_flags, int count, u32 paylen,
4217                                     u8 hdr_len)
4218 {
4219         union e1000_adv_tx_desc *tx_desc;
4220         struct igb_buffer *buffer_info;
4221         u32 olinfo_status = 0, cmd_type_len;
4222         unsigned int i = tx_ring->next_to_use;
4223
4224         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4225                         E1000_ADVTXD_DCMD_DEXT);
4226
4227         if (tx_flags & IGB_TX_FLAGS_VLAN)
4228                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4229
4230         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4231                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4232
4233         if (tx_flags & IGB_TX_FLAGS_TSO) {
4234                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4235
4236                 /* insert tcp checksum */
4237                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4238
4239                 /* insert ip checksum */
4240                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4241                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4242
4243         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4244                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4245         }
4246
4247         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4248             (tx_flags & (IGB_TX_FLAGS_CSUM |
4249                          IGB_TX_FLAGS_TSO |
4250                          IGB_TX_FLAGS_VLAN)))
4251                 olinfo_status |= tx_ring->reg_idx << 4;
4252
4253         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4254
4255         do {
4256                 buffer_info = &tx_ring->buffer_info[i];
4257                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4258                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4259                 tx_desc->read.cmd_type_len =
4260                         cpu_to_le32(cmd_type_len | buffer_info->length);
4261                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4262                 count--;
4263                 i++;
4264                 if (i == tx_ring->count)
4265                         i = 0;
4266         } while (count > 0);
4267
4268         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4269         /* Force memory writes to complete before letting h/w
4270          * know there are new descriptors to fetch.  (Only
4271          * applicable for weak-ordered memory model archs,
4272          * such as IA-64). */
4273         wmb();
4274
4275         tx_ring->next_to_use = i;
4276         writel(i, tx_ring->tail);
4277         /* we need this if more than one processor can write to our tail
4278          * at a time, it syncronizes IO on IA64/Altix systems */
4279         mmiowb();
4280 }
4281
4282 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4283 {
4284         struct net_device *netdev = tx_ring->netdev;
4285
4286         netif_stop_subqueue(netdev, tx_ring->queue_index);
4287
4288         /* Herbert's original patch had:
4289          *  smp_mb__after_netif_stop_queue();
4290          * but since that doesn't exist yet, just open code it. */
4291         smp_mb();
4292
4293         /* We need to check again in a case another CPU has just
4294          * made room available. */
4295         if (igb_desc_unused(tx_ring) < size)
4296                 return -EBUSY;
4297
4298         /* A reprieve! */
4299         netif_wake_subqueue(netdev, tx_ring->queue_index);
4300
4301         u64_stats_update_begin(&tx_ring->tx_syncp2);
4302         tx_ring->tx_stats.restart_queue2++;
4303         u64_stats_update_end(&tx_ring->tx_syncp2);
4304
4305         return 0;
4306 }
4307
4308 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4309 {
4310         if (igb_desc_unused(tx_ring) >= size)
4311                 return 0;
4312         return __igb_maybe_stop_tx(tx_ring, size);
4313 }
4314
4315 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4316                                     struct igb_ring *tx_ring)
4317 {
4318         int tso = 0, count;
4319         u32 tx_flags = 0;
4320         u16 first;
4321         u8 hdr_len = 0;
4322
4323         /* need: 1 descriptor per page,
4324          *       + 2 desc gap to keep tail from touching head,
4325          *       + 1 desc for skb->data,
4326          *       + 1 desc for context descriptor,
4327          * otherwise try next time */
4328         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4329                 /* this is a hard error */
4330                 return NETDEV_TX_BUSY;
4331         }
4332
4333         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4334                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4335                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4336         }
4337
4338         if (vlan_tx_tag_present(skb)) {
4339                 tx_flags |= IGB_TX_FLAGS_VLAN;
4340                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4341         }
4342
4343         if (skb->protocol == htons(ETH_P_IP))
4344                 tx_flags |= IGB_TX_FLAGS_IPV4;
4345
4346         first = tx_ring->next_to_use;
4347         if (skb_is_gso(skb)) {
4348                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4349
4350                 if (tso < 0) {
4351                         dev_kfree_skb_any(skb);
4352                         return NETDEV_TX_OK;
4353                 }
4354         }
4355
4356         if (tso)
4357                 tx_flags |= IGB_TX_FLAGS_TSO;
4358         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4359                  (skb->ip_summed == CHECKSUM_PARTIAL))
4360                 tx_flags |= IGB_TX_FLAGS_CSUM;
4361
4362         /*
4363          * count reflects descriptors mapped, if 0 or less then mapping error
4364          * has occurred and we need to rewind the descriptor queue
4365          */
4366         count = igb_tx_map_adv(tx_ring, skb, first);
4367         if (!count) {
4368                 dev_kfree_skb_any(skb);
4369                 tx_ring->buffer_info[first].time_stamp = 0;
4370                 tx_ring->next_to_use = first;
4371                 return NETDEV_TX_OK;
4372         }
4373
4374         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4375
4376         /* Make sure there is space in the ring for the next send. */
4377         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4378
4379         return NETDEV_TX_OK;
4380 }
4381
4382 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4383                                       struct net_device *netdev)
4384 {
4385         struct igb_adapter *adapter = netdev_priv(netdev);
4386         struct igb_ring *tx_ring;
4387         int r_idx = 0;
4388
4389         if (test_bit(__IGB_DOWN, &adapter->state)) {
4390                 dev_kfree_skb_any(skb);
4391                 return NETDEV_TX_OK;
4392         }
4393
4394         if (skb->len <= 0) {
4395                 dev_kfree_skb_any(skb);
4396                 return NETDEV_TX_OK;
4397         }
4398
4399         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4400         tx_ring = adapter->multi_tx_table[r_idx];
4401
4402         /* This goes back to the question of how to logically map a tx queue
4403          * to a flow.  Right now, performance is impacted slightly negatively
4404          * if using multiple tx queues.  If the stack breaks away from a
4405          * single qdisc implementation, we can look at this again. */
4406         return igb_xmit_frame_ring_adv(skb, tx_ring);
4407 }
4408
4409 /**
4410  * igb_tx_timeout - Respond to a Tx Hang
4411  * @netdev: network interface device structure
4412  **/
4413 static void igb_tx_timeout(struct net_device *netdev)
4414 {
4415         struct igb_adapter *adapter = netdev_priv(netdev);
4416         struct e1000_hw *hw = &adapter->hw;
4417
4418         /* Do the reset outside of interrupt context */
4419         adapter->tx_timeout_count++;
4420
4421         if (hw->mac.type == e1000_82580)
4422                 hw->dev_spec._82575.global_device_reset = true;
4423
4424         schedule_work(&adapter->reset_task);
4425         wr32(E1000_EICS,
4426              (adapter->eims_enable_mask & ~adapter->eims_other));
4427 }
4428
4429 static void igb_reset_task(struct work_struct *work)
4430 {
4431         struct igb_adapter *adapter;
4432         adapter = container_of(work, struct igb_adapter, reset_task);
4433
4434         igb_dump(adapter);
4435         netdev_err(adapter->netdev, "Reset adapter\n");
4436         igb_reinit_locked(adapter);
4437 }
4438
4439 /**
4440  * igb_get_stats64 - Get System Network Statistics
4441  * @netdev: network interface device structure
4442  * @stats: rtnl_link_stats64 pointer
4443  *
4444  **/
4445 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4446                                                  struct rtnl_link_stats64 *stats)
4447 {
4448         struct igb_adapter *adapter = netdev_priv(netdev);
4449
4450         spin_lock(&adapter->stats64_lock);
4451         igb_update_stats(adapter, &adapter->stats64);
4452         memcpy(stats, &adapter->stats64, sizeof(*stats));
4453         spin_unlock(&adapter->stats64_lock);
4454
4455         return stats;
4456 }
4457
4458 /**
4459  * igb_change_mtu - Change the Maximum Transfer Unit
4460  * @netdev: network interface device structure
4461  * @new_mtu: new value for maximum frame size
4462  *
4463  * Returns 0 on success, negative on failure
4464  **/
4465 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4466 {
4467         struct igb_adapter *adapter = netdev_priv(netdev);
4468         struct pci_dev *pdev = adapter->pdev;
4469         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4470         u32 rx_buffer_len, i;
4471
4472         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4473                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4474                 return -EINVAL;
4475         }
4476
4477         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4478                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4479                 return -EINVAL;
4480         }
4481
4482         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4483                 msleep(1);
4484
4485         /* igb_down has a dependency on max_frame_size */
4486         adapter->max_frame_size = max_frame;
4487
4488         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4489          * means we reserve 2 more, this pushes us to allocate from the next
4490          * larger slab size.
4491          * i.e. RXBUFFER_2048 --> size-4096 slab
4492          */
4493
4494         if (adapter->hw.mac.type == e1000_82580)
4495                 max_frame += IGB_TS_HDR_LEN;
4496
4497         if (max_frame <= IGB_RXBUFFER_1024)
4498                 rx_buffer_len = IGB_RXBUFFER_1024;
4499         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4500                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4501         else
4502                 rx_buffer_len = IGB_RXBUFFER_128;
4503
4504         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4505              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4506                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4507
4508         if ((adapter->hw.mac.type == e1000_82580) &&
4509             (rx_buffer_len == IGB_RXBUFFER_128))
4510                 rx_buffer_len += IGB_RXBUFFER_64;
4511
4512         if (netif_running(netdev))
4513                 igb_down(adapter);
4514
4515         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4516                  netdev->mtu, new_mtu);
4517         netdev->mtu = new_mtu;
4518
4519         for (i = 0; i < adapter->num_rx_queues; i++)
4520                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4521
4522         if (netif_running(netdev))
4523                 igb_up(adapter);
4524         else
4525                 igb_reset(adapter);
4526
4527         clear_bit(__IGB_RESETTING, &adapter->state);
4528
4529         return 0;
4530 }
4531
4532 /**
4533  * igb_update_stats - Update the board statistics counters
4534  * @adapter: board private structure
4535  **/
4536
4537 void igb_update_stats(struct igb_adapter *adapter,
4538                       struct rtnl_link_stats64 *net_stats)
4539 {
4540         struct e1000_hw *hw = &adapter->hw;
4541         struct pci_dev *pdev = adapter->pdev;
4542         u32 reg, mpc;
4543         u16 phy_tmp;
4544         int i;
4545         u64 bytes, packets;
4546         unsigned int start;
4547         u64 _bytes, _packets;
4548
4549 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4550
4551         /*
4552          * Prevent stats update while adapter is being reset, or if the pci
4553          * connection is down.
4554          */
4555         if (adapter->link_speed == 0)
4556                 return;
4557         if (pci_channel_offline(pdev))
4558                 return;
4559
4560         bytes = 0;
4561         packets = 0;
4562         for (i = 0; i < adapter->num_rx_queues; i++) {
4563                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4564                 struct igb_ring *ring = adapter->rx_ring[i];
4565
4566                 ring->rx_stats.drops += rqdpc_tmp;
4567                 net_stats->rx_fifo_errors += rqdpc_tmp;
4568
4569                 do {
4570                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4571                         _bytes = ring->rx_stats.bytes;
4572                         _packets = ring->rx_stats.packets;
4573                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4574                 bytes += _bytes;
4575                 packets += _packets;
4576         }
4577
4578         net_stats->rx_bytes = bytes;
4579         net_stats->rx_packets = packets;
4580
4581         bytes = 0;
4582         packets = 0;
4583         for (i = 0; i < adapter->num_tx_queues; i++) {
4584                 struct igb_ring *ring = adapter->tx_ring[i];
4585                 do {
4586                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4587                         _bytes = ring->tx_stats.bytes;
4588                         _packets = ring->tx_stats.packets;
4589                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4590                 bytes += _bytes;
4591                 packets += _packets;
4592         }
4593         net_stats->tx_bytes = bytes;
4594         net_stats->tx_packets = packets;
4595
4596         /* read stats registers */
4597         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4598         adapter->stats.gprc += rd32(E1000_GPRC);
4599         adapter->stats.gorc += rd32(E1000_GORCL);
4600         rd32(E1000_GORCH); /* clear GORCL */
4601         adapter->stats.bprc += rd32(E1000_BPRC);
4602         adapter->stats.mprc += rd32(E1000_MPRC);
4603         adapter->stats.roc += rd32(E1000_ROC);
4604
4605         adapter->stats.prc64 += rd32(E1000_PRC64);
4606         adapter->stats.prc127 += rd32(E1000_PRC127);
4607         adapter->stats.prc255 += rd32(E1000_PRC255);
4608         adapter->stats.prc511 += rd32(E1000_PRC511);
4609         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4610         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4611         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4612         adapter->stats.sec += rd32(E1000_SEC);
4613
4614         mpc = rd32(E1000_MPC);
4615         adapter->stats.mpc += mpc;
4616         net_stats->rx_fifo_errors += mpc;
4617         adapter->stats.scc += rd32(E1000_SCC);
4618         adapter->stats.ecol += rd32(E1000_ECOL);
4619         adapter->stats.mcc += rd32(E1000_MCC);
4620         adapter->stats.latecol += rd32(E1000_LATECOL);
4621         adapter->stats.dc += rd32(E1000_DC);
4622         adapter->stats.rlec += rd32(E1000_RLEC);
4623         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4624         adapter->stats.xontxc += rd32(E1000_XONTXC);
4625         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4626         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4627         adapter->stats.fcruc += rd32(E1000_FCRUC);
4628         adapter->stats.gptc += rd32(E1000_GPTC);
4629         adapter->stats.gotc += rd32(E1000_GOTCL);
4630         rd32(E1000_GOTCH); /* clear GOTCL */
4631         adapter->stats.rnbc += rd32(E1000_RNBC);
4632         adapter->stats.ruc += rd32(E1000_RUC);
4633         adapter->stats.rfc += rd32(E1000_RFC);
4634         adapter->stats.rjc += rd32(E1000_RJC);
4635         adapter->stats.tor += rd32(E1000_TORH);
4636         adapter->stats.tot += rd32(E1000_TOTH);
4637         adapter->stats.tpr += rd32(E1000_TPR);
4638
4639         adapter->stats.ptc64 += rd32(E1000_PTC64);
4640         adapter->stats.ptc127 += rd32(E1000_PTC127);
4641         adapter->stats.ptc255 += rd32(E1000_PTC255);
4642         adapter->stats.ptc511 += rd32(E1000_PTC511);
4643         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4644         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4645
4646         adapter->stats.mptc += rd32(E1000_MPTC);
4647         adapter->stats.bptc += rd32(E1000_BPTC);
4648
4649         adapter->stats.tpt += rd32(E1000_TPT);
4650         adapter->stats.colc += rd32(E1000_COLC);
4651
4652         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4653         /* read internal phy specific stats */
4654         reg = rd32(E1000_CTRL_EXT);
4655         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4656                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4657                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4658         }
4659
4660         adapter->stats.tsctc += rd32(E1000_TSCTC);
4661         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4662
4663         adapter->stats.iac += rd32(E1000_IAC);
4664         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4665         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4666         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4667         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4668         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4669         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4670         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4671         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4672
4673         /* Fill out the OS statistics structure */
4674         net_stats->multicast = adapter->stats.mprc;
4675         net_stats->collisions = adapter->stats.colc;
4676
4677         /* Rx Errors */
4678
4679         /* RLEC on some newer hardware can be incorrect so build
4680          * our own version based on RUC and ROC */
4681         net_stats->rx_errors = adapter->stats.rxerrc +
4682                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4683                 adapter->stats.ruc + adapter->stats.roc +
4684                 adapter->stats.cexterr;
4685         net_stats->rx_length_errors = adapter->stats.ruc +
4686                                       adapter->stats.roc;
4687         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4688         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4689         net_stats->rx_missed_errors = adapter->stats.mpc;
4690
4691         /* Tx Errors */
4692         net_stats->tx_errors = adapter->stats.ecol +
4693                                adapter->stats.latecol;
4694         net_stats->tx_aborted_errors = adapter->stats.ecol;
4695         net_stats->tx_window_errors = adapter->stats.latecol;
4696         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4697
4698         /* Tx Dropped needs to be maintained elsewhere */
4699
4700         /* Phy Stats */
4701         if (hw->phy.media_type == e1000_media_type_copper) {
4702                 if ((adapter->link_speed == SPEED_1000) &&
4703                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4704                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4705                         adapter->phy_stats.idle_errors += phy_tmp;
4706                 }
4707         }
4708
4709         /* Management Stats */
4710         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4711         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4712         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4713
4714         /* OS2BMC Stats */
4715         reg = rd32(E1000_MANC);
4716         if (reg & E1000_MANC_EN_BMC2OS) {
4717                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4718                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4719                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4720                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4721         }
4722 }
4723
4724 static irqreturn_t igb_msix_other(int irq, void *data)
4725 {
4726         struct igb_adapter *adapter = data;
4727         struct e1000_hw *hw = &adapter->hw;
4728         u32 icr = rd32(E1000_ICR);
4729         /* reading ICR causes bit 31 of EICR to be cleared */
4730
4731         if (icr & E1000_ICR_DRSTA)
4732                 schedule_work(&adapter->reset_task);
4733
4734         if (icr & E1000_ICR_DOUTSYNC) {
4735                 /* HW is reporting DMA is out of sync */
4736                 adapter->stats.doosync++;
4737                 /* The DMA Out of Sync is also indication of a spoof event
4738                  * in IOV mode. Check the Wrong VM Behavior register to
4739                  * see if it is really a spoof event. */
4740                 igb_check_wvbr(adapter);
4741         }
4742
4743         /* Check for a mailbox event */
4744         if (icr & E1000_ICR_VMMB)
4745                 igb_msg_task(adapter);
4746
4747         if (icr & E1000_ICR_LSC) {
4748                 hw->mac.get_link_status = 1;
4749                 /* guard against interrupt when we're going down */
4750                 if (!test_bit(__IGB_DOWN, &adapter->state))
4751                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4752         }
4753
4754         if (adapter->vfs_allocated_count)
4755                 wr32(E1000_IMS, E1000_IMS_LSC |
4756                                 E1000_IMS_VMMB |
4757                                 E1000_IMS_DOUTSYNC);
4758         else
4759                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4760         wr32(E1000_EIMS, adapter->eims_other);
4761
4762         return IRQ_HANDLED;
4763 }
4764
4765 static void igb_write_itr(struct igb_q_vector *q_vector)
4766 {
4767         struct igb_adapter *adapter = q_vector->adapter;
4768         u32 itr_val = q_vector->itr_val & 0x7FFC;
4769
4770         if (!q_vector->set_itr)
4771                 return;
4772
4773         if (!itr_val)
4774                 itr_val = 0x4;
4775
4776         if (adapter->hw.mac.type == e1000_82575)
4777                 itr_val |= itr_val << 16;
4778         else
4779                 itr_val |= 0x8000000;
4780
4781         writel(itr_val, q_vector->itr_register);
4782         q_vector->set_itr = 0;
4783 }
4784
4785 static irqreturn_t igb_msix_ring(int irq, void *data)
4786 {
4787         struct igb_q_vector *q_vector = data;
4788
4789         /* Write the ITR value calculated from the previous interrupt. */
4790         igb_write_itr(q_vector);
4791
4792         napi_schedule(&q_vector->napi);
4793
4794         return IRQ_HANDLED;
4795 }
4796
4797 #ifdef CONFIG_IGB_DCA
4798 static void igb_update_dca(struct igb_q_vector *q_vector)
4799 {
4800         struct igb_adapter *adapter = q_vector->adapter;
4801         struct e1000_hw *hw = &adapter->hw;
4802         int cpu = get_cpu();
4803
4804         if (q_vector->cpu == cpu)
4805                 goto out_no_update;
4806
4807         if (q_vector->tx_ring) {
4808                 int q = q_vector->tx_ring->reg_idx;
4809                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4810                 if (hw->mac.type == e1000_82575) {
4811                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4812                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4813                 } else {
4814                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4815                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4816                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4817                 }
4818                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4819                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4820         }
4821         if (q_vector->rx_ring) {
4822                 int q = q_vector->rx_ring->reg_idx;
4823                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4824                 if (hw->mac.type == e1000_82575) {
4825                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4826                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4827                 } else {
4828                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4829                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4830                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4831                 }
4832                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4833                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4834                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4835                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4836         }
4837         q_vector->cpu = cpu;
4838 out_no_update:
4839         put_cpu();
4840 }
4841
4842 static void igb_setup_dca(struct igb_adapter *adapter)
4843 {
4844         struct e1000_hw *hw = &adapter->hw;
4845         int i;
4846
4847         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4848                 return;
4849
4850         /* Always use CB2 mode, difference is masked in the CB driver. */
4851         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4852
4853         for (i = 0; i < adapter->num_q_vectors; i++) {
4854                 adapter->q_vector[i]->cpu = -1;
4855                 igb_update_dca(adapter->q_vector[i]);
4856         }
4857 }
4858
4859 static int __igb_notify_dca(struct device *dev, void *data)
4860 {
4861         struct net_device *netdev = dev_get_drvdata(dev);
4862         struct igb_adapter *adapter = netdev_priv(netdev);
4863         struct pci_dev *pdev = adapter->pdev;
4864         struct e1000_hw *hw = &adapter->hw;
4865         unsigned long event = *(unsigned long *)data;
4866
4867         switch (event) {
4868         case DCA_PROVIDER_ADD:
4869                 /* if already enabled, don't do it again */
4870                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4871                         break;
4872                 if (dca_add_requester(dev) == 0) {
4873                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4874                         dev_info(&pdev->dev, "DCA enabled\n");
4875                         igb_setup_dca(adapter);
4876                         break;
4877                 }
4878                 /* Fall Through since DCA is disabled. */
4879         case DCA_PROVIDER_REMOVE:
4880                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4881                         /* without this a class_device is left
4882                          * hanging around in the sysfs model */
4883                         dca_remove_requester(dev);
4884                         dev_info(&pdev->dev, "DCA disabled\n");
4885                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4886                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4887                 }
4888                 break;
4889         }
4890
4891         return 0;
4892 }
4893
4894 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4895                           void *p)
4896 {
4897         int ret_val;
4898
4899         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4900                                          __igb_notify_dca);
4901
4902         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4903 }
4904 #endif /* CONFIG_IGB_DCA */
4905
4906 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4907 {
4908         struct e1000_hw *hw = &adapter->hw;
4909         u32 ping;
4910         int i;
4911
4912         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4913                 ping = E1000_PF_CONTROL_MSG;
4914                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4915                         ping |= E1000_VT_MSGTYPE_CTS;
4916                 igb_write_mbx(hw, &ping, 1, i);
4917         }
4918 }
4919
4920 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4921 {
4922         struct e1000_hw *hw = &adapter->hw;
4923         u32 vmolr = rd32(E1000_VMOLR(vf));
4924         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4925
4926         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4927                             IGB_VF_FLAG_MULTI_PROMISC);
4928         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4929
4930         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4931                 vmolr |= E1000_VMOLR_MPME;
4932                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4933                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4934         } else {
4935                 /*
4936                  * if we have hashes and we are clearing a multicast promisc
4937                  * flag we need to write the hashes to the MTA as this step
4938                  * was previously skipped
4939                  */
4940                 if (vf_data->num_vf_mc_hashes > 30) {
4941                         vmolr |= E1000_VMOLR_MPME;
4942                 } else if (vf_data->num_vf_mc_hashes) {
4943                         int j;
4944                         vmolr |= E1000_VMOLR_ROMPE;
4945                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4946                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4947                 }
4948         }
4949
4950         wr32(E1000_VMOLR(vf), vmolr);
4951
4952         /* there are flags left unprocessed, likely not supported */
4953         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4954                 return -EINVAL;
4955
4956         return 0;
4957
4958 }
4959
4960 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4961                                   u32 *msgbuf, u32 vf)
4962 {
4963         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4964         u16 *hash_list = (u16 *)&msgbuf[1];
4965         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4966         int i;
4967
4968         /* salt away the number of multicast addresses assigned
4969          * to this VF for later use to restore when the PF multi cast
4970          * list changes
4971          */
4972         vf_data->num_vf_mc_hashes = n;
4973
4974         /* only up to 30 hash values supported */
4975         if (n > 30)
4976                 n = 30;
4977
4978         /* store the hashes for later use */
4979         for (i = 0; i < n; i++)
4980                 vf_data->vf_mc_hashes[i] = hash_list[i];
4981
4982         /* Flush and reset the mta with the new values */
4983         igb_set_rx_mode(adapter->netdev);
4984
4985         return 0;
4986 }
4987
4988 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4989 {
4990         struct e1000_hw *hw = &adapter->hw;
4991         struct vf_data_storage *vf_data;
4992         int i, j;
4993
4994         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4995                 u32 vmolr = rd32(E1000_VMOLR(i));
4996                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4997
4998                 vf_data = &adapter->vf_data[i];
4999
5000                 if ((vf_data->num_vf_mc_hashes > 30) ||
5001                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5002                         vmolr |= E1000_VMOLR_MPME;
5003                 } else if (vf_data->num_vf_mc_hashes) {
5004                         vmolr |= E1000_VMOLR_ROMPE;
5005                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5006                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5007                 }
5008                 wr32(E1000_VMOLR(i), vmolr);
5009         }
5010 }
5011
5012 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5013 {
5014         struct e1000_hw *hw = &adapter->hw;
5015         u32 pool_mask, reg, vid;
5016         int i;
5017
5018         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5019
5020         /* Find the vlan filter for this id */
5021         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5022                 reg = rd32(E1000_VLVF(i));
5023
5024                 /* remove the vf from the pool */
5025                 reg &= ~pool_mask;
5026
5027                 /* if pool is empty then remove entry from vfta */
5028                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5029                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5030                         reg = 0;
5031                         vid = reg & E1000_VLVF_VLANID_MASK;
5032                         igb_vfta_set(hw, vid, false);
5033                 }
5034
5035                 wr32(E1000_VLVF(i), reg);
5036         }
5037
5038         adapter->vf_data[vf].vlans_enabled = 0;
5039 }
5040
5041 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5042 {
5043         struct e1000_hw *hw = &adapter->hw;
5044         u32 reg, i;
5045
5046         /* The vlvf table only exists on 82576 hardware and newer */
5047         if (hw->mac.type < e1000_82576)
5048                 return -1;
5049
5050         /* we only need to do this if VMDq is enabled */
5051         if (!adapter->vfs_allocated_count)
5052                 return -1;
5053
5054         /* Find the vlan filter for this id */
5055         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5056                 reg = rd32(E1000_VLVF(i));
5057                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5058                     vid == (reg & E1000_VLVF_VLANID_MASK))
5059                         break;
5060         }
5061
5062         if (add) {
5063                 if (i == E1000_VLVF_ARRAY_SIZE) {
5064                         /* Did not find a matching VLAN ID entry that was
5065                          * enabled.  Search for a free filter entry, i.e.
5066                          * one without the enable bit set
5067                          */
5068                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5069                                 reg = rd32(E1000_VLVF(i));
5070                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5071                                         break;
5072                         }
5073                 }
5074                 if (i < E1000_VLVF_ARRAY_SIZE) {
5075                         /* Found an enabled/available entry */
5076                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5077
5078                         /* if !enabled we need to set this up in vfta */
5079                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5080                                 /* add VID to filter table */
5081                                 igb_vfta_set(hw, vid, true);
5082                                 reg |= E1000_VLVF_VLANID_ENABLE;
5083                         }
5084                         reg &= ~E1000_VLVF_VLANID_MASK;
5085                         reg |= vid;
5086                         wr32(E1000_VLVF(i), reg);
5087
5088                         /* do not modify RLPML for PF devices */
5089                         if (vf >= adapter->vfs_allocated_count)
5090                                 return 0;
5091
5092                         if (!adapter->vf_data[vf].vlans_enabled) {
5093                                 u32 size;
5094                                 reg = rd32(E1000_VMOLR(vf));
5095                                 size = reg & E1000_VMOLR_RLPML_MASK;
5096                                 size += 4;
5097                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5098                                 reg |= size;
5099                                 wr32(E1000_VMOLR(vf), reg);
5100                         }
5101
5102                         adapter->vf_data[vf].vlans_enabled++;
5103                         return 0;
5104                 }
5105         } else {
5106                 if (i < E1000_VLVF_ARRAY_SIZE) {
5107                         /* remove vf from the pool */
5108                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5109                         /* if pool is empty then remove entry from vfta */
5110                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5111                                 reg = 0;
5112                                 igb_vfta_set(hw, vid, false);
5113                         }
5114                         wr32(E1000_VLVF(i), reg);
5115
5116                         /* do not modify RLPML for PF devices */
5117                         if (vf >= adapter->vfs_allocated_count)
5118                                 return 0;
5119
5120                         adapter->vf_data[vf].vlans_enabled--;
5121                         if (!adapter->vf_data[vf].vlans_enabled) {
5122                                 u32 size;
5123                                 reg = rd32(E1000_VMOLR(vf));
5124                                 size = reg & E1000_VMOLR_RLPML_MASK;
5125                                 size -= 4;
5126                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5127                                 reg |= size;
5128                                 wr32(E1000_VMOLR(vf), reg);
5129                         }
5130                 }
5131         }
5132         return 0;
5133 }
5134
5135 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5136 {
5137         struct e1000_hw *hw = &adapter->hw;
5138
5139         if (vid)
5140                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5141         else
5142                 wr32(E1000_VMVIR(vf), 0);
5143 }
5144
5145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5146                                int vf, u16 vlan, u8 qos)
5147 {
5148         int err = 0;
5149         struct igb_adapter *adapter = netdev_priv(netdev);
5150
5151         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5152                 return -EINVAL;
5153         if (vlan || qos) {
5154                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5155                 if (err)
5156                         goto out;
5157                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5158                 igb_set_vmolr(adapter, vf, !vlan);
5159                 adapter->vf_data[vf].pf_vlan = vlan;
5160                 adapter->vf_data[vf].pf_qos = qos;
5161                 dev_info(&adapter->pdev->dev,
5162                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5163                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5164                         dev_warn(&adapter->pdev->dev,
5165                                  "The VF VLAN has been set,"
5166                                  " but the PF device is not up.\n");
5167                         dev_warn(&adapter->pdev->dev,
5168                                  "Bring the PF device up before"
5169                                  " attempting to use the VF device.\n");
5170                 }
5171         } else {
5172                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5173                                    false, vf);
5174                 igb_set_vmvir(adapter, vlan, vf);
5175                 igb_set_vmolr(adapter, vf, true);
5176                 adapter->vf_data[vf].pf_vlan = 0;
5177                 adapter->vf_data[vf].pf_qos = 0;
5178        }
5179 out:
5180        return err;
5181 }
5182
5183 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5184 {
5185         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5186         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5187
5188         return igb_vlvf_set(adapter, vid, add, vf);
5189 }
5190
5191 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5192 {
5193         /* clear flags - except flag that indicates PF has set the MAC */
5194         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5195         adapter->vf_data[vf].last_nack = jiffies;
5196
5197         /* reset offloads to defaults */
5198         igb_set_vmolr(adapter, vf, true);
5199
5200         /* reset vlans for device */
5201         igb_clear_vf_vfta(adapter, vf);
5202         if (adapter->vf_data[vf].pf_vlan)
5203                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5204                                     adapter->vf_data[vf].pf_vlan,
5205                                     adapter->vf_data[vf].pf_qos);
5206         else
5207                 igb_clear_vf_vfta(adapter, vf);
5208
5209         /* reset multicast table array for vf */
5210         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5211
5212         /* Flush and reset the mta with the new values */
5213         igb_set_rx_mode(adapter->netdev);
5214 }
5215
5216 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5217 {
5218         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5219
5220         /* generate a new mac address as we were hotplug removed/added */
5221         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5222                 random_ether_addr(vf_mac);
5223
5224         /* process remaining reset events */
5225         igb_vf_reset(adapter, vf);
5226 }
5227
5228 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5229 {
5230         struct e1000_hw *hw = &adapter->hw;
5231         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5232         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5233         u32 reg, msgbuf[3];
5234         u8 *addr = (u8 *)(&msgbuf[1]);
5235
5236         /* process all the same items cleared in a function level reset */
5237         igb_vf_reset(adapter, vf);
5238
5239         /* set vf mac address */
5240         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5241
5242         /* enable transmit and receive for vf */
5243         reg = rd32(E1000_VFTE);
5244         wr32(E1000_VFTE, reg | (1 << vf));
5245         reg = rd32(E1000_VFRE);
5246         wr32(E1000_VFRE, reg | (1 << vf));
5247
5248         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5249
5250         /* reply to reset with ack and vf mac address */
5251         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5252         memcpy(addr, vf_mac, 6);
5253         igb_write_mbx(hw, msgbuf, 3, vf);
5254 }
5255
5256 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5257 {
5258         /*
5259          * The VF MAC Address is stored in a packed array of bytes
5260          * starting at the second 32 bit word of the msg array
5261          */
5262         unsigned char *addr = (char *)&msg[1];
5263         int err = -1;
5264
5265         if (is_valid_ether_addr(addr))
5266                 err = igb_set_vf_mac(adapter, vf, addr);
5267
5268         return err;
5269 }
5270
5271 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5272 {
5273         struct e1000_hw *hw = &adapter->hw;
5274         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5275         u32 msg = E1000_VT_MSGTYPE_NACK;
5276
5277         /* if device isn't clear to send it shouldn't be reading either */
5278         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5279             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5280                 igb_write_mbx(hw, &msg, 1, vf);
5281                 vf_data->last_nack = jiffies;
5282         }
5283 }
5284
5285 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5286 {
5287         struct pci_dev *pdev = adapter->pdev;
5288         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5289         struct e1000_hw *hw = &adapter->hw;
5290         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5291         s32 retval;
5292
5293         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5294
5295         if (retval) {
5296                 /* if receive failed revoke VF CTS stats and restart init */
5297                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5298                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5299                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5300                         return;
5301                 goto out;
5302         }
5303
5304         /* this is a message we already processed, do nothing */
5305         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5306                 return;
5307
5308         /*
5309          * until the vf completes a reset it should not be
5310          * allowed to start any configuration.
5311          */
5312
5313         if (msgbuf[0] == E1000_VF_RESET) {
5314                 igb_vf_reset_msg(adapter, vf);
5315                 return;
5316         }
5317
5318         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5319                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5320                         return;
5321                 retval = -1;
5322                 goto out;
5323         }
5324
5325         switch ((msgbuf[0] & 0xFFFF)) {
5326         case E1000_VF_SET_MAC_ADDR:
5327                 retval = -EINVAL;
5328                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5329                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5330                 else
5331                         dev_warn(&pdev->dev,
5332                                  "VF %d attempted to override administratively "
5333                                  "set MAC address\nReload the VF driver to "
5334                                  "resume operations\n", vf);
5335                 break;
5336         case E1000_VF_SET_PROMISC:
5337                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5338                 break;
5339         case E1000_VF_SET_MULTICAST:
5340                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5341                 break;
5342         case E1000_VF_SET_LPE:
5343                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5344                 break;
5345         case E1000_VF_SET_VLAN:
5346                 retval = -1;
5347                 if (vf_data->pf_vlan)
5348                         dev_warn(&pdev->dev,
5349                                  "VF %d attempted to override administratively "
5350                                  "set VLAN tag\nReload the VF driver to "
5351                                  "resume operations\n", vf);
5352                 else
5353                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5354                 break;
5355         default:
5356                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5357                 retval = -1;
5358                 break;
5359         }
5360
5361         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5362 out:
5363         /* notify the VF of the results of what it sent us */
5364         if (retval)
5365                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5366         else
5367                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5368
5369         igb_write_mbx(hw, msgbuf, 1, vf);
5370 }
5371
5372 static void igb_msg_task(struct igb_adapter *adapter)
5373 {
5374         struct e1000_hw *hw = &adapter->hw;
5375         u32 vf;
5376
5377         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5378                 /* process any reset requests */
5379                 if (!igb_check_for_rst(hw, vf))
5380                         igb_vf_reset_event(adapter, vf);
5381
5382                 /* process any messages pending */
5383                 if (!igb_check_for_msg(hw, vf))
5384                         igb_rcv_msg_from_vf(adapter, vf);
5385
5386                 /* process any acks */
5387                 if (!igb_check_for_ack(hw, vf))
5388                         igb_rcv_ack_from_vf(adapter, vf);
5389         }
5390 }
5391
5392 /**
5393  *  igb_set_uta - Set unicast filter table address
5394  *  @adapter: board private structure
5395  *
5396  *  The unicast table address is a register array of 32-bit registers.
5397  *  The table is meant to be used in a way similar to how the MTA is used
5398  *  however due to certain limitations in the hardware it is necessary to
5399  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5400  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5401  **/
5402 static void igb_set_uta(struct igb_adapter *adapter)
5403 {
5404         struct e1000_hw *hw = &adapter->hw;
5405         int i;
5406
5407         /* The UTA table only exists on 82576 hardware and newer */
5408         if (hw->mac.type < e1000_82576)
5409                 return;
5410
5411         /* we only need to do this if VMDq is enabled */
5412         if (!adapter->vfs_allocated_count)
5413                 return;
5414
5415         for (i = 0; i < hw->mac.uta_reg_count; i++)
5416                 array_wr32(E1000_UTA, i, ~0);
5417 }
5418
5419 /**
5420  * igb_intr_msi - Interrupt Handler
5421  * @irq: interrupt number
5422  * @data: pointer to a network interface device structure
5423  **/
5424 static irqreturn_t igb_intr_msi(int irq, void *data)
5425 {
5426         struct igb_adapter *adapter = data;
5427         struct igb_q_vector *q_vector = adapter->q_vector[0];
5428         struct e1000_hw *hw = &adapter->hw;
5429         /* read ICR disables interrupts using IAM */
5430         u32 icr = rd32(E1000_ICR);
5431
5432         igb_write_itr(q_vector);
5433
5434         if (icr & E1000_ICR_DRSTA)
5435                 schedule_work(&adapter->reset_task);
5436
5437         if (icr & E1000_ICR_DOUTSYNC) {
5438                 /* HW is reporting DMA is out of sync */
5439                 adapter->stats.doosync++;
5440         }
5441
5442         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5443                 hw->mac.get_link_status = 1;
5444                 if (!test_bit(__IGB_DOWN, &adapter->state))
5445                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5446         }
5447
5448         napi_schedule(&q_vector->napi);
5449
5450         return IRQ_HANDLED;
5451 }
5452
5453 /**
5454  * igb_intr - Legacy Interrupt Handler
5455  * @irq: interrupt number
5456  * @data: pointer to a network interface device structure
5457  **/
5458 static irqreturn_t igb_intr(int irq, void *data)
5459 {
5460         struct igb_adapter *adapter = data;
5461         struct igb_q_vector *q_vector = adapter->q_vector[0];
5462         struct e1000_hw *hw = &adapter->hw;
5463         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5464          * need for the IMC write */
5465         u32 icr = rd32(E1000_ICR);
5466         if (!icr)
5467                 return IRQ_NONE;  /* Not our interrupt */
5468
5469         igb_write_itr(q_vector);
5470
5471         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5472          * not set, then the adapter didn't send an interrupt */
5473         if (!(icr & E1000_ICR_INT_ASSERTED))
5474                 return IRQ_NONE;
5475
5476         if (icr & E1000_ICR_DRSTA)
5477                 schedule_work(&adapter->reset_task);
5478
5479         if (icr & E1000_ICR_DOUTSYNC) {
5480                 /* HW is reporting DMA is out of sync */
5481                 adapter->stats.doosync++;
5482         }
5483
5484         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5485                 hw->mac.get_link_status = 1;
5486                 /* guard against interrupt when we're going down */
5487                 if (!test_bit(__IGB_DOWN, &adapter->state))
5488                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5489         }
5490
5491         napi_schedule(&q_vector->napi);
5492
5493         return IRQ_HANDLED;
5494 }
5495
5496 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5497 {
5498         struct igb_adapter *adapter = q_vector->adapter;
5499         struct e1000_hw *hw = &adapter->hw;
5500
5501         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5502             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5503                 if (!adapter->msix_entries)
5504                         igb_set_itr(adapter);
5505                 else
5506                         igb_update_ring_itr(q_vector);
5507         }
5508
5509         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5510                 if (adapter->msix_entries)
5511                         wr32(E1000_EIMS, q_vector->eims_value);
5512                 else
5513                         igb_irq_enable(adapter);
5514         }
5515 }
5516
5517 /**
5518  * igb_poll - NAPI Rx polling callback
5519  * @napi: napi polling structure
5520  * @budget: count of how many packets we should handle
5521  **/
5522 static int igb_poll(struct napi_struct *napi, int budget)
5523 {
5524         struct igb_q_vector *q_vector = container_of(napi,
5525                                                      struct igb_q_vector,
5526                                                      napi);
5527         int tx_clean_complete = 1, work_done = 0;
5528
5529 #ifdef CONFIG_IGB_DCA
5530         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5531                 igb_update_dca(q_vector);
5532 #endif
5533         if (q_vector->tx_ring)
5534                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5535
5536         if (q_vector->rx_ring)
5537                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5538
5539         if (!tx_clean_complete)
5540                 work_done = budget;
5541
5542         /* If not enough Rx work done, exit the polling mode */
5543         if (work_done < budget) {
5544                 napi_complete(napi);
5545                 igb_ring_irq_enable(q_vector);
5546         }
5547
5548         return work_done;
5549 }
5550
5551 /**
5552  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5553  * @adapter: board private structure
5554  * @shhwtstamps: timestamp structure to update
5555  * @regval: unsigned 64bit system time value.
5556  *
5557  * We need to convert the system time value stored in the RX/TXSTMP registers
5558  * into a hwtstamp which can be used by the upper level timestamping functions
5559  */
5560 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5561                                    struct skb_shared_hwtstamps *shhwtstamps,
5562                                    u64 regval)
5563 {
5564         u64 ns;
5565
5566         /*
5567          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5568          * 24 to match clock shift we setup earlier.
5569          */
5570         if (adapter->hw.mac.type == e1000_82580)
5571                 regval <<= IGB_82580_TSYNC_SHIFT;
5572
5573         ns = timecounter_cyc2time(&adapter->clock, regval);
5574         timecompare_update(&adapter->compare, ns);
5575         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5576         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5577         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5578 }
5579
5580 /**
5581  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5582  * @q_vector: pointer to q_vector containing needed info
5583  * @buffer: pointer to igb_buffer structure
5584  *
5585  * If we were asked to do hardware stamping and such a time stamp is
5586  * available, then it must have been for this skb here because we only
5587  * allow only one such packet into the queue.
5588  */
5589 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5590 {
5591         struct igb_adapter *adapter = q_vector->adapter;
5592         struct e1000_hw *hw = &adapter->hw;
5593         struct skb_shared_hwtstamps shhwtstamps;
5594         u64 regval;
5595
5596         /* if skb does not support hw timestamp or TX stamp not valid exit */
5597         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5598             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5599                 return;
5600
5601         regval = rd32(E1000_TXSTMPL);
5602         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5603
5604         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5605         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5606 }
5607
5608 /**
5609  * igb_clean_tx_irq - Reclaim resources after transmit completes
5610  * @q_vector: pointer to q_vector containing needed info
5611  * returns true if ring is completely cleaned
5612  **/
5613 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5614 {
5615         struct igb_adapter *adapter = q_vector->adapter;
5616         struct igb_ring *tx_ring = q_vector->tx_ring;
5617         struct net_device *netdev = tx_ring->netdev;
5618         struct e1000_hw *hw = &adapter->hw;
5619         struct igb_buffer *buffer_info;
5620         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5621         unsigned int total_bytes = 0, total_packets = 0;
5622         unsigned int i, eop, count = 0;
5623         bool cleaned = false;
5624
5625         i = tx_ring->next_to_clean;
5626         eop = tx_ring->buffer_info[i].next_to_watch;
5627         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5628
5629         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5630                (count < tx_ring->count)) {
5631                 rmb();  /* read buffer_info after eop_desc status */
5632                 for (cleaned = false; !cleaned; count++) {
5633                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5634                         buffer_info = &tx_ring->buffer_info[i];
5635                         cleaned = (i == eop);
5636
5637                         if (buffer_info->skb) {
5638                                 total_bytes += buffer_info->bytecount;
5639                                 /* gso_segs is currently only valid for tcp */
5640                                 total_packets += buffer_info->gso_segs;
5641                                 igb_tx_hwtstamp(q_vector, buffer_info);
5642                         }
5643
5644                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5645                         tx_desc->wb.status = 0;
5646
5647                         i++;
5648                         if (i == tx_ring->count)
5649                                 i = 0;
5650                 }
5651                 eop = tx_ring->buffer_info[i].next_to_watch;
5652                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5653         }
5654
5655         tx_ring->next_to_clean = i;
5656
5657         if (unlikely(count &&
5658                      netif_carrier_ok(netdev) &&
5659                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5660                 /* Make sure that anybody stopping the queue after this
5661                  * sees the new next_to_clean.
5662                  */
5663                 smp_mb();
5664                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5665                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5666                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5667
5668                         u64_stats_update_begin(&tx_ring->tx_syncp);
5669                         tx_ring->tx_stats.restart_queue++;
5670                         u64_stats_update_end(&tx_ring->tx_syncp);
5671                 }
5672         }
5673
5674         if (tx_ring->detect_tx_hung) {
5675                 /* Detect a transmit hang in hardware, this serializes the
5676                  * check with the clearing of time_stamp and movement of i */
5677                 tx_ring->detect_tx_hung = false;
5678                 if (tx_ring->buffer_info[i].time_stamp &&
5679                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5680                                (adapter->tx_timeout_factor * HZ)) &&
5681                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5682
5683                         /* detected Tx unit hang */
5684                         dev_err(tx_ring->dev,
5685                                 "Detected Tx Unit Hang\n"
5686                                 "  Tx Queue             <%d>\n"
5687                                 "  TDH                  <%x>\n"
5688                                 "  TDT                  <%x>\n"
5689                                 "  next_to_use          <%x>\n"
5690                                 "  next_to_clean        <%x>\n"
5691                                 "buffer_info[next_to_clean]\n"
5692                                 "  time_stamp           <%lx>\n"
5693                                 "  next_to_watch        <%x>\n"
5694                                 "  jiffies              <%lx>\n"
5695                                 "  desc.status          <%x>\n",
5696                                 tx_ring->queue_index,
5697                                 readl(tx_ring->head),
5698                                 readl(tx_ring->tail),
5699                                 tx_ring->next_to_use,
5700                                 tx_ring->next_to_clean,
5701                                 tx_ring->buffer_info[eop].time_stamp,
5702                                 eop,
5703                                 jiffies,
5704                                 eop_desc->wb.status);
5705                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5706                 }
5707         }
5708         tx_ring->total_bytes += total_bytes;
5709         tx_ring->total_packets += total_packets;
5710         u64_stats_update_begin(&tx_ring->tx_syncp);
5711         tx_ring->tx_stats.bytes += total_bytes;
5712         tx_ring->tx_stats.packets += total_packets;
5713         u64_stats_update_end(&tx_ring->tx_syncp);
5714         return count < tx_ring->count;
5715 }
5716
5717 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5718                                        u32 status_err, struct sk_buff *skb)
5719 {
5720         skb_checksum_none_assert(skb);
5721
5722         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5723         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5724              (status_err & E1000_RXD_STAT_IXSM))
5725                 return;
5726
5727         /* TCP/UDP checksum error bit is set */
5728         if (status_err &
5729             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5730                 /*
5731                  * work around errata with sctp packets where the TCPE aka
5732                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5733                  * packets, (aka let the stack check the crc32c)
5734                  */
5735                 if ((skb->len == 60) &&
5736                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5737                         u64_stats_update_begin(&ring->rx_syncp);
5738                         ring->rx_stats.csum_err++;
5739                         u64_stats_update_end(&ring->rx_syncp);
5740                 }
5741                 /* let the stack verify checksum errors */
5742                 return;
5743         }
5744         /* It must be a TCP or UDP packet with a valid checksum */
5745         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5746                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5747
5748         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5749 }
5750
5751 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5752                                    struct sk_buff *skb)
5753 {
5754         struct igb_adapter *adapter = q_vector->adapter;
5755         struct e1000_hw *hw = &adapter->hw;
5756         u64 regval;
5757
5758         /*
5759          * If this bit is set, then the RX registers contain the time stamp. No
5760          * other packet will be time stamped until we read these registers, so
5761          * read the registers to make them available again. Because only one
5762          * packet can be time stamped at a time, we know that the register
5763          * values must belong to this one here and therefore we don't need to
5764          * compare any of the additional attributes stored for it.
5765          *
5766          * If nothing went wrong, then it should have a shared tx_flags that we
5767          * can turn into a skb_shared_hwtstamps.
5768          */
5769         if (staterr & E1000_RXDADV_STAT_TSIP) {
5770                 u32 *stamp = (u32 *)skb->data;
5771                 regval = le32_to_cpu(*(stamp + 2));
5772                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5773                 skb_pull(skb, IGB_TS_HDR_LEN);
5774         } else {
5775                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5776                         return;
5777
5778                 regval = rd32(E1000_RXSTMPL);
5779                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5780         }
5781
5782         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5783 }
5784 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5785                                union e1000_adv_rx_desc *rx_desc)
5786 {
5787         /* HW will not DMA in data larger than the given buffer, even if it
5788          * parses the (NFS, of course) header to be larger.  In that case, it
5789          * fills the header buffer and spills the rest into the page.
5790          */
5791         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5792                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5793         if (hlen > rx_ring->rx_buffer_len)
5794                 hlen = rx_ring->rx_buffer_len;
5795         return hlen;
5796 }
5797
5798 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5799                                  int *work_done, int budget)
5800 {
5801         struct igb_ring *rx_ring = q_vector->rx_ring;
5802         struct net_device *netdev = rx_ring->netdev;
5803         struct device *dev = rx_ring->dev;
5804         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5805         struct igb_buffer *buffer_info , *next_buffer;
5806         struct sk_buff *skb;
5807         bool cleaned = false;
5808         int cleaned_count = 0;
5809         int current_node = numa_node_id();
5810         unsigned int total_bytes = 0, total_packets = 0;
5811         unsigned int i;
5812         u32 staterr;
5813         u16 length;
5814
5815         i = rx_ring->next_to_clean;
5816         buffer_info = &rx_ring->buffer_info[i];
5817         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5818         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5819
5820         while (staterr & E1000_RXD_STAT_DD) {
5821                 if (*work_done >= budget)
5822                         break;
5823                 (*work_done)++;
5824                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5825
5826                 skb = buffer_info->skb;
5827                 prefetch(skb->data - NET_IP_ALIGN);
5828                 buffer_info->skb = NULL;
5829
5830                 i++;
5831                 if (i == rx_ring->count)
5832                         i = 0;
5833
5834                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5835                 prefetch(next_rxd);
5836                 next_buffer = &rx_ring->buffer_info[i];
5837
5838                 length = le16_to_cpu(rx_desc->wb.upper.length);
5839                 cleaned = true;
5840                 cleaned_count++;
5841
5842                 if (buffer_info->dma) {
5843                         dma_unmap_single(dev, buffer_info->dma,
5844                                          rx_ring->rx_buffer_len,
5845                                          DMA_FROM_DEVICE);
5846                         buffer_info->dma = 0;
5847                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5848                                 skb_put(skb, length);
5849                                 goto send_up;
5850                         }
5851                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5852                 }
5853
5854                 if (length) {
5855                         dma_unmap_page(dev, buffer_info->page_dma,
5856                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5857                         buffer_info->page_dma = 0;
5858
5859                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5860                                                 buffer_info->page,
5861                                                 buffer_info->page_offset,
5862                                                 length);
5863
5864                         if ((page_count(buffer_info->page) != 1) ||
5865                             (page_to_nid(buffer_info->page) != current_node))
5866                                 buffer_info->page = NULL;
5867                         else
5868                                 get_page(buffer_info->page);
5869
5870                         skb->len += length;
5871                         skb->data_len += length;
5872                         skb->truesize += length;
5873                 }
5874
5875                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5876                         buffer_info->skb = next_buffer->skb;
5877                         buffer_info->dma = next_buffer->dma;
5878                         next_buffer->skb = skb;
5879                         next_buffer->dma = 0;
5880                         goto next_desc;
5881                 }
5882 send_up:
5883                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5884                         dev_kfree_skb_irq(skb);
5885                         goto next_desc;
5886                 }
5887
5888                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5889                         igb_rx_hwtstamp(q_vector, staterr, skb);
5890                 total_bytes += skb->len;
5891                 total_packets++;
5892
5893                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5894
5895                 skb->protocol = eth_type_trans(skb, netdev);
5896                 skb_record_rx_queue(skb, rx_ring->queue_index);
5897
5898                 if (staterr & E1000_RXD_STAT_VP) {
5899                         u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5900
5901                         __vlan_hwaccel_put_tag(skb, vid);
5902                 }
5903                 napi_gro_receive(&q_vector->napi, skb);
5904
5905 next_desc:
5906                 rx_desc->wb.upper.status_error = 0;
5907
5908                 /* return some buffers to hardware, one at a time is too slow */
5909                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5910                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5911                         cleaned_count = 0;
5912                 }
5913
5914                 /* use prefetched values */
5915                 rx_desc = next_rxd;
5916                 buffer_info = next_buffer;
5917                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5918         }
5919
5920         rx_ring->next_to_clean = i;
5921         cleaned_count = igb_desc_unused(rx_ring);
5922
5923         if (cleaned_count)
5924                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5925
5926         rx_ring->total_packets += total_packets;
5927         rx_ring->total_bytes += total_bytes;
5928         u64_stats_update_begin(&rx_ring->rx_syncp);
5929         rx_ring->rx_stats.packets += total_packets;
5930         rx_ring->rx_stats.bytes += total_bytes;
5931         u64_stats_update_end(&rx_ring->rx_syncp);
5932         return cleaned;
5933 }
5934
5935 /**
5936  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5937  * @adapter: address of board private structure
5938  **/
5939 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5940 {
5941         struct net_device *netdev = rx_ring->netdev;
5942         union e1000_adv_rx_desc *rx_desc;
5943         struct igb_buffer *buffer_info;
5944         struct sk_buff *skb;
5945         unsigned int i;
5946         int bufsz;
5947
5948         i = rx_ring->next_to_use;
5949         buffer_info = &rx_ring->buffer_info[i];
5950
5951         bufsz = rx_ring->rx_buffer_len;
5952
5953         while (cleaned_count--) {
5954                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5955
5956                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5957                         if (!buffer_info->page) {
5958                                 buffer_info->page = netdev_alloc_page(netdev);
5959                                 if (unlikely(!buffer_info->page)) {
5960                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5961                                         rx_ring->rx_stats.alloc_failed++;
5962                                         u64_stats_update_end(&rx_ring->rx_syncp);
5963                                         goto no_buffers;
5964                                 }
5965                                 buffer_info->page_offset = 0;
5966                         } else {
5967                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5968                         }
5969                         buffer_info->page_dma =
5970                                 dma_map_page(rx_ring->dev, buffer_info->page,
5971                                              buffer_info->page_offset,
5972                                              PAGE_SIZE / 2,
5973                                              DMA_FROM_DEVICE);
5974                         if (dma_mapping_error(rx_ring->dev,
5975                                               buffer_info->page_dma)) {
5976                                 buffer_info->page_dma = 0;
5977                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5978                                 rx_ring->rx_stats.alloc_failed++;
5979                                 u64_stats_update_end(&rx_ring->rx_syncp);
5980                                 goto no_buffers;
5981                         }
5982                 }
5983
5984                 skb = buffer_info->skb;
5985                 if (!skb) {
5986                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5987                         if (unlikely(!skb)) {
5988                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5989                                 rx_ring->rx_stats.alloc_failed++;
5990                                 u64_stats_update_end(&rx_ring->rx_syncp);
5991                                 goto no_buffers;
5992                         }
5993
5994                         buffer_info->skb = skb;
5995                 }
5996                 if (!buffer_info->dma) {
5997                         buffer_info->dma = dma_map_single(rx_ring->dev,
5998                                                           skb->data,
5999                                                           bufsz,
6000                                                           DMA_FROM_DEVICE);
6001                         if (dma_mapping_error(rx_ring->dev,
6002                                               buffer_info->dma)) {
6003                                 buffer_info->dma = 0;
6004                                 u64_stats_update_begin(&rx_ring->rx_syncp);
6005                                 rx_ring->rx_stats.alloc_failed++;
6006                                 u64_stats_update_end(&rx_ring->rx_syncp);
6007                                 goto no_buffers;
6008                         }
6009                 }
6010                 /* Refresh the desc even if buffer_addrs didn't change because
6011                  * each write-back erases this info. */
6012                 if (bufsz < IGB_RXBUFFER_1024) {
6013                         rx_desc->read.pkt_addr =
6014                              cpu_to_le64(buffer_info->page_dma);
6015                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6016                 } else {
6017                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6018                         rx_desc->read.hdr_addr = 0;
6019                 }
6020
6021                 i++;
6022                 if (i == rx_ring->count)
6023                         i = 0;
6024                 buffer_info = &rx_ring->buffer_info[i];
6025         }
6026
6027 no_buffers:
6028         if (rx_ring->next_to_use != i) {
6029                 rx_ring->next_to_use = i;
6030                 if (i == 0)
6031                         i = (rx_ring->count - 1);
6032                 else
6033                         i--;
6034
6035                 /* Force memory writes to complete before letting h/w
6036                  * know there are new descriptors to fetch.  (Only
6037                  * applicable for weak-ordered memory model archs,
6038                  * such as IA-64). */
6039                 wmb();
6040                 writel(i, rx_ring->tail);
6041         }
6042 }
6043
6044 /**
6045  * igb_mii_ioctl -
6046  * @netdev:
6047  * @ifreq:
6048  * @cmd:
6049  **/
6050 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6051 {
6052         struct igb_adapter *adapter = netdev_priv(netdev);
6053         struct mii_ioctl_data *data = if_mii(ifr);
6054
6055         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6056                 return -EOPNOTSUPP;
6057
6058         switch (cmd) {
6059         case SIOCGMIIPHY:
6060                 data->phy_id = adapter->hw.phy.addr;
6061                 break;
6062         case SIOCGMIIREG:
6063                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6064                                      &data->val_out))
6065                         return -EIO;
6066                 break;
6067         case SIOCSMIIREG:
6068         default:
6069                 return -EOPNOTSUPP;
6070         }
6071         return 0;
6072 }
6073
6074 /**
6075  * igb_hwtstamp_ioctl - control hardware time stamping
6076  * @netdev:
6077  * @ifreq:
6078  * @cmd:
6079  *
6080  * Outgoing time stamping can be enabled and disabled. Play nice and
6081  * disable it when requested, although it shouldn't case any overhead
6082  * when no packet needs it. At most one packet in the queue may be
6083  * marked for time stamping, otherwise it would be impossible to tell
6084  * for sure to which packet the hardware time stamp belongs.
6085  *
6086  * Incoming time stamping has to be configured via the hardware
6087  * filters. Not all combinations are supported, in particular event
6088  * type has to be specified. Matching the kind of event packet is
6089  * not supported, with the exception of "all V2 events regardless of
6090  * level 2 or 4".
6091  *
6092  **/
6093 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6094                               struct ifreq *ifr, int cmd)
6095 {
6096         struct igb_adapter *adapter = netdev_priv(netdev);
6097         struct e1000_hw *hw = &adapter->hw;
6098         struct hwtstamp_config config;
6099         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6100         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6101         u32 tsync_rx_cfg = 0;
6102         bool is_l4 = false;
6103         bool is_l2 = false;
6104         u32 regval;
6105
6106         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6107                 return -EFAULT;
6108
6109         /* reserved for future extensions */
6110         if (config.flags)
6111                 return -EINVAL;
6112
6113         switch (config.tx_type) {
6114         case HWTSTAMP_TX_OFF:
6115                 tsync_tx_ctl = 0;
6116         case HWTSTAMP_TX_ON:
6117                 break;
6118         default:
6119                 return -ERANGE;
6120         }
6121
6122         switch (config.rx_filter) {
6123         case HWTSTAMP_FILTER_NONE:
6124                 tsync_rx_ctl = 0;
6125                 break;
6126         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6127         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6128         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6129         case HWTSTAMP_FILTER_ALL:
6130                 /*
6131                  * register TSYNCRXCFG must be set, therefore it is not
6132                  * possible to time stamp both Sync and Delay_Req messages
6133                  * => fall back to time stamping all packets
6134                  */
6135                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6136                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6137                 break;
6138         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6139                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6140                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6141                 is_l4 = true;
6142                 break;
6143         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6144                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6145                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6146                 is_l4 = true;
6147                 break;
6148         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6149         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6150                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6151                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6152                 is_l2 = true;
6153                 is_l4 = true;
6154                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6155                 break;
6156         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6157         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6158                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6159                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6160                 is_l2 = true;
6161                 is_l4 = true;
6162                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6163                 break;
6164         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6165         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6166         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6167                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6168                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6169                 is_l2 = true;
6170                 break;
6171         default:
6172                 return -ERANGE;
6173         }
6174
6175         if (hw->mac.type == e1000_82575) {
6176                 if (tsync_rx_ctl | tsync_tx_ctl)
6177                         return -EINVAL;
6178                 return 0;
6179         }
6180
6181         /*
6182          * Per-packet timestamping only works if all packets are
6183          * timestamped, so enable timestamping in all packets as
6184          * long as one rx filter was configured.
6185          */
6186         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6187                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6188                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6189         }
6190
6191         /* enable/disable TX */
6192         regval = rd32(E1000_TSYNCTXCTL);
6193         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6194         regval |= tsync_tx_ctl;
6195         wr32(E1000_TSYNCTXCTL, regval);
6196
6197         /* enable/disable RX */
6198         regval = rd32(E1000_TSYNCRXCTL);
6199         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6200         regval |= tsync_rx_ctl;
6201         wr32(E1000_TSYNCRXCTL, regval);
6202
6203         /* define which PTP packets are time stamped */
6204         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6205
6206         /* define ethertype filter for timestamped packets */
6207         if (is_l2)
6208                 wr32(E1000_ETQF(3),
6209                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6210                                  E1000_ETQF_1588 | /* enable timestamping */
6211                                  ETH_P_1588));     /* 1588 eth protocol type */
6212         else
6213                 wr32(E1000_ETQF(3), 0);
6214
6215 #define PTP_PORT 319
6216         /* L4 Queue Filter[3]: filter by destination port and protocol */
6217         if (is_l4) {
6218                 u32 ftqf = (IPPROTO_UDP /* UDP */
6219                         | E1000_FTQF_VF_BP /* VF not compared */
6220                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6221                         | E1000_FTQF_MASK); /* mask all inputs */
6222                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6223
6224                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6225                 wr32(E1000_IMIREXT(3),
6226                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6227                 if (hw->mac.type == e1000_82576) {
6228                         /* enable source port check */
6229                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6230                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6231                 }
6232                 wr32(E1000_FTQF(3), ftqf);
6233         } else {
6234                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6235         }
6236         wrfl();
6237
6238         adapter->hwtstamp_config = config;
6239
6240         /* clear TX/RX time stamp registers, just to be sure */
6241         regval = rd32(E1000_TXSTMPH);
6242         regval = rd32(E1000_RXSTMPH);
6243
6244         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6245                 -EFAULT : 0;
6246 }
6247
6248 /**
6249  * igb_ioctl -
6250  * @netdev:
6251  * @ifreq:
6252  * @cmd:
6253  **/
6254 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6255 {
6256         switch (cmd) {
6257         case SIOCGMIIPHY:
6258         case SIOCGMIIREG:
6259         case SIOCSMIIREG:
6260                 return igb_mii_ioctl(netdev, ifr, cmd);
6261         case SIOCSHWTSTAMP:
6262                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6263         default:
6264                 return -EOPNOTSUPP;
6265         }
6266 }
6267
6268 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6269 {
6270         struct igb_adapter *adapter = hw->back;
6271         u16 cap_offset;
6272
6273         cap_offset = adapter->pdev->pcie_cap;
6274         if (!cap_offset)
6275                 return -E1000_ERR_CONFIG;
6276
6277         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6278
6279         return 0;
6280 }
6281
6282 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6283 {
6284         struct igb_adapter *adapter = hw->back;
6285         u16 cap_offset;
6286
6287         cap_offset = adapter->pdev->pcie_cap;
6288         if (!cap_offset)
6289                 return -E1000_ERR_CONFIG;
6290
6291         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6292
6293         return 0;
6294 }
6295
6296 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6297 {
6298         struct igb_adapter *adapter = netdev_priv(netdev);
6299         struct e1000_hw *hw = &adapter->hw;
6300         u32 ctrl, rctl;
6301
6302         igb_irq_disable(adapter);
6303
6304         if (features & NETIF_F_HW_VLAN_RX) {
6305                 /* enable VLAN tag insert/strip */
6306                 ctrl = rd32(E1000_CTRL);
6307                 ctrl |= E1000_CTRL_VME;
6308                 wr32(E1000_CTRL, ctrl);
6309
6310                 /* Disable CFI check */
6311                 rctl = rd32(E1000_RCTL);
6312                 rctl &= ~E1000_RCTL_CFIEN;
6313                 wr32(E1000_RCTL, rctl);
6314         } else {
6315                 /* disable VLAN tag insert/strip */
6316                 ctrl = rd32(E1000_CTRL);
6317                 ctrl &= ~E1000_CTRL_VME;
6318                 wr32(E1000_CTRL, ctrl);
6319         }
6320
6321         igb_rlpml_set(adapter);
6322
6323         if (!test_bit(__IGB_DOWN, &adapter->state))
6324                 igb_irq_enable(adapter);
6325 }
6326
6327 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6328 {
6329         struct igb_adapter *adapter = netdev_priv(netdev);
6330         struct e1000_hw *hw = &adapter->hw;
6331         int pf_id = adapter->vfs_allocated_count;
6332
6333         /* attempt to add filter to vlvf array */
6334         igb_vlvf_set(adapter, vid, true, pf_id);
6335
6336         /* add the filter since PF can receive vlans w/o entry in vlvf */
6337         igb_vfta_set(hw, vid, true);
6338
6339         set_bit(vid, adapter->active_vlans);
6340 }
6341
6342 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6343 {
6344         struct igb_adapter *adapter = netdev_priv(netdev);
6345         struct e1000_hw *hw = &adapter->hw;
6346         int pf_id = adapter->vfs_allocated_count;
6347         s32 err;
6348
6349         igb_irq_disable(adapter);
6350
6351         if (!test_bit(__IGB_DOWN, &adapter->state))
6352                 igb_irq_enable(adapter);
6353
6354         /* remove vlan from VLVF table array */
6355         err = igb_vlvf_set(adapter, vid, false, pf_id);
6356
6357         /* if vid was not present in VLVF just remove it from table */
6358         if (err)
6359                 igb_vfta_set(hw, vid, false);
6360
6361         clear_bit(vid, adapter->active_vlans);
6362 }
6363
6364 static void igb_restore_vlan(struct igb_adapter *adapter)
6365 {
6366         u16 vid;
6367
6368         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6369                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6370 }
6371
6372 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6373 {
6374         struct pci_dev *pdev = adapter->pdev;
6375         struct e1000_mac_info *mac = &adapter->hw.mac;
6376
6377         mac->autoneg = 0;
6378
6379         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6380          * for the switch() below to work */
6381         if ((spd & 1) || (dplx & ~1))
6382                 goto err_inval;
6383
6384         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6385         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6386             spd != SPEED_1000 &&
6387             dplx != DUPLEX_FULL)
6388                 goto err_inval;
6389
6390         switch (spd + dplx) {
6391         case SPEED_10 + DUPLEX_HALF:
6392                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6393                 break;
6394         case SPEED_10 + DUPLEX_FULL:
6395                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6396                 break;
6397         case SPEED_100 + DUPLEX_HALF:
6398                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6399                 break;
6400         case SPEED_100 + DUPLEX_FULL:
6401                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6402                 break;
6403         case SPEED_1000 + DUPLEX_FULL:
6404                 mac->autoneg = 1;
6405                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6406                 break;
6407         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6408         default:
6409                 goto err_inval;
6410         }
6411         return 0;
6412
6413 err_inval:
6414         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6415         return -EINVAL;
6416 }
6417
6418 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6419 {
6420         struct net_device *netdev = pci_get_drvdata(pdev);
6421         struct igb_adapter *adapter = netdev_priv(netdev);
6422         struct e1000_hw *hw = &adapter->hw;
6423         u32 ctrl, rctl, status;
6424         u32 wufc = adapter->wol;
6425 #ifdef CONFIG_PM
6426         int retval = 0;
6427 #endif
6428
6429         netif_device_detach(netdev);
6430
6431         if (netif_running(netdev))
6432                 igb_close(netdev);
6433
6434         igb_clear_interrupt_scheme(adapter);
6435
6436 #ifdef CONFIG_PM
6437         retval = pci_save_state(pdev);
6438         if (retval)
6439                 return retval;
6440 #endif
6441
6442         status = rd32(E1000_STATUS);
6443         if (status & E1000_STATUS_LU)
6444                 wufc &= ~E1000_WUFC_LNKC;
6445
6446         if (wufc) {
6447                 igb_setup_rctl(adapter);
6448                 igb_set_rx_mode(netdev);
6449
6450                 /* turn on all-multi mode if wake on multicast is enabled */
6451                 if (wufc & E1000_WUFC_MC) {
6452                         rctl = rd32(E1000_RCTL);
6453                         rctl |= E1000_RCTL_MPE;
6454                         wr32(E1000_RCTL, rctl);
6455                 }
6456
6457                 ctrl = rd32(E1000_CTRL);
6458                 /* advertise wake from D3Cold */
6459                 #define E1000_CTRL_ADVD3WUC 0x00100000
6460                 /* phy power management enable */
6461                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6462                 ctrl |= E1000_CTRL_ADVD3WUC;
6463                 wr32(E1000_CTRL, ctrl);
6464
6465                 /* Allow time for pending master requests to run */
6466                 igb_disable_pcie_master(hw);
6467
6468                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6469                 wr32(E1000_WUFC, wufc);
6470         } else {
6471                 wr32(E1000_WUC, 0);
6472                 wr32(E1000_WUFC, 0);
6473         }
6474
6475         *enable_wake = wufc || adapter->en_mng_pt;
6476         if (!*enable_wake)
6477                 igb_power_down_link(adapter);
6478         else
6479                 igb_power_up_link(adapter);
6480
6481         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6482          * would have already happened in close and is redundant. */
6483         igb_release_hw_control(adapter);
6484
6485         pci_disable_device(pdev);
6486
6487         return 0;
6488 }
6489
6490 #ifdef CONFIG_PM
6491 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6492 {
6493         int retval;
6494         bool wake;
6495
6496         retval = __igb_shutdown(pdev, &wake);
6497         if (retval)
6498                 return retval;
6499
6500         if (wake) {
6501                 pci_prepare_to_sleep(pdev);
6502         } else {
6503                 pci_wake_from_d3(pdev, false);
6504                 pci_set_power_state(pdev, PCI_D3hot);
6505         }
6506
6507         return 0;
6508 }
6509
6510 static int igb_resume(struct pci_dev *pdev)
6511 {
6512         struct net_device *netdev = pci_get_drvdata(pdev);
6513         struct igb_adapter *adapter = netdev_priv(netdev);
6514         struct e1000_hw *hw = &adapter->hw;
6515         u32 err;
6516
6517         pci_set_power_state(pdev, PCI_D0);
6518         pci_restore_state(pdev);
6519         pci_save_state(pdev);
6520
6521         err = pci_enable_device_mem(pdev);
6522         if (err) {
6523                 dev_err(&pdev->dev,
6524                         "igb: Cannot enable PCI device from suspend\n");
6525                 return err;
6526         }
6527         pci_set_master(pdev);
6528
6529         pci_enable_wake(pdev, PCI_D3hot, 0);
6530         pci_enable_wake(pdev, PCI_D3cold, 0);
6531
6532         if (igb_init_interrupt_scheme(adapter)) {
6533                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6534                 return -ENOMEM;
6535         }
6536
6537         igb_reset(adapter);
6538
6539         /* let the f/w know that the h/w is now under the control of the
6540          * driver. */
6541         igb_get_hw_control(adapter);
6542
6543         wr32(E1000_WUS, ~0);
6544
6545         if (netif_running(netdev)) {
6546                 err = igb_open(netdev);
6547                 if (err)
6548                         return err;
6549         }
6550
6551         netif_device_attach(netdev);
6552
6553         return 0;
6554 }
6555 #endif
6556
6557 static void igb_shutdown(struct pci_dev *pdev)
6558 {
6559         bool wake;
6560
6561         __igb_shutdown(pdev, &wake);
6562
6563         if (system_state == SYSTEM_POWER_OFF) {
6564                 pci_wake_from_d3(pdev, wake);
6565                 pci_set_power_state(pdev, PCI_D3hot);
6566         }
6567 }
6568
6569 #ifdef CONFIG_NET_POLL_CONTROLLER
6570 /*
6571  * Polling 'interrupt' - used by things like netconsole to send skbs
6572  * without having to re-enable interrupts. It's not called while
6573  * the interrupt routine is executing.
6574  */
6575 static void igb_netpoll(struct net_device *netdev)
6576 {
6577         struct igb_adapter *adapter = netdev_priv(netdev);
6578         struct e1000_hw *hw = &adapter->hw;
6579         int i;
6580
6581         if (!adapter->msix_entries) {
6582                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6583                 igb_irq_disable(adapter);
6584                 napi_schedule(&q_vector->napi);
6585                 return;
6586         }
6587
6588         for (i = 0; i < adapter->num_q_vectors; i++) {
6589                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6590                 wr32(E1000_EIMC, q_vector->eims_value);
6591                 napi_schedule(&q_vector->napi);
6592         }
6593 }
6594 #endif /* CONFIG_NET_POLL_CONTROLLER */
6595
6596 /**
6597  * igb_io_error_detected - called when PCI error is detected
6598  * @pdev: Pointer to PCI device
6599  * @state: The current pci connection state
6600  *
6601  * This function is called after a PCI bus error affecting
6602  * this device has been detected.
6603  */
6604 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6605                                               pci_channel_state_t state)
6606 {
6607         struct net_device *netdev = pci_get_drvdata(pdev);
6608         struct igb_adapter *adapter = netdev_priv(netdev);
6609
6610         netif_device_detach(netdev);
6611
6612         if (state == pci_channel_io_perm_failure)
6613                 return PCI_ERS_RESULT_DISCONNECT;
6614
6615         if (netif_running(netdev))
6616                 igb_down(adapter);
6617         pci_disable_device(pdev);
6618
6619         /* Request a slot slot reset. */
6620         return PCI_ERS_RESULT_NEED_RESET;
6621 }
6622
6623 /**
6624  * igb_io_slot_reset - called after the pci bus has been reset.
6625  * @pdev: Pointer to PCI device
6626  *
6627  * Restart the card from scratch, as if from a cold-boot. Implementation
6628  * resembles the first-half of the igb_resume routine.
6629  */
6630 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6631 {
6632         struct net_device *netdev = pci_get_drvdata(pdev);
6633         struct igb_adapter *adapter = netdev_priv(netdev);
6634         struct e1000_hw *hw = &adapter->hw;
6635         pci_ers_result_t result;
6636         int err;
6637
6638         if (pci_enable_device_mem(pdev)) {
6639                 dev_err(&pdev->dev,
6640                         "Cannot re-enable PCI device after reset.\n");
6641                 result = PCI_ERS_RESULT_DISCONNECT;
6642         } else {
6643                 pci_set_master(pdev);
6644                 pci_restore_state(pdev);
6645                 pci_save_state(pdev);
6646
6647                 pci_enable_wake(pdev, PCI_D3hot, 0);
6648                 pci_enable_wake(pdev, PCI_D3cold, 0);
6649
6650                 igb_reset(adapter);
6651                 wr32(E1000_WUS, ~0);
6652                 result = PCI_ERS_RESULT_RECOVERED;
6653         }
6654
6655         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6656         if (err) {
6657                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6658                         "failed 0x%0x\n", err);
6659                 /* non-fatal, continue */
6660         }
6661
6662         return result;
6663 }
6664
6665 /**
6666  * igb_io_resume - called when traffic can start flowing again.
6667  * @pdev: Pointer to PCI device
6668  *
6669  * This callback is called when the error recovery driver tells us that
6670  * its OK to resume normal operation. Implementation resembles the
6671  * second-half of the igb_resume routine.
6672  */
6673 static void igb_io_resume(struct pci_dev *pdev)
6674 {
6675         struct net_device *netdev = pci_get_drvdata(pdev);
6676         struct igb_adapter *adapter = netdev_priv(netdev);
6677
6678         if (netif_running(netdev)) {
6679                 if (igb_up(adapter)) {
6680                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6681                         return;
6682                 }
6683         }
6684
6685         netif_device_attach(netdev);
6686
6687         /* let the f/w know that the h/w is now under the control of the
6688          * driver. */
6689         igb_get_hw_control(adapter);
6690 }
6691
6692 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6693                              u8 qsel)
6694 {
6695         u32 rar_low, rar_high;
6696         struct e1000_hw *hw = &adapter->hw;
6697
6698         /* HW expects these in little endian so we reverse the byte order
6699          * from network order (big endian) to little endian
6700          */
6701         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6702                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6703         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6704
6705         /* Indicate to hardware the Address is Valid. */
6706         rar_high |= E1000_RAH_AV;
6707
6708         if (hw->mac.type == e1000_82575)
6709                 rar_high |= E1000_RAH_POOL_1 * qsel;
6710         else
6711                 rar_high |= E1000_RAH_POOL_1 << qsel;
6712
6713         wr32(E1000_RAL(index), rar_low);
6714         wrfl();
6715         wr32(E1000_RAH(index), rar_high);
6716         wrfl();
6717 }
6718
6719 static int igb_set_vf_mac(struct igb_adapter *adapter,
6720                           int vf, unsigned char *mac_addr)
6721 {
6722         struct e1000_hw *hw = &adapter->hw;
6723         /* VF MAC addresses start at end of receive addresses and moves
6724          * torwards the first, as a result a collision should not be possible */
6725         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6726
6727         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6728
6729         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6730
6731         return 0;
6732 }
6733
6734 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6735 {
6736         struct igb_adapter *adapter = netdev_priv(netdev);
6737         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6738                 return -EINVAL;
6739         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6740         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6741         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6742                                       " change effective.");
6743         if (test_bit(__IGB_DOWN, &adapter->state)) {
6744                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6745                          " but the PF device is not up.\n");
6746                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6747                          " attempting to use the VF device.\n");
6748         }
6749         return igb_set_vf_mac(adapter, vf, mac);
6750 }
6751
6752 static int igb_link_mbps(int internal_link_speed)
6753 {
6754         switch (internal_link_speed) {
6755         case SPEED_100:
6756                 return 100;
6757         case SPEED_1000:
6758                 return 1000;
6759         default:
6760                 return 0;
6761         }
6762 }
6763
6764 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6765                                   int link_speed)
6766 {
6767         int rf_dec, rf_int;
6768         u32 bcnrc_val;
6769
6770         if (tx_rate != 0) {
6771                 /* Calculate the rate factor values to set */
6772                 rf_int = link_speed / tx_rate;
6773                 rf_dec = (link_speed - (rf_int * tx_rate));
6774                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6775
6776                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6777                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6778                                E1000_RTTBCNRC_RF_INT_MASK);
6779                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6780         } else {
6781                 bcnrc_val = 0;
6782         }
6783
6784         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6785         wr32(E1000_RTTBCNRC, bcnrc_val);
6786 }
6787
6788 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6789 {
6790         int actual_link_speed, i;
6791         bool reset_rate = false;
6792
6793         /* VF TX rate limit was not set or not supported */
6794         if ((adapter->vf_rate_link_speed == 0) ||
6795             (adapter->hw.mac.type != e1000_82576))
6796                 return;
6797
6798         actual_link_speed = igb_link_mbps(adapter->link_speed);
6799         if (actual_link_speed != adapter->vf_rate_link_speed) {
6800                 reset_rate = true;
6801                 adapter->vf_rate_link_speed = 0;
6802                 dev_info(&adapter->pdev->dev,
6803                          "Link speed has been changed. VF Transmit "
6804                          "rate is disabled\n");
6805         }
6806
6807         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6808                 if (reset_rate)
6809                         adapter->vf_data[i].tx_rate = 0;
6810
6811                 igb_set_vf_rate_limit(&adapter->hw, i,
6812                                       adapter->vf_data[i].tx_rate,
6813                                       actual_link_speed);
6814         }
6815 }
6816
6817 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6818 {
6819         struct igb_adapter *adapter = netdev_priv(netdev);
6820         struct e1000_hw *hw = &adapter->hw;
6821         int actual_link_speed;
6822
6823         if (hw->mac.type != e1000_82576)
6824                 return -EOPNOTSUPP;
6825
6826         actual_link_speed = igb_link_mbps(adapter->link_speed);
6827         if ((vf >= adapter->vfs_allocated_count) ||
6828             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6829             (tx_rate < 0) || (tx_rate > actual_link_speed))
6830                 return -EINVAL;
6831
6832         adapter->vf_rate_link_speed = actual_link_speed;
6833         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6834         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6835
6836         return 0;
6837 }
6838
6839 static int igb_ndo_get_vf_config(struct net_device *netdev,
6840                                  int vf, struct ifla_vf_info *ivi)
6841 {
6842         struct igb_adapter *adapter = netdev_priv(netdev);
6843         if (vf >= adapter->vfs_allocated_count)
6844                 return -EINVAL;
6845         ivi->vf = vf;
6846         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6847         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6848         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6849         ivi->qos = adapter->vf_data[vf].pf_qos;
6850         return 0;
6851 }
6852
6853 static void igb_vmm_control(struct igb_adapter *adapter)
6854 {
6855         struct e1000_hw *hw = &adapter->hw;
6856         u32 reg;
6857
6858         switch (hw->mac.type) {
6859         case e1000_82575:
6860         default:
6861                 /* replication is not supported for 82575 */
6862                 return;
6863         case e1000_82576:
6864                 /* notify HW that the MAC is adding vlan tags */
6865                 reg = rd32(E1000_DTXCTL);
6866                 reg |= E1000_DTXCTL_VLAN_ADDED;
6867                 wr32(E1000_DTXCTL, reg);
6868         case e1000_82580:
6869                 /* enable replication vlan tag stripping */
6870                 reg = rd32(E1000_RPLOLR);
6871                 reg |= E1000_RPLOLR_STRVLAN;
6872                 wr32(E1000_RPLOLR, reg);
6873         case e1000_i350:
6874                 /* none of the above registers are supported by i350 */
6875                 break;
6876         }
6877
6878         if (adapter->vfs_allocated_count) {
6879                 igb_vmdq_set_loopback_pf(hw, true);
6880                 igb_vmdq_set_replication_pf(hw, true);
6881                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6882                                                 adapter->vfs_allocated_count);
6883         } else {
6884                 igb_vmdq_set_loopback_pf(hw, false);
6885                 igb_vmdq_set_replication_pf(hw, false);
6886         }
6887 }
6888
6889 /* igb_main.c */