Merge branch 'batman-adv/next' of git://git.open-mesh.org/ecsv/linux-merge
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #include <linux/prefetch.h>
49 #ifdef CONFIG_IGB_DCA
50 #include <linux/dca.h>
51 #endif
52 #include "igb.h"
53
54 #define MAJ 3
55 #define MIN 0
56 #define BUILD 6
57 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
58 __stringify(BUILD) "-k"
59 char igb_driver_name[] = "igb";
60 char igb_driver_version[] = DRV_VERSION;
61 static const char igb_driver_string[] =
62                                 "Intel(R) Gigabit Ethernet Network Driver";
63 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
64
65 static const struct e1000_info *igb_info_tbl[] = {
66         [board_82575] = &e1000_82575_info,
67 };
68
69 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
95         /* required last entry */
96         {0, }
97 };
98
99 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
100
101 void igb_reset(struct igb_adapter *);
102 static int igb_setup_all_tx_resources(struct igb_adapter *);
103 static int igb_setup_all_rx_resources(struct igb_adapter *);
104 static void igb_free_all_tx_resources(struct igb_adapter *);
105 static void igb_free_all_rx_resources(struct igb_adapter *);
106 static void igb_setup_mrqc(struct igb_adapter *);
107 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
108 static void __devexit igb_remove(struct pci_dev *pdev);
109 static void igb_init_hw_timer(struct igb_adapter *adapter);
110 static int igb_sw_init(struct igb_adapter *);
111 static int igb_open(struct net_device *);
112 static int igb_close(struct net_device *);
113 static void igb_configure_tx(struct igb_adapter *);
114 static void igb_configure_rx(struct igb_adapter *);
115 static void igb_clean_all_tx_rings(struct igb_adapter *);
116 static void igb_clean_all_rx_rings(struct igb_adapter *);
117 static void igb_clean_tx_ring(struct igb_ring *);
118 static void igb_clean_rx_ring(struct igb_ring *);
119 static void igb_set_rx_mode(struct net_device *);
120 static void igb_update_phy_info(unsigned long);
121 static void igb_watchdog(unsigned long);
122 static void igb_watchdog_task(struct work_struct *);
123 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
124 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
125                                                  struct rtnl_link_stats64 *stats);
126 static int igb_change_mtu(struct net_device *, int);
127 static int igb_set_mac(struct net_device *, void *);
128 static void igb_set_uta(struct igb_adapter *adapter);
129 static irqreturn_t igb_intr(int irq, void *);
130 static irqreturn_t igb_intr_msi(int irq, void *);
131 static irqreturn_t igb_msix_other(int irq, void *);
132 static irqreturn_t igb_msix_ring(int irq, void *);
133 #ifdef CONFIG_IGB_DCA
134 static void igb_update_dca(struct igb_q_vector *);
135 static void igb_setup_dca(struct igb_adapter *);
136 #endif /* CONFIG_IGB_DCA */
137 static bool igb_clean_tx_irq(struct igb_q_vector *);
138 static int igb_poll(struct napi_struct *, int);
139 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
140 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
141 static void igb_tx_timeout(struct net_device *);
142 static void igb_reset_task(struct work_struct *);
143 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
144 static void igb_vlan_rx_add_vid(struct net_device *, u16);
145 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
146 static void igb_restore_vlan(struct igb_adapter *);
147 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
148 static void igb_ping_all_vfs(struct igb_adapter *);
149 static void igb_msg_task(struct igb_adapter *);
150 static void igb_vmm_control(struct igb_adapter *);
151 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
152 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
153 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
154 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
155                                int vf, u16 vlan, u8 qos);
156 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
157 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
158                                  struct ifla_vf_info *ivi);
159 static void igb_check_vf_rate_limit(struct igb_adapter *);
160
161 #ifdef CONFIG_PM
162 static int igb_suspend(struct pci_dev *, pm_message_t);
163 static int igb_resume(struct pci_dev *);
164 #endif
165 static void igb_shutdown(struct pci_dev *);
166 #ifdef CONFIG_IGB_DCA
167 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
168 static struct notifier_block dca_notifier = {
169         .notifier_call  = igb_notify_dca,
170         .next           = NULL,
171         .priority       = 0
172 };
173 #endif
174 #ifdef CONFIG_NET_POLL_CONTROLLER
175 /* for netdump / net console */
176 static void igb_netpoll(struct net_device *);
177 #endif
178 #ifdef CONFIG_PCI_IOV
179 static unsigned int max_vfs = 0;
180 module_param(max_vfs, uint, 0);
181 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
182                  "per physical function");
183 #endif /* CONFIG_PCI_IOV */
184
185 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
186                      pci_channel_state_t);
187 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
188 static void igb_io_resume(struct pci_dev *);
189
190 static struct pci_error_handlers igb_err_handler = {
191         .error_detected = igb_io_error_detected,
192         .slot_reset = igb_io_slot_reset,
193         .resume = igb_io_resume,
194 };
195
196
197 static struct pci_driver igb_driver = {
198         .name     = igb_driver_name,
199         .id_table = igb_pci_tbl,
200         .probe    = igb_probe,
201         .remove   = __devexit_p(igb_remove),
202 #ifdef CONFIG_PM
203         /* Power Management Hooks */
204         .suspend  = igb_suspend,
205         .resume   = igb_resume,
206 #endif
207         .shutdown = igb_shutdown,
208         .err_handler = &igb_err_handler
209 };
210
211 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
212 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
213 MODULE_LICENSE("GPL");
214 MODULE_VERSION(DRV_VERSION);
215
216 struct igb_reg_info {
217         u32 ofs;
218         char *name;
219 };
220
221 static const struct igb_reg_info igb_reg_info_tbl[] = {
222
223         /* General Registers */
224         {E1000_CTRL, "CTRL"},
225         {E1000_STATUS, "STATUS"},
226         {E1000_CTRL_EXT, "CTRL_EXT"},
227
228         /* Interrupt Registers */
229         {E1000_ICR, "ICR"},
230
231         /* RX Registers */
232         {E1000_RCTL, "RCTL"},
233         {E1000_RDLEN(0), "RDLEN"},
234         {E1000_RDH(0), "RDH"},
235         {E1000_RDT(0), "RDT"},
236         {E1000_RXDCTL(0), "RXDCTL"},
237         {E1000_RDBAL(0), "RDBAL"},
238         {E1000_RDBAH(0), "RDBAH"},
239
240         /* TX Registers */
241         {E1000_TCTL, "TCTL"},
242         {E1000_TDBAL(0), "TDBAL"},
243         {E1000_TDBAH(0), "TDBAH"},
244         {E1000_TDLEN(0), "TDLEN"},
245         {E1000_TDH(0), "TDH"},
246         {E1000_TDT(0), "TDT"},
247         {E1000_TXDCTL(0), "TXDCTL"},
248         {E1000_TDFH, "TDFH"},
249         {E1000_TDFT, "TDFT"},
250         {E1000_TDFHS, "TDFHS"},
251         {E1000_TDFPC, "TDFPC"},
252
253         /* List Terminator */
254         {}
255 };
256
257 /*
258  * igb_regdump - register printout routine
259  */
260 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
261 {
262         int n = 0;
263         char rname[16];
264         u32 regs[8];
265
266         switch (reginfo->ofs) {
267         case E1000_RDLEN(0):
268                 for (n = 0; n < 4; n++)
269                         regs[n] = rd32(E1000_RDLEN(n));
270                 break;
271         case E1000_RDH(0):
272                 for (n = 0; n < 4; n++)
273                         regs[n] = rd32(E1000_RDH(n));
274                 break;
275         case E1000_RDT(0):
276                 for (n = 0; n < 4; n++)
277                         regs[n] = rd32(E1000_RDT(n));
278                 break;
279         case E1000_RXDCTL(0):
280                 for (n = 0; n < 4; n++)
281                         regs[n] = rd32(E1000_RXDCTL(n));
282                 break;
283         case E1000_RDBAL(0):
284                 for (n = 0; n < 4; n++)
285                         regs[n] = rd32(E1000_RDBAL(n));
286                 break;
287         case E1000_RDBAH(0):
288                 for (n = 0; n < 4; n++)
289                         regs[n] = rd32(E1000_RDBAH(n));
290                 break;
291         case E1000_TDBAL(0):
292                 for (n = 0; n < 4; n++)
293                         regs[n] = rd32(E1000_RDBAL(n));
294                 break;
295         case E1000_TDBAH(0):
296                 for (n = 0; n < 4; n++)
297                         regs[n] = rd32(E1000_TDBAH(n));
298                 break;
299         case E1000_TDLEN(0):
300                 for (n = 0; n < 4; n++)
301                         regs[n] = rd32(E1000_TDLEN(n));
302                 break;
303         case E1000_TDH(0):
304                 for (n = 0; n < 4; n++)
305                         regs[n] = rd32(E1000_TDH(n));
306                 break;
307         case E1000_TDT(0):
308                 for (n = 0; n < 4; n++)
309                         regs[n] = rd32(E1000_TDT(n));
310                 break;
311         case E1000_TXDCTL(0):
312                 for (n = 0; n < 4; n++)
313                         regs[n] = rd32(E1000_TXDCTL(n));
314                 break;
315         default:
316                 printk(KERN_INFO "%-15s %08x\n",
317                         reginfo->name, rd32(reginfo->ofs));
318                 return;
319         }
320
321         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
322         printk(KERN_INFO "%-15s ", rname);
323         for (n = 0; n < 4; n++)
324                 printk(KERN_CONT "%08x ", regs[n]);
325         printk(KERN_CONT "\n");
326 }
327
328 /*
329  * igb_dump - Print registers, tx-rings and rx-rings
330  */
331 static void igb_dump(struct igb_adapter *adapter)
332 {
333         struct net_device *netdev = adapter->netdev;
334         struct e1000_hw *hw = &adapter->hw;
335         struct igb_reg_info *reginfo;
336         int n = 0;
337         struct igb_ring *tx_ring;
338         union e1000_adv_tx_desc *tx_desc;
339         struct my_u0 { u64 a; u64 b; } *u0;
340         struct igb_buffer *buffer_info;
341         struct igb_ring *rx_ring;
342         union e1000_adv_rx_desc *rx_desc;
343         u32 staterr;
344         int i = 0;
345
346         if (!netif_msg_hw(adapter))
347                 return;
348
349         /* Print netdevice Info */
350         if (netdev) {
351                 dev_info(&adapter->pdev->dev, "Net device Info\n");
352                 printk(KERN_INFO "Device Name     state            "
353                         "trans_start      last_rx\n");
354                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
355                 netdev->name,
356                 netdev->state,
357                 netdev->trans_start,
358                 netdev->last_rx);
359         }
360
361         /* Print Registers */
362         dev_info(&adapter->pdev->dev, "Register Dump\n");
363         printk(KERN_INFO " Register Name   Value\n");
364         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
365              reginfo->name; reginfo++) {
366                 igb_regdump(hw, reginfo);
367         }
368
369         /* Print TX Ring Summary */
370         if (!netdev || !netif_running(netdev))
371                 goto exit;
372
373         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
374         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
375                 " leng ntw timestamp\n");
376         for (n = 0; n < adapter->num_tx_queues; n++) {
377                 tx_ring = adapter->tx_ring[n];
378                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
379                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
380                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
381                            (u64)buffer_info->dma,
382                            buffer_info->length,
383                            buffer_info->next_to_watch,
384                            (u64)buffer_info->time_stamp);
385         }
386
387         /* Print TX Rings */
388         if (!netif_msg_tx_done(adapter))
389                 goto rx_ring_summary;
390
391         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
392
393         /* Transmit Descriptor Formats
394          *
395          * Advanced Transmit Descriptor
396          *   +--------------------------------------------------------------+
397          * 0 |         Buffer Address [63:0]                                |
398          *   +--------------------------------------------------------------+
399          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
400          *   +--------------------------------------------------------------+
401          *   63      46 45    40 39 38 36 35 32 31   24             15       0
402          */
403
404         for (n = 0; n < adapter->num_tx_queues; n++) {
405                 tx_ring = adapter->tx_ring[n];
406                 printk(KERN_INFO "------------------------------------\n");
407                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
408                 printk(KERN_INFO "------------------------------------\n");
409                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
410                         "[PlPOCIStDDM Ln] [bi->dma       ] "
411                         "leng  ntw timestamp        bi->skb\n");
412
413                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
414                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
415                         buffer_info = &tx_ring->buffer_info[i];
416                         u0 = (struct my_u0 *)tx_desc;
417                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
418                                 " %04X  %3X %016llX %p", i,
419                                 le64_to_cpu(u0->a),
420                                 le64_to_cpu(u0->b),
421                                 (u64)buffer_info->dma,
422                                 buffer_info->length,
423                                 buffer_info->next_to_watch,
424                                 (u64)buffer_info->time_stamp,
425                                 buffer_info->skb);
426                         if (i == tx_ring->next_to_use &&
427                                 i == tx_ring->next_to_clean)
428                                 printk(KERN_CONT " NTC/U\n");
429                         else if (i == tx_ring->next_to_use)
430                                 printk(KERN_CONT " NTU\n");
431                         else if (i == tx_ring->next_to_clean)
432                                 printk(KERN_CONT " NTC\n");
433                         else
434                                 printk(KERN_CONT "\n");
435
436                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
437                                 print_hex_dump(KERN_INFO, "",
438                                         DUMP_PREFIX_ADDRESS,
439                                         16, 1, phys_to_virt(buffer_info->dma),
440                                         buffer_info->length, true);
441                 }
442         }
443
444         /* Print RX Rings Summary */
445 rx_ring_summary:
446         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
447         printk(KERN_INFO "Queue [NTU] [NTC]\n");
448         for (n = 0; n < adapter->num_rx_queues; n++) {
449                 rx_ring = adapter->rx_ring[n];
450                 printk(KERN_INFO " %5d %5X %5X\n", n,
451                            rx_ring->next_to_use, rx_ring->next_to_clean);
452         }
453
454         /* Print RX Rings */
455         if (!netif_msg_rx_status(adapter))
456                 goto exit;
457
458         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
459
460         /* Advanced Receive Descriptor (Read) Format
461          *    63                                           1        0
462          *    +-----------------------------------------------------+
463          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
464          *    +----------------------------------------------+------+
465          *  8 |       Header Buffer Address [63:1]           |  DD  |
466          *    +-----------------------------------------------------+
467          *
468          *
469          * Advanced Receive Descriptor (Write-Back) Format
470          *
471          *   63       48 47    32 31  30      21 20 17 16   4 3     0
472          *   +------------------------------------------------------+
473          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
474          *   | Checksum   Ident  |   |           |    | Type | Type |
475          *   +------------------------------------------------------+
476          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
477          *   +------------------------------------------------------+
478          *   63       48 47    32 31            20 19               0
479          */
480
481         for (n = 0; n < adapter->num_rx_queues; n++) {
482                 rx_ring = adapter->rx_ring[n];
483                 printk(KERN_INFO "------------------------------------\n");
484                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
485                 printk(KERN_INFO "------------------------------------\n");
486                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
487                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
488                         "<-- Adv Rx Read format\n");
489                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
490                         "[vl er S cks ln] ---------------- [bi->skb] "
491                         "<-- Adv Rx Write-Back format\n");
492
493                 for (i = 0; i < rx_ring->count; i++) {
494                         buffer_info = &rx_ring->buffer_info[i];
495                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
496                         u0 = (struct my_u0 *)rx_desc;
497                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
498                         if (staterr & E1000_RXD_STAT_DD) {
499                                 /* Descriptor Done */
500                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
501                                         "%016llX ---------------- %p", i,
502                                         le64_to_cpu(u0->a),
503                                         le64_to_cpu(u0->b),
504                                         buffer_info->skb);
505                         } else {
506                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
507                                         "%016llX %016llX %p", i,
508                                         le64_to_cpu(u0->a),
509                                         le64_to_cpu(u0->b),
510                                         (u64)buffer_info->dma,
511                                         buffer_info->skb);
512
513                                 if (netif_msg_pktdata(adapter)) {
514                                         print_hex_dump(KERN_INFO, "",
515                                                 DUMP_PREFIX_ADDRESS,
516                                                 16, 1,
517                                                 phys_to_virt(buffer_info->dma),
518                                                 rx_ring->rx_buffer_len, true);
519                                         if (rx_ring->rx_buffer_len
520                                                 < IGB_RXBUFFER_1024)
521                                                 print_hex_dump(KERN_INFO, "",
522                                                   DUMP_PREFIX_ADDRESS,
523                                                   16, 1,
524                                                   phys_to_virt(
525                                                     buffer_info->page_dma +
526                                                     buffer_info->page_offset),
527                                                   PAGE_SIZE/2, true);
528                                 }
529                         }
530
531                         if (i == rx_ring->next_to_use)
532                                 printk(KERN_CONT " NTU\n");
533                         else if (i == rx_ring->next_to_clean)
534                                 printk(KERN_CONT " NTC\n");
535                         else
536                                 printk(KERN_CONT "\n");
537
538                 }
539         }
540
541 exit:
542         return;
543 }
544
545
546 /**
547  * igb_read_clock - read raw cycle counter (to be used by time counter)
548  */
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
550 {
551         struct igb_adapter *adapter =
552                 container_of(tc, struct igb_adapter, cycles);
553         struct e1000_hw *hw = &adapter->hw;
554         u64 stamp = 0;
555         int shift = 0;
556
557         /*
558          * The timestamp latches on lowest register read. For the 82580
559          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
560          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
561          */
562         if (hw->mac.type == e1000_82580) {
563                 stamp = rd32(E1000_SYSTIMR) >> 8;
564                 shift = IGB_82580_TSYNC_SHIFT;
565         }
566
567         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
569         return stamp;
570 }
571
572 /**
573  * igb_get_hw_dev - return device
574  * used by hardware layer to print debugging information
575  **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578         struct igb_adapter *adapter = hw->back;
579         return adapter->netdev;
580 }
581
582 /**
583  * igb_init_module - Driver Registration Routine
584  *
585  * igb_init_module is the first routine called when the driver is
586  * loaded. All it does is register with the PCI subsystem.
587  **/
588 static int __init igb_init_module(void)
589 {
590         int ret;
591         printk(KERN_INFO "%s - version %s\n",
592                igb_driver_string, igb_driver_version);
593
594         printk(KERN_INFO "%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597         dca_register_notify(&dca_notifier);
598 #endif
599         ret = pci_register_driver(&igb_driver);
600         return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606  * igb_exit_module - Driver Exit Cleanup Routine
607  *
608  * igb_exit_module is called just before the driver is removed
609  * from memory.
610  **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614         dca_unregister_notify(&dca_notifier);
615 #endif
616         pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623  * igb_cache_ring_register - Descriptor ring to register mapping
624  * @adapter: board private structure to initialize
625  *
626  * Once we know the feature-set enabled for the device, we'll cache
627  * the register offset the descriptor ring is assigned to.
628  **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631         int i = 0, j = 0;
632         u32 rbase_offset = adapter->vfs_allocated_count;
633
634         switch (adapter->hw.mac.type) {
635         case e1000_82576:
636                 /* The queues are allocated for virtualization such that VF 0
637                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638                  * In order to avoid collision we start at the first free queue
639                  * and continue consuming queues in the same sequence
640                  */
641                 if (adapter->vfs_allocated_count) {
642                         for (; i < adapter->rss_queues; i++)
643                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
644                                                                Q_IDX_82576(i);
645                 }
646         case e1000_82575:
647         case e1000_82580:
648         case e1000_i350:
649         default:
650                 for (; i < adapter->num_rx_queues; i++)
651                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652                 for (; j < adapter->num_tx_queues; j++)
653                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654                 break;
655         }
656 }
657
658 static void igb_free_queues(struct igb_adapter *adapter)
659 {
660         int i;
661
662         for (i = 0; i < adapter->num_tx_queues; i++) {
663                 kfree(adapter->tx_ring[i]);
664                 adapter->tx_ring[i] = NULL;
665         }
666         for (i = 0; i < adapter->num_rx_queues; i++) {
667                 kfree(adapter->rx_ring[i]);
668                 adapter->rx_ring[i] = NULL;
669         }
670         adapter->num_rx_queues = 0;
671         adapter->num_tx_queues = 0;
672 }
673
674 /**
675  * igb_alloc_queues - Allocate memory for all rings
676  * @adapter: board private structure to initialize
677  *
678  * We allocate one ring per queue at run-time since we don't know the
679  * number of queues at compile-time.
680  **/
681 static int igb_alloc_queues(struct igb_adapter *adapter)
682 {
683         struct igb_ring *ring;
684         int i;
685
686         for (i = 0; i < adapter->num_tx_queues; i++) {
687                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
688                 if (!ring)
689                         goto err;
690                 ring->count = adapter->tx_ring_count;
691                 ring->queue_index = i;
692                 ring->dev = &adapter->pdev->dev;
693                 ring->netdev = adapter->netdev;
694                 /* For 82575, context index must be unique per ring. */
695                 if (adapter->hw.mac.type == e1000_82575)
696                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697                 adapter->tx_ring[i] = ring;
698         }
699
700         for (i = 0; i < adapter->num_rx_queues; i++) {
701                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702                 if (!ring)
703                         goto err;
704                 ring->count = adapter->rx_ring_count;
705                 ring->queue_index = i;
706                 ring->dev = &adapter->pdev->dev;
707                 ring->netdev = adapter->netdev;
708                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
709                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
710                 /* set flag indicating ring supports SCTP checksum offload */
711                 if (adapter->hw.mac.type >= e1000_82576)
712                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
713                 adapter->rx_ring[i] = ring;
714         }
715
716         igb_cache_ring_register(adapter);
717
718         return 0;
719
720 err:
721         igb_free_queues(adapter);
722
723         return -ENOMEM;
724 }
725
726 #define IGB_N0_QUEUE -1
727 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
728 {
729         u32 msixbm = 0;
730         struct igb_adapter *adapter = q_vector->adapter;
731         struct e1000_hw *hw = &adapter->hw;
732         u32 ivar, index;
733         int rx_queue = IGB_N0_QUEUE;
734         int tx_queue = IGB_N0_QUEUE;
735
736         if (q_vector->rx_ring)
737                 rx_queue = q_vector->rx_ring->reg_idx;
738         if (q_vector->tx_ring)
739                 tx_queue = q_vector->tx_ring->reg_idx;
740
741         switch (hw->mac.type) {
742         case e1000_82575:
743                 /* The 82575 assigns vectors using a bitmask, which matches the
744                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
745                    or more queues to a vector, we write the appropriate bits
746                    into the MSIXBM register for that vector. */
747                 if (rx_queue > IGB_N0_QUEUE)
748                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
749                 if (tx_queue > IGB_N0_QUEUE)
750                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
751                 if (!adapter->msix_entries && msix_vector == 0)
752                         msixbm |= E1000_EIMS_OTHER;
753                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
754                 q_vector->eims_value = msixbm;
755                 break;
756         case e1000_82576:
757                 /* 82576 uses a table-based method for assigning vectors.
758                    Each queue has a single entry in the table to which we write
759                    a vector number along with a "valid" bit.  Sadly, the layout
760                    of the table is somewhat counterintuitive. */
761                 if (rx_queue > IGB_N0_QUEUE) {
762                         index = (rx_queue & 0x7);
763                         ivar = array_rd32(E1000_IVAR0, index);
764                         if (rx_queue < 8) {
765                                 /* vector goes into low byte of register */
766                                 ivar = ivar & 0xFFFFFF00;
767                                 ivar |= msix_vector | E1000_IVAR_VALID;
768                         } else {
769                                 /* vector goes into third byte of register */
770                                 ivar = ivar & 0xFF00FFFF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
772                         }
773                         array_wr32(E1000_IVAR0, index, ivar);
774                 }
775                 if (tx_queue > IGB_N0_QUEUE) {
776                         index = (tx_queue & 0x7);
777                         ivar = array_rd32(E1000_IVAR0, index);
778                         if (tx_queue < 8) {
779                                 /* vector goes into second byte of register */
780                                 ivar = ivar & 0xFFFF00FF;
781                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
782                         } else {
783                                 /* vector goes into high byte of register */
784                                 ivar = ivar & 0x00FFFFFF;
785                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
786                         }
787                         array_wr32(E1000_IVAR0, index, ivar);
788                 }
789                 q_vector->eims_value = 1 << msix_vector;
790                 break;
791         case e1000_82580:
792         case e1000_i350:
793                 /* 82580 uses the same table-based approach as 82576 but has fewer
794                    entries as a result we carry over for queues greater than 4. */
795                 if (rx_queue > IGB_N0_QUEUE) {
796                         index = (rx_queue >> 1);
797                         ivar = array_rd32(E1000_IVAR0, index);
798                         if (rx_queue & 0x1) {
799                                 /* vector goes into third byte of register */
800                                 ivar = ivar & 0xFF00FFFF;
801                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
802                         } else {
803                                 /* vector goes into low byte of register */
804                                 ivar = ivar & 0xFFFFFF00;
805                                 ivar |= msix_vector | E1000_IVAR_VALID;
806                         }
807                         array_wr32(E1000_IVAR0, index, ivar);
808                 }
809                 if (tx_queue > IGB_N0_QUEUE) {
810                         index = (tx_queue >> 1);
811                         ivar = array_rd32(E1000_IVAR0, index);
812                         if (tx_queue & 0x1) {
813                                 /* vector goes into high byte of register */
814                                 ivar = ivar & 0x00FFFFFF;
815                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
816                         } else {
817                                 /* vector goes into second byte of register */
818                                 ivar = ivar & 0xFFFF00FF;
819                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
820                         }
821                         array_wr32(E1000_IVAR0, index, ivar);
822                 }
823                 q_vector->eims_value = 1 << msix_vector;
824                 break;
825         default:
826                 BUG();
827                 break;
828         }
829
830         /* add q_vector eims value to global eims_enable_mask */
831         adapter->eims_enable_mask |= q_vector->eims_value;
832
833         /* configure q_vector to set itr on first interrupt */
834         q_vector->set_itr = 1;
835 }
836
837 /**
838  * igb_configure_msix - Configure MSI-X hardware
839  *
840  * igb_configure_msix sets up the hardware to properly
841  * generate MSI-X interrupts.
842  **/
843 static void igb_configure_msix(struct igb_adapter *adapter)
844 {
845         u32 tmp;
846         int i, vector = 0;
847         struct e1000_hw *hw = &adapter->hw;
848
849         adapter->eims_enable_mask = 0;
850
851         /* set vector for other causes, i.e. link changes */
852         switch (hw->mac.type) {
853         case e1000_82575:
854                 tmp = rd32(E1000_CTRL_EXT);
855                 /* enable MSI-X PBA support*/
856                 tmp |= E1000_CTRL_EXT_PBA_CLR;
857
858                 /* Auto-Mask interrupts upon ICR read. */
859                 tmp |= E1000_CTRL_EXT_EIAME;
860                 tmp |= E1000_CTRL_EXT_IRCA;
861
862                 wr32(E1000_CTRL_EXT, tmp);
863
864                 /* enable msix_other interrupt */
865                 array_wr32(E1000_MSIXBM(0), vector++,
866                                       E1000_EIMS_OTHER);
867                 adapter->eims_other = E1000_EIMS_OTHER;
868
869                 break;
870
871         case e1000_82576:
872         case e1000_82580:
873         case e1000_i350:
874                 /* Turn on MSI-X capability first, or our settings
875                  * won't stick.  And it will take days to debug. */
876                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
877                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
878                                 E1000_GPIE_NSICR);
879
880                 /* enable msix_other interrupt */
881                 adapter->eims_other = 1 << vector;
882                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
883
884                 wr32(E1000_IVAR_MISC, tmp);
885                 break;
886         default:
887                 /* do nothing, since nothing else supports MSI-X */
888                 break;
889         } /* switch (hw->mac.type) */
890
891         adapter->eims_enable_mask |= adapter->eims_other;
892
893         for (i = 0; i < adapter->num_q_vectors; i++)
894                 igb_assign_vector(adapter->q_vector[i], vector++);
895
896         wrfl();
897 }
898
899 /**
900  * igb_request_msix - Initialize MSI-X interrupts
901  *
902  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
903  * kernel.
904  **/
905 static int igb_request_msix(struct igb_adapter *adapter)
906 {
907         struct net_device *netdev = adapter->netdev;
908         struct e1000_hw *hw = &adapter->hw;
909         int i, err = 0, vector = 0;
910
911         err = request_irq(adapter->msix_entries[vector].vector,
912                           igb_msix_other, 0, netdev->name, adapter);
913         if (err)
914                 goto out;
915         vector++;
916
917         for (i = 0; i < adapter->num_q_vectors; i++) {
918                 struct igb_q_vector *q_vector = adapter->q_vector[i];
919
920                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
921
922                 if (q_vector->rx_ring && q_vector->tx_ring)
923                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
924                                 q_vector->rx_ring->queue_index);
925                 else if (q_vector->tx_ring)
926                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
927                                 q_vector->tx_ring->queue_index);
928                 else if (q_vector->rx_ring)
929                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
930                                 q_vector->rx_ring->queue_index);
931                 else
932                         sprintf(q_vector->name, "%s-unused", netdev->name);
933
934                 err = request_irq(adapter->msix_entries[vector].vector,
935                                   igb_msix_ring, 0, q_vector->name,
936                                   q_vector);
937                 if (err)
938                         goto out;
939                 vector++;
940         }
941
942         igb_configure_msix(adapter);
943         return 0;
944 out:
945         return err;
946 }
947
948 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
949 {
950         if (adapter->msix_entries) {
951                 pci_disable_msix(adapter->pdev);
952                 kfree(adapter->msix_entries);
953                 adapter->msix_entries = NULL;
954         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
955                 pci_disable_msi(adapter->pdev);
956         }
957 }
958
959 /**
960  * igb_free_q_vectors - Free memory allocated for interrupt vectors
961  * @adapter: board private structure to initialize
962  *
963  * This function frees the memory allocated to the q_vectors.  In addition if
964  * NAPI is enabled it will delete any references to the NAPI struct prior
965  * to freeing the q_vector.
966  **/
967 static void igb_free_q_vectors(struct igb_adapter *adapter)
968 {
969         int v_idx;
970
971         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
972                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
973                 adapter->q_vector[v_idx] = NULL;
974                 if (!q_vector)
975                         continue;
976                 netif_napi_del(&q_vector->napi);
977                 kfree(q_vector);
978         }
979         adapter->num_q_vectors = 0;
980 }
981
982 /**
983  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
984  *
985  * This function resets the device so that it has 0 rx queues, tx queues, and
986  * MSI-X interrupts allocated.
987  */
988 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
989 {
990         igb_free_queues(adapter);
991         igb_free_q_vectors(adapter);
992         igb_reset_interrupt_capability(adapter);
993 }
994
995 /**
996  * igb_set_interrupt_capability - set MSI or MSI-X if supported
997  *
998  * Attempt to configure interrupts using the best available
999  * capabilities of the hardware and kernel.
1000  **/
1001 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1002 {
1003         int err;
1004         int numvecs, i;
1005
1006         /* Number of supported queues. */
1007         adapter->num_rx_queues = adapter->rss_queues;
1008         if (adapter->vfs_allocated_count)
1009                 adapter->num_tx_queues = 1;
1010         else
1011                 adapter->num_tx_queues = adapter->rss_queues;
1012
1013         /* start with one vector for every rx queue */
1014         numvecs = adapter->num_rx_queues;
1015
1016         /* if tx handler is separate add 1 for every tx queue */
1017         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1018                 numvecs += adapter->num_tx_queues;
1019
1020         /* store the number of vectors reserved for queues */
1021         adapter->num_q_vectors = numvecs;
1022
1023         /* add 1 vector for link status interrupts */
1024         numvecs++;
1025         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1026                                         GFP_KERNEL);
1027         if (!adapter->msix_entries)
1028                 goto msi_only;
1029
1030         for (i = 0; i < numvecs; i++)
1031                 adapter->msix_entries[i].entry = i;
1032
1033         err = pci_enable_msix(adapter->pdev,
1034                               adapter->msix_entries,
1035                               numvecs);
1036         if (err == 0)
1037                 goto out;
1038
1039         igb_reset_interrupt_capability(adapter);
1040
1041         /* If we can't do MSI-X, try MSI */
1042 msi_only:
1043 #ifdef CONFIG_PCI_IOV
1044         /* disable SR-IOV for non MSI-X configurations */
1045         if (adapter->vf_data) {
1046                 struct e1000_hw *hw = &adapter->hw;
1047                 /* disable iov and allow time for transactions to clear */
1048                 pci_disable_sriov(adapter->pdev);
1049                 msleep(500);
1050
1051                 kfree(adapter->vf_data);
1052                 adapter->vf_data = NULL;
1053                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1054                 msleep(100);
1055                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1056         }
1057 #endif
1058         adapter->vfs_allocated_count = 0;
1059         adapter->rss_queues = 1;
1060         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061         adapter->num_rx_queues = 1;
1062         adapter->num_tx_queues = 1;
1063         adapter->num_q_vectors = 1;
1064         if (!pci_enable_msi(adapter->pdev))
1065                 adapter->flags |= IGB_FLAG_HAS_MSI;
1066 out:
1067         /* Notify the stack of the (possibly) reduced queue counts. */
1068         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069         return netif_set_real_num_rx_queues(adapter->netdev,
1070                                             adapter->num_rx_queues);
1071 }
1072
1073 /**
1074  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075  * @adapter: board private structure to initialize
1076  *
1077  * We allocate one q_vector per queue interrupt.  If allocation fails we
1078  * return -ENOMEM.
1079  **/
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1081 {
1082         struct igb_q_vector *q_vector;
1083         struct e1000_hw *hw = &adapter->hw;
1084         int v_idx;
1085
1086         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1088                 if (!q_vector)
1089                         goto err_out;
1090                 q_vector->adapter = adapter;
1091                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092                 q_vector->itr_val = IGB_START_ITR;
1093                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094                 adapter->q_vector[v_idx] = q_vector;
1095         }
1096         return 0;
1097
1098 err_out:
1099         igb_free_q_vectors(adapter);
1100         return -ENOMEM;
1101 }
1102
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104                                       int ring_idx, int v_idx)
1105 {
1106         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109         q_vector->rx_ring->q_vector = q_vector;
1110         q_vector->itr_val = adapter->rx_itr_setting;
1111         if (q_vector->itr_val && q_vector->itr_val <= 3)
1112                 q_vector->itr_val = IGB_START_ITR;
1113 }
1114
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116                                       int ring_idx, int v_idx)
1117 {
1118         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1119
1120         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121         q_vector->tx_ring->q_vector = q_vector;
1122         q_vector->itr_val = adapter->tx_itr_setting;
1123         if (q_vector->itr_val && q_vector->itr_val <= 3)
1124                 q_vector->itr_val = IGB_START_ITR;
1125 }
1126
1127 /**
1128  * igb_map_ring_to_vector - maps allocated queues to vectors
1129  *
1130  * This function maps the recently allocated queues to vectors.
1131  **/
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1133 {
1134         int i;
1135         int v_idx = 0;
1136
1137         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138             (adapter->num_q_vectors < adapter->num_tx_queues))
1139                 return -ENOMEM;
1140
1141         if (adapter->num_q_vectors >=
1142             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143                 for (i = 0; i < adapter->num_rx_queues; i++)
1144                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145                 for (i = 0; i < adapter->num_tx_queues; i++)
1146                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147         } else {
1148                 for (i = 0; i < adapter->num_rx_queues; i++) {
1149                         if (i < adapter->num_tx_queues)
1150                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1152                 }
1153                 for (; i < adapter->num_tx_queues; i++)
1154                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1155         }
1156         return 0;
1157 }
1158
1159 /**
1160  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1161  *
1162  * This function initializes the interrupts and allocates all of the queues.
1163  **/
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1165 {
1166         struct pci_dev *pdev = adapter->pdev;
1167         int err;
1168
1169         err = igb_set_interrupt_capability(adapter);
1170         if (err)
1171                 return err;
1172
1173         err = igb_alloc_q_vectors(adapter);
1174         if (err) {
1175                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176                 goto err_alloc_q_vectors;
1177         }
1178
1179         err = igb_alloc_queues(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182                 goto err_alloc_queues;
1183         }
1184
1185         err = igb_map_ring_to_vector(adapter);
1186         if (err) {
1187                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188                 goto err_map_queues;
1189         }
1190
1191
1192         return 0;
1193 err_map_queues:
1194         igb_free_queues(adapter);
1195 err_alloc_queues:
1196         igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198         igb_reset_interrupt_capability(adapter);
1199         return err;
1200 }
1201
1202 /**
1203  * igb_request_irq - initialize interrupts
1204  *
1205  * Attempts to configure interrupts using the best available
1206  * capabilities of the hardware and kernel.
1207  **/
1208 static int igb_request_irq(struct igb_adapter *adapter)
1209 {
1210         struct net_device *netdev = adapter->netdev;
1211         struct pci_dev *pdev = adapter->pdev;
1212         int err = 0;
1213
1214         if (adapter->msix_entries) {
1215                 err = igb_request_msix(adapter);
1216                 if (!err)
1217                         goto request_done;
1218                 /* fall back to MSI */
1219                 igb_clear_interrupt_scheme(adapter);
1220                 if (!pci_enable_msi(adapter->pdev))
1221                         adapter->flags |= IGB_FLAG_HAS_MSI;
1222                 igb_free_all_tx_resources(adapter);
1223                 igb_free_all_rx_resources(adapter);
1224                 adapter->num_tx_queues = 1;
1225                 adapter->num_rx_queues = 1;
1226                 adapter->num_q_vectors = 1;
1227                 err = igb_alloc_q_vectors(adapter);
1228                 if (err) {
1229                         dev_err(&pdev->dev,
1230                                 "Unable to allocate memory for vectors\n");
1231                         goto request_done;
1232                 }
1233                 err = igb_alloc_queues(adapter);
1234                 if (err) {
1235                         dev_err(&pdev->dev,
1236                                 "Unable to allocate memory for queues\n");
1237                         igb_free_q_vectors(adapter);
1238                         goto request_done;
1239                 }
1240                 igb_setup_all_tx_resources(adapter);
1241                 igb_setup_all_rx_resources(adapter);
1242         } else {
1243                 igb_assign_vector(adapter->q_vector[0], 0);
1244         }
1245
1246         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248                                   netdev->name, adapter);
1249                 if (!err)
1250                         goto request_done;
1251
1252                 /* fall back to legacy interrupts */
1253                 igb_reset_interrupt_capability(adapter);
1254                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1255         }
1256
1257         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258                           netdev->name, adapter);
1259
1260         if (err)
1261                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262                         err);
1263
1264 request_done:
1265         return err;
1266 }
1267
1268 static void igb_free_irq(struct igb_adapter *adapter)
1269 {
1270         if (adapter->msix_entries) {
1271                 int vector = 0, i;
1272
1273                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1274
1275                 for (i = 0; i < adapter->num_q_vectors; i++) {
1276                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1277                         free_irq(adapter->msix_entries[vector++].vector,
1278                                  q_vector);
1279                 }
1280         } else {
1281                 free_irq(adapter->pdev->irq, adapter);
1282         }
1283 }
1284
1285 /**
1286  * igb_irq_disable - Mask off interrupt generation on the NIC
1287  * @adapter: board private structure
1288  **/
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1290 {
1291         struct e1000_hw *hw = &adapter->hw;
1292
1293         /*
1294          * we need to be careful when disabling interrupts.  The VFs are also
1295          * mapped into these registers and so clearing the bits can cause
1296          * issues on the VF drivers so we only need to clear what we set
1297          */
1298         if (adapter->msix_entries) {
1299                 u32 regval = rd32(E1000_EIAM);
1300                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302                 regval = rd32(E1000_EIAC);
1303                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1304         }
1305
1306         wr32(E1000_IAM, 0);
1307         wr32(E1000_IMC, ~0);
1308         wrfl();
1309         if (adapter->msix_entries) {
1310                 int i;
1311                 for (i = 0; i < adapter->num_q_vectors; i++)
1312                         synchronize_irq(adapter->msix_entries[i].vector);
1313         } else {
1314                 synchronize_irq(adapter->pdev->irq);
1315         }
1316 }
1317
1318 /**
1319  * igb_irq_enable - Enable default interrupt generation settings
1320  * @adapter: board private structure
1321  **/
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1323 {
1324         struct e1000_hw *hw = &adapter->hw;
1325
1326         if (adapter->msix_entries) {
1327                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328                 u32 regval = rd32(E1000_EIAC);
1329                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330                 regval = rd32(E1000_EIAM);
1331                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333                 if (adapter->vfs_allocated_count) {
1334                         wr32(E1000_MBVFIMR, 0xFF);
1335                         ims |= E1000_IMS_VMMB;
1336                 }
1337                 if (adapter->hw.mac.type == e1000_82580)
1338                         ims |= E1000_IMS_DRSTA;
1339
1340                 wr32(E1000_IMS, ims);
1341         } else {
1342                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1343                                 E1000_IMS_DRSTA);
1344                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1345                                 E1000_IMS_DRSTA);
1346         }
1347 }
1348
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1350 {
1351         struct e1000_hw *hw = &adapter->hw;
1352         u16 vid = adapter->hw.mng_cookie.vlan_id;
1353         u16 old_vid = adapter->mng_vlan_id;
1354
1355         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356                 /* add VID to filter table */
1357                 igb_vfta_set(hw, vid, true);
1358                 adapter->mng_vlan_id = vid;
1359         } else {
1360                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1361         }
1362
1363         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1364             (vid != old_vid) &&
1365             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1366                 /* remove VID from filter table */
1367                 igb_vfta_set(hw, old_vid, false);
1368         }
1369 }
1370
1371 /**
1372  * igb_release_hw_control - release control of the h/w to f/w
1373  * @adapter: address of board private structure
1374  *
1375  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376  * For ASF and Pass Through versions of f/w this means that the
1377  * driver is no longer loaded.
1378  *
1379  **/
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1381 {
1382         struct e1000_hw *hw = &adapter->hw;
1383         u32 ctrl_ext;
1384
1385         /* Let firmware take over control of h/w */
1386         ctrl_ext = rd32(E1000_CTRL_EXT);
1387         wr32(E1000_CTRL_EXT,
1388                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1389 }
1390
1391 /**
1392  * igb_get_hw_control - get control of the h/w from f/w
1393  * @adapter: address of board private structure
1394  *
1395  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396  * For ASF and Pass Through versions of f/w this means that
1397  * the driver is loaded.
1398  *
1399  **/
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1401 {
1402         struct e1000_hw *hw = &adapter->hw;
1403         u32 ctrl_ext;
1404
1405         /* Let firmware know the driver has taken over */
1406         ctrl_ext = rd32(E1000_CTRL_EXT);
1407         wr32(E1000_CTRL_EXT,
1408                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1409 }
1410
1411 /**
1412  * igb_configure - configure the hardware for RX and TX
1413  * @adapter: private board structure
1414  **/
1415 static void igb_configure(struct igb_adapter *adapter)
1416 {
1417         struct net_device *netdev = adapter->netdev;
1418         int i;
1419
1420         igb_get_hw_control(adapter);
1421         igb_set_rx_mode(netdev);
1422
1423         igb_restore_vlan(adapter);
1424
1425         igb_setup_tctl(adapter);
1426         igb_setup_mrqc(adapter);
1427         igb_setup_rctl(adapter);
1428
1429         igb_configure_tx(adapter);
1430         igb_configure_rx(adapter);
1431
1432         igb_rx_fifo_flush_82575(&adapter->hw);
1433
1434         /* call igb_desc_unused which always leaves
1435          * at least 1 descriptor unused to make sure
1436          * next_to_use != next_to_clean */
1437         for (i = 0; i < adapter->num_rx_queues; i++) {
1438                 struct igb_ring *ring = adapter->rx_ring[i];
1439                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1440         }
1441 }
1442
1443 /**
1444  * igb_power_up_link - Power up the phy/serdes link
1445  * @adapter: address of board private structure
1446  **/
1447 void igb_power_up_link(struct igb_adapter *adapter)
1448 {
1449         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450                 igb_power_up_phy_copper(&adapter->hw);
1451         else
1452                 igb_power_up_serdes_link_82575(&adapter->hw);
1453 }
1454
1455 /**
1456  * igb_power_down_link - Power down the phy/serdes link
1457  * @adapter: address of board private structure
1458  */
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1460 {
1461         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462                 igb_power_down_phy_copper_82575(&adapter->hw);
1463         else
1464                 igb_shutdown_serdes_link_82575(&adapter->hw);
1465 }
1466
1467 /**
1468  * igb_up - Open the interface and prepare it to handle traffic
1469  * @adapter: board private structure
1470  **/
1471 int igb_up(struct igb_adapter *adapter)
1472 {
1473         struct e1000_hw *hw = &adapter->hw;
1474         int i;
1475
1476         /* hardware has been reset, we need to reload some things */
1477         igb_configure(adapter);
1478
1479         clear_bit(__IGB_DOWN, &adapter->state);
1480
1481         for (i = 0; i < adapter->num_q_vectors; i++) {
1482                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483                 napi_enable(&q_vector->napi);
1484         }
1485         if (adapter->msix_entries)
1486                 igb_configure_msix(adapter);
1487         else
1488                 igb_assign_vector(adapter->q_vector[0], 0);
1489
1490         /* Clear any pending interrupts. */
1491         rd32(E1000_ICR);
1492         igb_irq_enable(adapter);
1493
1494         /* notify VFs that reset has been completed */
1495         if (adapter->vfs_allocated_count) {
1496                 u32 reg_data = rd32(E1000_CTRL_EXT);
1497                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498                 wr32(E1000_CTRL_EXT, reg_data);
1499         }
1500
1501         netif_tx_start_all_queues(adapter->netdev);
1502
1503         /* start the watchdog. */
1504         hw->mac.get_link_status = 1;
1505         schedule_work(&adapter->watchdog_task);
1506
1507         return 0;
1508 }
1509
1510 void igb_down(struct igb_adapter *adapter)
1511 {
1512         struct net_device *netdev = adapter->netdev;
1513         struct e1000_hw *hw = &adapter->hw;
1514         u32 tctl, rctl;
1515         int i;
1516
1517         /* signal that we're down so the interrupt handler does not
1518          * reschedule our watchdog timer */
1519         set_bit(__IGB_DOWN, &adapter->state);
1520
1521         /* disable receives in the hardware */
1522         rctl = rd32(E1000_RCTL);
1523         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524         /* flush and sleep below */
1525
1526         netif_tx_stop_all_queues(netdev);
1527
1528         /* disable transmits in the hardware */
1529         tctl = rd32(E1000_TCTL);
1530         tctl &= ~E1000_TCTL_EN;
1531         wr32(E1000_TCTL, tctl);
1532         /* flush both disables and wait for them to finish */
1533         wrfl();
1534         msleep(10);
1535
1536         for (i = 0; i < adapter->num_q_vectors; i++) {
1537                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538                 napi_disable(&q_vector->napi);
1539         }
1540
1541         igb_irq_disable(adapter);
1542
1543         del_timer_sync(&adapter->watchdog_timer);
1544         del_timer_sync(&adapter->phy_info_timer);
1545
1546         netif_carrier_off(netdev);
1547
1548         /* record the stats before reset*/
1549         spin_lock(&adapter->stats64_lock);
1550         igb_update_stats(adapter, &adapter->stats64);
1551         spin_unlock(&adapter->stats64_lock);
1552
1553         adapter->link_speed = 0;
1554         adapter->link_duplex = 0;
1555
1556         if (!pci_channel_offline(adapter->pdev))
1557                 igb_reset(adapter);
1558         igb_clean_all_tx_rings(adapter);
1559         igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1561
1562         /* since we reset the hardware DCA settings were cleared */
1563         igb_setup_dca(adapter);
1564 #endif
1565 }
1566
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1568 {
1569         WARN_ON(in_interrupt());
1570         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1571                 msleep(1);
1572         igb_down(adapter);
1573         igb_up(adapter);
1574         clear_bit(__IGB_RESETTING, &adapter->state);
1575 }
1576
1577 void igb_reset(struct igb_adapter *adapter)
1578 {
1579         struct pci_dev *pdev = adapter->pdev;
1580         struct e1000_hw *hw = &adapter->hw;
1581         struct e1000_mac_info *mac = &hw->mac;
1582         struct e1000_fc_info *fc = &hw->fc;
1583         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1584         u16 hwm;
1585
1586         /* Repartition Pba for greater than 9k mtu
1587          * To take effect CTRL.RST is required.
1588          */
1589         switch (mac->type) {
1590         case e1000_i350:
1591         case e1000_82580:
1592                 pba = rd32(E1000_RXPBS);
1593                 pba = igb_rxpbs_adjust_82580(pba);
1594                 break;
1595         case e1000_82576:
1596                 pba = rd32(E1000_RXPBS);
1597                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1598                 break;
1599         case e1000_82575:
1600         default:
1601                 pba = E1000_PBA_34K;
1602                 break;
1603         }
1604
1605         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606             (mac->type < e1000_82576)) {
1607                 /* adjust PBA for jumbo frames */
1608                 wr32(E1000_PBA, pba);
1609
1610                 /* To maintain wire speed transmits, the Tx FIFO should be
1611                  * large enough to accommodate two full transmit packets,
1612                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1613                  * the Rx FIFO should be large enough to accommodate at least
1614                  * one full receive packet and is similarly rounded up and
1615                  * expressed in KB. */
1616                 pba = rd32(E1000_PBA);
1617                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618                 tx_space = pba >> 16;
1619                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1620                 pba &= 0xffff;
1621                 /* the tx fifo also stores 16 bytes of information about the tx
1622                  * but don't include ethernet FCS because hardware appends it */
1623                 min_tx_space = (adapter->max_frame_size +
1624                                 sizeof(union e1000_adv_tx_desc) -
1625                                 ETH_FCS_LEN) * 2;
1626                 min_tx_space = ALIGN(min_tx_space, 1024);
1627                 min_tx_space >>= 10;
1628                 /* software strips receive CRC, so leave room for it */
1629                 min_rx_space = adapter->max_frame_size;
1630                 min_rx_space = ALIGN(min_rx_space, 1024);
1631                 min_rx_space >>= 10;
1632
1633                 /* If current Tx allocation is less than the min Tx FIFO size,
1634                  * and the min Tx FIFO size is less than the current Rx FIFO
1635                  * allocation, take space away from current Rx allocation */
1636                 if (tx_space < min_tx_space &&
1637                     ((min_tx_space - tx_space) < pba)) {
1638                         pba = pba - (min_tx_space - tx_space);
1639
1640                         /* if short on rx space, rx wins and must trump tx
1641                          * adjustment */
1642                         if (pba < min_rx_space)
1643                                 pba = min_rx_space;
1644                 }
1645                 wr32(E1000_PBA, pba);
1646         }
1647
1648         /* flow control settings */
1649         /* The high water mark must be low enough to fit one full frame
1650          * (or the size used for early receive) above it in the Rx FIFO.
1651          * Set it to the lower of:
1652          * - 90% of the Rx FIFO size, or
1653          * - the full Rx FIFO size minus one full frame */
1654         hwm = min(((pba << 10) * 9 / 10),
1655                         ((pba << 10) - 2 * adapter->max_frame_size));
1656
1657         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1658         fc->low_water = fc->high_water - 16;
1659         fc->pause_time = 0xFFFF;
1660         fc->send_xon = 1;
1661         fc->current_mode = fc->requested_mode;
1662
1663         /* disable receive for all VFs and wait one second */
1664         if (adapter->vfs_allocated_count) {
1665                 int i;
1666                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1668
1669                 /* ping all the active vfs to let them know we are going down */
1670                 igb_ping_all_vfs(adapter);
1671
1672                 /* disable transmits and receives */
1673                 wr32(E1000_VFRE, 0);
1674                 wr32(E1000_VFTE, 0);
1675         }
1676
1677         /* Allow time for pending master requests to run */
1678         hw->mac.ops.reset_hw(hw);
1679         wr32(E1000_WUC, 0);
1680
1681         if (hw->mac.ops.init_hw(hw))
1682                 dev_err(&pdev->dev, "Hardware Error\n");
1683         if (hw->mac.type > e1000_82580) {
1684                 if (adapter->flags & IGB_FLAG_DMAC) {
1685                         u32 reg;
1686
1687                         /*
1688                          * DMA Coalescing high water mark needs to be higher
1689                          * than * the * Rx threshold.  The Rx threshold is
1690                          * currently * pba - 6, so we * should use a high water
1691                          * mark of pba * - 4. */
1692                         hwm = (pba - 4) << 10;
1693
1694                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695                                & E1000_DMACR_DMACTHR_MASK);
1696
1697                         /* transition to L0x or L1 if available..*/
1698                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1699
1700                         /* watchdog timer= +-1000 usec in 32usec intervals */
1701                         reg |= (1000 >> 5);
1702                         wr32(E1000_DMACR, reg);
1703
1704                         /* no lower threshold to disable coalescing(smart fifb)
1705                          * -UTRESH=0*/
1706                         wr32(E1000_DMCRTRH, 0);
1707
1708                         /* set hwm to PBA -  2 * max frame size */
1709                         wr32(E1000_FCRTC, hwm);
1710
1711                         /*
1712                          * This sets the time to wait before requesting tran-
1713                          * sition to * low power state to number of usecs needed
1714                          * to receive 1 512 * byte frame at gigabit line rate
1715                          */
1716                         reg = rd32(E1000_DMCTLX);
1717                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1718
1719                         /* Delay 255 usec before entering Lx state. */
1720                         reg |= 0xFF;
1721                         wr32(E1000_DMCTLX, reg);
1722
1723                         /* free space in Tx packet buffer to wake from DMAC */
1724                         wr32(E1000_DMCTXTH,
1725                              (IGB_MIN_TXPBSIZE -
1726                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1727                              >> 6);
1728
1729                         /* make low power state decision controlled by DMAC */
1730                         reg = rd32(E1000_PCIEMISC);
1731                         reg |= E1000_PCIEMISC_LX_DECISION;
1732                         wr32(E1000_PCIEMISC, reg);
1733                 } /* end if IGB_FLAG_DMAC set */
1734         }
1735         if (hw->mac.type == e1000_82580) {
1736                 u32 reg = rd32(E1000_PCIEMISC);
1737                 wr32(E1000_PCIEMISC,
1738                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1739         }
1740         if (!netif_running(adapter->netdev))
1741                 igb_power_down_link(adapter);
1742
1743         igb_update_mng_vlan(adapter);
1744
1745         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748         igb_get_phy_info(hw);
1749 }
1750
1751 static const struct net_device_ops igb_netdev_ops = {
1752         .ndo_open               = igb_open,
1753         .ndo_stop               = igb_close,
1754         .ndo_start_xmit         = igb_xmit_frame_adv,
1755         .ndo_get_stats64        = igb_get_stats64,
1756         .ndo_set_rx_mode        = igb_set_rx_mode,
1757         .ndo_set_multicast_list = igb_set_rx_mode,
1758         .ndo_set_mac_address    = igb_set_mac,
1759         .ndo_change_mtu         = igb_change_mtu,
1760         .ndo_do_ioctl           = igb_ioctl,
1761         .ndo_tx_timeout         = igb_tx_timeout,
1762         .ndo_validate_addr      = eth_validate_addr,
1763         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1764         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1765         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1766         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1767         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1768         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1769         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1770 #ifdef CONFIG_NET_POLL_CONTROLLER
1771         .ndo_poll_controller    = igb_netpoll,
1772 #endif
1773 };
1774
1775 /**
1776  * igb_probe - Device Initialization Routine
1777  * @pdev: PCI device information struct
1778  * @ent: entry in igb_pci_tbl
1779  *
1780  * Returns 0 on success, negative on failure
1781  *
1782  * igb_probe initializes an adapter identified by a pci_dev structure.
1783  * The OS initialization, configuring of the adapter private structure,
1784  * and a hardware reset occur.
1785  **/
1786 static int __devinit igb_probe(struct pci_dev *pdev,
1787                                const struct pci_device_id *ent)
1788 {
1789         struct net_device *netdev;
1790         struct igb_adapter *adapter;
1791         struct e1000_hw *hw;
1792         u16 eeprom_data = 0;
1793         s32 ret_val;
1794         static int global_quad_port_a; /* global quad port a indication */
1795         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1796         unsigned long mmio_start, mmio_len;
1797         int err, pci_using_dac;
1798         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1799         u8 part_str[E1000_PBANUM_LENGTH];
1800
1801         /* Catch broken hardware that put the wrong VF device ID in
1802          * the PCIe SR-IOV capability.
1803          */
1804         if (pdev->is_virtfn) {
1805                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1806                      pci_name(pdev), pdev->vendor, pdev->device);
1807                 return -EINVAL;
1808         }
1809
1810         err = pci_enable_device_mem(pdev);
1811         if (err)
1812                 return err;
1813
1814         pci_using_dac = 0;
1815         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1816         if (!err) {
1817                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1818                 if (!err)
1819                         pci_using_dac = 1;
1820         } else {
1821                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1822                 if (err) {
1823                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1824                         if (err) {
1825                                 dev_err(&pdev->dev, "No usable DMA "
1826                                         "configuration, aborting\n");
1827                                 goto err_dma;
1828                         }
1829                 }
1830         }
1831
1832         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1833                                            IORESOURCE_MEM),
1834                                            igb_driver_name);
1835         if (err)
1836                 goto err_pci_reg;
1837
1838         pci_enable_pcie_error_reporting(pdev);
1839
1840         pci_set_master(pdev);
1841         pci_save_state(pdev);
1842
1843         err = -ENOMEM;
1844         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1845                                    IGB_ABS_MAX_TX_QUEUES);
1846         if (!netdev)
1847                 goto err_alloc_etherdev;
1848
1849         SET_NETDEV_DEV(netdev, &pdev->dev);
1850
1851         pci_set_drvdata(pdev, netdev);
1852         adapter = netdev_priv(netdev);
1853         adapter->netdev = netdev;
1854         adapter->pdev = pdev;
1855         hw = &adapter->hw;
1856         hw->back = adapter;
1857         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1858
1859         mmio_start = pci_resource_start(pdev, 0);
1860         mmio_len = pci_resource_len(pdev, 0);
1861
1862         err = -EIO;
1863         hw->hw_addr = ioremap(mmio_start, mmio_len);
1864         if (!hw->hw_addr)
1865                 goto err_ioremap;
1866
1867         netdev->netdev_ops = &igb_netdev_ops;
1868         igb_set_ethtool_ops(netdev);
1869         netdev->watchdog_timeo = 5 * HZ;
1870
1871         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1872
1873         netdev->mem_start = mmio_start;
1874         netdev->mem_end = mmio_start + mmio_len;
1875
1876         /* PCI config space info */
1877         hw->vendor_id = pdev->vendor;
1878         hw->device_id = pdev->device;
1879         hw->revision_id = pdev->revision;
1880         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1881         hw->subsystem_device_id = pdev->subsystem_device;
1882
1883         /* Copy the default MAC, PHY and NVM function pointers */
1884         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1885         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1886         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1887         /* Initialize skew-specific constants */
1888         err = ei->get_invariants(hw);
1889         if (err)
1890                 goto err_sw_init;
1891
1892         /* setup the private structure */
1893         err = igb_sw_init(adapter);
1894         if (err)
1895                 goto err_sw_init;
1896
1897         igb_get_bus_info_pcie(hw);
1898
1899         hw->phy.autoneg_wait_to_complete = false;
1900
1901         /* Copper options */
1902         if (hw->phy.media_type == e1000_media_type_copper) {
1903                 hw->phy.mdix = AUTO_ALL_MODES;
1904                 hw->phy.disable_polarity_correction = false;
1905                 hw->phy.ms_type = e1000_ms_hw_default;
1906         }
1907
1908         if (igb_check_reset_block(hw))
1909                 dev_info(&pdev->dev,
1910                         "PHY reset is blocked due to SOL/IDER session.\n");
1911
1912         netdev->features = NETIF_F_SG |
1913                            NETIF_F_IP_CSUM |
1914                            NETIF_F_HW_VLAN_TX |
1915                            NETIF_F_HW_VLAN_RX |
1916                            NETIF_F_HW_VLAN_FILTER;
1917
1918         netdev->features |= NETIF_F_IPV6_CSUM;
1919         netdev->features |= NETIF_F_TSO;
1920         netdev->features |= NETIF_F_TSO6;
1921         netdev->features |= NETIF_F_GRO;
1922
1923         netdev->vlan_features |= NETIF_F_TSO;
1924         netdev->vlan_features |= NETIF_F_TSO6;
1925         netdev->vlan_features |= NETIF_F_IP_CSUM;
1926         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1927         netdev->vlan_features |= NETIF_F_SG;
1928
1929         if (pci_using_dac) {
1930                 netdev->features |= NETIF_F_HIGHDMA;
1931                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1932         }
1933
1934         if (hw->mac.type >= e1000_82576)
1935                 netdev->features |= NETIF_F_SCTP_CSUM;
1936
1937         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1938
1939         /* before reading the NVM, reset the controller to put the device in a
1940          * known good starting state */
1941         hw->mac.ops.reset_hw(hw);
1942
1943         /* make sure the NVM is good */
1944         if (hw->nvm.ops.validate(hw) < 0) {
1945                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1946                 err = -EIO;
1947                 goto err_eeprom;
1948         }
1949
1950         /* copy the MAC address out of the NVM */
1951         if (hw->mac.ops.read_mac_addr(hw))
1952                 dev_err(&pdev->dev, "NVM Read Error\n");
1953
1954         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1955         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1956
1957         if (!is_valid_ether_addr(netdev->perm_addr)) {
1958                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1959                 err = -EIO;
1960                 goto err_eeprom;
1961         }
1962
1963         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1964                     (unsigned long) adapter);
1965         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1966                     (unsigned long) adapter);
1967
1968         INIT_WORK(&adapter->reset_task, igb_reset_task);
1969         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1970
1971         /* Initialize link properties that are user-changeable */
1972         adapter->fc_autoneg = true;
1973         hw->mac.autoneg = true;
1974         hw->phy.autoneg_advertised = 0x2f;
1975
1976         hw->fc.requested_mode = e1000_fc_default;
1977         hw->fc.current_mode = e1000_fc_default;
1978
1979         igb_validate_mdi_setting(hw);
1980
1981         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1982          * enable the ACPI Magic Packet filter
1983          */
1984
1985         if (hw->bus.func == 0)
1986                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1987         else if (hw->mac.type == e1000_82580)
1988                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1989                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1990                                  &eeprom_data);
1991         else if (hw->bus.func == 1)
1992                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1993
1994         if (eeprom_data & eeprom_apme_mask)
1995                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1996
1997         /* now that we have the eeprom settings, apply the special cases where
1998          * the eeprom may be wrong or the board simply won't support wake on
1999          * lan on a particular port */
2000         switch (pdev->device) {
2001         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2002                 adapter->eeprom_wol = 0;
2003                 break;
2004         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2005         case E1000_DEV_ID_82576_FIBER:
2006         case E1000_DEV_ID_82576_SERDES:
2007                 /* Wake events only supported on port A for dual fiber
2008                  * regardless of eeprom setting */
2009                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2010                         adapter->eeprom_wol = 0;
2011                 break;
2012         case E1000_DEV_ID_82576_QUAD_COPPER:
2013         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2014                 /* if quad port adapter, disable WoL on all but port A */
2015                 if (global_quad_port_a != 0)
2016                         adapter->eeprom_wol = 0;
2017                 else
2018                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2019                 /* Reset for multiple quad port adapters */
2020                 if (++global_quad_port_a == 4)
2021                         global_quad_port_a = 0;
2022                 break;
2023         }
2024
2025         /* initialize the wol settings based on the eeprom settings */
2026         adapter->wol = adapter->eeprom_wol;
2027         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2028
2029         /* reset the hardware with the new settings */
2030         igb_reset(adapter);
2031
2032         /* let the f/w know that the h/w is now under the control of the
2033          * driver. */
2034         igb_get_hw_control(adapter);
2035
2036         strcpy(netdev->name, "eth%d");
2037         err = register_netdev(netdev);
2038         if (err)
2039                 goto err_register;
2040
2041         /* carrier off reporting is important to ethtool even BEFORE open */
2042         netif_carrier_off(netdev);
2043
2044 #ifdef CONFIG_IGB_DCA
2045         if (dca_add_requester(&pdev->dev) == 0) {
2046                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2047                 dev_info(&pdev->dev, "DCA enabled\n");
2048                 igb_setup_dca(adapter);
2049         }
2050
2051 #endif
2052         /* do hw tstamp init after resetting */
2053         igb_init_hw_timer(adapter);
2054
2055         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2056         /* print bus type/speed/width info */
2057         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2058                  netdev->name,
2059                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2060                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2061                                                             "unknown"),
2062                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2063                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2064                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2065                    "unknown"),
2066                  netdev->dev_addr);
2067
2068         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2069         if (ret_val)
2070                 strcpy(part_str, "Unknown");
2071         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2072         dev_info(&pdev->dev,
2073                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2074                 adapter->msix_entries ? "MSI-X" :
2075                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2076                 adapter->num_rx_queues, adapter->num_tx_queues);
2077         switch (hw->mac.type) {
2078         case e1000_i350:
2079                 igb_set_eee_i350(hw);
2080                 break;
2081         default:
2082                 break;
2083         }
2084         return 0;
2085
2086 err_register:
2087         igb_release_hw_control(adapter);
2088 err_eeprom:
2089         if (!igb_check_reset_block(hw))
2090                 igb_reset_phy(hw);
2091
2092         if (hw->flash_address)
2093                 iounmap(hw->flash_address);
2094 err_sw_init:
2095         igb_clear_interrupt_scheme(adapter);
2096         iounmap(hw->hw_addr);
2097 err_ioremap:
2098         free_netdev(netdev);
2099 err_alloc_etherdev:
2100         pci_release_selected_regions(pdev,
2101                                      pci_select_bars(pdev, IORESOURCE_MEM));
2102 err_pci_reg:
2103 err_dma:
2104         pci_disable_device(pdev);
2105         return err;
2106 }
2107
2108 /**
2109  * igb_remove - Device Removal Routine
2110  * @pdev: PCI device information struct
2111  *
2112  * igb_remove is called by the PCI subsystem to alert the driver
2113  * that it should release a PCI device.  The could be caused by a
2114  * Hot-Plug event, or because the driver is going to be removed from
2115  * memory.
2116  **/
2117 static void __devexit igb_remove(struct pci_dev *pdev)
2118 {
2119         struct net_device *netdev = pci_get_drvdata(pdev);
2120         struct igb_adapter *adapter = netdev_priv(netdev);
2121         struct e1000_hw *hw = &adapter->hw;
2122
2123         /*
2124          * The watchdog timer may be rescheduled, so explicitly
2125          * disable watchdog from being rescheduled.
2126          */
2127         set_bit(__IGB_DOWN, &adapter->state);
2128         del_timer_sync(&adapter->watchdog_timer);
2129         del_timer_sync(&adapter->phy_info_timer);
2130
2131         cancel_work_sync(&adapter->reset_task);
2132         cancel_work_sync(&adapter->watchdog_task);
2133
2134 #ifdef CONFIG_IGB_DCA
2135         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2136                 dev_info(&pdev->dev, "DCA disabled\n");
2137                 dca_remove_requester(&pdev->dev);
2138                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2139                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2140         }
2141 #endif
2142
2143         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2144          * would have already happened in close and is redundant. */
2145         igb_release_hw_control(adapter);
2146
2147         unregister_netdev(netdev);
2148
2149         igb_clear_interrupt_scheme(adapter);
2150
2151 #ifdef CONFIG_PCI_IOV
2152         /* reclaim resources allocated to VFs */
2153         if (adapter->vf_data) {
2154                 /* disable iov and allow time for transactions to clear */
2155                 pci_disable_sriov(pdev);
2156                 msleep(500);
2157
2158                 kfree(adapter->vf_data);
2159                 adapter->vf_data = NULL;
2160                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2161                 msleep(100);
2162                 dev_info(&pdev->dev, "IOV Disabled\n");
2163         }
2164 #endif
2165
2166         iounmap(hw->hw_addr);
2167         if (hw->flash_address)
2168                 iounmap(hw->flash_address);
2169         pci_release_selected_regions(pdev,
2170                                      pci_select_bars(pdev, IORESOURCE_MEM));
2171
2172         free_netdev(netdev);
2173
2174         pci_disable_pcie_error_reporting(pdev);
2175
2176         pci_disable_device(pdev);
2177 }
2178
2179 /**
2180  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2181  * @adapter: board private structure to initialize
2182  *
2183  * This function initializes the vf specific data storage and then attempts to
2184  * allocate the VFs.  The reason for ordering it this way is because it is much
2185  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2186  * the memory for the VFs.
2187  **/
2188 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2189 {
2190 #ifdef CONFIG_PCI_IOV
2191         struct pci_dev *pdev = adapter->pdev;
2192
2193         if (adapter->vfs_allocated_count) {
2194                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2195                                            sizeof(struct vf_data_storage),
2196                                            GFP_KERNEL);
2197                 /* if allocation failed then we do not support SR-IOV */
2198                 if (!adapter->vf_data) {
2199                         adapter->vfs_allocated_count = 0;
2200                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2201                                 "Data Storage\n");
2202                 }
2203         }
2204
2205         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2206                 kfree(adapter->vf_data);
2207                 adapter->vf_data = NULL;
2208 #endif /* CONFIG_PCI_IOV */
2209                 adapter->vfs_allocated_count = 0;
2210 #ifdef CONFIG_PCI_IOV
2211         } else {
2212                 unsigned char mac_addr[ETH_ALEN];
2213                 int i;
2214                 dev_info(&pdev->dev, "%d vfs allocated\n",
2215                          adapter->vfs_allocated_count);
2216                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2217                         random_ether_addr(mac_addr);
2218                         igb_set_vf_mac(adapter, i, mac_addr);
2219                 }
2220                 /* DMA Coalescing is not supported in IOV mode. */
2221                 if (adapter->flags & IGB_FLAG_DMAC)
2222                         adapter->flags &= ~IGB_FLAG_DMAC;
2223         }
2224 #endif /* CONFIG_PCI_IOV */
2225 }
2226
2227
2228 /**
2229  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2230  * @adapter: board private structure to initialize
2231  *
2232  * igb_init_hw_timer initializes the function pointer and values for the hw
2233  * timer found in hardware.
2234  **/
2235 static void igb_init_hw_timer(struct igb_adapter *adapter)
2236 {
2237         struct e1000_hw *hw = &adapter->hw;
2238
2239         switch (hw->mac.type) {
2240         case e1000_i350:
2241         case e1000_82580:
2242                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2243                 adapter->cycles.read = igb_read_clock;
2244                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2245                 adapter->cycles.mult = 1;
2246                 /*
2247                  * The 82580 timesync updates the system timer every 8ns by 8ns
2248                  * and the value cannot be shifted.  Instead we need to shift
2249                  * the registers to generate a 64bit timer value.  As a result
2250                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2251                  * 24 in order to generate a larger value for synchronization.
2252                  */
2253                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2254                 /* disable system timer temporarily by setting bit 31 */
2255                 wr32(E1000_TSAUXC, 0x80000000);
2256                 wrfl();
2257
2258                 /* Set registers so that rollover occurs soon to test this. */
2259                 wr32(E1000_SYSTIMR, 0x00000000);
2260                 wr32(E1000_SYSTIML, 0x80000000);
2261                 wr32(E1000_SYSTIMH, 0x000000FF);
2262                 wrfl();
2263
2264                 /* enable system timer by clearing bit 31 */
2265                 wr32(E1000_TSAUXC, 0x0);
2266                 wrfl();
2267
2268                 timecounter_init(&adapter->clock,
2269                                  &adapter->cycles,
2270                                  ktime_to_ns(ktime_get_real()));
2271                 /*
2272                  * Synchronize our NIC clock against system wall clock. NIC
2273                  * time stamp reading requires ~3us per sample, each sample
2274                  * was pretty stable even under load => only require 10
2275                  * samples for each offset comparison.
2276                  */
2277                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2278                 adapter->compare.source = &adapter->clock;
2279                 adapter->compare.target = ktime_get_real;
2280                 adapter->compare.num_samples = 10;
2281                 timecompare_update(&adapter->compare, 0);
2282                 break;
2283         case e1000_82576:
2284                 /*
2285                  * Initialize hardware timer: we keep it running just in case
2286                  * that some program needs it later on.
2287                  */
2288                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2289                 adapter->cycles.read = igb_read_clock;
2290                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2291                 adapter->cycles.mult = 1;
2292                 /**
2293                  * Scale the NIC clock cycle by a large factor so that
2294                  * relatively small clock corrections can be added or
2295                  * subtracted at each clock tick. The drawbacks of a large
2296                  * factor are a) that the clock register overflows more quickly
2297                  * (not such a big deal) and b) that the increment per tick has
2298                  * to fit into 24 bits.  As a result we need to use a shift of
2299                  * 19 so we can fit a value of 16 into the TIMINCA register.
2300                  */
2301                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2302                 wr32(E1000_TIMINCA,
2303                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2304                                 (16 << IGB_82576_TSYNC_SHIFT));
2305
2306                 /* Set registers so that rollover occurs soon to test this. */
2307                 wr32(E1000_SYSTIML, 0x00000000);
2308                 wr32(E1000_SYSTIMH, 0xFF800000);
2309                 wrfl();
2310
2311                 timecounter_init(&adapter->clock,
2312                                  &adapter->cycles,
2313                                  ktime_to_ns(ktime_get_real()));
2314                 /*
2315                  * Synchronize our NIC clock against system wall clock. NIC
2316                  * time stamp reading requires ~3us per sample, each sample
2317                  * was pretty stable even under load => only require 10
2318                  * samples for each offset comparison.
2319                  */
2320                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2321                 adapter->compare.source = &adapter->clock;
2322                 adapter->compare.target = ktime_get_real;
2323                 adapter->compare.num_samples = 10;
2324                 timecompare_update(&adapter->compare, 0);
2325                 break;
2326         case e1000_82575:
2327                 /* 82575 does not support timesync */
2328         default:
2329                 break;
2330         }
2331
2332 }
2333
2334 /**
2335  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2336  * @adapter: board private structure to initialize
2337  *
2338  * igb_sw_init initializes the Adapter private data structure.
2339  * Fields are initialized based on PCI device information and
2340  * OS network device settings (MTU size).
2341  **/
2342 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2343 {
2344         struct e1000_hw *hw = &adapter->hw;
2345         struct net_device *netdev = adapter->netdev;
2346         struct pci_dev *pdev = adapter->pdev;
2347
2348         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2349
2350         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2351         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2352         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2353         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2354
2355         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2356         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2357
2358         spin_lock_init(&adapter->stats64_lock);
2359 #ifdef CONFIG_PCI_IOV
2360         switch (hw->mac.type) {
2361         case e1000_82576:
2362         case e1000_i350:
2363                 if (max_vfs > 7) {
2364                         dev_warn(&pdev->dev,
2365                                  "Maximum of 7 VFs per PF, using max\n");
2366                         adapter->vfs_allocated_count = 7;
2367                 } else
2368                         adapter->vfs_allocated_count = max_vfs;
2369                 break;
2370         default:
2371                 break;
2372         }
2373 #endif /* CONFIG_PCI_IOV */
2374         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2375
2376         /*
2377          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2378          * then we should combine the queues into a queue pair in order to
2379          * conserve interrupts due to limited supply
2380          */
2381         if ((adapter->rss_queues > 4) ||
2382             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2383                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2384
2385         /* This call may decrease the number of queues */
2386         if (igb_init_interrupt_scheme(adapter)) {
2387                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2388                 return -ENOMEM;
2389         }
2390
2391         igb_probe_vfs(adapter);
2392
2393         /* Explicitly disable IRQ since the NIC can be in any state. */
2394         igb_irq_disable(adapter);
2395
2396         if (hw->mac.type == e1000_i350)
2397                 adapter->flags &= ~IGB_FLAG_DMAC;
2398
2399         set_bit(__IGB_DOWN, &adapter->state);
2400         return 0;
2401 }
2402
2403 /**
2404  * igb_open - Called when a network interface is made active
2405  * @netdev: network interface device structure
2406  *
2407  * Returns 0 on success, negative value on failure
2408  *
2409  * The open entry point is called when a network interface is made
2410  * active by the system (IFF_UP).  At this point all resources needed
2411  * for transmit and receive operations are allocated, the interrupt
2412  * handler is registered with the OS, the watchdog timer is started,
2413  * and the stack is notified that the interface is ready.
2414  **/
2415 static int igb_open(struct net_device *netdev)
2416 {
2417         struct igb_adapter *adapter = netdev_priv(netdev);
2418         struct e1000_hw *hw = &adapter->hw;
2419         int err;
2420         int i;
2421
2422         /* disallow open during test */
2423         if (test_bit(__IGB_TESTING, &adapter->state))
2424                 return -EBUSY;
2425
2426         netif_carrier_off(netdev);
2427
2428         /* allocate transmit descriptors */
2429         err = igb_setup_all_tx_resources(adapter);
2430         if (err)
2431                 goto err_setup_tx;
2432
2433         /* allocate receive descriptors */
2434         err = igb_setup_all_rx_resources(adapter);
2435         if (err)
2436                 goto err_setup_rx;
2437
2438         igb_power_up_link(adapter);
2439
2440         /* before we allocate an interrupt, we must be ready to handle it.
2441          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2442          * as soon as we call pci_request_irq, so we have to setup our
2443          * clean_rx handler before we do so.  */
2444         igb_configure(adapter);
2445
2446         err = igb_request_irq(adapter);
2447         if (err)
2448                 goto err_req_irq;
2449
2450         /* From here on the code is the same as igb_up() */
2451         clear_bit(__IGB_DOWN, &adapter->state);
2452
2453         for (i = 0; i < adapter->num_q_vectors; i++) {
2454                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2455                 napi_enable(&q_vector->napi);
2456         }
2457
2458         /* Clear any pending interrupts. */
2459         rd32(E1000_ICR);
2460
2461         igb_irq_enable(adapter);
2462
2463         /* notify VFs that reset has been completed */
2464         if (adapter->vfs_allocated_count) {
2465                 u32 reg_data = rd32(E1000_CTRL_EXT);
2466                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2467                 wr32(E1000_CTRL_EXT, reg_data);
2468         }
2469
2470         netif_tx_start_all_queues(netdev);
2471
2472         /* start the watchdog. */
2473         hw->mac.get_link_status = 1;
2474         schedule_work(&adapter->watchdog_task);
2475
2476         return 0;
2477
2478 err_req_irq:
2479         igb_release_hw_control(adapter);
2480         igb_power_down_link(adapter);
2481         igb_free_all_rx_resources(adapter);
2482 err_setup_rx:
2483         igb_free_all_tx_resources(adapter);
2484 err_setup_tx:
2485         igb_reset(adapter);
2486
2487         return err;
2488 }
2489
2490 /**
2491  * igb_close - Disables a network interface
2492  * @netdev: network interface device structure
2493  *
2494  * Returns 0, this is not allowed to fail
2495  *
2496  * The close entry point is called when an interface is de-activated
2497  * by the OS.  The hardware is still under the driver's control, but
2498  * needs to be disabled.  A global MAC reset is issued to stop the
2499  * hardware, and all transmit and receive resources are freed.
2500  **/
2501 static int igb_close(struct net_device *netdev)
2502 {
2503         struct igb_adapter *adapter = netdev_priv(netdev);
2504
2505         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2506         igb_down(adapter);
2507
2508         igb_free_irq(adapter);
2509
2510         igb_free_all_tx_resources(adapter);
2511         igb_free_all_rx_resources(adapter);
2512
2513         return 0;
2514 }
2515
2516 /**
2517  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2518  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2519  *
2520  * Return 0 on success, negative on failure
2521  **/
2522 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2523 {
2524         struct device *dev = tx_ring->dev;
2525         int size;
2526
2527         size = sizeof(struct igb_buffer) * tx_ring->count;
2528         tx_ring->buffer_info = vzalloc(size);
2529         if (!tx_ring->buffer_info)
2530                 goto err;
2531
2532         /* round up to nearest 4K */
2533         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2534         tx_ring->size = ALIGN(tx_ring->size, 4096);
2535
2536         tx_ring->desc = dma_alloc_coherent(dev,
2537                                            tx_ring->size,
2538                                            &tx_ring->dma,
2539                                            GFP_KERNEL);
2540
2541         if (!tx_ring->desc)
2542                 goto err;
2543
2544         tx_ring->next_to_use = 0;
2545         tx_ring->next_to_clean = 0;
2546         return 0;
2547
2548 err:
2549         vfree(tx_ring->buffer_info);
2550         dev_err(dev,
2551                 "Unable to allocate memory for the transmit descriptor ring\n");
2552         return -ENOMEM;
2553 }
2554
2555 /**
2556  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2557  *                                (Descriptors) for all queues
2558  * @adapter: board private structure
2559  *
2560  * Return 0 on success, negative on failure
2561  **/
2562 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2563 {
2564         struct pci_dev *pdev = adapter->pdev;
2565         int i, err = 0;
2566
2567         for (i = 0; i < adapter->num_tx_queues; i++) {
2568                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2569                 if (err) {
2570                         dev_err(&pdev->dev,
2571                                 "Allocation for Tx Queue %u failed\n", i);
2572                         for (i--; i >= 0; i--)
2573                                 igb_free_tx_resources(adapter->tx_ring[i]);
2574                         break;
2575                 }
2576         }
2577
2578         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2579                 int r_idx = i % adapter->num_tx_queues;
2580                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2581         }
2582         return err;
2583 }
2584
2585 /**
2586  * igb_setup_tctl - configure the transmit control registers
2587  * @adapter: Board private structure
2588  **/
2589 void igb_setup_tctl(struct igb_adapter *adapter)
2590 {
2591         struct e1000_hw *hw = &adapter->hw;
2592         u32 tctl;
2593
2594         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2595         wr32(E1000_TXDCTL(0), 0);
2596
2597         /* Program the Transmit Control Register */
2598         tctl = rd32(E1000_TCTL);
2599         tctl &= ~E1000_TCTL_CT;
2600         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2601                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2602
2603         igb_config_collision_dist(hw);
2604
2605         /* Enable transmits */
2606         tctl |= E1000_TCTL_EN;
2607
2608         wr32(E1000_TCTL, tctl);
2609 }
2610
2611 /**
2612  * igb_configure_tx_ring - Configure transmit ring after Reset
2613  * @adapter: board private structure
2614  * @ring: tx ring to configure
2615  *
2616  * Configure a transmit ring after a reset.
2617  **/
2618 void igb_configure_tx_ring(struct igb_adapter *adapter,
2619                            struct igb_ring *ring)
2620 {
2621         struct e1000_hw *hw = &adapter->hw;
2622         u32 txdctl;
2623         u64 tdba = ring->dma;
2624         int reg_idx = ring->reg_idx;
2625
2626         /* disable the queue */
2627         txdctl = rd32(E1000_TXDCTL(reg_idx));
2628         wr32(E1000_TXDCTL(reg_idx),
2629                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2630         wrfl();
2631         mdelay(10);
2632
2633         wr32(E1000_TDLEN(reg_idx),
2634                         ring->count * sizeof(union e1000_adv_tx_desc));
2635         wr32(E1000_TDBAL(reg_idx),
2636                         tdba & 0x00000000ffffffffULL);
2637         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2638
2639         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2640         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2641         writel(0, ring->head);
2642         writel(0, ring->tail);
2643
2644         txdctl |= IGB_TX_PTHRESH;
2645         txdctl |= IGB_TX_HTHRESH << 8;
2646         txdctl |= IGB_TX_WTHRESH << 16;
2647
2648         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2649         wr32(E1000_TXDCTL(reg_idx), txdctl);
2650 }
2651
2652 /**
2653  * igb_configure_tx - Configure transmit Unit after Reset
2654  * @adapter: board private structure
2655  *
2656  * Configure the Tx unit of the MAC after a reset.
2657  **/
2658 static void igb_configure_tx(struct igb_adapter *adapter)
2659 {
2660         int i;
2661
2662         for (i = 0; i < adapter->num_tx_queues; i++)
2663                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2664 }
2665
2666 /**
2667  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2668  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2669  *
2670  * Returns 0 on success, negative on failure
2671  **/
2672 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2673 {
2674         struct device *dev = rx_ring->dev;
2675         int size, desc_len;
2676
2677         size = sizeof(struct igb_buffer) * rx_ring->count;
2678         rx_ring->buffer_info = vzalloc(size);
2679         if (!rx_ring->buffer_info)
2680                 goto err;
2681
2682         desc_len = sizeof(union e1000_adv_rx_desc);
2683
2684         /* Round up to nearest 4K */
2685         rx_ring->size = rx_ring->count * desc_len;
2686         rx_ring->size = ALIGN(rx_ring->size, 4096);
2687
2688         rx_ring->desc = dma_alloc_coherent(dev,
2689                                            rx_ring->size,
2690                                            &rx_ring->dma,
2691                                            GFP_KERNEL);
2692
2693         if (!rx_ring->desc)
2694                 goto err;
2695
2696         rx_ring->next_to_clean = 0;
2697         rx_ring->next_to_use = 0;
2698
2699         return 0;
2700
2701 err:
2702         vfree(rx_ring->buffer_info);
2703         rx_ring->buffer_info = NULL;
2704         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2705                 " ring\n");
2706         return -ENOMEM;
2707 }
2708
2709 /**
2710  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2711  *                                (Descriptors) for all queues
2712  * @adapter: board private structure
2713  *
2714  * Return 0 on success, negative on failure
2715  **/
2716 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2717 {
2718         struct pci_dev *pdev = adapter->pdev;
2719         int i, err = 0;
2720
2721         for (i = 0; i < adapter->num_rx_queues; i++) {
2722                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2723                 if (err) {
2724                         dev_err(&pdev->dev,
2725                                 "Allocation for Rx Queue %u failed\n", i);
2726                         for (i--; i >= 0; i--)
2727                                 igb_free_rx_resources(adapter->rx_ring[i]);
2728                         break;
2729                 }
2730         }
2731
2732         return err;
2733 }
2734
2735 /**
2736  * igb_setup_mrqc - configure the multiple receive queue control registers
2737  * @adapter: Board private structure
2738  **/
2739 static void igb_setup_mrqc(struct igb_adapter *adapter)
2740 {
2741         struct e1000_hw *hw = &adapter->hw;
2742         u32 mrqc, rxcsum;
2743         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2744         union e1000_reta {
2745                 u32 dword;
2746                 u8  bytes[4];
2747         } reta;
2748         static const u8 rsshash[40] = {
2749                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2750                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2751                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2752                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2753
2754         /* Fill out hash function seeds */
2755         for (j = 0; j < 10; j++) {
2756                 u32 rsskey = rsshash[(j * 4)];
2757                 rsskey |= rsshash[(j * 4) + 1] << 8;
2758                 rsskey |= rsshash[(j * 4) + 2] << 16;
2759                 rsskey |= rsshash[(j * 4) + 3] << 24;
2760                 array_wr32(E1000_RSSRK(0), j, rsskey);
2761         }
2762
2763         num_rx_queues = adapter->rss_queues;
2764
2765         if (adapter->vfs_allocated_count) {
2766                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2767                 switch (hw->mac.type) {
2768                 case e1000_i350:
2769                 case e1000_82580:
2770                         num_rx_queues = 1;
2771                         shift = 0;
2772                         break;
2773                 case e1000_82576:
2774                         shift = 3;
2775                         num_rx_queues = 2;
2776                         break;
2777                 case e1000_82575:
2778                         shift = 2;
2779                         shift2 = 6;
2780                 default:
2781                         break;
2782                 }
2783         } else {
2784                 if (hw->mac.type == e1000_82575)
2785                         shift = 6;
2786         }
2787
2788         for (j = 0; j < (32 * 4); j++) {
2789                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2790                 if (shift2)
2791                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2792                 if ((j & 3) == 3)
2793                         wr32(E1000_RETA(j >> 2), reta.dword);
2794         }
2795
2796         /*
2797          * Disable raw packet checksumming so that RSS hash is placed in
2798          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2799          * offloads as they are enabled by default
2800          */
2801         rxcsum = rd32(E1000_RXCSUM);
2802         rxcsum |= E1000_RXCSUM_PCSD;
2803
2804         if (adapter->hw.mac.type >= e1000_82576)
2805                 /* Enable Receive Checksum Offload for SCTP */
2806                 rxcsum |= E1000_RXCSUM_CRCOFL;
2807
2808         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2809         wr32(E1000_RXCSUM, rxcsum);
2810
2811         /* If VMDq is enabled then we set the appropriate mode for that, else
2812          * we default to RSS so that an RSS hash is calculated per packet even
2813          * if we are only using one queue */
2814         if (adapter->vfs_allocated_count) {
2815                 if (hw->mac.type > e1000_82575) {
2816                         /* Set the default pool for the PF's first queue */
2817                         u32 vtctl = rd32(E1000_VT_CTL);
2818                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2819                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2820                         vtctl |= adapter->vfs_allocated_count <<
2821                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2822                         wr32(E1000_VT_CTL, vtctl);
2823                 }
2824                 if (adapter->rss_queues > 1)
2825                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2826                 else
2827                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2828         } else {
2829                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2830         }
2831         igb_vmm_control(adapter);
2832
2833         /*
2834          * Generate RSS hash based on TCP port numbers and/or
2835          * IPv4/v6 src and dst addresses since UDP cannot be
2836          * hashed reliably due to IP fragmentation
2837          */
2838         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2839                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2840                 E1000_MRQC_RSS_FIELD_IPV6 |
2841                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2842                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2843
2844         wr32(E1000_MRQC, mrqc);
2845 }
2846
2847 /**
2848  * igb_setup_rctl - configure the receive control registers
2849  * @adapter: Board private structure
2850  **/
2851 void igb_setup_rctl(struct igb_adapter *adapter)
2852 {
2853         struct e1000_hw *hw = &adapter->hw;
2854         u32 rctl;
2855
2856         rctl = rd32(E1000_RCTL);
2857
2858         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2859         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2860
2861         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2862                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2863
2864         /*
2865          * enable stripping of CRC. It's unlikely this will break BMC
2866          * redirection as it did with e1000. Newer features require
2867          * that the HW strips the CRC.
2868          */
2869         rctl |= E1000_RCTL_SECRC;
2870
2871         /* disable store bad packets and clear size bits. */
2872         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2873
2874         /* enable LPE to prevent packets larger than max_frame_size */
2875         rctl |= E1000_RCTL_LPE;
2876
2877         /* disable queue 0 to prevent tail write w/o re-config */
2878         wr32(E1000_RXDCTL(0), 0);
2879
2880         /* Attention!!!  For SR-IOV PF driver operations you must enable
2881          * queue drop for all VF and PF queues to prevent head of line blocking
2882          * if an un-trusted VF does not provide descriptors to hardware.
2883          */
2884         if (adapter->vfs_allocated_count) {
2885                 /* set all queue drop enable bits */
2886                 wr32(E1000_QDE, ALL_QUEUES);
2887         }
2888
2889         wr32(E1000_RCTL, rctl);
2890 }
2891
2892 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2893                                    int vfn)
2894 {
2895         struct e1000_hw *hw = &adapter->hw;
2896         u32 vmolr;
2897
2898         /* if it isn't the PF check to see if VFs are enabled and
2899          * increase the size to support vlan tags */
2900         if (vfn < adapter->vfs_allocated_count &&
2901             adapter->vf_data[vfn].vlans_enabled)
2902                 size += VLAN_TAG_SIZE;
2903
2904         vmolr = rd32(E1000_VMOLR(vfn));
2905         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2906         vmolr |= size | E1000_VMOLR_LPE;
2907         wr32(E1000_VMOLR(vfn), vmolr);
2908
2909         return 0;
2910 }
2911
2912 /**
2913  * igb_rlpml_set - set maximum receive packet size
2914  * @adapter: board private structure
2915  *
2916  * Configure maximum receivable packet size.
2917  **/
2918 static void igb_rlpml_set(struct igb_adapter *adapter)
2919 {
2920         u32 max_frame_size = adapter->max_frame_size;
2921         struct e1000_hw *hw = &adapter->hw;
2922         u16 pf_id = adapter->vfs_allocated_count;
2923
2924         if (adapter->vlgrp)
2925                 max_frame_size += VLAN_TAG_SIZE;
2926
2927         /* if vfs are enabled we set RLPML to the largest possible request
2928          * size and set the VMOLR RLPML to the size we need */
2929         if (pf_id) {
2930                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2931                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2932         }
2933
2934         wr32(E1000_RLPML, max_frame_size);
2935 }
2936
2937 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2938                                  int vfn, bool aupe)
2939 {
2940         struct e1000_hw *hw = &adapter->hw;
2941         u32 vmolr;
2942
2943         /*
2944          * This register exists only on 82576 and newer so if we are older then
2945          * we should exit and do nothing
2946          */
2947         if (hw->mac.type < e1000_82576)
2948                 return;
2949
2950         vmolr = rd32(E1000_VMOLR(vfn));
2951         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2952         if (aupe)
2953                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2954         else
2955                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2956
2957         /* clear all bits that might not be set */
2958         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2959
2960         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2961                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2962         /*
2963          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2964          * multicast packets
2965          */
2966         if (vfn <= adapter->vfs_allocated_count)
2967                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2968
2969         wr32(E1000_VMOLR(vfn), vmolr);
2970 }
2971
2972 /**
2973  * igb_configure_rx_ring - Configure a receive ring after Reset
2974  * @adapter: board private structure
2975  * @ring: receive ring to be configured
2976  *
2977  * Configure the Rx unit of the MAC after a reset.
2978  **/
2979 void igb_configure_rx_ring(struct igb_adapter *adapter,
2980                            struct igb_ring *ring)
2981 {
2982         struct e1000_hw *hw = &adapter->hw;
2983         u64 rdba = ring->dma;
2984         int reg_idx = ring->reg_idx;
2985         u32 srrctl, rxdctl;
2986
2987         /* disable the queue */
2988         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2989         wr32(E1000_RXDCTL(reg_idx),
2990                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2991
2992         /* Set DMA base address registers */
2993         wr32(E1000_RDBAL(reg_idx),
2994              rdba & 0x00000000ffffffffULL);
2995         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2996         wr32(E1000_RDLEN(reg_idx),
2997                        ring->count * sizeof(union e1000_adv_rx_desc));
2998
2999         /* initialize head and tail */
3000         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3001         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3002         writel(0, ring->head);
3003         writel(0, ring->tail);
3004
3005         /* set descriptor configuration */
3006         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3007                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3008                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3009 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3010                 srrctl |= IGB_RXBUFFER_16384 >>
3011                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3012 #else
3013                 srrctl |= (PAGE_SIZE / 2) >>
3014                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3015 #endif
3016                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3017         } else {
3018                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3019                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3020                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3021         }
3022         if (hw->mac.type == e1000_82580)
3023                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3024         /* Only set Drop Enable if we are supporting multiple queues */
3025         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3026                 srrctl |= E1000_SRRCTL_DROP_EN;
3027
3028         wr32(E1000_SRRCTL(reg_idx), srrctl);
3029
3030         /* set filtering for VMDQ pools */
3031         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3032
3033         /* enable receive descriptor fetching */
3034         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3035         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3036         rxdctl &= 0xFFF00000;
3037         rxdctl |= IGB_RX_PTHRESH;
3038         rxdctl |= IGB_RX_HTHRESH << 8;
3039         rxdctl |= IGB_RX_WTHRESH << 16;
3040         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3041 }
3042
3043 /**
3044  * igb_configure_rx - Configure receive Unit after Reset
3045  * @adapter: board private structure
3046  *
3047  * Configure the Rx unit of the MAC after a reset.
3048  **/
3049 static void igb_configure_rx(struct igb_adapter *adapter)
3050 {
3051         int i;
3052
3053         /* set UTA to appropriate mode */
3054         igb_set_uta(adapter);
3055
3056         /* set the correct pool for the PF default MAC address in entry 0 */
3057         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3058                          adapter->vfs_allocated_count);
3059
3060         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3061          * the Base and Length of the Rx Descriptor Ring */
3062         for (i = 0; i < adapter->num_rx_queues; i++)
3063                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3064 }
3065
3066 /**
3067  * igb_free_tx_resources - Free Tx Resources per Queue
3068  * @tx_ring: Tx descriptor ring for a specific queue
3069  *
3070  * Free all transmit software resources
3071  **/
3072 void igb_free_tx_resources(struct igb_ring *tx_ring)
3073 {
3074         igb_clean_tx_ring(tx_ring);
3075
3076         vfree(tx_ring->buffer_info);
3077         tx_ring->buffer_info = NULL;
3078
3079         /* if not set, then don't free */
3080         if (!tx_ring->desc)
3081                 return;
3082
3083         dma_free_coherent(tx_ring->dev, tx_ring->size,
3084                           tx_ring->desc, tx_ring->dma);
3085
3086         tx_ring->desc = NULL;
3087 }
3088
3089 /**
3090  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3091  * @adapter: board private structure
3092  *
3093  * Free all transmit software resources
3094  **/
3095 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3096 {
3097         int i;
3098
3099         for (i = 0; i < adapter->num_tx_queues; i++)
3100                 igb_free_tx_resources(adapter->tx_ring[i]);
3101 }
3102
3103 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3104                                     struct igb_buffer *buffer_info)
3105 {
3106         if (buffer_info->dma) {
3107                 if (buffer_info->mapped_as_page)
3108                         dma_unmap_page(tx_ring->dev,
3109                                         buffer_info->dma,
3110                                         buffer_info->length,
3111                                         DMA_TO_DEVICE);
3112                 else
3113                         dma_unmap_single(tx_ring->dev,
3114                                         buffer_info->dma,
3115                                         buffer_info->length,
3116                                         DMA_TO_DEVICE);
3117                 buffer_info->dma = 0;
3118         }
3119         if (buffer_info->skb) {
3120                 dev_kfree_skb_any(buffer_info->skb);
3121                 buffer_info->skb = NULL;
3122         }
3123         buffer_info->time_stamp = 0;
3124         buffer_info->length = 0;
3125         buffer_info->next_to_watch = 0;
3126         buffer_info->mapped_as_page = false;
3127 }
3128
3129 /**
3130  * igb_clean_tx_ring - Free Tx Buffers
3131  * @tx_ring: ring to be cleaned
3132  **/
3133 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3134 {
3135         struct igb_buffer *buffer_info;
3136         unsigned long size;
3137         unsigned int i;
3138
3139         if (!tx_ring->buffer_info)
3140                 return;
3141         /* Free all the Tx ring sk_buffs */
3142
3143         for (i = 0; i < tx_ring->count; i++) {
3144                 buffer_info = &tx_ring->buffer_info[i];
3145                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3146         }
3147
3148         size = sizeof(struct igb_buffer) * tx_ring->count;
3149         memset(tx_ring->buffer_info, 0, size);
3150
3151         /* Zero out the descriptor ring */
3152         memset(tx_ring->desc, 0, tx_ring->size);
3153
3154         tx_ring->next_to_use = 0;
3155         tx_ring->next_to_clean = 0;
3156 }
3157
3158 /**
3159  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3160  * @adapter: board private structure
3161  **/
3162 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3163 {
3164         int i;
3165
3166         for (i = 0; i < adapter->num_tx_queues; i++)
3167                 igb_clean_tx_ring(adapter->tx_ring[i]);
3168 }
3169
3170 /**
3171  * igb_free_rx_resources - Free Rx Resources
3172  * @rx_ring: ring to clean the resources from
3173  *
3174  * Free all receive software resources
3175  **/
3176 void igb_free_rx_resources(struct igb_ring *rx_ring)
3177 {
3178         igb_clean_rx_ring(rx_ring);
3179
3180         vfree(rx_ring->buffer_info);
3181         rx_ring->buffer_info = NULL;
3182
3183         /* if not set, then don't free */
3184         if (!rx_ring->desc)
3185                 return;
3186
3187         dma_free_coherent(rx_ring->dev, rx_ring->size,
3188                           rx_ring->desc, rx_ring->dma);
3189
3190         rx_ring->desc = NULL;
3191 }
3192
3193 /**
3194  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3195  * @adapter: board private structure
3196  *
3197  * Free all receive software resources
3198  **/
3199 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3200 {
3201         int i;
3202
3203         for (i = 0; i < adapter->num_rx_queues; i++)
3204                 igb_free_rx_resources(adapter->rx_ring[i]);
3205 }
3206
3207 /**
3208  * igb_clean_rx_ring - Free Rx Buffers per Queue
3209  * @rx_ring: ring to free buffers from
3210  **/
3211 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3212 {
3213         struct igb_buffer *buffer_info;
3214         unsigned long size;
3215         unsigned int i;
3216
3217         if (!rx_ring->buffer_info)
3218                 return;
3219
3220         /* Free all the Rx ring sk_buffs */
3221         for (i = 0; i < rx_ring->count; i++) {
3222                 buffer_info = &rx_ring->buffer_info[i];
3223                 if (buffer_info->dma) {
3224                         dma_unmap_single(rx_ring->dev,
3225                                          buffer_info->dma,
3226                                          rx_ring->rx_buffer_len,
3227                                          DMA_FROM_DEVICE);
3228                         buffer_info->dma = 0;
3229                 }
3230
3231                 if (buffer_info->skb) {
3232                         dev_kfree_skb(buffer_info->skb);
3233                         buffer_info->skb = NULL;
3234                 }
3235                 if (buffer_info->page_dma) {
3236                         dma_unmap_page(rx_ring->dev,
3237                                        buffer_info->page_dma,
3238                                        PAGE_SIZE / 2,
3239                                        DMA_FROM_DEVICE);
3240                         buffer_info->page_dma = 0;
3241                 }
3242                 if (buffer_info->page) {
3243                         put_page(buffer_info->page);
3244                         buffer_info->page = NULL;
3245                         buffer_info->page_offset = 0;
3246                 }
3247         }
3248
3249         size = sizeof(struct igb_buffer) * rx_ring->count;
3250         memset(rx_ring->buffer_info, 0, size);
3251
3252         /* Zero out the descriptor ring */
3253         memset(rx_ring->desc, 0, rx_ring->size);
3254
3255         rx_ring->next_to_clean = 0;
3256         rx_ring->next_to_use = 0;
3257 }
3258
3259 /**
3260  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3261  * @adapter: board private structure
3262  **/
3263 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3264 {
3265         int i;
3266
3267         for (i = 0; i < adapter->num_rx_queues; i++)
3268                 igb_clean_rx_ring(adapter->rx_ring[i]);
3269 }
3270
3271 /**
3272  * igb_set_mac - Change the Ethernet Address of the NIC
3273  * @netdev: network interface device structure
3274  * @p: pointer to an address structure
3275  *
3276  * Returns 0 on success, negative on failure
3277  **/
3278 static int igb_set_mac(struct net_device *netdev, void *p)
3279 {
3280         struct igb_adapter *adapter = netdev_priv(netdev);
3281         struct e1000_hw *hw = &adapter->hw;
3282         struct sockaddr *addr = p;
3283
3284         if (!is_valid_ether_addr(addr->sa_data))
3285                 return -EADDRNOTAVAIL;
3286
3287         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3288         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3289
3290         /* set the correct pool for the new PF MAC address in entry 0 */
3291         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3292                          adapter->vfs_allocated_count);
3293
3294         return 0;
3295 }
3296
3297 /**
3298  * igb_write_mc_addr_list - write multicast addresses to MTA
3299  * @netdev: network interface device structure
3300  *
3301  * Writes multicast address list to the MTA hash table.
3302  * Returns: -ENOMEM on failure
3303  *                0 on no addresses written
3304  *                X on writing X addresses to MTA
3305  **/
3306 static int igb_write_mc_addr_list(struct net_device *netdev)
3307 {
3308         struct igb_adapter *adapter = netdev_priv(netdev);
3309         struct e1000_hw *hw = &adapter->hw;
3310         struct netdev_hw_addr *ha;
3311         u8  *mta_list;
3312         int i;
3313
3314         if (netdev_mc_empty(netdev)) {
3315                 /* nothing to program, so clear mc list */
3316                 igb_update_mc_addr_list(hw, NULL, 0);
3317                 igb_restore_vf_multicasts(adapter);
3318                 return 0;
3319         }
3320
3321         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3322         if (!mta_list)
3323                 return -ENOMEM;
3324
3325         /* The shared function expects a packed array of only addresses. */
3326         i = 0;
3327         netdev_for_each_mc_addr(ha, netdev)
3328                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3329
3330         igb_update_mc_addr_list(hw, mta_list, i);
3331         kfree(mta_list);
3332
3333         return netdev_mc_count(netdev);
3334 }
3335
3336 /**
3337  * igb_write_uc_addr_list - write unicast addresses to RAR table
3338  * @netdev: network interface device structure
3339  *
3340  * Writes unicast address list to the RAR table.
3341  * Returns: -ENOMEM on failure/insufficient address space
3342  *                0 on no addresses written
3343  *                X on writing X addresses to the RAR table
3344  **/
3345 static int igb_write_uc_addr_list(struct net_device *netdev)
3346 {
3347         struct igb_adapter *adapter = netdev_priv(netdev);
3348         struct e1000_hw *hw = &adapter->hw;
3349         unsigned int vfn = adapter->vfs_allocated_count;
3350         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3351         int count = 0;
3352
3353         /* return ENOMEM indicating insufficient memory for addresses */
3354         if (netdev_uc_count(netdev) > rar_entries)
3355                 return -ENOMEM;
3356
3357         if (!netdev_uc_empty(netdev) && rar_entries) {
3358                 struct netdev_hw_addr *ha;
3359
3360                 netdev_for_each_uc_addr(ha, netdev) {
3361                         if (!rar_entries)
3362                                 break;
3363                         igb_rar_set_qsel(adapter, ha->addr,
3364                                          rar_entries--,
3365                                          vfn);
3366                         count++;
3367                 }
3368         }
3369         /* write the addresses in reverse order to avoid write combining */
3370         for (; rar_entries > 0 ; rar_entries--) {
3371                 wr32(E1000_RAH(rar_entries), 0);
3372                 wr32(E1000_RAL(rar_entries), 0);
3373         }
3374         wrfl();
3375
3376         return count;
3377 }
3378
3379 /**
3380  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3381  * @netdev: network interface device structure
3382  *
3383  * The set_rx_mode entry point is called whenever the unicast or multicast
3384  * address lists or the network interface flags are updated.  This routine is
3385  * responsible for configuring the hardware for proper unicast, multicast,
3386  * promiscuous mode, and all-multi behavior.
3387  **/
3388 static void igb_set_rx_mode(struct net_device *netdev)
3389 {
3390         struct igb_adapter *adapter = netdev_priv(netdev);
3391         struct e1000_hw *hw = &adapter->hw;
3392         unsigned int vfn = adapter->vfs_allocated_count;
3393         u32 rctl, vmolr = 0;
3394         int count;
3395
3396         /* Check for Promiscuous and All Multicast modes */
3397         rctl = rd32(E1000_RCTL);
3398
3399         /* clear the effected bits */
3400         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3401
3402         if (netdev->flags & IFF_PROMISC) {
3403                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3404                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3405         } else {
3406                 if (netdev->flags & IFF_ALLMULTI) {
3407                         rctl |= E1000_RCTL_MPE;
3408                         vmolr |= E1000_VMOLR_MPME;
3409                 } else {
3410                         /*
3411                          * Write addresses to the MTA, if the attempt fails
3412                          * then we should just turn on promiscuous mode so
3413                          * that we can at least receive multicast traffic
3414                          */
3415                         count = igb_write_mc_addr_list(netdev);
3416                         if (count < 0) {
3417                                 rctl |= E1000_RCTL_MPE;
3418                                 vmolr |= E1000_VMOLR_MPME;
3419                         } else if (count) {
3420                                 vmolr |= E1000_VMOLR_ROMPE;
3421                         }
3422                 }
3423                 /*
3424                  * Write addresses to available RAR registers, if there is not
3425                  * sufficient space to store all the addresses then enable
3426                  * unicast promiscuous mode
3427                  */
3428                 count = igb_write_uc_addr_list(netdev);
3429                 if (count < 0) {
3430                         rctl |= E1000_RCTL_UPE;
3431                         vmolr |= E1000_VMOLR_ROPE;
3432                 }
3433                 rctl |= E1000_RCTL_VFE;
3434         }
3435         wr32(E1000_RCTL, rctl);
3436
3437         /*
3438          * In order to support SR-IOV and eventually VMDq it is necessary to set
3439          * the VMOLR to enable the appropriate modes.  Without this workaround
3440          * we will have issues with VLAN tag stripping not being done for frames
3441          * that are only arriving because we are the default pool
3442          */
3443         if (hw->mac.type < e1000_82576)
3444                 return;
3445
3446         vmolr |= rd32(E1000_VMOLR(vfn)) &
3447                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3448         wr32(E1000_VMOLR(vfn), vmolr);
3449         igb_restore_vf_multicasts(adapter);
3450 }
3451
3452 static void igb_check_wvbr(struct igb_adapter *adapter)
3453 {
3454         struct e1000_hw *hw = &adapter->hw;
3455         u32 wvbr = 0;
3456
3457         switch (hw->mac.type) {
3458         case e1000_82576:
3459         case e1000_i350:
3460                 if (!(wvbr = rd32(E1000_WVBR)))
3461                         return;
3462                 break;
3463         default:
3464                 break;
3465         }
3466
3467         adapter->wvbr |= wvbr;
3468 }
3469
3470 #define IGB_STAGGERED_QUEUE_OFFSET 8
3471
3472 static void igb_spoof_check(struct igb_adapter *adapter)
3473 {
3474         int j;
3475
3476         if (!adapter->wvbr)
3477                 return;
3478
3479         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3480                 if (adapter->wvbr & (1 << j) ||
3481                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3482                         dev_warn(&adapter->pdev->dev,
3483                                 "Spoof event(s) detected on VF %d\n", j);
3484                         adapter->wvbr &=
3485                                 ~((1 << j) |
3486                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3487                 }
3488         }
3489 }
3490
3491 /* Need to wait a few seconds after link up to get diagnostic information from
3492  * the phy */
3493 static void igb_update_phy_info(unsigned long data)
3494 {
3495         struct igb_adapter *adapter = (struct igb_adapter *) data;
3496         igb_get_phy_info(&adapter->hw);
3497 }
3498
3499 /**
3500  * igb_has_link - check shared code for link and determine up/down
3501  * @adapter: pointer to driver private info
3502  **/
3503 bool igb_has_link(struct igb_adapter *adapter)
3504 {
3505         struct e1000_hw *hw = &adapter->hw;
3506         bool link_active = false;
3507         s32 ret_val = 0;
3508
3509         /* get_link_status is set on LSC (link status) interrupt or
3510          * rx sequence error interrupt.  get_link_status will stay
3511          * false until the e1000_check_for_link establishes link
3512          * for copper adapters ONLY
3513          */
3514         switch (hw->phy.media_type) {
3515         case e1000_media_type_copper:
3516                 if (hw->mac.get_link_status) {
3517                         ret_val = hw->mac.ops.check_for_link(hw);
3518                         link_active = !hw->mac.get_link_status;
3519                 } else {
3520                         link_active = true;
3521                 }
3522                 break;
3523         case e1000_media_type_internal_serdes:
3524                 ret_val = hw->mac.ops.check_for_link(hw);
3525                 link_active = hw->mac.serdes_has_link;
3526                 break;
3527         default:
3528         case e1000_media_type_unknown:
3529                 break;
3530         }
3531
3532         return link_active;
3533 }
3534
3535 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3536 {
3537         bool ret = false;
3538         u32 ctrl_ext, thstat;
3539
3540         /* check for thermal sensor event on i350, copper only */
3541         if (hw->mac.type == e1000_i350) {
3542                 thstat = rd32(E1000_THSTAT);
3543                 ctrl_ext = rd32(E1000_CTRL_EXT);
3544
3545                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3546                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3547                         ret = !!(thstat & event);
3548                 }
3549         }
3550
3551         return ret;
3552 }
3553
3554 /**
3555  * igb_watchdog - Timer Call-back
3556  * @data: pointer to adapter cast into an unsigned long
3557  **/
3558 static void igb_watchdog(unsigned long data)
3559 {
3560         struct igb_adapter *adapter = (struct igb_adapter *)data;
3561         /* Do the rest outside of interrupt context */
3562         schedule_work(&adapter->watchdog_task);
3563 }
3564
3565 static void igb_watchdog_task(struct work_struct *work)
3566 {
3567         struct igb_adapter *adapter = container_of(work,
3568                                                    struct igb_adapter,
3569                                                    watchdog_task);
3570         struct e1000_hw *hw = &adapter->hw;
3571         struct net_device *netdev = adapter->netdev;
3572         u32 link;
3573         int i;
3574
3575         link = igb_has_link(adapter);
3576         if (link) {
3577                 if (!netif_carrier_ok(netdev)) {
3578                         u32 ctrl;
3579                         hw->mac.ops.get_speed_and_duplex(hw,
3580                                                          &adapter->link_speed,
3581                                                          &adapter->link_duplex);
3582
3583                         ctrl = rd32(E1000_CTRL);
3584                         /* Links status message must follow this format */
3585                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3586                                  "Flow Control: %s\n",
3587                                netdev->name,
3588                                adapter->link_speed,
3589                                adapter->link_duplex == FULL_DUPLEX ?
3590                                  "Full Duplex" : "Half Duplex",
3591                                ((ctrl & E1000_CTRL_TFCE) &&
3592                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3593                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3594                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3595
3596                         /* check for thermal sensor event */
3597                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3598                                 printk(KERN_INFO "igb: %s The network adapter "
3599                                                  "link speed was downshifted "
3600                                                  "because it overheated.\n",
3601                                                  netdev->name);
3602                         }
3603
3604                         /* adjust timeout factor according to speed/duplex */
3605                         adapter->tx_timeout_factor = 1;
3606                         switch (adapter->link_speed) {
3607                         case SPEED_10:
3608                                 adapter->tx_timeout_factor = 14;
3609                                 break;
3610                         case SPEED_100:
3611                                 /* maybe add some timeout factor ? */
3612                                 break;
3613                         }
3614
3615                         netif_carrier_on(netdev);
3616
3617                         igb_ping_all_vfs(adapter);
3618                         igb_check_vf_rate_limit(adapter);
3619
3620                         /* link state has changed, schedule phy info update */
3621                         if (!test_bit(__IGB_DOWN, &adapter->state))
3622                                 mod_timer(&adapter->phy_info_timer,
3623                                           round_jiffies(jiffies + 2 * HZ));
3624                 }
3625         } else {
3626                 if (netif_carrier_ok(netdev)) {
3627                         adapter->link_speed = 0;
3628                         adapter->link_duplex = 0;
3629
3630                         /* check for thermal sensor event */
3631                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3632                                 printk(KERN_ERR "igb: %s The network adapter "
3633                                                 "was stopped because it "
3634                                                 "overheated.\n",
3635                                                 netdev->name);
3636                         }
3637
3638                         /* Links status message must follow this format */
3639                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3640                                netdev->name);
3641                         netif_carrier_off(netdev);
3642
3643                         igb_ping_all_vfs(adapter);
3644
3645                         /* link state has changed, schedule phy info update */
3646                         if (!test_bit(__IGB_DOWN, &adapter->state))
3647                                 mod_timer(&adapter->phy_info_timer,
3648                                           round_jiffies(jiffies + 2 * HZ));
3649                 }
3650         }
3651
3652         spin_lock(&adapter->stats64_lock);
3653         igb_update_stats(adapter, &adapter->stats64);
3654         spin_unlock(&adapter->stats64_lock);
3655
3656         for (i = 0; i < adapter->num_tx_queues; i++) {
3657                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3658                 if (!netif_carrier_ok(netdev)) {
3659                         /* We've lost link, so the controller stops DMA,
3660                          * but we've got queued Tx work that's never going
3661                          * to get done, so reset controller to flush Tx.
3662                          * (Do the reset outside of interrupt context). */
3663                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3664                                 adapter->tx_timeout_count++;
3665                                 schedule_work(&adapter->reset_task);
3666                                 /* return immediately since reset is imminent */
3667                                 return;
3668                         }
3669                 }
3670
3671                 /* Force detection of hung controller every watchdog period */
3672                 tx_ring->detect_tx_hung = true;
3673         }
3674
3675         /* Cause software interrupt to ensure rx ring is cleaned */
3676         if (adapter->msix_entries) {
3677                 u32 eics = 0;
3678                 for (i = 0; i < adapter->num_q_vectors; i++) {
3679                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3680                         eics |= q_vector->eims_value;
3681                 }
3682                 wr32(E1000_EICS, eics);
3683         } else {
3684                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3685         }
3686
3687         igb_spoof_check(adapter);
3688
3689         /* Reset the timer */
3690         if (!test_bit(__IGB_DOWN, &adapter->state))
3691                 mod_timer(&adapter->watchdog_timer,
3692                           round_jiffies(jiffies + 2 * HZ));
3693 }
3694
3695 enum latency_range {
3696         lowest_latency = 0,
3697         low_latency = 1,
3698         bulk_latency = 2,
3699         latency_invalid = 255
3700 };
3701
3702 /**
3703  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3704  *
3705  *      Stores a new ITR value based on strictly on packet size.  This
3706  *      algorithm is less sophisticated than that used in igb_update_itr,
3707  *      due to the difficulty of synchronizing statistics across multiple
3708  *      receive rings.  The divisors and thresholds used by this function
3709  *      were determined based on theoretical maximum wire speed and testing
3710  *      data, in order to minimize response time while increasing bulk
3711  *      throughput.
3712  *      This functionality is controlled by the InterruptThrottleRate module
3713  *      parameter (see igb_param.c)
3714  *      NOTE:  This function is called only when operating in a multiqueue
3715  *             receive environment.
3716  * @q_vector: pointer to q_vector
3717  **/
3718 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3719 {
3720         int new_val = q_vector->itr_val;
3721         int avg_wire_size = 0;
3722         struct igb_adapter *adapter = q_vector->adapter;
3723         struct igb_ring *ring;
3724         unsigned int packets;
3725
3726         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3727          * ints/sec - ITR timer value of 120 ticks.
3728          */
3729         if (adapter->link_speed != SPEED_1000) {
3730                 new_val = 976;
3731                 goto set_itr_val;
3732         }
3733
3734         ring = q_vector->rx_ring;
3735         if (ring) {
3736                 packets = ACCESS_ONCE(ring->total_packets);
3737
3738                 if (packets)
3739                         avg_wire_size = ring->total_bytes / packets;
3740         }
3741
3742         ring = q_vector->tx_ring;
3743         if (ring) {
3744                 packets = ACCESS_ONCE(ring->total_packets);
3745
3746                 if (packets)
3747                         avg_wire_size = max_t(u32, avg_wire_size,
3748                                               ring->total_bytes / packets);
3749         }
3750
3751         /* if avg_wire_size isn't set no work was done */
3752         if (!avg_wire_size)
3753                 goto clear_counts;
3754
3755         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3756         avg_wire_size += 24;
3757
3758         /* Don't starve jumbo frames */
3759         avg_wire_size = min(avg_wire_size, 3000);
3760
3761         /* Give a little boost to mid-size frames */
3762         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3763                 new_val = avg_wire_size / 3;
3764         else
3765                 new_val = avg_wire_size / 2;
3766
3767         /* when in itr mode 3 do not exceed 20K ints/sec */
3768         if (adapter->rx_itr_setting == 3 && new_val < 196)
3769                 new_val = 196;
3770
3771 set_itr_val:
3772         if (new_val != q_vector->itr_val) {
3773                 q_vector->itr_val = new_val;
3774                 q_vector->set_itr = 1;
3775         }
3776 clear_counts:
3777         if (q_vector->rx_ring) {
3778                 q_vector->rx_ring->total_bytes = 0;
3779                 q_vector->rx_ring->total_packets = 0;
3780         }
3781         if (q_vector->tx_ring) {
3782                 q_vector->tx_ring->total_bytes = 0;
3783                 q_vector->tx_ring->total_packets = 0;
3784         }
3785 }
3786
3787 /**
3788  * igb_update_itr - update the dynamic ITR value based on statistics
3789  *      Stores a new ITR value based on packets and byte
3790  *      counts during the last interrupt.  The advantage of per interrupt
3791  *      computation is faster updates and more accurate ITR for the current
3792  *      traffic pattern.  Constants in this function were computed
3793  *      based on theoretical maximum wire speed and thresholds were set based
3794  *      on testing data as well as attempting to minimize response time
3795  *      while increasing bulk throughput.
3796  *      this functionality is controlled by the InterruptThrottleRate module
3797  *      parameter (see igb_param.c)
3798  *      NOTE:  These calculations are only valid when operating in a single-
3799  *             queue environment.
3800  * @adapter: pointer to adapter
3801  * @itr_setting: current q_vector->itr_val
3802  * @packets: the number of packets during this measurement interval
3803  * @bytes: the number of bytes during this measurement interval
3804  **/
3805 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3806                                    int packets, int bytes)
3807 {
3808         unsigned int retval = itr_setting;
3809
3810         if (packets == 0)
3811                 goto update_itr_done;
3812
3813         switch (itr_setting) {
3814         case lowest_latency:
3815                 /* handle TSO and jumbo frames */
3816                 if (bytes/packets > 8000)
3817                         retval = bulk_latency;
3818                 else if ((packets < 5) && (bytes > 512))
3819                         retval = low_latency;
3820                 break;
3821         case low_latency:  /* 50 usec aka 20000 ints/s */
3822                 if (bytes > 10000) {
3823                         /* this if handles the TSO accounting */
3824                         if (bytes/packets > 8000) {
3825                                 retval = bulk_latency;
3826                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3827                                 retval = bulk_latency;
3828                         } else if ((packets > 35)) {
3829                                 retval = lowest_latency;
3830                         }
3831                 } else if (bytes/packets > 2000) {
3832                         retval = bulk_latency;
3833                 } else if (packets <= 2 && bytes < 512) {
3834                         retval = lowest_latency;
3835                 }
3836                 break;
3837         case bulk_latency: /* 250 usec aka 4000 ints/s */
3838                 if (bytes > 25000) {
3839                         if (packets > 35)
3840                                 retval = low_latency;
3841                 } else if (bytes < 1500) {
3842                         retval = low_latency;
3843                 }
3844                 break;
3845         }
3846
3847 update_itr_done:
3848         return retval;
3849 }
3850
3851 static void igb_set_itr(struct igb_adapter *adapter)
3852 {
3853         struct igb_q_vector *q_vector = adapter->q_vector[0];
3854         u16 current_itr;
3855         u32 new_itr = q_vector->itr_val;
3856
3857         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3858         if (adapter->link_speed != SPEED_1000) {
3859                 current_itr = 0;
3860                 new_itr = 4000;
3861                 goto set_itr_now;
3862         }
3863
3864         adapter->rx_itr = igb_update_itr(adapter,
3865                                     adapter->rx_itr,
3866                                     q_vector->rx_ring->total_packets,
3867                                     q_vector->rx_ring->total_bytes);
3868
3869         adapter->tx_itr = igb_update_itr(adapter,
3870                                     adapter->tx_itr,
3871                                     q_vector->tx_ring->total_packets,
3872                                     q_vector->tx_ring->total_bytes);
3873         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3874
3875         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3876         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3877                 current_itr = low_latency;
3878
3879         switch (current_itr) {
3880         /* counts and packets in update_itr are dependent on these numbers */
3881         case lowest_latency:
3882                 new_itr = 56;  /* aka 70,000 ints/sec */
3883                 break;
3884         case low_latency:
3885                 new_itr = 196; /* aka 20,000 ints/sec */
3886                 break;
3887         case bulk_latency:
3888                 new_itr = 980; /* aka 4,000 ints/sec */
3889                 break;
3890         default:
3891                 break;
3892         }
3893
3894 set_itr_now:
3895         q_vector->rx_ring->total_bytes = 0;
3896         q_vector->rx_ring->total_packets = 0;
3897         q_vector->tx_ring->total_bytes = 0;
3898         q_vector->tx_ring->total_packets = 0;
3899
3900         if (new_itr != q_vector->itr_val) {
3901                 /* this attempts to bias the interrupt rate towards Bulk
3902                  * by adding intermediate steps when interrupt rate is
3903                  * increasing */
3904                 new_itr = new_itr > q_vector->itr_val ?
3905                              max((new_itr * q_vector->itr_val) /
3906                                  (new_itr + (q_vector->itr_val >> 2)),
3907                                  new_itr) :
3908                              new_itr;
3909                 /* Don't write the value here; it resets the adapter's
3910                  * internal timer, and causes us to delay far longer than
3911                  * we should between interrupts.  Instead, we write the ITR
3912                  * value at the beginning of the next interrupt so the timing
3913                  * ends up being correct.
3914                  */
3915                 q_vector->itr_val = new_itr;
3916                 q_vector->set_itr = 1;
3917         }
3918 }
3919
3920 #define IGB_TX_FLAGS_CSUM               0x00000001
3921 #define IGB_TX_FLAGS_VLAN               0x00000002
3922 #define IGB_TX_FLAGS_TSO                0x00000004
3923 #define IGB_TX_FLAGS_IPV4               0x00000008
3924 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3925 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3926 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3927
3928 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3929                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3930 {
3931         struct e1000_adv_tx_context_desc *context_desc;
3932         unsigned int i;
3933         int err;
3934         struct igb_buffer *buffer_info;
3935         u32 info = 0, tu_cmd = 0;
3936         u32 mss_l4len_idx;
3937         u8 l4len;
3938
3939         if (skb_header_cloned(skb)) {
3940                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3941                 if (err)
3942                         return err;
3943         }
3944
3945         l4len = tcp_hdrlen(skb);
3946         *hdr_len += l4len;
3947
3948         if (skb->protocol == htons(ETH_P_IP)) {
3949                 struct iphdr *iph = ip_hdr(skb);
3950                 iph->tot_len = 0;
3951                 iph->check = 0;
3952                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3953                                                          iph->daddr, 0,
3954                                                          IPPROTO_TCP,
3955                                                          0);
3956         } else if (skb_is_gso_v6(skb)) {
3957                 ipv6_hdr(skb)->payload_len = 0;
3958                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3959                                                        &ipv6_hdr(skb)->daddr,
3960                                                        0, IPPROTO_TCP, 0);
3961         }
3962
3963         i = tx_ring->next_to_use;
3964
3965         buffer_info = &tx_ring->buffer_info[i];
3966         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3967         /* VLAN MACLEN IPLEN */
3968         if (tx_flags & IGB_TX_FLAGS_VLAN)
3969                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3970         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3971         *hdr_len += skb_network_offset(skb);
3972         info |= skb_network_header_len(skb);
3973         *hdr_len += skb_network_header_len(skb);
3974         context_desc->vlan_macip_lens = cpu_to_le32(info);
3975
3976         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3977         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3978
3979         if (skb->protocol == htons(ETH_P_IP))
3980                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3981         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3982
3983         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3984
3985         /* MSS L4LEN IDX */
3986         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3987         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3988
3989         /* For 82575, context index must be unique per ring. */
3990         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3991                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3992
3993         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3994         context_desc->seqnum_seed = 0;
3995
3996         buffer_info->time_stamp = jiffies;
3997         buffer_info->next_to_watch = i;
3998         buffer_info->dma = 0;
3999         i++;
4000         if (i == tx_ring->count)
4001                 i = 0;
4002
4003         tx_ring->next_to_use = i;
4004
4005         return true;
4006 }
4007
4008 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4009                                    struct sk_buff *skb, u32 tx_flags)
4010 {
4011         struct e1000_adv_tx_context_desc *context_desc;
4012         struct device *dev = tx_ring->dev;
4013         struct igb_buffer *buffer_info;
4014         u32 info = 0, tu_cmd = 0;
4015         unsigned int i;
4016
4017         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4018             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4019                 i = tx_ring->next_to_use;
4020                 buffer_info = &tx_ring->buffer_info[i];
4021                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4022
4023                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4024                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4025
4026                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4027                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4028                         info |= skb_network_header_len(skb);
4029
4030                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4031
4032                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4033
4034                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4035                         __be16 protocol;
4036
4037                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4038                                 const struct vlan_ethhdr *vhdr =
4039                                           (const struct vlan_ethhdr*)skb->data;
4040
4041                                 protocol = vhdr->h_vlan_encapsulated_proto;
4042                         } else {
4043                                 protocol = skb->protocol;
4044                         }
4045
4046                         switch (protocol) {
4047                         case cpu_to_be16(ETH_P_IP):
4048                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4049                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4050                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4051                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4052                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4053                                 break;
4054                         case cpu_to_be16(ETH_P_IPV6):
4055                                 /* XXX what about other V6 headers?? */
4056                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4057                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4058                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4059                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4060                                 break;
4061                         default:
4062                                 if (unlikely(net_ratelimit()))
4063                                         dev_warn(dev,
4064                                             "partial checksum but proto=%x!\n",
4065                                             skb->protocol);
4066                                 break;
4067                         }
4068                 }
4069
4070                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4071                 context_desc->seqnum_seed = 0;
4072                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4073                         context_desc->mss_l4len_idx =
4074                                 cpu_to_le32(tx_ring->reg_idx << 4);
4075
4076                 buffer_info->time_stamp = jiffies;
4077                 buffer_info->next_to_watch = i;
4078                 buffer_info->dma = 0;
4079
4080                 i++;
4081                 if (i == tx_ring->count)
4082                         i = 0;
4083                 tx_ring->next_to_use = i;
4084
4085                 return true;
4086         }
4087         return false;
4088 }
4089
4090 #define IGB_MAX_TXD_PWR 16
4091 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4092
4093 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4094                                  unsigned int first)
4095 {
4096         struct igb_buffer *buffer_info;
4097         struct device *dev = tx_ring->dev;
4098         unsigned int hlen = skb_headlen(skb);
4099         unsigned int count = 0, i;
4100         unsigned int f;
4101         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4102
4103         i = tx_ring->next_to_use;
4104
4105         buffer_info = &tx_ring->buffer_info[i];
4106         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4107         buffer_info->length = hlen;
4108         /* set time_stamp *before* dma to help avoid a possible race */
4109         buffer_info->time_stamp = jiffies;
4110         buffer_info->next_to_watch = i;
4111         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4112                                           DMA_TO_DEVICE);
4113         if (dma_mapping_error(dev, buffer_info->dma))
4114                 goto dma_error;
4115
4116         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4117                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4118                 unsigned int len = frag->size;
4119
4120                 count++;
4121                 i++;
4122                 if (i == tx_ring->count)
4123                         i = 0;
4124
4125                 buffer_info = &tx_ring->buffer_info[i];
4126                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4127                 buffer_info->length = len;
4128                 buffer_info->time_stamp = jiffies;
4129                 buffer_info->next_to_watch = i;
4130                 buffer_info->mapped_as_page = true;
4131                 buffer_info->dma = dma_map_page(dev,
4132                                                 frag->page,
4133                                                 frag->page_offset,
4134                                                 len,
4135                                                 DMA_TO_DEVICE);
4136                 if (dma_mapping_error(dev, buffer_info->dma))
4137                         goto dma_error;
4138
4139         }
4140
4141         tx_ring->buffer_info[i].skb = skb;
4142         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4143         /* multiply data chunks by size of headers */
4144         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4145         tx_ring->buffer_info[i].gso_segs = gso_segs;
4146         tx_ring->buffer_info[first].next_to_watch = i;
4147
4148         return ++count;
4149
4150 dma_error:
4151         dev_err(dev, "TX DMA map failed\n");
4152
4153         /* clear timestamp and dma mappings for failed buffer_info mapping */
4154         buffer_info->dma = 0;
4155         buffer_info->time_stamp = 0;
4156         buffer_info->length = 0;
4157         buffer_info->next_to_watch = 0;
4158         buffer_info->mapped_as_page = false;
4159
4160         /* clear timestamp and dma mappings for remaining portion of packet */
4161         while (count--) {
4162                 if (i == 0)
4163                         i = tx_ring->count;
4164                 i--;
4165                 buffer_info = &tx_ring->buffer_info[i];
4166                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4167         }
4168
4169         return 0;
4170 }
4171
4172 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4173                                     u32 tx_flags, int count, u32 paylen,
4174                                     u8 hdr_len)
4175 {
4176         union e1000_adv_tx_desc *tx_desc;
4177         struct igb_buffer *buffer_info;
4178         u32 olinfo_status = 0, cmd_type_len;
4179         unsigned int i = tx_ring->next_to_use;
4180
4181         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4182                         E1000_ADVTXD_DCMD_DEXT);
4183
4184         if (tx_flags & IGB_TX_FLAGS_VLAN)
4185                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4186
4187         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4188                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4189
4190         if (tx_flags & IGB_TX_FLAGS_TSO) {
4191                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4192
4193                 /* insert tcp checksum */
4194                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4195
4196                 /* insert ip checksum */
4197                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4198                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4199
4200         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4201                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4202         }
4203
4204         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4205             (tx_flags & (IGB_TX_FLAGS_CSUM |
4206                          IGB_TX_FLAGS_TSO |
4207                          IGB_TX_FLAGS_VLAN)))
4208                 olinfo_status |= tx_ring->reg_idx << 4;
4209
4210         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4211
4212         do {
4213                 buffer_info = &tx_ring->buffer_info[i];
4214                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4215                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4216                 tx_desc->read.cmd_type_len =
4217                         cpu_to_le32(cmd_type_len | buffer_info->length);
4218                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4219                 count--;
4220                 i++;
4221                 if (i == tx_ring->count)
4222                         i = 0;
4223         } while (count > 0);
4224
4225         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4226         /* Force memory writes to complete before letting h/w
4227          * know there are new descriptors to fetch.  (Only
4228          * applicable for weak-ordered memory model archs,
4229          * such as IA-64). */
4230         wmb();
4231
4232         tx_ring->next_to_use = i;
4233         writel(i, tx_ring->tail);
4234         /* we need this if more than one processor can write to our tail
4235          * at a time, it syncronizes IO on IA64/Altix systems */
4236         mmiowb();
4237 }
4238
4239 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4240 {
4241         struct net_device *netdev = tx_ring->netdev;
4242
4243         netif_stop_subqueue(netdev, tx_ring->queue_index);
4244
4245         /* Herbert's original patch had:
4246          *  smp_mb__after_netif_stop_queue();
4247          * but since that doesn't exist yet, just open code it. */
4248         smp_mb();
4249
4250         /* We need to check again in a case another CPU has just
4251          * made room available. */
4252         if (igb_desc_unused(tx_ring) < size)
4253                 return -EBUSY;
4254
4255         /* A reprieve! */
4256         netif_wake_subqueue(netdev, tx_ring->queue_index);
4257
4258         u64_stats_update_begin(&tx_ring->tx_syncp2);
4259         tx_ring->tx_stats.restart_queue2++;
4260         u64_stats_update_end(&tx_ring->tx_syncp2);
4261
4262         return 0;
4263 }
4264
4265 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4266 {
4267         if (igb_desc_unused(tx_ring) >= size)
4268                 return 0;
4269         return __igb_maybe_stop_tx(tx_ring, size);
4270 }
4271
4272 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4273                                     struct igb_ring *tx_ring)
4274 {
4275         int tso = 0, count;
4276         u32 tx_flags = 0;
4277         u16 first;
4278         u8 hdr_len = 0;
4279
4280         /* need: 1 descriptor per page,
4281          *       + 2 desc gap to keep tail from touching head,
4282          *       + 1 desc for skb->data,
4283          *       + 1 desc for context descriptor,
4284          * otherwise try next time */
4285         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4286                 /* this is a hard error */
4287                 return NETDEV_TX_BUSY;
4288         }
4289
4290         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4291                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4292                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4293         }
4294
4295         if (vlan_tx_tag_present(skb)) {
4296                 tx_flags |= IGB_TX_FLAGS_VLAN;
4297                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4298         }
4299
4300         if (skb->protocol == htons(ETH_P_IP))
4301                 tx_flags |= IGB_TX_FLAGS_IPV4;
4302
4303         first = tx_ring->next_to_use;
4304         if (skb_is_gso(skb)) {
4305                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4306
4307                 if (tso < 0) {
4308                         dev_kfree_skb_any(skb);
4309                         return NETDEV_TX_OK;
4310                 }
4311         }
4312
4313         if (tso)
4314                 tx_flags |= IGB_TX_FLAGS_TSO;
4315         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4316                  (skb->ip_summed == CHECKSUM_PARTIAL))
4317                 tx_flags |= IGB_TX_FLAGS_CSUM;
4318
4319         /*
4320          * count reflects descriptors mapped, if 0 or less then mapping error
4321          * has occurred and we need to rewind the descriptor queue
4322          */
4323         count = igb_tx_map_adv(tx_ring, skb, first);
4324         if (!count) {
4325                 dev_kfree_skb_any(skb);
4326                 tx_ring->buffer_info[first].time_stamp = 0;
4327                 tx_ring->next_to_use = first;
4328                 return NETDEV_TX_OK;
4329         }
4330
4331         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4332
4333         /* Make sure there is space in the ring for the next send. */
4334         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4335
4336         return NETDEV_TX_OK;
4337 }
4338
4339 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4340                                       struct net_device *netdev)
4341 {
4342         struct igb_adapter *adapter = netdev_priv(netdev);
4343         struct igb_ring *tx_ring;
4344         int r_idx = 0;
4345
4346         if (test_bit(__IGB_DOWN, &adapter->state)) {
4347                 dev_kfree_skb_any(skb);
4348                 return NETDEV_TX_OK;
4349         }
4350
4351         if (skb->len <= 0) {
4352                 dev_kfree_skb_any(skb);
4353                 return NETDEV_TX_OK;
4354         }
4355
4356         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4357         tx_ring = adapter->multi_tx_table[r_idx];
4358
4359         /* This goes back to the question of how to logically map a tx queue
4360          * to a flow.  Right now, performance is impacted slightly negatively
4361          * if using multiple tx queues.  If the stack breaks away from a
4362          * single qdisc implementation, we can look at this again. */
4363         return igb_xmit_frame_ring_adv(skb, tx_ring);
4364 }
4365
4366 /**
4367  * igb_tx_timeout - Respond to a Tx Hang
4368  * @netdev: network interface device structure
4369  **/
4370 static void igb_tx_timeout(struct net_device *netdev)
4371 {
4372         struct igb_adapter *adapter = netdev_priv(netdev);
4373         struct e1000_hw *hw = &adapter->hw;
4374
4375         /* Do the reset outside of interrupt context */
4376         adapter->tx_timeout_count++;
4377
4378         if (hw->mac.type == e1000_82580)
4379                 hw->dev_spec._82575.global_device_reset = true;
4380
4381         schedule_work(&adapter->reset_task);
4382         wr32(E1000_EICS,
4383              (adapter->eims_enable_mask & ~adapter->eims_other));
4384 }
4385
4386 static void igb_reset_task(struct work_struct *work)
4387 {
4388         struct igb_adapter *adapter;
4389         adapter = container_of(work, struct igb_adapter, reset_task);
4390
4391         igb_dump(adapter);
4392         netdev_err(adapter->netdev, "Reset adapter\n");
4393         igb_reinit_locked(adapter);
4394 }
4395
4396 /**
4397  * igb_get_stats64 - Get System Network Statistics
4398  * @netdev: network interface device structure
4399  * @stats: rtnl_link_stats64 pointer
4400  *
4401  **/
4402 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4403                                                  struct rtnl_link_stats64 *stats)
4404 {
4405         struct igb_adapter *adapter = netdev_priv(netdev);
4406
4407         spin_lock(&adapter->stats64_lock);
4408         igb_update_stats(adapter, &adapter->stats64);
4409         memcpy(stats, &adapter->stats64, sizeof(*stats));
4410         spin_unlock(&adapter->stats64_lock);
4411
4412         return stats;
4413 }
4414
4415 /**
4416  * igb_change_mtu - Change the Maximum Transfer Unit
4417  * @netdev: network interface device structure
4418  * @new_mtu: new value for maximum frame size
4419  *
4420  * Returns 0 on success, negative on failure
4421  **/
4422 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4423 {
4424         struct igb_adapter *adapter = netdev_priv(netdev);
4425         struct pci_dev *pdev = adapter->pdev;
4426         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4427         u32 rx_buffer_len, i;
4428
4429         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4430                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4431                 return -EINVAL;
4432         }
4433
4434         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4435                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4436                 return -EINVAL;
4437         }
4438
4439         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4440                 msleep(1);
4441
4442         /* igb_down has a dependency on max_frame_size */
4443         adapter->max_frame_size = max_frame;
4444
4445         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4446          * means we reserve 2 more, this pushes us to allocate from the next
4447          * larger slab size.
4448          * i.e. RXBUFFER_2048 --> size-4096 slab
4449          */
4450
4451         if (adapter->hw.mac.type == e1000_82580)
4452                 max_frame += IGB_TS_HDR_LEN;
4453
4454         if (max_frame <= IGB_RXBUFFER_1024)
4455                 rx_buffer_len = IGB_RXBUFFER_1024;
4456         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4457                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4458         else
4459                 rx_buffer_len = IGB_RXBUFFER_128;
4460
4461         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4462              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4463                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4464
4465         if ((adapter->hw.mac.type == e1000_82580) &&
4466             (rx_buffer_len == IGB_RXBUFFER_128))
4467                 rx_buffer_len += IGB_RXBUFFER_64;
4468
4469         if (netif_running(netdev))
4470                 igb_down(adapter);
4471
4472         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4473                  netdev->mtu, new_mtu);
4474         netdev->mtu = new_mtu;
4475
4476         for (i = 0; i < adapter->num_rx_queues; i++)
4477                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4478
4479         if (netif_running(netdev))
4480                 igb_up(adapter);
4481         else
4482                 igb_reset(adapter);
4483
4484         clear_bit(__IGB_RESETTING, &adapter->state);
4485
4486         return 0;
4487 }
4488
4489 /**
4490  * igb_update_stats - Update the board statistics counters
4491  * @adapter: board private structure
4492  **/
4493
4494 void igb_update_stats(struct igb_adapter *adapter,
4495                       struct rtnl_link_stats64 *net_stats)
4496 {
4497         struct e1000_hw *hw = &adapter->hw;
4498         struct pci_dev *pdev = adapter->pdev;
4499         u32 reg, mpc;
4500         u16 phy_tmp;
4501         int i;
4502         u64 bytes, packets;
4503         unsigned int start;
4504         u64 _bytes, _packets;
4505
4506 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4507
4508         /*
4509          * Prevent stats update while adapter is being reset, or if the pci
4510          * connection is down.
4511          */
4512         if (adapter->link_speed == 0)
4513                 return;
4514         if (pci_channel_offline(pdev))
4515                 return;
4516
4517         bytes = 0;
4518         packets = 0;
4519         for (i = 0; i < adapter->num_rx_queues; i++) {
4520                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4521                 struct igb_ring *ring = adapter->rx_ring[i];
4522
4523                 ring->rx_stats.drops += rqdpc_tmp;
4524                 net_stats->rx_fifo_errors += rqdpc_tmp;
4525
4526                 do {
4527                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4528                         _bytes = ring->rx_stats.bytes;
4529                         _packets = ring->rx_stats.packets;
4530                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4531                 bytes += _bytes;
4532                 packets += _packets;
4533         }
4534
4535         net_stats->rx_bytes = bytes;
4536         net_stats->rx_packets = packets;
4537
4538         bytes = 0;
4539         packets = 0;
4540         for (i = 0; i < adapter->num_tx_queues; i++) {
4541                 struct igb_ring *ring = adapter->tx_ring[i];
4542                 do {
4543                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4544                         _bytes = ring->tx_stats.bytes;
4545                         _packets = ring->tx_stats.packets;
4546                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4547                 bytes += _bytes;
4548                 packets += _packets;
4549         }
4550         net_stats->tx_bytes = bytes;
4551         net_stats->tx_packets = packets;
4552
4553         /* read stats registers */
4554         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4555         adapter->stats.gprc += rd32(E1000_GPRC);
4556         adapter->stats.gorc += rd32(E1000_GORCL);
4557         rd32(E1000_GORCH); /* clear GORCL */
4558         adapter->stats.bprc += rd32(E1000_BPRC);
4559         adapter->stats.mprc += rd32(E1000_MPRC);
4560         adapter->stats.roc += rd32(E1000_ROC);
4561
4562         adapter->stats.prc64 += rd32(E1000_PRC64);
4563         adapter->stats.prc127 += rd32(E1000_PRC127);
4564         adapter->stats.prc255 += rd32(E1000_PRC255);
4565         adapter->stats.prc511 += rd32(E1000_PRC511);
4566         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4567         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4568         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4569         adapter->stats.sec += rd32(E1000_SEC);
4570
4571         mpc = rd32(E1000_MPC);
4572         adapter->stats.mpc += mpc;
4573         net_stats->rx_fifo_errors += mpc;
4574         adapter->stats.scc += rd32(E1000_SCC);
4575         adapter->stats.ecol += rd32(E1000_ECOL);
4576         adapter->stats.mcc += rd32(E1000_MCC);
4577         adapter->stats.latecol += rd32(E1000_LATECOL);
4578         adapter->stats.dc += rd32(E1000_DC);
4579         adapter->stats.rlec += rd32(E1000_RLEC);
4580         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4581         adapter->stats.xontxc += rd32(E1000_XONTXC);
4582         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4583         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4584         adapter->stats.fcruc += rd32(E1000_FCRUC);
4585         adapter->stats.gptc += rd32(E1000_GPTC);
4586         adapter->stats.gotc += rd32(E1000_GOTCL);
4587         rd32(E1000_GOTCH); /* clear GOTCL */
4588         adapter->stats.rnbc += rd32(E1000_RNBC);
4589         adapter->stats.ruc += rd32(E1000_RUC);
4590         adapter->stats.rfc += rd32(E1000_RFC);
4591         adapter->stats.rjc += rd32(E1000_RJC);
4592         adapter->stats.tor += rd32(E1000_TORH);
4593         adapter->stats.tot += rd32(E1000_TOTH);
4594         adapter->stats.tpr += rd32(E1000_TPR);
4595
4596         adapter->stats.ptc64 += rd32(E1000_PTC64);
4597         adapter->stats.ptc127 += rd32(E1000_PTC127);
4598         adapter->stats.ptc255 += rd32(E1000_PTC255);
4599         adapter->stats.ptc511 += rd32(E1000_PTC511);
4600         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4601         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4602
4603         adapter->stats.mptc += rd32(E1000_MPTC);
4604         adapter->stats.bptc += rd32(E1000_BPTC);
4605
4606         adapter->stats.tpt += rd32(E1000_TPT);
4607         adapter->stats.colc += rd32(E1000_COLC);
4608
4609         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4610         /* read internal phy specific stats */
4611         reg = rd32(E1000_CTRL_EXT);
4612         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4613                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4614                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4615         }
4616
4617         adapter->stats.tsctc += rd32(E1000_TSCTC);
4618         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4619
4620         adapter->stats.iac += rd32(E1000_IAC);
4621         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4622         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4623         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4624         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4625         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4626         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4627         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4628         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4629
4630         /* Fill out the OS statistics structure */
4631         net_stats->multicast = adapter->stats.mprc;
4632         net_stats->collisions = adapter->stats.colc;
4633
4634         /* Rx Errors */
4635
4636         /* RLEC on some newer hardware can be incorrect so build
4637          * our own version based on RUC and ROC */
4638         net_stats->rx_errors = adapter->stats.rxerrc +
4639                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4640                 adapter->stats.ruc + adapter->stats.roc +
4641                 adapter->stats.cexterr;
4642         net_stats->rx_length_errors = adapter->stats.ruc +
4643                                       adapter->stats.roc;
4644         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4645         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4646         net_stats->rx_missed_errors = adapter->stats.mpc;
4647
4648         /* Tx Errors */
4649         net_stats->tx_errors = adapter->stats.ecol +
4650                                adapter->stats.latecol;
4651         net_stats->tx_aborted_errors = adapter->stats.ecol;
4652         net_stats->tx_window_errors = adapter->stats.latecol;
4653         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4654
4655         /* Tx Dropped needs to be maintained elsewhere */
4656
4657         /* Phy Stats */
4658         if (hw->phy.media_type == e1000_media_type_copper) {
4659                 if ((adapter->link_speed == SPEED_1000) &&
4660                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4661                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4662                         adapter->phy_stats.idle_errors += phy_tmp;
4663                 }
4664         }
4665
4666         /* Management Stats */
4667         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4668         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4669         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4670
4671         /* OS2BMC Stats */
4672         reg = rd32(E1000_MANC);
4673         if (reg & E1000_MANC_EN_BMC2OS) {
4674                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4675                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4676                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4677                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4678         }
4679 }
4680
4681 static irqreturn_t igb_msix_other(int irq, void *data)
4682 {
4683         struct igb_adapter *adapter = data;
4684         struct e1000_hw *hw = &adapter->hw;
4685         u32 icr = rd32(E1000_ICR);
4686         /* reading ICR causes bit 31 of EICR to be cleared */
4687
4688         if (icr & E1000_ICR_DRSTA)
4689                 schedule_work(&adapter->reset_task);
4690
4691         if (icr & E1000_ICR_DOUTSYNC) {
4692                 /* HW is reporting DMA is out of sync */
4693                 adapter->stats.doosync++;
4694                 /* The DMA Out of Sync is also indication of a spoof event
4695                  * in IOV mode. Check the Wrong VM Behavior register to
4696                  * see if it is really a spoof event. */
4697                 igb_check_wvbr(adapter);
4698         }
4699
4700         /* Check for a mailbox event */
4701         if (icr & E1000_ICR_VMMB)
4702                 igb_msg_task(adapter);
4703
4704         if (icr & E1000_ICR_LSC) {
4705                 hw->mac.get_link_status = 1;
4706                 /* guard against interrupt when we're going down */
4707                 if (!test_bit(__IGB_DOWN, &adapter->state))
4708                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4709         }
4710
4711         if (adapter->vfs_allocated_count)
4712                 wr32(E1000_IMS, E1000_IMS_LSC |
4713                                 E1000_IMS_VMMB |
4714                                 E1000_IMS_DOUTSYNC);
4715         else
4716                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4717         wr32(E1000_EIMS, adapter->eims_other);
4718
4719         return IRQ_HANDLED;
4720 }
4721
4722 static void igb_write_itr(struct igb_q_vector *q_vector)
4723 {
4724         struct igb_adapter *adapter = q_vector->adapter;
4725         u32 itr_val = q_vector->itr_val & 0x7FFC;
4726
4727         if (!q_vector->set_itr)
4728                 return;
4729
4730         if (!itr_val)
4731                 itr_val = 0x4;
4732
4733         if (adapter->hw.mac.type == e1000_82575)
4734                 itr_val |= itr_val << 16;
4735         else
4736                 itr_val |= 0x8000000;
4737
4738         writel(itr_val, q_vector->itr_register);
4739         q_vector->set_itr = 0;
4740 }
4741
4742 static irqreturn_t igb_msix_ring(int irq, void *data)
4743 {
4744         struct igb_q_vector *q_vector = data;
4745
4746         /* Write the ITR value calculated from the previous interrupt. */
4747         igb_write_itr(q_vector);
4748
4749         napi_schedule(&q_vector->napi);
4750
4751         return IRQ_HANDLED;
4752 }
4753
4754 #ifdef CONFIG_IGB_DCA
4755 static void igb_update_dca(struct igb_q_vector *q_vector)
4756 {
4757         struct igb_adapter *adapter = q_vector->adapter;
4758         struct e1000_hw *hw = &adapter->hw;
4759         int cpu = get_cpu();
4760
4761         if (q_vector->cpu == cpu)
4762                 goto out_no_update;
4763
4764         if (q_vector->tx_ring) {
4765                 int q = q_vector->tx_ring->reg_idx;
4766                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4767                 if (hw->mac.type == e1000_82575) {
4768                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4769                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4770                 } else {
4771                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4772                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4773                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4774                 }
4775                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4776                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4777         }
4778         if (q_vector->rx_ring) {
4779                 int q = q_vector->rx_ring->reg_idx;
4780                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4781                 if (hw->mac.type == e1000_82575) {
4782                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4783                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4784                 } else {
4785                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4786                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4787                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4788                 }
4789                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4790                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4791                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4792                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4793         }
4794         q_vector->cpu = cpu;
4795 out_no_update:
4796         put_cpu();
4797 }
4798
4799 static void igb_setup_dca(struct igb_adapter *adapter)
4800 {
4801         struct e1000_hw *hw = &adapter->hw;
4802         int i;
4803
4804         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4805                 return;
4806
4807         /* Always use CB2 mode, difference is masked in the CB driver. */
4808         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4809
4810         for (i = 0; i < adapter->num_q_vectors; i++) {
4811                 adapter->q_vector[i]->cpu = -1;
4812                 igb_update_dca(adapter->q_vector[i]);
4813         }
4814 }
4815
4816 static int __igb_notify_dca(struct device *dev, void *data)
4817 {
4818         struct net_device *netdev = dev_get_drvdata(dev);
4819         struct igb_adapter *adapter = netdev_priv(netdev);
4820         struct pci_dev *pdev = adapter->pdev;
4821         struct e1000_hw *hw = &adapter->hw;
4822         unsigned long event = *(unsigned long *)data;
4823
4824         switch (event) {
4825         case DCA_PROVIDER_ADD:
4826                 /* if already enabled, don't do it again */
4827                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4828                         break;
4829                 if (dca_add_requester(dev) == 0) {
4830                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4831                         dev_info(&pdev->dev, "DCA enabled\n");
4832                         igb_setup_dca(adapter);
4833                         break;
4834                 }
4835                 /* Fall Through since DCA is disabled. */
4836         case DCA_PROVIDER_REMOVE:
4837                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4838                         /* without this a class_device is left
4839                          * hanging around in the sysfs model */
4840                         dca_remove_requester(dev);
4841                         dev_info(&pdev->dev, "DCA disabled\n");
4842                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4843                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4844                 }
4845                 break;
4846         }
4847
4848         return 0;
4849 }
4850
4851 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4852                           void *p)
4853 {
4854         int ret_val;
4855
4856         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4857                                          __igb_notify_dca);
4858
4859         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4860 }
4861 #endif /* CONFIG_IGB_DCA */
4862
4863 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4864 {
4865         struct e1000_hw *hw = &adapter->hw;
4866         u32 ping;
4867         int i;
4868
4869         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4870                 ping = E1000_PF_CONTROL_MSG;
4871                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4872                         ping |= E1000_VT_MSGTYPE_CTS;
4873                 igb_write_mbx(hw, &ping, 1, i);
4874         }
4875 }
4876
4877 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4878 {
4879         struct e1000_hw *hw = &adapter->hw;
4880         u32 vmolr = rd32(E1000_VMOLR(vf));
4881         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4882
4883         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4884                             IGB_VF_FLAG_MULTI_PROMISC);
4885         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4886
4887         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4888                 vmolr |= E1000_VMOLR_MPME;
4889                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4890                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4891         } else {
4892                 /*
4893                  * if we have hashes and we are clearing a multicast promisc
4894                  * flag we need to write the hashes to the MTA as this step
4895                  * was previously skipped
4896                  */
4897                 if (vf_data->num_vf_mc_hashes > 30) {
4898                         vmolr |= E1000_VMOLR_MPME;
4899                 } else if (vf_data->num_vf_mc_hashes) {
4900                         int j;
4901                         vmolr |= E1000_VMOLR_ROMPE;
4902                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4903                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4904                 }
4905         }
4906
4907         wr32(E1000_VMOLR(vf), vmolr);
4908
4909         /* there are flags left unprocessed, likely not supported */
4910         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4911                 return -EINVAL;
4912
4913         return 0;
4914
4915 }
4916
4917 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4918                                   u32 *msgbuf, u32 vf)
4919 {
4920         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4921         u16 *hash_list = (u16 *)&msgbuf[1];
4922         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4923         int i;
4924
4925         /* salt away the number of multicast addresses assigned
4926          * to this VF for later use to restore when the PF multi cast
4927          * list changes
4928          */
4929         vf_data->num_vf_mc_hashes = n;
4930
4931         /* only up to 30 hash values supported */
4932         if (n > 30)
4933                 n = 30;
4934
4935         /* store the hashes for later use */
4936         for (i = 0; i < n; i++)
4937                 vf_data->vf_mc_hashes[i] = hash_list[i];
4938
4939         /* Flush and reset the mta with the new values */
4940         igb_set_rx_mode(adapter->netdev);
4941
4942         return 0;
4943 }
4944
4945 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4946 {
4947         struct e1000_hw *hw = &adapter->hw;
4948         struct vf_data_storage *vf_data;
4949         int i, j;
4950
4951         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4952                 u32 vmolr = rd32(E1000_VMOLR(i));
4953                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4954
4955                 vf_data = &adapter->vf_data[i];
4956
4957                 if ((vf_data->num_vf_mc_hashes > 30) ||
4958                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4959                         vmolr |= E1000_VMOLR_MPME;
4960                 } else if (vf_data->num_vf_mc_hashes) {
4961                         vmolr |= E1000_VMOLR_ROMPE;
4962                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4963                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4964                 }
4965                 wr32(E1000_VMOLR(i), vmolr);
4966         }
4967 }
4968
4969 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4970 {
4971         struct e1000_hw *hw = &adapter->hw;
4972         u32 pool_mask, reg, vid;
4973         int i;
4974
4975         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4976
4977         /* Find the vlan filter for this id */
4978         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4979                 reg = rd32(E1000_VLVF(i));
4980
4981                 /* remove the vf from the pool */
4982                 reg &= ~pool_mask;
4983
4984                 /* if pool is empty then remove entry from vfta */
4985                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4986                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4987                         reg = 0;
4988                         vid = reg & E1000_VLVF_VLANID_MASK;
4989                         igb_vfta_set(hw, vid, false);
4990                 }
4991
4992                 wr32(E1000_VLVF(i), reg);
4993         }
4994
4995         adapter->vf_data[vf].vlans_enabled = 0;
4996 }
4997
4998 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4999 {
5000         struct e1000_hw *hw = &adapter->hw;
5001         u32 reg, i;
5002
5003         /* The vlvf table only exists on 82576 hardware and newer */
5004         if (hw->mac.type < e1000_82576)
5005                 return -1;
5006
5007         /* we only need to do this if VMDq is enabled */
5008         if (!adapter->vfs_allocated_count)
5009                 return -1;
5010
5011         /* Find the vlan filter for this id */
5012         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5013                 reg = rd32(E1000_VLVF(i));
5014                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5015                     vid == (reg & E1000_VLVF_VLANID_MASK))
5016                         break;
5017         }
5018
5019         if (add) {
5020                 if (i == E1000_VLVF_ARRAY_SIZE) {
5021                         /* Did not find a matching VLAN ID entry that was
5022                          * enabled.  Search for a free filter entry, i.e.
5023                          * one without the enable bit set
5024                          */
5025                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5026                                 reg = rd32(E1000_VLVF(i));
5027                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5028                                         break;
5029                         }
5030                 }
5031                 if (i < E1000_VLVF_ARRAY_SIZE) {
5032                         /* Found an enabled/available entry */
5033                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5034
5035                         /* if !enabled we need to set this up in vfta */
5036                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5037                                 /* add VID to filter table */
5038                                 igb_vfta_set(hw, vid, true);
5039                                 reg |= E1000_VLVF_VLANID_ENABLE;
5040                         }
5041                         reg &= ~E1000_VLVF_VLANID_MASK;
5042                         reg |= vid;
5043                         wr32(E1000_VLVF(i), reg);
5044
5045                         /* do not modify RLPML for PF devices */
5046                         if (vf >= adapter->vfs_allocated_count)
5047                                 return 0;
5048
5049                         if (!adapter->vf_data[vf].vlans_enabled) {
5050                                 u32 size;
5051                                 reg = rd32(E1000_VMOLR(vf));
5052                                 size = reg & E1000_VMOLR_RLPML_MASK;
5053                                 size += 4;
5054                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5055                                 reg |= size;
5056                                 wr32(E1000_VMOLR(vf), reg);
5057                         }
5058
5059                         adapter->vf_data[vf].vlans_enabled++;
5060                         return 0;
5061                 }
5062         } else {
5063                 if (i < E1000_VLVF_ARRAY_SIZE) {
5064                         /* remove vf from the pool */
5065                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5066                         /* if pool is empty then remove entry from vfta */
5067                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5068                                 reg = 0;
5069                                 igb_vfta_set(hw, vid, false);
5070                         }
5071                         wr32(E1000_VLVF(i), reg);
5072
5073                         /* do not modify RLPML for PF devices */
5074                         if (vf >= adapter->vfs_allocated_count)
5075                                 return 0;
5076
5077                         adapter->vf_data[vf].vlans_enabled--;
5078                         if (!adapter->vf_data[vf].vlans_enabled) {
5079                                 u32 size;
5080                                 reg = rd32(E1000_VMOLR(vf));
5081                                 size = reg & E1000_VMOLR_RLPML_MASK;
5082                                 size -= 4;
5083                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5084                                 reg |= size;
5085                                 wr32(E1000_VMOLR(vf), reg);
5086                         }
5087                 }
5088         }
5089         return 0;
5090 }
5091
5092 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5093 {
5094         struct e1000_hw *hw = &adapter->hw;
5095
5096         if (vid)
5097                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5098         else
5099                 wr32(E1000_VMVIR(vf), 0);
5100 }
5101
5102 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5103                                int vf, u16 vlan, u8 qos)
5104 {
5105         int err = 0;
5106         struct igb_adapter *adapter = netdev_priv(netdev);
5107
5108         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5109                 return -EINVAL;
5110         if (vlan || qos) {
5111                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5112                 if (err)
5113                         goto out;
5114                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5115                 igb_set_vmolr(adapter, vf, !vlan);
5116                 adapter->vf_data[vf].pf_vlan = vlan;
5117                 adapter->vf_data[vf].pf_qos = qos;
5118                 dev_info(&adapter->pdev->dev,
5119                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5120                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5121                         dev_warn(&adapter->pdev->dev,
5122                                  "The VF VLAN has been set,"
5123                                  " but the PF device is not up.\n");
5124                         dev_warn(&adapter->pdev->dev,
5125                                  "Bring the PF device up before"
5126                                  " attempting to use the VF device.\n");
5127                 }
5128         } else {
5129                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5130                                    false, vf);
5131                 igb_set_vmvir(adapter, vlan, vf);
5132                 igb_set_vmolr(adapter, vf, true);
5133                 adapter->vf_data[vf].pf_vlan = 0;
5134                 adapter->vf_data[vf].pf_qos = 0;
5135        }
5136 out:
5137        return err;
5138 }
5139
5140 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5141 {
5142         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5143         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5144
5145         return igb_vlvf_set(adapter, vid, add, vf);
5146 }
5147
5148 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5149 {
5150         /* clear flags - except flag that indicates PF has set the MAC */
5151         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5152         adapter->vf_data[vf].last_nack = jiffies;
5153
5154         /* reset offloads to defaults */
5155         igb_set_vmolr(adapter, vf, true);
5156
5157         /* reset vlans for device */
5158         igb_clear_vf_vfta(adapter, vf);
5159         if (adapter->vf_data[vf].pf_vlan)
5160                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5161                                     adapter->vf_data[vf].pf_vlan,
5162                                     adapter->vf_data[vf].pf_qos);
5163         else
5164                 igb_clear_vf_vfta(adapter, vf);
5165
5166         /* reset multicast table array for vf */
5167         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5168
5169         /* Flush and reset the mta with the new values */
5170         igb_set_rx_mode(adapter->netdev);
5171 }
5172
5173 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5174 {
5175         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5176
5177         /* generate a new mac address as we were hotplug removed/added */
5178         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5179                 random_ether_addr(vf_mac);
5180
5181         /* process remaining reset events */
5182         igb_vf_reset(adapter, vf);
5183 }
5184
5185 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5186 {
5187         struct e1000_hw *hw = &adapter->hw;
5188         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5189         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5190         u32 reg, msgbuf[3];
5191         u8 *addr = (u8 *)(&msgbuf[1]);
5192
5193         /* process all the same items cleared in a function level reset */
5194         igb_vf_reset(adapter, vf);
5195
5196         /* set vf mac address */
5197         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5198
5199         /* enable transmit and receive for vf */
5200         reg = rd32(E1000_VFTE);
5201         wr32(E1000_VFTE, reg | (1 << vf));
5202         reg = rd32(E1000_VFRE);
5203         wr32(E1000_VFRE, reg | (1 << vf));
5204
5205         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5206
5207         /* reply to reset with ack and vf mac address */
5208         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5209         memcpy(addr, vf_mac, 6);
5210         igb_write_mbx(hw, msgbuf, 3, vf);
5211 }
5212
5213 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5214 {
5215         /*
5216          * The VF MAC Address is stored in a packed array of bytes
5217          * starting at the second 32 bit word of the msg array
5218          */
5219         unsigned char *addr = (char *)&msg[1];
5220         int err = -1;
5221
5222         if (is_valid_ether_addr(addr))
5223                 err = igb_set_vf_mac(adapter, vf, addr);
5224
5225         return err;
5226 }
5227
5228 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5229 {
5230         struct e1000_hw *hw = &adapter->hw;
5231         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5232         u32 msg = E1000_VT_MSGTYPE_NACK;
5233
5234         /* if device isn't clear to send it shouldn't be reading either */
5235         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5236             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5237                 igb_write_mbx(hw, &msg, 1, vf);
5238                 vf_data->last_nack = jiffies;
5239         }
5240 }
5241
5242 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5243 {
5244         struct pci_dev *pdev = adapter->pdev;
5245         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5246         struct e1000_hw *hw = &adapter->hw;
5247         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5248         s32 retval;
5249
5250         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5251
5252         if (retval) {
5253                 /* if receive failed revoke VF CTS stats and restart init */
5254                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5255                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5256                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5257                         return;
5258                 goto out;
5259         }
5260
5261         /* this is a message we already processed, do nothing */
5262         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5263                 return;
5264
5265         /*
5266          * until the vf completes a reset it should not be
5267          * allowed to start any configuration.
5268          */
5269
5270         if (msgbuf[0] == E1000_VF_RESET) {
5271                 igb_vf_reset_msg(adapter, vf);
5272                 return;
5273         }
5274
5275         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5276                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5277                         return;
5278                 retval = -1;
5279                 goto out;
5280         }
5281
5282         switch ((msgbuf[0] & 0xFFFF)) {
5283         case E1000_VF_SET_MAC_ADDR:
5284                 retval = -EINVAL;
5285                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5286                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5287                 else
5288                         dev_warn(&pdev->dev,
5289                                  "VF %d attempted to override administratively "
5290                                  "set MAC address\nReload the VF driver to "
5291                                  "resume operations\n", vf);
5292                 break;
5293         case E1000_VF_SET_PROMISC:
5294                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5295                 break;
5296         case E1000_VF_SET_MULTICAST:
5297                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5298                 break;
5299         case E1000_VF_SET_LPE:
5300                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5301                 break;
5302         case E1000_VF_SET_VLAN:
5303                 retval = -1;
5304                 if (vf_data->pf_vlan)
5305                         dev_warn(&pdev->dev,
5306                                  "VF %d attempted to override administratively "
5307                                  "set VLAN tag\nReload the VF driver to "
5308                                  "resume operations\n", vf);
5309                 else
5310                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5311                 break;
5312         default:
5313                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5314                 retval = -1;
5315                 break;
5316         }
5317
5318         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5319 out:
5320         /* notify the VF of the results of what it sent us */
5321         if (retval)
5322                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5323         else
5324                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5325
5326         igb_write_mbx(hw, msgbuf, 1, vf);
5327 }
5328
5329 static void igb_msg_task(struct igb_adapter *adapter)
5330 {
5331         struct e1000_hw *hw = &adapter->hw;
5332         u32 vf;
5333
5334         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5335                 /* process any reset requests */
5336                 if (!igb_check_for_rst(hw, vf))
5337                         igb_vf_reset_event(adapter, vf);
5338
5339                 /* process any messages pending */
5340                 if (!igb_check_for_msg(hw, vf))
5341                         igb_rcv_msg_from_vf(adapter, vf);
5342
5343                 /* process any acks */
5344                 if (!igb_check_for_ack(hw, vf))
5345                         igb_rcv_ack_from_vf(adapter, vf);
5346         }
5347 }
5348
5349 /**
5350  *  igb_set_uta - Set unicast filter table address
5351  *  @adapter: board private structure
5352  *
5353  *  The unicast table address is a register array of 32-bit registers.
5354  *  The table is meant to be used in a way similar to how the MTA is used
5355  *  however due to certain limitations in the hardware it is necessary to
5356  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5357  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5358  **/
5359 static void igb_set_uta(struct igb_adapter *adapter)
5360 {
5361         struct e1000_hw *hw = &adapter->hw;
5362         int i;
5363
5364         /* The UTA table only exists on 82576 hardware and newer */
5365         if (hw->mac.type < e1000_82576)
5366                 return;
5367
5368         /* we only need to do this if VMDq is enabled */
5369         if (!adapter->vfs_allocated_count)
5370                 return;
5371
5372         for (i = 0; i < hw->mac.uta_reg_count; i++)
5373                 array_wr32(E1000_UTA, i, ~0);
5374 }
5375
5376 /**
5377  * igb_intr_msi - Interrupt Handler
5378  * @irq: interrupt number
5379  * @data: pointer to a network interface device structure
5380  **/
5381 static irqreturn_t igb_intr_msi(int irq, void *data)
5382 {
5383         struct igb_adapter *adapter = data;
5384         struct igb_q_vector *q_vector = adapter->q_vector[0];
5385         struct e1000_hw *hw = &adapter->hw;
5386         /* read ICR disables interrupts using IAM */
5387         u32 icr = rd32(E1000_ICR);
5388
5389         igb_write_itr(q_vector);
5390
5391         if (icr & E1000_ICR_DRSTA)
5392                 schedule_work(&adapter->reset_task);
5393
5394         if (icr & E1000_ICR_DOUTSYNC) {
5395                 /* HW is reporting DMA is out of sync */
5396                 adapter->stats.doosync++;
5397         }
5398
5399         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5400                 hw->mac.get_link_status = 1;
5401                 if (!test_bit(__IGB_DOWN, &adapter->state))
5402                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5403         }
5404
5405         napi_schedule(&q_vector->napi);
5406
5407         return IRQ_HANDLED;
5408 }
5409
5410 /**
5411  * igb_intr - Legacy Interrupt Handler
5412  * @irq: interrupt number
5413  * @data: pointer to a network interface device structure
5414  **/
5415 static irqreturn_t igb_intr(int irq, void *data)
5416 {
5417         struct igb_adapter *adapter = data;
5418         struct igb_q_vector *q_vector = adapter->q_vector[0];
5419         struct e1000_hw *hw = &adapter->hw;
5420         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5421          * need for the IMC write */
5422         u32 icr = rd32(E1000_ICR);
5423         if (!icr)
5424                 return IRQ_NONE;  /* Not our interrupt */
5425
5426         igb_write_itr(q_vector);
5427
5428         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5429          * not set, then the adapter didn't send an interrupt */
5430         if (!(icr & E1000_ICR_INT_ASSERTED))
5431                 return IRQ_NONE;
5432
5433         if (icr & E1000_ICR_DRSTA)
5434                 schedule_work(&adapter->reset_task);
5435
5436         if (icr & E1000_ICR_DOUTSYNC) {
5437                 /* HW is reporting DMA is out of sync */
5438                 adapter->stats.doosync++;
5439         }
5440
5441         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5442                 hw->mac.get_link_status = 1;
5443                 /* guard against interrupt when we're going down */
5444                 if (!test_bit(__IGB_DOWN, &adapter->state))
5445                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5446         }
5447
5448         napi_schedule(&q_vector->napi);
5449
5450         return IRQ_HANDLED;
5451 }
5452
5453 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5454 {
5455         struct igb_adapter *adapter = q_vector->adapter;
5456         struct e1000_hw *hw = &adapter->hw;
5457
5458         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5459             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5460                 if (!adapter->msix_entries)
5461                         igb_set_itr(adapter);
5462                 else
5463                         igb_update_ring_itr(q_vector);
5464         }
5465
5466         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5467                 if (adapter->msix_entries)
5468                         wr32(E1000_EIMS, q_vector->eims_value);
5469                 else
5470                         igb_irq_enable(adapter);
5471         }
5472 }
5473
5474 /**
5475  * igb_poll - NAPI Rx polling callback
5476  * @napi: napi polling structure
5477  * @budget: count of how many packets we should handle
5478  **/
5479 static int igb_poll(struct napi_struct *napi, int budget)
5480 {
5481         struct igb_q_vector *q_vector = container_of(napi,
5482                                                      struct igb_q_vector,
5483                                                      napi);
5484         int tx_clean_complete = 1, work_done = 0;
5485
5486 #ifdef CONFIG_IGB_DCA
5487         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5488                 igb_update_dca(q_vector);
5489 #endif
5490         if (q_vector->tx_ring)
5491                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5492
5493         if (q_vector->rx_ring)
5494                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5495
5496         if (!tx_clean_complete)
5497                 work_done = budget;
5498
5499         /* If not enough Rx work done, exit the polling mode */
5500         if (work_done < budget) {
5501                 napi_complete(napi);
5502                 igb_ring_irq_enable(q_vector);
5503         }
5504
5505         return work_done;
5506 }
5507
5508 /**
5509  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5510  * @adapter: board private structure
5511  * @shhwtstamps: timestamp structure to update
5512  * @regval: unsigned 64bit system time value.
5513  *
5514  * We need to convert the system time value stored in the RX/TXSTMP registers
5515  * into a hwtstamp which can be used by the upper level timestamping functions
5516  */
5517 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5518                                    struct skb_shared_hwtstamps *shhwtstamps,
5519                                    u64 regval)
5520 {
5521         u64 ns;
5522
5523         /*
5524          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5525          * 24 to match clock shift we setup earlier.
5526          */
5527         if (adapter->hw.mac.type == e1000_82580)
5528                 regval <<= IGB_82580_TSYNC_SHIFT;
5529
5530         ns = timecounter_cyc2time(&adapter->clock, regval);
5531         timecompare_update(&adapter->compare, ns);
5532         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5533         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5534         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5535 }
5536
5537 /**
5538  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5539  * @q_vector: pointer to q_vector containing needed info
5540  * @buffer: pointer to igb_buffer structure
5541  *
5542  * If we were asked to do hardware stamping and such a time stamp is
5543  * available, then it must have been for this skb here because we only
5544  * allow only one such packet into the queue.
5545  */
5546 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5547 {
5548         struct igb_adapter *adapter = q_vector->adapter;
5549         struct e1000_hw *hw = &adapter->hw;
5550         struct skb_shared_hwtstamps shhwtstamps;
5551         u64 regval;
5552
5553         /* if skb does not support hw timestamp or TX stamp not valid exit */
5554         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5555             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5556                 return;
5557
5558         regval = rd32(E1000_TXSTMPL);
5559         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5560
5561         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5562         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5563 }
5564
5565 /**
5566  * igb_clean_tx_irq - Reclaim resources after transmit completes
5567  * @q_vector: pointer to q_vector containing needed info
5568  * returns true if ring is completely cleaned
5569  **/
5570 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5571 {
5572         struct igb_adapter *adapter = q_vector->adapter;
5573         struct igb_ring *tx_ring = q_vector->tx_ring;
5574         struct net_device *netdev = tx_ring->netdev;
5575         struct e1000_hw *hw = &adapter->hw;
5576         struct igb_buffer *buffer_info;
5577         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5578         unsigned int total_bytes = 0, total_packets = 0;
5579         unsigned int i, eop, count = 0;
5580         bool cleaned = false;
5581
5582         i = tx_ring->next_to_clean;
5583         eop = tx_ring->buffer_info[i].next_to_watch;
5584         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5585
5586         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5587                (count < tx_ring->count)) {
5588                 rmb();  /* read buffer_info after eop_desc status */
5589                 for (cleaned = false; !cleaned; count++) {
5590                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5591                         buffer_info = &tx_ring->buffer_info[i];
5592                         cleaned = (i == eop);
5593
5594                         if (buffer_info->skb) {
5595                                 total_bytes += buffer_info->bytecount;
5596                                 /* gso_segs is currently only valid for tcp */
5597                                 total_packets += buffer_info->gso_segs;
5598                                 igb_tx_hwtstamp(q_vector, buffer_info);
5599                         }
5600
5601                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5602                         tx_desc->wb.status = 0;
5603
5604                         i++;
5605                         if (i == tx_ring->count)
5606                                 i = 0;
5607                 }
5608                 eop = tx_ring->buffer_info[i].next_to_watch;
5609                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5610         }
5611
5612         tx_ring->next_to_clean = i;
5613
5614         if (unlikely(count &&
5615                      netif_carrier_ok(netdev) &&
5616                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5617                 /* Make sure that anybody stopping the queue after this
5618                  * sees the new next_to_clean.
5619                  */
5620                 smp_mb();
5621                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5622                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5623                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5624
5625                         u64_stats_update_begin(&tx_ring->tx_syncp);
5626                         tx_ring->tx_stats.restart_queue++;
5627                         u64_stats_update_end(&tx_ring->tx_syncp);
5628                 }
5629         }
5630
5631         if (tx_ring->detect_tx_hung) {
5632                 /* Detect a transmit hang in hardware, this serializes the
5633                  * check with the clearing of time_stamp and movement of i */
5634                 tx_ring->detect_tx_hung = false;
5635                 if (tx_ring->buffer_info[i].time_stamp &&
5636                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5637                                (adapter->tx_timeout_factor * HZ)) &&
5638                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5639
5640                         /* detected Tx unit hang */
5641                         dev_err(tx_ring->dev,
5642                                 "Detected Tx Unit Hang\n"
5643                                 "  Tx Queue             <%d>\n"
5644                                 "  TDH                  <%x>\n"
5645                                 "  TDT                  <%x>\n"
5646                                 "  next_to_use          <%x>\n"
5647                                 "  next_to_clean        <%x>\n"
5648                                 "buffer_info[next_to_clean]\n"
5649                                 "  time_stamp           <%lx>\n"
5650                                 "  next_to_watch        <%x>\n"
5651                                 "  jiffies              <%lx>\n"
5652                                 "  desc.status          <%x>\n",
5653                                 tx_ring->queue_index,
5654                                 readl(tx_ring->head),
5655                                 readl(tx_ring->tail),
5656                                 tx_ring->next_to_use,
5657                                 tx_ring->next_to_clean,
5658                                 tx_ring->buffer_info[eop].time_stamp,
5659                                 eop,
5660                                 jiffies,
5661                                 eop_desc->wb.status);
5662                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5663                 }
5664         }
5665         tx_ring->total_bytes += total_bytes;
5666         tx_ring->total_packets += total_packets;
5667         u64_stats_update_begin(&tx_ring->tx_syncp);
5668         tx_ring->tx_stats.bytes += total_bytes;
5669         tx_ring->tx_stats.packets += total_packets;
5670         u64_stats_update_end(&tx_ring->tx_syncp);
5671         return count < tx_ring->count;
5672 }
5673
5674 /**
5675  * igb_receive_skb - helper function to handle rx indications
5676  * @q_vector: structure containing interrupt and ring information
5677  * @skb: packet to send up
5678  * @vlan_tag: vlan tag for packet
5679  **/
5680 static void igb_receive_skb(struct igb_q_vector *q_vector,
5681                             struct sk_buff *skb,
5682                             u16 vlan_tag)
5683 {
5684         struct igb_adapter *adapter = q_vector->adapter;
5685
5686         if (vlan_tag && adapter->vlgrp)
5687                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5688                                  vlan_tag, skb);
5689         else
5690                 napi_gro_receive(&q_vector->napi, skb);
5691 }
5692
5693 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5694                                        u32 status_err, struct sk_buff *skb)
5695 {
5696         skb_checksum_none_assert(skb);
5697
5698         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5699         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5700              (status_err & E1000_RXD_STAT_IXSM))
5701                 return;
5702
5703         /* TCP/UDP checksum error bit is set */
5704         if (status_err &
5705             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5706                 /*
5707                  * work around errata with sctp packets where the TCPE aka
5708                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5709                  * packets, (aka let the stack check the crc32c)
5710                  */
5711                 if ((skb->len == 60) &&
5712                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5713                         u64_stats_update_begin(&ring->rx_syncp);
5714                         ring->rx_stats.csum_err++;
5715                         u64_stats_update_end(&ring->rx_syncp);
5716                 }
5717                 /* let the stack verify checksum errors */
5718                 return;
5719         }
5720         /* It must be a TCP or UDP packet with a valid checksum */
5721         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5722                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5723
5724         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5725 }
5726
5727 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5728                                    struct sk_buff *skb)
5729 {
5730         struct igb_adapter *adapter = q_vector->adapter;
5731         struct e1000_hw *hw = &adapter->hw;
5732         u64 regval;
5733
5734         /*
5735          * If this bit is set, then the RX registers contain the time stamp. No
5736          * other packet will be time stamped until we read these registers, so
5737          * read the registers to make them available again. Because only one
5738          * packet can be time stamped at a time, we know that the register
5739          * values must belong to this one here and therefore we don't need to
5740          * compare any of the additional attributes stored for it.
5741          *
5742          * If nothing went wrong, then it should have a shared tx_flags that we
5743          * can turn into a skb_shared_hwtstamps.
5744          */
5745         if (staterr & E1000_RXDADV_STAT_TSIP) {
5746                 u32 *stamp = (u32 *)skb->data;
5747                 regval = le32_to_cpu(*(stamp + 2));
5748                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5749                 skb_pull(skb, IGB_TS_HDR_LEN);
5750         } else {
5751                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5752                         return;
5753
5754                 regval = rd32(E1000_RXSTMPL);
5755                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5756         }
5757
5758         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5759 }
5760 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5761                                union e1000_adv_rx_desc *rx_desc)
5762 {
5763         /* HW will not DMA in data larger than the given buffer, even if it
5764          * parses the (NFS, of course) header to be larger.  In that case, it
5765          * fills the header buffer and spills the rest into the page.
5766          */
5767         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5768                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5769         if (hlen > rx_ring->rx_buffer_len)
5770                 hlen = rx_ring->rx_buffer_len;
5771         return hlen;
5772 }
5773
5774 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5775                                  int *work_done, int budget)
5776 {
5777         struct igb_ring *rx_ring = q_vector->rx_ring;
5778         struct net_device *netdev = rx_ring->netdev;
5779         struct device *dev = rx_ring->dev;
5780         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5781         struct igb_buffer *buffer_info , *next_buffer;
5782         struct sk_buff *skb;
5783         bool cleaned = false;
5784         int cleaned_count = 0;
5785         int current_node = numa_node_id();
5786         unsigned int total_bytes = 0, total_packets = 0;
5787         unsigned int i;
5788         u32 staterr;
5789         u16 length;
5790         u16 vlan_tag;
5791
5792         i = rx_ring->next_to_clean;
5793         buffer_info = &rx_ring->buffer_info[i];
5794         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5795         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5796
5797         while (staterr & E1000_RXD_STAT_DD) {
5798                 if (*work_done >= budget)
5799                         break;
5800                 (*work_done)++;
5801                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5802
5803                 skb = buffer_info->skb;
5804                 prefetch(skb->data - NET_IP_ALIGN);
5805                 buffer_info->skb = NULL;
5806
5807                 i++;
5808                 if (i == rx_ring->count)
5809                         i = 0;
5810
5811                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5812                 prefetch(next_rxd);
5813                 next_buffer = &rx_ring->buffer_info[i];
5814
5815                 length = le16_to_cpu(rx_desc->wb.upper.length);
5816                 cleaned = true;
5817                 cleaned_count++;
5818
5819                 if (buffer_info->dma) {
5820                         dma_unmap_single(dev, buffer_info->dma,
5821                                          rx_ring->rx_buffer_len,
5822                                          DMA_FROM_DEVICE);
5823                         buffer_info->dma = 0;
5824                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5825                                 skb_put(skb, length);
5826                                 goto send_up;
5827                         }
5828                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5829                 }
5830
5831                 if (length) {
5832                         dma_unmap_page(dev, buffer_info->page_dma,
5833                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5834                         buffer_info->page_dma = 0;
5835
5836                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5837                                                 buffer_info->page,
5838                                                 buffer_info->page_offset,
5839                                                 length);
5840
5841                         if ((page_count(buffer_info->page) != 1) ||
5842                             (page_to_nid(buffer_info->page) != current_node))
5843                                 buffer_info->page = NULL;
5844                         else
5845                                 get_page(buffer_info->page);
5846
5847                         skb->len += length;
5848                         skb->data_len += length;
5849                         skb->truesize += length;
5850                 }
5851
5852                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5853                         buffer_info->skb = next_buffer->skb;
5854                         buffer_info->dma = next_buffer->dma;
5855                         next_buffer->skb = skb;
5856                         next_buffer->dma = 0;
5857                         goto next_desc;
5858                 }
5859 send_up:
5860                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5861                         dev_kfree_skb_irq(skb);
5862                         goto next_desc;
5863                 }
5864
5865                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5866                         igb_rx_hwtstamp(q_vector, staterr, skb);
5867                 total_bytes += skb->len;
5868                 total_packets++;
5869
5870                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5871
5872                 skb->protocol = eth_type_trans(skb, netdev);
5873                 skb_record_rx_queue(skb, rx_ring->queue_index);
5874
5875                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5876                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5877
5878                 igb_receive_skb(q_vector, skb, vlan_tag);
5879
5880 next_desc:
5881                 rx_desc->wb.upper.status_error = 0;
5882
5883                 /* return some buffers to hardware, one at a time is too slow */
5884                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5885                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5886                         cleaned_count = 0;
5887                 }
5888
5889                 /* use prefetched values */
5890                 rx_desc = next_rxd;
5891                 buffer_info = next_buffer;
5892                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5893         }
5894
5895         rx_ring->next_to_clean = i;
5896         cleaned_count = igb_desc_unused(rx_ring);
5897
5898         if (cleaned_count)
5899                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5900
5901         rx_ring->total_packets += total_packets;
5902         rx_ring->total_bytes += total_bytes;
5903         u64_stats_update_begin(&rx_ring->rx_syncp);
5904         rx_ring->rx_stats.packets += total_packets;
5905         rx_ring->rx_stats.bytes += total_bytes;
5906         u64_stats_update_end(&rx_ring->rx_syncp);
5907         return cleaned;
5908 }
5909
5910 /**
5911  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5912  * @adapter: address of board private structure
5913  **/
5914 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5915 {
5916         struct net_device *netdev = rx_ring->netdev;
5917         union e1000_adv_rx_desc *rx_desc;
5918         struct igb_buffer *buffer_info;
5919         struct sk_buff *skb;
5920         unsigned int i;
5921         int bufsz;
5922
5923         i = rx_ring->next_to_use;
5924         buffer_info = &rx_ring->buffer_info[i];
5925
5926         bufsz = rx_ring->rx_buffer_len;
5927
5928         while (cleaned_count--) {
5929                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5930
5931                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5932                         if (!buffer_info->page) {
5933                                 buffer_info->page = netdev_alloc_page(netdev);
5934                                 if (unlikely(!buffer_info->page)) {
5935                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5936                                         rx_ring->rx_stats.alloc_failed++;
5937                                         u64_stats_update_end(&rx_ring->rx_syncp);
5938                                         goto no_buffers;
5939                                 }
5940                                 buffer_info->page_offset = 0;
5941                         } else {
5942                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5943                         }
5944                         buffer_info->page_dma =
5945                                 dma_map_page(rx_ring->dev, buffer_info->page,
5946                                              buffer_info->page_offset,
5947                                              PAGE_SIZE / 2,
5948                                              DMA_FROM_DEVICE);
5949                         if (dma_mapping_error(rx_ring->dev,
5950                                               buffer_info->page_dma)) {
5951                                 buffer_info->page_dma = 0;
5952                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5953                                 rx_ring->rx_stats.alloc_failed++;
5954                                 u64_stats_update_end(&rx_ring->rx_syncp);
5955                                 goto no_buffers;
5956                         }
5957                 }
5958
5959                 skb = buffer_info->skb;
5960                 if (!skb) {
5961                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5962                         if (unlikely(!skb)) {
5963                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5964                                 rx_ring->rx_stats.alloc_failed++;
5965                                 u64_stats_update_end(&rx_ring->rx_syncp);
5966                                 goto no_buffers;
5967                         }
5968
5969                         buffer_info->skb = skb;
5970                 }
5971                 if (!buffer_info->dma) {
5972                         buffer_info->dma = dma_map_single(rx_ring->dev,
5973                                                           skb->data,
5974                                                           bufsz,
5975                                                           DMA_FROM_DEVICE);
5976                         if (dma_mapping_error(rx_ring->dev,
5977                                               buffer_info->dma)) {
5978                                 buffer_info->dma = 0;
5979                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5980                                 rx_ring->rx_stats.alloc_failed++;
5981                                 u64_stats_update_end(&rx_ring->rx_syncp);
5982                                 goto no_buffers;
5983                         }
5984                 }
5985                 /* Refresh the desc even if buffer_addrs didn't change because
5986                  * each write-back erases this info. */
5987                 if (bufsz < IGB_RXBUFFER_1024) {
5988                         rx_desc->read.pkt_addr =
5989                              cpu_to_le64(buffer_info->page_dma);
5990                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5991                 } else {
5992                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5993                         rx_desc->read.hdr_addr = 0;
5994                 }
5995
5996                 i++;
5997                 if (i == rx_ring->count)
5998                         i = 0;
5999                 buffer_info = &rx_ring->buffer_info[i];
6000         }
6001
6002 no_buffers:
6003         if (rx_ring->next_to_use != i) {
6004                 rx_ring->next_to_use = i;
6005                 if (i == 0)
6006                         i = (rx_ring->count - 1);
6007                 else
6008                         i--;
6009
6010                 /* Force memory writes to complete before letting h/w
6011                  * know there are new descriptors to fetch.  (Only
6012                  * applicable for weak-ordered memory model archs,
6013                  * such as IA-64). */
6014                 wmb();
6015                 writel(i, rx_ring->tail);
6016         }
6017 }
6018
6019 /**
6020  * igb_mii_ioctl -
6021  * @netdev:
6022  * @ifreq:
6023  * @cmd:
6024  **/
6025 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6026 {
6027         struct igb_adapter *adapter = netdev_priv(netdev);
6028         struct mii_ioctl_data *data = if_mii(ifr);
6029
6030         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6031                 return -EOPNOTSUPP;
6032
6033         switch (cmd) {
6034         case SIOCGMIIPHY:
6035                 data->phy_id = adapter->hw.phy.addr;
6036                 break;
6037         case SIOCGMIIREG:
6038                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6039                                      &data->val_out))
6040                         return -EIO;
6041                 break;
6042         case SIOCSMIIREG:
6043         default:
6044                 return -EOPNOTSUPP;
6045         }
6046         return 0;
6047 }
6048
6049 /**
6050  * igb_hwtstamp_ioctl - control hardware time stamping
6051  * @netdev:
6052  * @ifreq:
6053  * @cmd:
6054  *
6055  * Outgoing time stamping can be enabled and disabled. Play nice and
6056  * disable it when requested, although it shouldn't case any overhead
6057  * when no packet needs it. At most one packet in the queue may be
6058  * marked for time stamping, otherwise it would be impossible to tell
6059  * for sure to which packet the hardware time stamp belongs.
6060  *
6061  * Incoming time stamping has to be configured via the hardware
6062  * filters. Not all combinations are supported, in particular event
6063  * type has to be specified. Matching the kind of event packet is
6064  * not supported, with the exception of "all V2 events regardless of
6065  * level 2 or 4".
6066  *
6067  **/
6068 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6069                               struct ifreq *ifr, int cmd)
6070 {
6071         struct igb_adapter *adapter = netdev_priv(netdev);
6072         struct e1000_hw *hw = &adapter->hw;
6073         struct hwtstamp_config config;
6074         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6075         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6076         u32 tsync_rx_cfg = 0;
6077         bool is_l4 = false;
6078         bool is_l2 = false;
6079         u32 regval;
6080
6081         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6082                 return -EFAULT;
6083
6084         /* reserved for future extensions */
6085         if (config.flags)
6086                 return -EINVAL;
6087
6088         switch (config.tx_type) {
6089         case HWTSTAMP_TX_OFF:
6090                 tsync_tx_ctl = 0;
6091         case HWTSTAMP_TX_ON:
6092                 break;
6093         default:
6094                 return -ERANGE;
6095         }
6096
6097         switch (config.rx_filter) {
6098         case HWTSTAMP_FILTER_NONE:
6099                 tsync_rx_ctl = 0;
6100                 break;
6101         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6102         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6103         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6104         case HWTSTAMP_FILTER_ALL:
6105                 /*
6106                  * register TSYNCRXCFG must be set, therefore it is not
6107                  * possible to time stamp both Sync and Delay_Req messages
6108                  * => fall back to time stamping all packets
6109                  */
6110                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6111                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6112                 break;
6113         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6114                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6115                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6116                 is_l4 = true;
6117                 break;
6118         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6119                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6120                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6121                 is_l4 = true;
6122                 break;
6123         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6124         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6125                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6126                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6127                 is_l2 = true;
6128                 is_l4 = true;
6129                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6130                 break;
6131         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6132         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6133                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6134                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6135                 is_l2 = true;
6136                 is_l4 = true;
6137                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6138                 break;
6139         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6140         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6141         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6142                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6143                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6144                 is_l2 = true;
6145                 break;
6146         default:
6147                 return -ERANGE;
6148         }
6149
6150         if (hw->mac.type == e1000_82575) {
6151                 if (tsync_rx_ctl | tsync_tx_ctl)
6152                         return -EINVAL;
6153                 return 0;
6154         }
6155
6156         /*
6157          * Per-packet timestamping only works if all packets are
6158          * timestamped, so enable timestamping in all packets as
6159          * long as one rx filter was configured.
6160          */
6161         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6162                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6163                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6164         }
6165
6166         /* enable/disable TX */
6167         regval = rd32(E1000_TSYNCTXCTL);
6168         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6169         regval |= tsync_tx_ctl;
6170         wr32(E1000_TSYNCTXCTL, regval);
6171
6172         /* enable/disable RX */
6173         regval = rd32(E1000_TSYNCRXCTL);
6174         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6175         regval |= tsync_rx_ctl;
6176         wr32(E1000_TSYNCRXCTL, regval);
6177
6178         /* define which PTP packets are time stamped */
6179         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6180
6181         /* define ethertype filter for timestamped packets */
6182         if (is_l2)
6183                 wr32(E1000_ETQF(3),
6184                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6185                                  E1000_ETQF_1588 | /* enable timestamping */
6186                                  ETH_P_1588));     /* 1588 eth protocol type */
6187         else
6188                 wr32(E1000_ETQF(3), 0);
6189
6190 #define PTP_PORT 319
6191         /* L4 Queue Filter[3]: filter by destination port and protocol */
6192         if (is_l4) {
6193                 u32 ftqf = (IPPROTO_UDP /* UDP */
6194                         | E1000_FTQF_VF_BP /* VF not compared */
6195                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6196                         | E1000_FTQF_MASK); /* mask all inputs */
6197                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6198
6199                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6200                 wr32(E1000_IMIREXT(3),
6201                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6202                 if (hw->mac.type == e1000_82576) {
6203                         /* enable source port check */
6204                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6205                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6206                 }
6207                 wr32(E1000_FTQF(3), ftqf);
6208         } else {
6209                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6210         }
6211         wrfl();
6212
6213         adapter->hwtstamp_config = config;
6214
6215         /* clear TX/RX time stamp registers, just to be sure */
6216         regval = rd32(E1000_TXSTMPH);
6217         regval = rd32(E1000_RXSTMPH);
6218
6219         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6220                 -EFAULT : 0;
6221 }
6222
6223 /**
6224  * igb_ioctl -
6225  * @netdev:
6226  * @ifreq:
6227  * @cmd:
6228  **/
6229 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6230 {
6231         switch (cmd) {
6232         case SIOCGMIIPHY:
6233         case SIOCGMIIREG:
6234         case SIOCSMIIREG:
6235                 return igb_mii_ioctl(netdev, ifr, cmd);
6236         case SIOCSHWTSTAMP:
6237                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6238         default:
6239                 return -EOPNOTSUPP;
6240         }
6241 }
6242
6243 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6244 {
6245         struct igb_adapter *adapter = hw->back;
6246         u16 cap_offset;
6247
6248         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6249         if (!cap_offset)
6250                 return -E1000_ERR_CONFIG;
6251
6252         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6253
6254         return 0;
6255 }
6256
6257 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6258 {
6259         struct igb_adapter *adapter = hw->back;
6260         u16 cap_offset;
6261
6262         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6263         if (!cap_offset)
6264                 return -E1000_ERR_CONFIG;
6265
6266         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6267
6268         return 0;
6269 }
6270
6271 static void igb_vlan_rx_register(struct net_device *netdev,
6272                                  struct vlan_group *grp)
6273 {
6274         struct igb_adapter *adapter = netdev_priv(netdev);
6275         struct e1000_hw *hw = &adapter->hw;
6276         u32 ctrl, rctl;
6277
6278         igb_irq_disable(adapter);
6279         adapter->vlgrp = grp;
6280
6281         if (grp) {
6282                 /* enable VLAN tag insert/strip */
6283                 ctrl = rd32(E1000_CTRL);
6284                 ctrl |= E1000_CTRL_VME;
6285                 wr32(E1000_CTRL, ctrl);
6286
6287                 /* Disable CFI check */
6288                 rctl = rd32(E1000_RCTL);
6289                 rctl &= ~E1000_RCTL_CFIEN;
6290                 wr32(E1000_RCTL, rctl);
6291         } else {
6292                 /* disable VLAN tag insert/strip */
6293                 ctrl = rd32(E1000_CTRL);
6294                 ctrl &= ~E1000_CTRL_VME;
6295                 wr32(E1000_CTRL, ctrl);
6296         }
6297
6298         igb_rlpml_set(adapter);
6299
6300         if (!test_bit(__IGB_DOWN, &adapter->state))
6301                 igb_irq_enable(adapter);
6302 }
6303
6304 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6305 {
6306         struct igb_adapter *adapter = netdev_priv(netdev);
6307         struct e1000_hw *hw = &adapter->hw;
6308         int pf_id = adapter->vfs_allocated_count;
6309
6310         /* attempt to add filter to vlvf array */
6311         igb_vlvf_set(adapter, vid, true, pf_id);
6312
6313         /* add the filter since PF can receive vlans w/o entry in vlvf */
6314         igb_vfta_set(hw, vid, true);
6315 }
6316
6317 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6318 {
6319         struct igb_adapter *adapter = netdev_priv(netdev);
6320         struct e1000_hw *hw = &adapter->hw;
6321         int pf_id = adapter->vfs_allocated_count;
6322         s32 err;
6323
6324         igb_irq_disable(adapter);
6325         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6326
6327         if (!test_bit(__IGB_DOWN, &adapter->state))
6328                 igb_irq_enable(adapter);
6329
6330         /* remove vlan from VLVF table array */
6331         err = igb_vlvf_set(adapter, vid, false, pf_id);
6332
6333         /* if vid was not present in VLVF just remove it from table */
6334         if (err)
6335                 igb_vfta_set(hw, vid, false);
6336 }
6337
6338 static void igb_restore_vlan(struct igb_adapter *adapter)
6339 {
6340         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6341
6342         if (adapter->vlgrp) {
6343                 u16 vid;
6344                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6345                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6346                                 continue;
6347                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6348                 }
6349         }
6350 }
6351
6352 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6353 {
6354         struct pci_dev *pdev = adapter->pdev;
6355         struct e1000_mac_info *mac = &adapter->hw.mac;
6356
6357         mac->autoneg = 0;
6358
6359         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6360          * for the switch() below to work */
6361         if ((spd & 1) || (dplx & ~1))
6362                 goto err_inval;
6363
6364         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6365         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6366             spd != SPEED_1000 &&
6367             dplx != DUPLEX_FULL)
6368                 goto err_inval;
6369
6370         switch (spd + dplx) {
6371         case SPEED_10 + DUPLEX_HALF:
6372                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6373                 break;
6374         case SPEED_10 + DUPLEX_FULL:
6375                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6376                 break;
6377         case SPEED_100 + DUPLEX_HALF:
6378                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6379                 break;
6380         case SPEED_100 + DUPLEX_FULL:
6381                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6382                 break;
6383         case SPEED_1000 + DUPLEX_FULL:
6384                 mac->autoneg = 1;
6385                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6386                 break;
6387         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6388         default:
6389                 goto err_inval;
6390         }
6391         return 0;
6392
6393 err_inval:
6394         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6395         return -EINVAL;
6396 }
6397
6398 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6399 {
6400         struct net_device *netdev = pci_get_drvdata(pdev);
6401         struct igb_adapter *adapter = netdev_priv(netdev);
6402         struct e1000_hw *hw = &adapter->hw;
6403         u32 ctrl, rctl, status;
6404         u32 wufc = adapter->wol;
6405 #ifdef CONFIG_PM
6406         int retval = 0;
6407 #endif
6408
6409         netif_device_detach(netdev);
6410
6411         if (netif_running(netdev))
6412                 igb_close(netdev);
6413
6414         igb_clear_interrupt_scheme(adapter);
6415
6416 #ifdef CONFIG_PM
6417         retval = pci_save_state(pdev);
6418         if (retval)
6419                 return retval;
6420 #endif
6421
6422         status = rd32(E1000_STATUS);
6423         if (status & E1000_STATUS_LU)
6424                 wufc &= ~E1000_WUFC_LNKC;
6425
6426         if (wufc) {
6427                 igb_setup_rctl(adapter);
6428                 igb_set_rx_mode(netdev);
6429
6430                 /* turn on all-multi mode if wake on multicast is enabled */
6431                 if (wufc & E1000_WUFC_MC) {
6432                         rctl = rd32(E1000_RCTL);
6433                         rctl |= E1000_RCTL_MPE;
6434                         wr32(E1000_RCTL, rctl);
6435                 }
6436
6437                 ctrl = rd32(E1000_CTRL);
6438                 /* advertise wake from D3Cold */
6439                 #define E1000_CTRL_ADVD3WUC 0x00100000
6440                 /* phy power management enable */
6441                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6442                 ctrl |= E1000_CTRL_ADVD3WUC;
6443                 wr32(E1000_CTRL, ctrl);
6444
6445                 /* Allow time for pending master requests to run */
6446                 igb_disable_pcie_master(hw);
6447
6448                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6449                 wr32(E1000_WUFC, wufc);
6450         } else {
6451                 wr32(E1000_WUC, 0);
6452                 wr32(E1000_WUFC, 0);
6453         }
6454
6455         *enable_wake = wufc || adapter->en_mng_pt;
6456         if (!*enable_wake)
6457                 igb_power_down_link(adapter);
6458         else
6459                 igb_power_up_link(adapter);
6460
6461         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6462          * would have already happened in close and is redundant. */
6463         igb_release_hw_control(adapter);
6464
6465         pci_disable_device(pdev);
6466
6467         return 0;
6468 }
6469
6470 #ifdef CONFIG_PM
6471 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6472 {
6473         int retval;
6474         bool wake;
6475
6476         retval = __igb_shutdown(pdev, &wake);
6477         if (retval)
6478                 return retval;
6479
6480         if (wake) {
6481                 pci_prepare_to_sleep(pdev);
6482         } else {
6483                 pci_wake_from_d3(pdev, false);
6484                 pci_set_power_state(pdev, PCI_D3hot);
6485         }
6486
6487         return 0;
6488 }
6489
6490 static int igb_resume(struct pci_dev *pdev)
6491 {
6492         struct net_device *netdev = pci_get_drvdata(pdev);
6493         struct igb_adapter *adapter = netdev_priv(netdev);
6494         struct e1000_hw *hw = &adapter->hw;
6495         u32 err;
6496
6497         pci_set_power_state(pdev, PCI_D0);
6498         pci_restore_state(pdev);
6499         pci_save_state(pdev);
6500
6501         err = pci_enable_device_mem(pdev);
6502         if (err) {
6503                 dev_err(&pdev->dev,
6504                         "igb: Cannot enable PCI device from suspend\n");
6505                 return err;
6506         }
6507         pci_set_master(pdev);
6508
6509         pci_enable_wake(pdev, PCI_D3hot, 0);
6510         pci_enable_wake(pdev, PCI_D3cold, 0);
6511
6512         if (igb_init_interrupt_scheme(adapter)) {
6513                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6514                 return -ENOMEM;
6515         }
6516
6517         igb_reset(adapter);
6518
6519         /* let the f/w know that the h/w is now under the control of the
6520          * driver. */
6521         igb_get_hw_control(adapter);
6522
6523         wr32(E1000_WUS, ~0);
6524
6525         if (netif_running(netdev)) {
6526                 err = igb_open(netdev);
6527                 if (err)
6528                         return err;
6529         }
6530
6531         netif_device_attach(netdev);
6532
6533         return 0;
6534 }
6535 #endif
6536
6537 static void igb_shutdown(struct pci_dev *pdev)
6538 {
6539         bool wake;
6540
6541         __igb_shutdown(pdev, &wake);
6542
6543         if (system_state == SYSTEM_POWER_OFF) {
6544                 pci_wake_from_d3(pdev, wake);
6545                 pci_set_power_state(pdev, PCI_D3hot);
6546         }
6547 }
6548
6549 #ifdef CONFIG_NET_POLL_CONTROLLER
6550 /*
6551  * Polling 'interrupt' - used by things like netconsole to send skbs
6552  * without having to re-enable interrupts. It's not called while
6553  * the interrupt routine is executing.
6554  */
6555 static void igb_netpoll(struct net_device *netdev)
6556 {
6557         struct igb_adapter *adapter = netdev_priv(netdev);
6558         struct e1000_hw *hw = &adapter->hw;
6559         int i;
6560
6561         if (!adapter->msix_entries) {
6562                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6563                 igb_irq_disable(adapter);
6564                 napi_schedule(&q_vector->napi);
6565                 return;
6566         }
6567
6568         for (i = 0; i < adapter->num_q_vectors; i++) {
6569                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6570                 wr32(E1000_EIMC, q_vector->eims_value);
6571                 napi_schedule(&q_vector->napi);
6572         }
6573 }
6574 #endif /* CONFIG_NET_POLL_CONTROLLER */
6575
6576 /**
6577  * igb_io_error_detected - called when PCI error is detected
6578  * @pdev: Pointer to PCI device
6579  * @state: The current pci connection state
6580  *
6581  * This function is called after a PCI bus error affecting
6582  * this device has been detected.
6583  */
6584 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6585                                               pci_channel_state_t state)
6586 {
6587         struct net_device *netdev = pci_get_drvdata(pdev);
6588         struct igb_adapter *adapter = netdev_priv(netdev);
6589
6590         netif_device_detach(netdev);
6591
6592         if (state == pci_channel_io_perm_failure)
6593                 return PCI_ERS_RESULT_DISCONNECT;
6594
6595         if (netif_running(netdev))
6596                 igb_down(adapter);
6597         pci_disable_device(pdev);
6598
6599         /* Request a slot slot reset. */
6600         return PCI_ERS_RESULT_NEED_RESET;
6601 }
6602
6603 /**
6604  * igb_io_slot_reset - called after the pci bus has been reset.
6605  * @pdev: Pointer to PCI device
6606  *
6607  * Restart the card from scratch, as if from a cold-boot. Implementation
6608  * resembles the first-half of the igb_resume routine.
6609  */
6610 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6611 {
6612         struct net_device *netdev = pci_get_drvdata(pdev);
6613         struct igb_adapter *adapter = netdev_priv(netdev);
6614         struct e1000_hw *hw = &adapter->hw;
6615         pci_ers_result_t result;
6616         int err;
6617
6618         if (pci_enable_device_mem(pdev)) {
6619                 dev_err(&pdev->dev,
6620                         "Cannot re-enable PCI device after reset.\n");
6621                 result = PCI_ERS_RESULT_DISCONNECT;
6622         } else {
6623                 pci_set_master(pdev);
6624                 pci_restore_state(pdev);
6625                 pci_save_state(pdev);
6626
6627                 pci_enable_wake(pdev, PCI_D3hot, 0);
6628                 pci_enable_wake(pdev, PCI_D3cold, 0);
6629
6630                 igb_reset(adapter);
6631                 wr32(E1000_WUS, ~0);
6632                 result = PCI_ERS_RESULT_RECOVERED;
6633         }
6634
6635         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6636         if (err) {
6637                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6638                         "failed 0x%0x\n", err);
6639                 /* non-fatal, continue */
6640         }
6641
6642         return result;
6643 }
6644
6645 /**
6646  * igb_io_resume - called when traffic can start flowing again.
6647  * @pdev: Pointer to PCI device
6648  *
6649  * This callback is called when the error recovery driver tells us that
6650  * its OK to resume normal operation. Implementation resembles the
6651  * second-half of the igb_resume routine.
6652  */
6653 static void igb_io_resume(struct pci_dev *pdev)
6654 {
6655         struct net_device *netdev = pci_get_drvdata(pdev);
6656         struct igb_adapter *adapter = netdev_priv(netdev);
6657
6658         if (netif_running(netdev)) {
6659                 if (igb_up(adapter)) {
6660                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6661                         return;
6662                 }
6663         }
6664
6665         netif_device_attach(netdev);
6666
6667         /* let the f/w know that the h/w is now under the control of the
6668          * driver. */
6669         igb_get_hw_control(adapter);
6670 }
6671
6672 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6673                              u8 qsel)
6674 {
6675         u32 rar_low, rar_high;
6676         struct e1000_hw *hw = &adapter->hw;
6677
6678         /* HW expects these in little endian so we reverse the byte order
6679          * from network order (big endian) to little endian
6680          */
6681         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6682                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6683         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6684
6685         /* Indicate to hardware the Address is Valid. */
6686         rar_high |= E1000_RAH_AV;
6687
6688         if (hw->mac.type == e1000_82575)
6689                 rar_high |= E1000_RAH_POOL_1 * qsel;
6690         else
6691                 rar_high |= E1000_RAH_POOL_1 << qsel;
6692
6693         wr32(E1000_RAL(index), rar_low);
6694         wrfl();
6695         wr32(E1000_RAH(index), rar_high);
6696         wrfl();
6697 }
6698
6699 static int igb_set_vf_mac(struct igb_adapter *adapter,
6700                           int vf, unsigned char *mac_addr)
6701 {
6702         struct e1000_hw *hw = &adapter->hw;
6703         /* VF MAC addresses start at end of receive addresses and moves
6704          * torwards the first, as a result a collision should not be possible */
6705         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6706
6707         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6708
6709         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6710
6711         return 0;
6712 }
6713
6714 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6715 {
6716         struct igb_adapter *adapter = netdev_priv(netdev);
6717         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6718                 return -EINVAL;
6719         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6720         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6721         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6722                                       " change effective.");
6723         if (test_bit(__IGB_DOWN, &adapter->state)) {
6724                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6725                          " but the PF device is not up.\n");
6726                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6727                          " attempting to use the VF device.\n");
6728         }
6729         return igb_set_vf_mac(adapter, vf, mac);
6730 }
6731
6732 static int igb_link_mbps(int internal_link_speed)
6733 {
6734         switch (internal_link_speed) {
6735         case SPEED_100:
6736                 return 100;
6737         case SPEED_1000:
6738                 return 1000;
6739         default:
6740                 return 0;
6741         }
6742 }
6743
6744 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6745                                   int link_speed)
6746 {
6747         int rf_dec, rf_int;
6748         u32 bcnrc_val;
6749
6750         if (tx_rate != 0) {
6751                 /* Calculate the rate factor values to set */
6752                 rf_int = link_speed / tx_rate;
6753                 rf_dec = (link_speed - (rf_int * tx_rate));
6754                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6755
6756                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6757                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6758                                E1000_RTTBCNRC_RF_INT_MASK);
6759                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6760         } else {
6761                 bcnrc_val = 0;
6762         }
6763
6764         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6765         wr32(E1000_RTTBCNRC, bcnrc_val);
6766 }
6767
6768 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6769 {
6770         int actual_link_speed, i;
6771         bool reset_rate = false;
6772
6773         /* VF TX rate limit was not set or not supported */
6774         if ((adapter->vf_rate_link_speed == 0) ||
6775             (adapter->hw.mac.type != e1000_82576))
6776                 return;
6777
6778         actual_link_speed = igb_link_mbps(adapter->link_speed);
6779         if (actual_link_speed != adapter->vf_rate_link_speed) {
6780                 reset_rate = true;
6781                 adapter->vf_rate_link_speed = 0;
6782                 dev_info(&adapter->pdev->dev,
6783                          "Link speed has been changed. VF Transmit "
6784                          "rate is disabled\n");
6785         }
6786
6787         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6788                 if (reset_rate)
6789                         adapter->vf_data[i].tx_rate = 0;
6790
6791                 igb_set_vf_rate_limit(&adapter->hw, i,
6792                                       adapter->vf_data[i].tx_rate,
6793                                       actual_link_speed);
6794         }
6795 }
6796
6797 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6798 {
6799         struct igb_adapter *adapter = netdev_priv(netdev);
6800         struct e1000_hw *hw = &adapter->hw;
6801         int actual_link_speed;
6802
6803         if (hw->mac.type != e1000_82576)
6804                 return -EOPNOTSUPP;
6805
6806         actual_link_speed = igb_link_mbps(adapter->link_speed);
6807         if ((vf >= adapter->vfs_allocated_count) ||
6808             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6809             (tx_rate < 0) || (tx_rate > actual_link_speed))
6810                 return -EINVAL;
6811
6812         adapter->vf_rate_link_speed = actual_link_speed;
6813         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6814         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6815
6816         return 0;
6817 }
6818
6819 static int igb_ndo_get_vf_config(struct net_device *netdev,
6820                                  int vf, struct ifla_vf_info *ivi)
6821 {
6822         struct igb_adapter *adapter = netdev_priv(netdev);
6823         if (vf >= adapter->vfs_allocated_count)
6824                 return -EINVAL;
6825         ivi->vf = vf;
6826         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6827         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6828         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6829         ivi->qos = adapter->vf_data[vf].pf_qos;
6830         return 0;
6831 }
6832
6833 static void igb_vmm_control(struct igb_adapter *adapter)
6834 {
6835         struct e1000_hw *hw = &adapter->hw;
6836         u32 reg;
6837
6838         switch (hw->mac.type) {
6839         case e1000_82575:
6840         default:
6841                 /* replication is not supported for 82575 */
6842                 return;
6843         case e1000_82576:
6844                 /* notify HW that the MAC is adding vlan tags */
6845                 reg = rd32(E1000_DTXCTL);
6846                 reg |= E1000_DTXCTL_VLAN_ADDED;
6847                 wr32(E1000_DTXCTL, reg);
6848         case e1000_82580:
6849                 /* enable replication vlan tag stripping */
6850                 reg = rd32(E1000_RPLOLR);
6851                 reg |= E1000_RPLOLR_STRVLAN;
6852                 wr32(E1000_RPLOLR, reg);
6853         case e1000_i350:
6854                 /* none of the above registers are supported by i350 */
6855                 break;
6856         }
6857
6858         if (adapter->vfs_allocated_count) {
6859                 igb_vmdq_set_loopback_pf(hw, true);
6860                 igb_vmdq_set_replication_pf(hw, true);
6861                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6862                                                 adapter->vfs_allocated_count);
6863         } else {
6864                 igb_vmdq_set_loopback_pf(hw, false);
6865                 igb_vmdq_set_replication_pf(hw, false);
6866         }
6867 }
6868
6869 /* igb_main.c */