Merge branch 'stable/bug.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #include <linux/prefetch.h>
49 #ifdef CONFIG_IGB_DCA
50 #include <linux/dca.h>
51 #endif
52 #include "igb.h"
53
54 #define MAJ 3
55 #define MIN 0
56 #define BUILD 6
57 #define KFIX 2
58 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
59 __stringify(BUILD) "-k" __stringify(KFIX)
60 char igb_driver_name[] = "igb";
61 char igb_driver_version[] = DRV_VERSION;
62 static const char igb_driver_string[] =
63                                 "Intel(R) Gigabit Ethernet Network Driver";
64 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
65
66 static const struct e1000_info *igb_info_tbl[] = {
67         [board_82575] = &e1000_82575_info,
68 };
69
70 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
96         /* required last entry */
97         {0, }
98 };
99
100 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
101
102 void igb_reset(struct igb_adapter *);
103 static int igb_setup_all_tx_resources(struct igb_adapter *);
104 static int igb_setup_all_rx_resources(struct igb_adapter *);
105 static void igb_free_all_tx_resources(struct igb_adapter *);
106 static void igb_free_all_rx_resources(struct igb_adapter *);
107 static void igb_setup_mrqc(struct igb_adapter *);
108 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
109 static void __devexit igb_remove(struct pci_dev *pdev);
110 static void igb_init_hw_timer(struct igb_adapter *adapter);
111 static int igb_sw_init(struct igb_adapter *);
112 static int igb_open(struct net_device *);
113 static int igb_close(struct net_device *);
114 static void igb_configure_tx(struct igb_adapter *);
115 static void igb_configure_rx(struct igb_adapter *);
116 static void igb_clean_all_tx_rings(struct igb_adapter *);
117 static void igb_clean_all_rx_rings(struct igb_adapter *);
118 static void igb_clean_tx_ring(struct igb_ring *);
119 static void igb_clean_rx_ring(struct igb_ring *);
120 static void igb_set_rx_mode(struct net_device *);
121 static void igb_update_phy_info(unsigned long);
122 static void igb_watchdog(unsigned long);
123 static void igb_watchdog_task(struct work_struct *);
124 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
125 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
126                                                  struct rtnl_link_stats64 *stats);
127 static int igb_change_mtu(struct net_device *, int);
128 static int igb_set_mac(struct net_device *, void *);
129 static void igb_set_uta(struct igb_adapter *adapter);
130 static irqreturn_t igb_intr(int irq, void *);
131 static irqreturn_t igb_intr_msi(int irq, void *);
132 static irqreturn_t igb_msix_other(int irq, void *);
133 static irqreturn_t igb_msix_ring(int irq, void *);
134 #ifdef CONFIG_IGB_DCA
135 static void igb_update_dca(struct igb_q_vector *);
136 static void igb_setup_dca(struct igb_adapter *);
137 #endif /* CONFIG_IGB_DCA */
138 static bool igb_clean_tx_irq(struct igb_q_vector *);
139 static int igb_poll(struct napi_struct *, int);
140 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
141 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
142 static void igb_tx_timeout(struct net_device *);
143 static void igb_reset_task(struct work_struct *);
144 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
145 static void igb_vlan_rx_add_vid(struct net_device *, u16);
146 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
147 static void igb_restore_vlan(struct igb_adapter *);
148 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
149 static void igb_ping_all_vfs(struct igb_adapter *);
150 static void igb_msg_task(struct igb_adapter *);
151 static void igb_vmm_control(struct igb_adapter *);
152 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
153 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
154 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
155 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
156                                int vf, u16 vlan, u8 qos);
157 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
158 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
159                                  struct ifla_vf_info *ivi);
160 static void igb_check_vf_rate_limit(struct igb_adapter *);
161
162 #ifdef CONFIG_PM
163 static int igb_suspend(struct pci_dev *, pm_message_t);
164 static int igb_resume(struct pci_dev *);
165 #endif
166 static void igb_shutdown(struct pci_dev *);
167 #ifdef CONFIG_IGB_DCA
168 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
169 static struct notifier_block dca_notifier = {
170         .notifier_call  = igb_notify_dca,
171         .next           = NULL,
172         .priority       = 0
173 };
174 #endif
175 #ifdef CONFIG_NET_POLL_CONTROLLER
176 /* for netdump / net console */
177 static void igb_netpoll(struct net_device *);
178 #endif
179 #ifdef CONFIG_PCI_IOV
180 static unsigned int max_vfs = 0;
181 module_param(max_vfs, uint, 0);
182 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
183                  "per physical function");
184 #endif /* CONFIG_PCI_IOV */
185
186 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
187                      pci_channel_state_t);
188 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
189 static void igb_io_resume(struct pci_dev *);
190
191 static struct pci_error_handlers igb_err_handler = {
192         .error_detected = igb_io_error_detected,
193         .slot_reset = igb_io_slot_reset,
194         .resume = igb_io_resume,
195 };
196
197
198 static struct pci_driver igb_driver = {
199         .name     = igb_driver_name,
200         .id_table = igb_pci_tbl,
201         .probe    = igb_probe,
202         .remove   = __devexit_p(igb_remove),
203 #ifdef CONFIG_PM
204         /* Power Management Hooks */
205         .suspend  = igb_suspend,
206         .resume   = igb_resume,
207 #endif
208         .shutdown = igb_shutdown,
209         .err_handler = &igb_err_handler
210 };
211
212 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
213 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
214 MODULE_LICENSE("GPL");
215 MODULE_VERSION(DRV_VERSION);
216
217 struct igb_reg_info {
218         u32 ofs;
219         char *name;
220 };
221
222 static const struct igb_reg_info igb_reg_info_tbl[] = {
223
224         /* General Registers */
225         {E1000_CTRL, "CTRL"},
226         {E1000_STATUS, "STATUS"},
227         {E1000_CTRL_EXT, "CTRL_EXT"},
228
229         /* Interrupt Registers */
230         {E1000_ICR, "ICR"},
231
232         /* RX Registers */
233         {E1000_RCTL, "RCTL"},
234         {E1000_RDLEN(0), "RDLEN"},
235         {E1000_RDH(0), "RDH"},
236         {E1000_RDT(0), "RDT"},
237         {E1000_RXDCTL(0), "RXDCTL"},
238         {E1000_RDBAL(0), "RDBAL"},
239         {E1000_RDBAH(0), "RDBAH"},
240
241         /* TX Registers */
242         {E1000_TCTL, "TCTL"},
243         {E1000_TDBAL(0), "TDBAL"},
244         {E1000_TDBAH(0), "TDBAH"},
245         {E1000_TDLEN(0), "TDLEN"},
246         {E1000_TDH(0), "TDH"},
247         {E1000_TDT(0), "TDT"},
248         {E1000_TXDCTL(0), "TXDCTL"},
249         {E1000_TDFH, "TDFH"},
250         {E1000_TDFT, "TDFT"},
251         {E1000_TDFHS, "TDFHS"},
252         {E1000_TDFPC, "TDFPC"},
253
254         /* List Terminator */
255         {}
256 };
257
258 /*
259  * igb_regdump - register printout routine
260  */
261 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
262 {
263         int n = 0;
264         char rname[16];
265         u32 regs[8];
266
267         switch (reginfo->ofs) {
268         case E1000_RDLEN(0):
269                 for (n = 0; n < 4; n++)
270                         regs[n] = rd32(E1000_RDLEN(n));
271                 break;
272         case E1000_RDH(0):
273                 for (n = 0; n < 4; n++)
274                         regs[n] = rd32(E1000_RDH(n));
275                 break;
276         case E1000_RDT(0):
277                 for (n = 0; n < 4; n++)
278                         regs[n] = rd32(E1000_RDT(n));
279                 break;
280         case E1000_RXDCTL(0):
281                 for (n = 0; n < 4; n++)
282                         regs[n] = rd32(E1000_RXDCTL(n));
283                 break;
284         case E1000_RDBAL(0):
285                 for (n = 0; n < 4; n++)
286                         regs[n] = rd32(E1000_RDBAL(n));
287                 break;
288         case E1000_RDBAH(0):
289                 for (n = 0; n < 4; n++)
290                         regs[n] = rd32(E1000_RDBAH(n));
291                 break;
292         case E1000_TDBAL(0):
293                 for (n = 0; n < 4; n++)
294                         regs[n] = rd32(E1000_RDBAL(n));
295                 break;
296         case E1000_TDBAH(0):
297                 for (n = 0; n < 4; n++)
298                         regs[n] = rd32(E1000_TDBAH(n));
299                 break;
300         case E1000_TDLEN(0):
301                 for (n = 0; n < 4; n++)
302                         regs[n] = rd32(E1000_TDLEN(n));
303                 break;
304         case E1000_TDH(0):
305                 for (n = 0; n < 4; n++)
306                         regs[n] = rd32(E1000_TDH(n));
307                 break;
308         case E1000_TDT(0):
309                 for (n = 0; n < 4; n++)
310                         regs[n] = rd32(E1000_TDT(n));
311                 break;
312         case E1000_TXDCTL(0):
313                 for (n = 0; n < 4; n++)
314                         regs[n] = rd32(E1000_TXDCTL(n));
315                 break;
316         default:
317                 printk(KERN_INFO "%-15s %08x\n",
318                         reginfo->name, rd32(reginfo->ofs));
319                 return;
320         }
321
322         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
323         printk(KERN_INFO "%-15s ", rname);
324         for (n = 0; n < 4; n++)
325                 printk(KERN_CONT "%08x ", regs[n]);
326         printk(KERN_CONT "\n");
327 }
328
329 /*
330  * igb_dump - Print registers, tx-rings and rx-rings
331  */
332 static void igb_dump(struct igb_adapter *adapter)
333 {
334         struct net_device *netdev = adapter->netdev;
335         struct e1000_hw *hw = &adapter->hw;
336         struct igb_reg_info *reginfo;
337         int n = 0;
338         struct igb_ring *tx_ring;
339         union e1000_adv_tx_desc *tx_desc;
340         struct my_u0 { u64 a; u64 b; } *u0;
341         struct igb_buffer *buffer_info;
342         struct igb_ring *rx_ring;
343         union e1000_adv_rx_desc *rx_desc;
344         u32 staterr;
345         int i = 0;
346
347         if (!netif_msg_hw(adapter))
348                 return;
349
350         /* Print netdevice Info */
351         if (netdev) {
352                 dev_info(&adapter->pdev->dev, "Net device Info\n");
353                 printk(KERN_INFO "Device Name     state            "
354                         "trans_start      last_rx\n");
355                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
356                 netdev->name,
357                 netdev->state,
358                 netdev->trans_start,
359                 netdev->last_rx);
360         }
361
362         /* Print Registers */
363         dev_info(&adapter->pdev->dev, "Register Dump\n");
364         printk(KERN_INFO " Register Name   Value\n");
365         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
366              reginfo->name; reginfo++) {
367                 igb_regdump(hw, reginfo);
368         }
369
370         /* Print TX Ring Summary */
371         if (!netdev || !netif_running(netdev))
372                 goto exit;
373
374         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
375         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
376                 " leng ntw timestamp\n");
377         for (n = 0; n < adapter->num_tx_queues; n++) {
378                 tx_ring = adapter->tx_ring[n];
379                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
380                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
381                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
382                            (u64)buffer_info->dma,
383                            buffer_info->length,
384                            buffer_info->next_to_watch,
385                            (u64)buffer_info->time_stamp);
386         }
387
388         /* Print TX Rings */
389         if (!netif_msg_tx_done(adapter))
390                 goto rx_ring_summary;
391
392         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
393
394         /* Transmit Descriptor Formats
395          *
396          * Advanced Transmit Descriptor
397          *   +--------------------------------------------------------------+
398          * 0 |         Buffer Address [63:0]                                |
399          *   +--------------------------------------------------------------+
400          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
401          *   +--------------------------------------------------------------+
402          *   63      46 45    40 39 38 36 35 32 31   24             15       0
403          */
404
405         for (n = 0; n < adapter->num_tx_queues; n++) {
406                 tx_ring = adapter->tx_ring[n];
407                 printk(KERN_INFO "------------------------------------\n");
408                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
409                 printk(KERN_INFO "------------------------------------\n");
410                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
411                         "[PlPOCIStDDM Ln] [bi->dma       ] "
412                         "leng  ntw timestamp        bi->skb\n");
413
414                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
415                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
416                         buffer_info = &tx_ring->buffer_info[i];
417                         u0 = (struct my_u0 *)tx_desc;
418                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
419                                 " %04X  %3X %016llX %p", i,
420                                 le64_to_cpu(u0->a),
421                                 le64_to_cpu(u0->b),
422                                 (u64)buffer_info->dma,
423                                 buffer_info->length,
424                                 buffer_info->next_to_watch,
425                                 (u64)buffer_info->time_stamp,
426                                 buffer_info->skb);
427                         if (i == tx_ring->next_to_use &&
428                                 i == tx_ring->next_to_clean)
429                                 printk(KERN_CONT " NTC/U\n");
430                         else if (i == tx_ring->next_to_use)
431                                 printk(KERN_CONT " NTU\n");
432                         else if (i == tx_ring->next_to_clean)
433                                 printk(KERN_CONT " NTC\n");
434                         else
435                                 printk(KERN_CONT "\n");
436
437                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
438                                 print_hex_dump(KERN_INFO, "",
439                                         DUMP_PREFIX_ADDRESS,
440                                         16, 1, phys_to_virt(buffer_info->dma),
441                                         buffer_info->length, true);
442                 }
443         }
444
445         /* Print RX Rings Summary */
446 rx_ring_summary:
447         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
448         printk(KERN_INFO "Queue [NTU] [NTC]\n");
449         for (n = 0; n < adapter->num_rx_queues; n++) {
450                 rx_ring = adapter->rx_ring[n];
451                 printk(KERN_INFO " %5d %5X %5X\n", n,
452                            rx_ring->next_to_use, rx_ring->next_to_clean);
453         }
454
455         /* Print RX Rings */
456         if (!netif_msg_rx_status(adapter))
457                 goto exit;
458
459         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
460
461         /* Advanced Receive Descriptor (Read) Format
462          *    63                                           1        0
463          *    +-----------------------------------------------------+
464          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
465          *    +----------------------------------------------+------+
466          *  8 |       Header Buffer Address [63:1]           |  DD  |
467          *    +-----------------------------------------------------+
468          *
469          *
470          * Advanced Receive Descriptor (Write-Back) Format
471          *
472          *   63       48 47    32 31  30      21 20 17 16   4 3     0
473          *   +------------------------------------------------------+
474          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
475          *   | Checksum   Ident  |   |           |    | Type | Type |
476          *   +------------------------------------------------------+
477          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
478          *   +------------------------------------------------------+
479          *   63       48 47    32 31            20 19               0
480          */
481
482         for (n = 0; n < adapter->num_rx_queues; n++) {
483                 rx_ring = adapter->rx_ring[n];
484                 printk(KERN_INFO "------------------------------------\n");
485                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
486                 printk(KERN_INFO "------------------------------------\n");
487                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
488                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
489                         "<-- Adv Rx Read format\n");
490                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
491                         "[vl er S cks ln] ---------------- [bi->skb] "
492                         "<-- Adv Rx Write-Back format\n");
493
494                 for (i = 0; i < rx_ring->count; i++) {
495                         buffer_info = &rx_ring->buffer_info[i];
496                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
497                         u0 = (struct my_u0 *)rx_desc;
498                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
499                         if (staterr & E1000_RXD_STAT_DD) {
500                                 /* Descriptor Done */
501                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
502                                         "%016llX ---------------- %p", i,
503                                         le64_to_cpu(u0->a),
504                                         le64_to_cpu(u0->b),
505                                         buffer_info->skb);
506                         } else {
507                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
508                                         "%016llX %016llX %p", i,
509                                         le64_to_cpu(u0->a),
510                                         le64_to_cpu(u0->b),
511                                         (u64)buffer_info->dma,
512                                         buffer_info->skb);
513
514                                 if (netif_msg_pktdata(adapter)) {
515                                         print_hex_dump(KERN_INFO, "",
516                                                 DUMP_PREFIX_ADDRESS,
517                                                 16, 1,
518                                                 phys_to_virt(buffer_info->dma),
519                                                 rx_ring->rx_buffer_len, true);
520                                         if (rx_ring->rx_buffer_len
521                                                 < IGB_RXBUFFER_1024)
522                                                 print_hex_dump(KERN_INFO, "",
523                                                   DUMP_PREFIX_ADDRESS,
524                                                   16, 1,
525                                                   phys_to_virt(
526                                                     buffer_info->page_dma +
527                                                     buffer_info->page_offset),
528                                                   PAGE_SIZE/2, true);
529                                 }
530                         }
531
532                         if (i == rx_ring->next_to_use)
533                                 printk(KERN_CONT " NTU\n");
534                         else if (i == rx_ring->next_to_clean)
535                                 printk(KERN_CONT " NTC\n");
536                         else
537                                 printk(KERN_CONT "\n");
538
539                 }
540         }
541
542 exit:
543         return;
544 }
545
546
547 /**
548  * igb_read_clock - read raw cycle counter (to be used by time counter)
549  */
550 static cycle_t igb_read_clock(const struct cyclecounter *tc)
551 {
552         struct igb_adapter *adapter =
553                 container_of(tc, struct igb_adapter, cycles);
554         struct e1000_hw *hw = &adapter->hw;
555         u64 stamp = 0;
556         int shift = 0;
557
558         /*
559          * The timestamp latches on lowest register read. For the 82580
560          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
561          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
562          */
563         if (hw->mac.type == e1000_82580) {
564                 stamp = rd32(E1000_SYSTIMR) >> 8;
565                 shift = IGB_82580_TSYNC_SHIFT;
566         }
567
568         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
569         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
570         return stamp;
571 }
572
573 /**
574  * igb_get_hw_dev - return device
575  * used by hardware layer to print debugging information
576  **/
577 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 {
579         struct igb_adapter *adapter = hw->back;
580         return adapter->netdev;
581 }
582
583 /**
584  * igb_init_module - Driver Registration Routine
585  *
586  * igb_init_module is the first routine called when the driver is
587  * loaded. All it does is register with the PCI subsystem.
588  **/
589 static int __init igb_init_module(void)
590 {
591         int ret;
592         printk(KERN_INFO "%s - version %s\n",
593                igb_driver_string, igb_driver_version);
594
595         printk(KERN_INFO "%s\n", igb_copyright);
596
597 #ifdef CONFIG_IGB_DCA
598         dca_register_notify(&dca_notifier);
599 #endif
600         ret = pci_register_driver(&igb_driver);
601         return ret;
602 }
603
604 module_init(igb_init_module);
605
606 /**
607  * igb_exit_module - Driver Exit Cleanup Routine
608  *
609  * igb_exit_module is called just before the driver is removed
610  * from memory.
611  **/
612 static void __exit igb_exit_module(void)
613 {
614 #ifdef CONFIG_IGB_DCA
615         dca_unregister_notify(&dca_notifier);
616 #endif
617         pci_unregister_driver(&igb_driver);
618 }
619
620 module_exit(igb_exit_module);
621
622 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 /**
624  * igb_cache_ring_register - Descriptor ring to register mapping
625  * @adapter: board private structure to initialize
626  *
627  * Once we know the feature-set enabled for the device, we'll cache
628  * the register offset the descriptor ring is assigned to.
629  **/
630 static void igb_cache_ring_register(struct igb_adapter *adapter)
631 {
632         int i = 0, j = 0;
633         u32 rbase_offset = adapter->vfs_allocated_count;
634
635         switch (adapter->hw.mac.type) {
636         case e1000_82576:
637                 /* The queues are allocated for virtualization such that VF 0
638                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
639                  * In order to avoid collision we start at the first free queue
640                  * and continue consuming queues in the same sequence
641                  */
642                 if (adapter->vfs_allocated_count) {
643                         for (; i < adapter->rss_queues; i++)
644                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
645                                                                Q_IDX_82576(i);
646                 }
647         case e1000_82575:
648         case e1000_82580:
649         case e1000_i350:
650         default:
651                 for (; i < adapter->num_rx_queues; i++)
652                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653                 for (; j < adapter->num_tx_queues; j++)
654                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655                 break;
656         }
657 }
658
659 static void igb_free_queues(struct igb_adapter *adapter)
660 {
661         int i;
662
663         for (i = 0; i < adapter->num_tx_queues; i++) {
664                 kfree(adapter->tx_ring[i]);
665                 adapter->tx_ring[i] = NULL;
666         }
667         for (i = 0; i < adapter->num_rx_queues; i++) {
668                 kfree(adapter->rx_ring[i]);
669                 adapter->rx_ring[i] = NULL;
670         }
671         adapter->num_rx_queues = 0;
672         adapter->num_tx_queues = 0;
673 }
674
675 /**
676  * igb_alloc_queues - Allocate memory for all rings
677  * @adapter: board private structure to initialize
678  *
679  * We allocate one ring per queue at run-time since we don't know the
680  * number of queues at compile-time.
681  **/
682 static int igb_alloc_queues(struct igb_adapter *adapter)
683 {
684         struct igb_ring *ring;
685         int i;
686
687         for (i = 0; i < adapter->num_tx_queues; i++) {
688                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
689                 if (!ring)
690                         goto err;
691                 ring->count = adapter->tx_ring_count;
692                 ring->queue_index = i;
693                 ring->dev = &adapter->pdev->dev;
694                 ring->netdev = adapter->netdev;
695                 /* For 82575, context index must be unique per ring. */
696                 if (adapter->hw.mac.type == e1000_82575)
697                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
698                 adapter->tx_ring[i] = ring;
699         }
700
701         for (i = 0; i < adapter->num_rx_queues; i++) {
702                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
703                 if (!ring)
704                         goto err;
705                 ring->count = adapter->rx_ring_count;
706                 ring->queue_index = i;
707                 ring->dev = &adapter->pdev->dev;
708                 ring->netdev = adapter->netdev;
709                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
710                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
711                 /* set flag indicating ring supports SCTP checksum offload */
712                 if (adapter->hw.mac.type >= e1000_82576)
713                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
714                 adapter->rx_ring[i] = ring;
715         }
716
717         igb_cache_ring_register(adapter);
718
719         return 0;
720
721 err:
722         igb_free_queues(adapter);
723
724         return -ENOMEM;
725 }
726
727 #define IGB_N0_QUEUE -1
728 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
729 {
730         u32 msixbm = 0;
731         struct igb_adapter *adapter = q_vector->adapter;
732         struct e1000_hw *hw = &adapter->hw;
733         u32 ivar, index;
734         int rx_queue = IGB_N0_QUEUE;
735         int tx_queue = IGB_N0_QUEUE;
736
737         if (q_vector->rx_ring)
738                 rx_queue = q_vector->rx_ring->reg_idx;
739         if (q_vector->tx_ring)
740                 tx_queue = q_vector->tx_ring->reg_idx;
741
742         switch (hw->mac.type) {
743         case e1000_82575:
744                 /* The 82575 assigns vectors using a bitmask, which matches the
745                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
746                    or more queues to a vector, we write the appropriate bits
747                    into the MSIXBM register for that vector. */
748                 if (rx_queue > IGB_N0_QUEUE)
749                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
750                 if (tx_queue > IGB_N0_QUEUE)
751                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
752                 if (!adapter->msix_entries && msix_vector == 0)
753                         msixbm |= E1000_EIMS_OTHER;
754                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
755                 q_vector->eims_value = msixbm;
756                 break;
757         case e1000_82576:
758                 /* 82576 uses a table-based method for assigning vectors.
759                    Each queue has a single entry in the table to which we write
760                    a vector number along with a "valid" bit.  Sadly, the layout
761                    of the table is somewhat counterintuitive. */
762                 if (rx_queue > IGB_N0_QUEUE) {
763                         index = (rx_queue & 0x7);
764                         ivar = array_rd32(E1000_IVAR0, index);
765                         if (rx_queue < 8) {
766                                 /* vector goes into low byte of register */
767                                 ivar = ivar & 0xFFFFFF00;
768                                 ivar |= msix_vector | E1000_IVAR_VALID;
769                         } else {
770                                 /* vector goes into third byte of register */
771                                 ivar = ivar & 0xFF00FFFF;
772                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
773                         }
774                         array_wr32(E1000_IVAR0, index, ivar);
775                 }
776                 if (tx_queue > IGB_N0_QUEUE) {
777                         index = (tx_queue & 0x7);
778                         ivar = array_rd32(E1000_IVAR0, index);
779                         if (tx_queue < 8) {
780                                 /* vector goes into second byte of register */
781                                 ivar = ivar & 0xFFFF00FF;
782                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
783                         } else {
784                                 /* vector goes into high byte of register */
785                                 ivar = ivar & 0x00FFFFFF;
786                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
787                         }
788                         array_wr32(E1000_IVAR0, index, ivar);
789                 }
790                 q_vector->eims_value = 1 << msix_vector;
791                 break;
792         case e1000_82580:
793         case e1000_i350:
794                 /* 82580 uses the same table-based approach as 82576 but has fewer
795                    entries as a result we carry over for queues greater than 4. */
796                 if (rx_queue > IGB_N0_QUEUE) {
797                         index = (rx_queue >> 1);
798                         ivar = array_rd32(E1000_IVAR0, index);
799                         if (rx_queue & 0x1) {
800                                 /* vector goes into third byte of register */
801                                 ivar = ivar & 0xFF00FFFF;
802                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803                         } else {
804                                 /* vector goes into low byte of register */
805                                 ivar = ivar & 0xFFFFFF00;
806                                 ivar |= msix_vector | E1000_IVAR_VALID;
807                         }
808                         array_wr32(E1000_IVAR0, index, ivar);
809                 }
810                 if (tx_queue > IGB_N0_QUEUE) {
811                         index = (tx_queue >> 1);
812                         ivar = array_rd32(E1000_IVAR0, index);
813                         if (tx_queue & 0x1) {
814                                 /* vector goes into high byte of register */
815                                 ivar = ivar & 0x00FFFFFF;
816                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817                         } else {
818                                 /* vector goes into second byte of register */
819                                 ivar = ivar & 0xFFFF00FF;
820                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
821                         }
822                         array_wr32(E1000_IVAR0, index, ivar);
823                 }
824                 q_vector->eims_value = 1 << msix_vector;
825                 break;
826         default:
827                 BUG();
828                 break;
829         }
830
831         /* add q_vector eims value to global eims_enable_mask */
832         adapter->eims_enable_mask |= q_vector->eims_value;
833
834         /* configure q_vector to set itr on first interrupt */
835         q_vector->set_itr = 1;
836 }
837
838 /**
839  * igb_configure_msix - Configure MSI-X hardware
840  *
841  * igb_configure_msix sets up the hardware to properly
842  * generate MSI-X interrupts.
843  **/
844 static void igb_configure_msix(struct igb_adapter *adapter)
845 {
846         u32 tmp;
847         int i, vector = 0;
848         struct e1000_hw *hw = &adapter->hw;
849
850         adapter->eims_enable_mask = 0;
851
852         /* set vector for other causes, i.e. link changes */
853         switch (hw->mac.type) {
854         case e1000_82575:
855                 tmp = rd32(E1000_CTRL_EXT);
856                 /* enable MSI-X PBA support*/
857                 tmp |= E1000_CTRL_EXT_PBA_CLR;
858
859                 /* Auto-Mask interrupts upon ICR read. */
860                 tmp |= E1000_CTRL_EXT_EIAME;
861                 tmp |= E1000_CTRL_EXT_IRCA;
862
863                 wr32(E1000_CTRL_EXT, tmp);
864
865                 /* enable msix_other interrupt */
866                 array_wr32(E1000_MSIXBM(0), vector++,
867                                       E1000_EIMS_OTHER);
868                 adapter->eims_other = E1000_EIMS_OTHER;
869
870                 break;
871
872         case e1000_82576:
873         case e1000_82580:
874         case e1000_i350:
875                 /* Turn on MSI-X capability first, or our settings
876                  * won't stick.  And it will take days to debug. */
877                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
878                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
879                                 E1000_GPIE_NSICR);
880
881                 /* enable msix_other interrupt */
882                 adapter->eims_other = 1 << vector;
883                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
884
885                 wr32(E1000_IVAR_MISC, tmp);
886                 break;
887         default:
888                 /* do nothing, since nothing else supports MSI-X */
889                 break;
890         } /* switch (hw->mac.type) */
891
892         adapter->eims_enable_mask |= adapter->eims_other;
893
894         for (i = 0; i < adapter->num_q_vectors; i++)
895                 igb_assign_vector(adapter->q_vector[i], vector++);
896
897         wrfl();
898 }
899
900 /**
901  * igb_request_msix - Initialize MSI-X interrupts
902  *
903  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
904  * kernel.
905  **/
906 static int igb_request_msix(struct igb_adapter *adapter)
907 {
908         struct net_device *netdev = adapter->netdev;
909         struct e1000_hw *hw = &adapter->hw;
910         int i, err = 0, vector = 0;
911
912         err = request_irq(adapter->msix_entries[vector].vector,
913                           igb_msix_other, 0, netdev->name, adapter);
914         if (err)
915                 goto out;
916         vector++;
917
918         for (i = 0; i < adapter->num_q_vectors; i++) {
919                 struct igb_q_vector *q_vector = adapter->q_vector[i];
920
921                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
922
923                 if (q_vector->rx_ring && q_vector->tx_ring)
924                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
925                                 q_vector->rx_ring->queue_index);
926                 else if (q_vector->tx_ring)
927                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
928                                 q_vector->tx_ring->queue_index);
929                 else if (q_vector->rx_ring)
930                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
931                                 q_vector->rx_ring->queue_index);
932                 else
933                         sprintf(q_vector->name, "%s-unused", netdev->name);
934
935                 err = request_irq(adapter->msix_entries[vector].vector,
936                                   igb_msix_ring, 0, q_vector->name,
937                                   q_vector);
938                 if (err)
939                         goto out;
940                 vector++;
941         }
942
943         igb_configure_msix(adapter);
944         return 0;
945 out:
946         return err;
947 }
948
949 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
950 {
951         if (adapter->msix_entries) {
952                 pci_disable_msix(adapter->pdev);
953                 kfree(adapter->msix_entries);
954                 adapter->msix_entries = NULL;
955         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
956                 pci_disable_msi(adapter->pdev);
957         }
958 }
959
960 /**
961  * igb_free_q_vectors - Free memory allocated for interrupt vectors
962  * @adapter: board private structure to initialize
963  *
964  * This function frees the memory allocated to the q_vectors.  In addition if
965  * NAPI is enabled it will delete any references to the NAPI struct prior
966  * to freeing the q_vector.
967  **/
968 static void igb_free_q_vectors(struct igb_adapter *adapter)
969 {
970         int v_idx;
971
972         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
973                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
974                 adapter->q_vector[v_idx] = NULL;
975                 if (!q_vector)
976                         continue;
977                 netif_napi_del(&q_vector->napi);
978                 kfree(q_vector);
979         }
980         adapter->num_q_vectors = 0;
981 }
982
983 /**
984  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
985  *
986  * This function resets the device so that it has 0 rx queues, tx queues, and
987  * MSI-X interrupts allocated.
988  */
989 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
990 {
991         igb_free_queues(adapter);
992         igb_free_q_vectors(adapter);
993         igb_reset_interrupt_capability(adapter);
994 }
995
996 /**
997  * igb_set_interrupt_capability - set MSI or MSI-X if supported
998  *
999  * Attempt to configure interrupts using the best available
1000  * capabilities of the hardware and kernel.
1001  **/
1002 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1003 {
1004         int err;
1005         int numvecs, i;
1006
1007         /* Number of supported queues. */
1008         adapter->num_rx_queues = adapter->rss_queues;
1009         if (adapter->vfs_allocated_count)
1010                 adapter->num_tx_queues = 1;
1011         else
1012                 adapter->num_tx_queues = adapter->rss_queues;
1013
1014         /* start with one vector for every rx queue */
1015         numvecs = adapter->num_rx_queues;
1016
1017         /* if tx handler is separate add 1 for every tx queue */
1018         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1019                 numvecs += adapter->num_tx_queues;
1020
1021         /* store the number of vectors reserved for queues */
1022         adapter->num_q_vectors = numvecs;
1023
1024         /* add 1 vector for link status interrupts */
1025         numvecs++;
1026         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1027                                         GFP_KERNEL);
1028         if (!adapter->msix_entries)
1029                 goto msi_only;
1030
1031         for (i = 0; i < numvecs; i++)
1032                 adapter->msix_entries[i].entry = i;
1033
1034         err = pci_enable_msix(adapter->pdev,
1035                               adapter->msix_entries,
1036                               numvecs);
1037         if (err == 0)
1038                 goto out;
1039
1040         igb_reset_interrupt_capability(adapter);
1041
1042         /* If we can't do MSI-X, try MSI */
1043 msi_only:
1044 #ifdef CONFIG_PCI_IOV
1045         /* disable SR-IOV for non MSI-X configurations */
1046         if (adapter->vf_data) {
1047                 struct e1000_hw *hw = &adapter->hw;
1048                 /* disable iov and allow time for transactions to clear */
1049                 pci_disable_sriov(adapter->pdev);
1050                 msleep(500);
1051
1052                 kfree(adapter->vf_data);
1053                 adapter->vf_data = NULL;
1054                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055                 msleep(100);
1056                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1057         }
1058 #endif
1059         adapter->vfs_allocated_count = 0;
1060         adapter->rss_queues = 1;
1061         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1062         adapter->num_rx_queues = 1;
1063         adapter->num_tx_queues = 1;
1064         adapter->num_q_vectors = 1;
1065         if (!pci_enable_msi(adapter->pdev))
1066                 adapter->flags |= IGB_FLAG_HAS_MSI;
1067 out:
1068         /* Notify the stack of the (possibly) reduced queue counts. */
1069         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1070         return netif_set_real_num_rx_queues(adapter->netdev,
1071                                             adapter->num_rx_queues);
1072 }
1073
1074 /**
1075  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1076  * @adapter: board private structure to initialize
1077  *
1078  * We allocate one q_vector per queue interrupt.  If allocation fails we
1079  * return -ENOMEM.
1080  **/
1081 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1082 {
1083         struct igb_q_vector *q_vector;
1084         struct e1000_hw *hw = &adapter->hw;
1085         int v_idx;
1086
1087         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1088                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1089                 if (!q_vector)
1090                         goto err_out;
1091                 q_vector->adapter = adapter;
1092                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1093                 q_vector->itr_val = IGB_START_ITR;
1094                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1095                 adapter->q_vector[v_idx] = q_vector;
1096         }
1097         return 0;
1098
1099 err_out:
1100         igb_free_q_vectors(adapter);
1101         return -ENOMEM;
1102 }
1103
1104 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1105                                       int ring_idx, int v_idx)
1106 {
1107         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1108
1109         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1110         q_vector->rx_ring->q_vector = q_vector;
1111         q_vector->itr_val = adapter->rx_itr_setting;
1112         if (q_vector->itr_val && q_vector->itr_val <= 3)
1113                 q_vector->itr_val = IGB_START_ITR;
1114 }
1115
1116 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1117                                       int ring_idx, int v_idx)
1118 {
1119         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1120
1121         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1122         q_vector->tx_ring->q_vector = q_vector;
1123         q_vector->itr_val = adapter->tx_itr_setting;
1124         if (q_vector->itr_val && q_vector->itr_val <= 3)
1125                 q_vector->itr_val = IGB_START_ITR;
1126 }
1127
1128 /**
1129  * igb_map_ring_to_vector - maps allocated queues to vectors
1130  *
1131  * This function maps the recently allocated queues to vectors.
1132  **/
1133 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1134 {
1135         int i;
1136         int v_idx = 0;
1137
1138         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1139             (adapter->num_q_vectors < adapter->num_tx_queues))
1140                 return -ENOMEM;
1141
1142         if (adapter->num_q_vectors >=
1143             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1144                 for (i = 0; i < adapter->num_rx_queues; i++)
1145                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146                 for (i = 0; i < adapter->num_tx_queues; i++)
1147                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1148         } else {
1149                 for (i = 0; i < adapter->num_rx_queues; i++) {
1150                         if (i < adapter->num_tx_queues)
1151                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1152                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1153                 }
1154                 for (; i < adapter->num_tx_queues; i++)
1155                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1156         }
1157         return 0;
1158 }
1159
1160 /**
1161  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1162  *
1163  * This function initializes the interrupts and allocates all of the queues.
1164  **/
1165 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1166 {
1167         struct pci_dev *pdev = adapter->pdev;
1168         int err;
1169
1170         err = igb_set_interrupt_capability(adapter);
1171         if (err)
1172                 return err;
1173
1174         err = igb_alloc_q_vectors(adapter);
1175         if (err) {
1176                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1177                 goto err_alloc_q_vectors;
1178         }
1179
1180         err = igb_alloc_queues(adapter);
1181         if (err) {
1182                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1183                 goto err_alloc_queues;
1184         }
1185
1186         err = igb_map_ring_to_vector(adapter);
1187         if (err) {
1188                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1189                 goto err_map_queues;
1190         }
1191
1192
1193         return 0;
1194 err_map_queues:
1195         igb_free_queues(adapter);
1196 err_alloc_queues:
1197         igb_free_q_vectors(adapter);
1198 err_alloc_q_vectors:
1199         igb_reset_interrupt_capability(adapter);
1200         return err;
1201 }
1202
1203 /**
1204  * igb_request_irq - initialize interrupts
1205  *
1206  * Attempts to configure interrupts using the best available
1207  * capabilities of the hardware and kernel.
1208  **/
1209 static int igb_request_irq(struct igb_adapter *adapter)
1210 {
1211         struct net_device *netdev = adapter->netdev;
1212         struct pci_dev *pdev = adapter->pdev;
1213         int err = 0;
1214
1215         if (adapter->msix_entries) {
1216                 err = igb_request_msix(adapter);
1217                 if (!err)
1218                         goto request_done;
1219                 /* fall back to MSI */
1220                 igb_clear_interrupt_scheme(adapter);
1221                 if (!pci_enable_msi(adapter->pdev))
1222                         adapter->flags |= IGB_FLAG_HAS_MSI;
1223                 igb_free_all_tx_resources(adapter);
1224                 igb_free_all_rx_resources(adapter);
1225                 adapter->num_tx_queues = 1;
1226                 adapter->num_rx_queues = 1;
1227                 adapter->num_q_vectors = 1;
1228                 err = igb_alloc_q_vectors(adapter);
1229                 if (err) {
1230                         dev_err(&pdev->dev,
1231                                 "Unable to allocate memory for vectors\n");
1232                         goto request_done;
1233                 }
1234                 err = igb_alloc_queues(adapter);
1235                 if (err) {
1236                         dev_err(&pdev->dev,
1237                                 "Unable to allocate memory for queues\n");
1238                         igb_free_q_vectors(adapter);
1239                         goto request_done;
1240                 }
1241                 igb_setup_all_tx_resources(adapter);
1242                 igb_setup_all_rx_resources(adapter);
1243         } else {
1244                 igb_assign_vector(adapter->q_vector[0], 0);
1245         }
1246
1247         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1248                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1249                                   netdev->name, adapter);
1250                 if (!err)
1251                         goto request_done;
1252
1253                 /* fall back to legacy interrupts */
1254                 igb_reset_interrupt_capability(adapter);
1255                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1256         }
1257
1258         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1259                           netdev->name, adapter);
1260
1261         if (err)
1262                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1263                         err);
1264
1265 request_done:
1266         return err;
1267 }
1268
1269 static void igb_free_irq(struct igb_adapter *adapter)
1270 {
1271         if (adapter->msix_entries) {
1272                 int vector = 0, i;
1273
1274                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1275
1276                 for (i = 0; i < adapter->num_q_vectors; i++) {
1277                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1278                         free_irq(adapter->msix_entries[vector++].vector,
1279                                  q_vector);
1280                 }
1281         } else {
1282                 free_irq(adapter->pdev->irq, adapter);
1283         }
1284 }
1285
1286 /**
1287  * igb_irq_disable - Mask off interrupt generation on the NIC
1288  * @adapter: board private structure
1289  **/
1290 static void igb_irq_disable(struct igb_adapter *adapter)
1291 {
1292         struct e1000_hw *hw = &adapter->hw;
1293
1294         /*
1295          * we need to be careful when disabling interrupts.  The VFs are also
1296          * mapped into these registers and so clearing the bits can cause
1297          * issues on the VF drivers so we only need to clear what we set
1298          */
1299         if (adapter->msix_entries) {
1300                 u32 regval = rd32(E1000_EIAM);
1301                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1302                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1303                 regval = rd32(E1000_EIAC);
1304                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1305         }
1306
1307         wr32(E1000_IAM, 0);
1308         wr32(E1000_IMC, ~0);
1309         wrfl();
1310         if (adapter->msix_entries) {
1311                 int i;
1312                 for (i = 0; i < adapter->num_q_vectors; i++)
1313                         synchronize_irq(adapter->msix_entries[i].vector);
1314         } else {
1315                 synchronize_irq(adapter->pdev->irq);
1316         }
1317 }
1318
1319 /**
1320  * igb_irq_enable - Enable default interrupt generation settings
1321  * @adapter: board private structure
1322  **/
1323 static void igb_irq_enable(struct igb_adapter *adapter)
1324 {
1325         struct e1000_hw *hw = &adapter->hw;
1326
1327         if (adapter->msix_entries) {
1328                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1329                 u32 regval = rd32(E1000_EIAC);
1330                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1331                 regval = rd32(E1000_EIAM);
1332                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1333                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1334                 if (adapter->vfs_allocated_count) {
1335                         wr32(E1000_MBVFIMR, 0xFF);
1336                         ims |= E1000_IMS_VMMB;
1337                 }
1338                 if (adapter->hw.mac.type == e1000_82580)
1339                         ims |= E1000_IMS_DRSTA;
1340
1341                 wr32(E1000_IMS, ims);
1342         } else {
1343                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1344                                 E1000_IMS_DRSTA);
1345                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1346                                 E1000_IMS_DRSTA);
1347         }
1348 }
1349
1350 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1351 {
1352         struct e1000_hw *hw = &adapter->hw;
1353         u16 vid = adapter->hw.mng_cookie.vlan_id;
1354         u16 old_vid = adapter->mng_vlan_id;
1355
1356         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1357                 /* add VID to filter table */
1358                 igb_vfta_set(hw, vid, true);
1359                 adapter->mng_vlan_id = vid;
1360         } else {
1361                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1362         }
1363
1364         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1365             (vid != old_vid) &&
1366             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1367                 /* remove VID from filter table */
1368                 igb_vfta_set(hw, old_vid, false);
1369         }
1370 }
1371
1372 /**
1373  * igb_release_hw_control - release control of the h/w to f/w
1374  * @adapter: address of board private structure
1375  *
1376  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1377  * For ASF and Pass Through versions of f/w this means that the
1378  * driver is no longer loaded.
1379  *
1380  **/
1381 static void igb_release_hw_control(struct igb_adapter *adapter)
1382 {
1383         struct e1000_hw *hw = &adapter->hw;
1384         u32 ctrl_ext;
1385
1386         /* Let firmware take over control of h/w */
1387         ctrl_ext = rd32(E1000_CTRL_EXT);
1388         wr32(E1000_CTRL_EXT,
1389                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1390 }
1391
1392 /**
1393  * igb_get_hw_control - get control of the h/w from f/w
1394  * @adapter: address of board private structure
1395  *
1396  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1397  * For ASF and Pass Through versions of f/w this means that
1398  * the driver is loaded.
1399  *
1400  **/
1401 static void igb_get_hw_control(struct igb_adapter *adapter)
1402 {
1403         struct e1000_hw *hw = &adapter->hw;
1404         u32 ctrl_ext;
1405
1406         /* Let firmware know the driver has taken over */
1407         ctrl_ext = rd32(E1000_CTRL_EXT);
1408         wr32(E1000_CTRL_EXT,
1409                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1410 }
1411
1412 /**
1413  * igb_configure - configure the hardware for RX and TX
1414  * @adapter: private board structure
1415  **/
1416 static void igb_configure(struct igb_adapter *adapter)
1417 {
1418         struct net_device *netdev = adapter->netdev;
1419         int i;
1420
1421         igb_get_hw_control(adapter);
1422         igb_set_rx_mode(netdev);
1423
1424         igb_restore_vlan(adapter);
1425
1426         igb_setup_tctl(adapter);
1427         igb_setup_mrqc(adapter);
1428         igb_setup_rctl(adapter);
1429
1430         igb_configure_tx(adapter);
1431         igb_configure_rx(adapter);
1432
1433         igb_rx_fifo_flush_82575(&adapter->hw);
1434
1435         /* call igb_desc_unused which always leaves
1436          * at least 1 descriptor unused to make sure
1437          * next_to_use != next_to_clean */
1438         for (i = 0; i < adapter->num_rx_queues; i++) {
1439                 struct igb_ring *ring = adapter->rx_ring[i];
1440                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1441         }
1442 }
1443
1444 /**
1445  * igb_power_up_link - Power up the phy/serdes link
1446  * @adapter: address of board private structure
1447  **/
1448 void igb_power_up_link(struct igb_adapter *adapter)
1449 {
1450         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1451                 igb_power_up_phy_copper(&adapter->hw);
1452         else
1453                 igb_power_up_serdes_link_82575(&adapter->hw);
1454 }
1455
1456 /**
1457  * igb_power_down_link - Power down the phy/serdes link
1458  * @adapter: address of board private structure
1459  */
1460 static void igb_power_down_link(struct igb_adapter *adapter)
1461 {
1462         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1463                 igb_power_down_phy_copper_82575(&adapter->hw);
1464         else
1465                 igb_shutdown_serdes_link_82575(&adapter->hw);
1466 }
1467
1468 /**
1469  * igb_up - Open the interface and prepare it to handle traffic
1470  * @adapter: board private structure
1471  **/
1472 int igb_up(struct igb_adapter *adapter)
1473 {
1474         struct e1000_hw *hw = &adapter->hw;
1475         int i;
1476
1477         /* hardware has been reset, we need to reload some things */
1478         igb_configure(adapter);
1479
1480         clear_bit(__IGB_DOWN, &adapter->state);
1481
1482         for (i = 0; i < adapter->num_q_vectors; i++) {
1483                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1484                 napi_enable(&q_vector->napi);
1485         }
1486         if (adapter->msix_entries)
1487                 igb_configure_msix(adapter);
1488         else
1489                 igb_assign_vector(adapter->q_vector[0], 0);
1490
1491         /* Clear any pending interrupts. */
1492         rd32(E1000_ICR);
1493         igb_irq_enable(adapter);
1494
1495         /* notify VFs that reset has been completed */
1496         if (adapter->vfs_allocated_count) {
1497                 u32 reg_data = rd32(E1000_CTRL_EXT);
1498                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1499                 wr32(E1000_CTRL_EXT, reg_data);
1500         }
1501
1502         netif_tx_start_all_queues(adapter->netdev);
1503
1504         /* start the watchdog. */
1505         hw->mac.get_link_status = 1;
1506         schedule_work(&adapter->watchdog_task);
1507
1508         return 0;
1509 }
1510
1511 void igb_down(struct igb_adapter *adapter)
1512 {
1513         struct net_device *netdev = adapter->netdev;
1514         struct e1000_hw *hw = &adapter->hw;
1515         u32 tctl, rctl;
1516         int i;
1517
1518         /* signal that we're down so the interrupt handler does not
1519          * reschedule our watchdog timer */
1520         set_bit(__IGB_DOWN, &adapter->state);
1521
1522         /* disable receives in the hardware */
1523         rctl = rd32(E1000_RCTL);
1524         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1525         /* flush and sleep below */
1526
1527         netif_tx_stop_all_queues(netdev);
1528
1529         /* disable transmits in the hardware */
1530         tctl = rd32(E1000_TCTL);
1531         tctl &= ~E1000_TCTL_EN;
1532         wr32(E1000_TCTL, tctl);
1533         /* flush both disables and wait for them to finish */
1534         wrfl();
1535         msleep(10);
1536
1537         for (i = 0; i < adapter->num_q_vectors; i++) {
1538                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1539                 napi_disable(&q_vector->napi);
1540         }
1541
1542         igb_irq_disable(adapter);
1543
1544         del_timer_sync(&adapter->watchdog_timer);
1545         del_timer_sync(&adapter->phy_info_timer);
1546
1547         netif_carrier_off(netdev);
1548
1549         /* record the stats before reset*/
1550         spin_lock(&adapter->stats64_lock);
1551         igb_update_stats(adapter, &adapter->stats64);
1552         spin_unlock(&adapter->stats64_lock);
1553
1554         adapter->link_speed = 0;
1555         adapter->link_duplex = 0;
1556
1557         if (!pci_channel_offline(adapter->pdev))
1558                 igb_reset(adapter);
1559         igb_clean_all_tx_rings(adapter);
1560         igb_clean_all_rx_rings(adapter);
1561 #ifdef CONFIG_IGB_DCA
1562
1563         /* since we reset the hardware DCA settings were cleared */
1564         igb_setup_dca(adapter);
1565 #endif
1566 }
1567
1568 void igb_reinit_locked(struct igb_adapter *adapter)
1569 {
1570         WARN_ON(in_interrupt());
1571         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1572                 msleep(1);
1573         igb_down(adapter);
1574         igb_up(adapter);
1575         clear_bit(__IGB_RESETTING, &adapter->state);
1576 }
1577
1578 void igb_reset(struct igb_adapter *adapter)
1579 {
1580         struct pci_dev *pdev = adapter->pdev;
1581         struct e1000_hw *hw = &adapter->hw;
1582         struct e1000_mac_info *mac = &hw->mac;
1583         struct e1000_fc_info *fc = &hw->fc;
1584         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1585         u16 hwm;
1586
1587         /* Repartition Pba for greater than 9k mtu
1588          * To take effect CTRL.RST is required.
1589          */
1590         switch (mac->type) {
1591         case e1000_i350:
1592         case e1000_82580:
1593                 pba = rd32(E1000_RXPBS);
1594                 pba = igb_rxpbs_adjust_82580(pba);
1595                 break;
1596         case e1000_82576:
1597                 pba = rd32(E1000_RXPBS);
1598                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1599                 break;
1600         case e1000_82575:
1601         default:
1602                 pba = E1000_PBA_34K;
1603                 break;
1604         }
1605
1606         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1607             (mac->type < e1000_82576)) {
1608                 /* adjust PBA for jumbo frames */
1609                 wr32(E1000_PBA, pba);
1610
1611                 /* To maintain wire speed transmits, the Tx FIFO should be
1612                  * large enough to accommodate two full transmit packets,
1613                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1614                  * the Rx FIFO should be large enough to accommodate at least
1615                  * one full receive packet and is similarly rounded up and
1616                  * expressed in KB. */
1617                 pba = rd32(E1000_PBA);
1618                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1619                 tx_space = pba >> 16;
1620                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1621                 pba &= 0xffff;
1622                 /* the tx fifo also stores 16 bytes of information about the tx
1623                  * but don't include ethernet FCS because hardware appends it */
1624                 min_tx_space = (adapter->max_frame_size +
1625                                 sizeof(union e1000_adv_tx_desc) -
1626                                 ETH_FCS_LEN) * 2;
1627                 min_tx_space = ALIGN(min_tx_space, 1024);
1628                 min_tx_space >>= 10;
1629                 /* software strips receive CRC, so leave room for it */
1630                 min_rx_space = adapter->max_frame_size;
1631                 min_rx_space = ALIGN(min_rx_space, 1024);
1632                 min_rx_space >>= 10;
1633
1634                 /* If current Tx allocation is less than the min Tx FIFO size,
1635                  * and the min Tx FIFO size is less than the current Rx FIFO
1636                  * allocation, take space away from current Rx allocation */
1637                 if (tx_space < min_tx_space &&
1638                     ((min_tx_space - tx_space) < pba)) {
1639                         pba = pba - (min_tx_space - tx_space);
1640
1641                         /* if short on rx space, rx wins and must trump tx
1642                          * adjustment */
1643                         if (pba < min_rx_space)
1644                                 pba = min_rx_space;
1645                 }
1646                 wr32(E1000_PBA, pba);
1647         }
1648
1649         /* flow control settings */
1650         /* The high water mark must be low enough to fit one full frame
1651          * (or the size used for early receive) above it in the Rx FIFO.
1652          * Set it to the lower of:
1653          * - 90% of the Rx FIFO size, or
1654          * - the full Rx FIFO size minus one full frame */
1655         hwm = min(((pba << 10) * 9 / 10),
1656                         ((pba << 10) - 2 * adapter->max_frame_size));
1657
1658         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1659         fc->low_water = fc->high_water - 16;
1660         fc->pause_time = 0xFFFF;
1661         fc->send_xon = 1;
1662         fc->current_mode = fc->requested_mode;
1663
1664         /* disable receive for all VFs and wait one second */
1665         if (adapter->vfs_allocated_count) {
1666                 int i;
1667                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1668                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1669
1670                 /* ping all the active vfs to let them know we are going down */
1671                 igb_ping_all_vfs(adapter);
1672
1673                 /* disable transmits and receives */
1674                 wr32(E1000_VFRE, 0);
1675                 wr32(E1000_VFTE, 0);
1676         }
1677
1678         /* Allow time for pending master requests to run */
1679         hw->mac.ops.reset_hw(hw);
1680         wr32(E1000_WUC, 0);
1681
1682         if (hw->mac.ops.init_hw(hw))
1683                 dev_err(&pdev->dev, "Hardware Error\n");
1684         if (hw->mac.type > e1000_82580) {
1685                 if (adapter->flags & IGB_FLAG_DMAC) {
1686                         u32 reg;
1687
1688                         /*
1689                          * DMA Coalescing high water mark needs to be higher
1690                          * than * the * Rx threshold.  The Rx threshold is
1691                          * currently * pba - 6, so we * should use a high water
1692                          * mark of pba * - 4. */
1693                         hwm = (pba - 4) << 10;
1694
1695                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1696                                & E1000_DMACR_DMACTHR_MASK);
1697
1698                         /* transition to L0x or L1 if available..*/
1699                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1700
1701                         /* watchdog timer= +-1000 usec in 32usec intervals */
1702                         reg |= (1000 >> 5);
1703                         wr32(E1000_DMACR, reg);
1704
1705                         /* no lower threshold to disable coalescing(smart fifb)
1706                          * -UTRESH=0*/
1707                         wr32(E1000_DMCRTRH, 0);
1708
1709                         /* set hwm to PBA -  2 * max frame size */
1710                         wr32(E1000_FCRTC, hwm);
1711
1712                         /*
1713                          * This sets the time to wait before requesting tran-
1714                          * sition to * low power state to number of usecs needed
1715                          * to receive 1 512 * byte frame at gigabit line rate
1716                          */
1717                         reg = rd32(E1000_DMCTLX);
1718                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1719
1720                         /* Delay 255 usec before entering Lx state. */
1721                         reg |= 0xFF;
1722                         wr32(E1000_DMCTLX, reg);
1723
1724                         /* free space in Tx packet buffer to wake from DMAC */
1725                         wr32(E1000_DMCTXTH,
1726                              (IGB_MIN_TXPBSIZE -
1727                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1728                              >> 6);
1729
1730                         /* make low power state decision controlled by DMAC */
1731                         reg = rd32(E1000_PCIEMISC);
1732                         reg |= E1000_PCIEMISC_LX_DECISION;
1733                         wr32(E1000_PCIEMISC, reg);
1734                 } /* end if IGB_FLAG_DMAC set */
1735         }
1736         if (hw->mac.type == e1000_82580) {
1737                 u32 reg = rd32(E1000_PCIEMISC);
1738                 wr32(E1000_PCIEMISC,
1739                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1740         }
1741         if (!netif_running(adapter->netdev))
1742                 igb_power_down_link(adapter);
1743
1744         igb_update_mng_vlan(adapter);
1745
1746         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1747         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1748
1749         igb_get_phy_info(hw);
1750 }
1751
1752 static const struct net_device_ops igb_netdev_ops = {
1753         .ndo_open               = igb_open,
1754         .ndo_stop               = igb_close,
1755         .ndo_start_xmit         = igb_xmit_frame_adv,
1756         .ndo_get_stats64        = igb_get_stats64,
1757         .ndo_set_rx_mode        = igb_set_rx_mode,
1758         .ndo_set_multicast_list = igb_set_rx_mode,
1759         .ndo_set_mac_address    = igb_set_mac,
1760         .ndo_change_mtu         = igb_change_mtu,
1761         .ndo_do_ioctl           = igb_ioctl,
1762         .ndo_tx_timeout         = igb_tx_timeout,
1763         .ndo_validate_addr      = eth_validate_addr,
1764         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1765         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1766         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1767         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1768         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1769         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1770         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1771 #ifdef CONFIG_NET_POLL_CONTROLLER
1772         .ndo_poll_controller    = igb_netpoll,
1773 #endif
1774 };
1775
1776 /**
1777  * igb_probe - Device Initialization Routine
1778  * @pdev: PCI device information struct
1779  * @ent: entry in igb_pci_tbl
1780  *
1781  * Returns 0 on success, negative on failure
1782  *
1783  * igb_probe initializes an adapter identified by a pci_dev structure.
1784  * The OS initialization, configuring of the adapter private structure,
1785  * and a hardware reset occur.
1786  **/
1787 static int __devinit igb_probe(struct pci_dev *pdev,
1788                                const struct pci_device_id *ent)
1789 {
1790         struct net_device *netdev;
1791         struct igb_adapter *adapter;
1792         struct e1000_hw *hw;
1793         u16 eeprom_data = 0;
1794         s32 ret_val;
1795         static int global_quad_port_a; /* global quad port a indication */
1796         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1797         unsigned long mmio_start, mmio_len;
1798         int err, pci_using_dac;
1799         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1800         u8 part_str[E1000_PBANUM_LENGTH];
1801
1802         /* Catch broken hardware that put the wrong VF device ID in
1803          * the PCIe SR-IOV capability.
1804          */
1805         if (pdev->is_virtfn) {
1806                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1807                      pci_name(pdev), pdev->vendor, pdev->device);
1808                 return -EINVAL;
1809         }
1810
1811         err = pci_enable_device_mem(pdev);
1812         if (err)
1813                 return err;
1814
1815         pci_using_dac = 0;
1816         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1817         if (!err) {
1818                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1819                 if (!err)
1820                         pci_using_dac = 1;
1821         } else {
1822                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1823                 if (err) {
1824                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1825                         if (err) {
1826                                 dev_err(&pdev->dev, "No usable DMA "
1827                                         "configuration, aborting\n");
1828                                 goto err_dma;
1829                         }
1830                 }
1831         }
1832
1833         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1834                                            IORESOURCE_MEM),
1835                                            igb_driver_name);
1836         if (err)
1837                 goto err_pci_reg;
1838
1839         pci_enable_pcie_error_reporting(pdev);
1840
1841         pci_set_master(pdev);
1842         pci_save_state(pdev);
1843
1844         err = -ENOMEM;
1845         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1846                                    IGB_ABS_MAX_TX_QUEUES);
1847         if (!netdev)
1848                 goto err_alloc_etherdev;
1849
1850         SET_NETDEV_DEV(netdev, &pdev->dev);
1851
1852         pci_set_drvdata(pdev, netdev);
1853         adapter = netdev_priv(netdev);
1854         adapter->netdev = netdev;
1855         adapter->pdev = pdev;
1856         hw = &adapter->hw;
1857         hw->back = adapter;
1858         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1859
1860         mmio_start = pci_resource_start(pdev, 0);
1861         mmio_len = pci_resource_len(pdev, 0);
1862
1863         err = -EIO;
1864         hw->hw_addr = ioremap(mmio_start, mmio_len);
1865         if (!hw->hw_addr)
1866                 goto err_ioremap;
1867
1868         netdev->netdev_ops = &igb_netdev_ops;
1869         igb_set_ethtool_ops(netdev);
1870         netdev->watchdog_timeo = 5 * HZ;
1871
1872         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1873
1874         netdev->mem_start = mmio_start;
1875         netdev->mem_end = mmio_start + mmio_len;
1876
1877         /* PCI config space info */
1878         hw->vendor_id = pdev->vendor;
1879         hw->device_id = pdev->device;
1880         hw->revision_id = pdev->revision;
1881         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1882         hw->subsystem_device_id = pdev->subsystem_device;
1883
1884         /* Copy the default MAC, PHY and NVM function pointers */
1885         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1886         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1887         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1888         /* Initialize skew-specific constants */
1889         err = ei->get_invariants(hw);
1890         if (err)
1891                 goto err_sw_init;
1892
1893         /* setup the private structure */
1894         err = igb_sw_init(adapter);
1895         if (err)
1896                 goto err_sw_init;
1897
1898         igb_get_bus_info_pcie(hw);
1899
1900         hw->phy.autoneg_wait_to_complete = false;
1901
1902         /* Copper options */
1903         if (hw->phy.media_type == e1000_media_type_copper) {
1904                 hw->phy.mdix = AUTO_ALL_MODES;
1905                 hw->phy.disable_polarity_correction = false;
1906                 hw->phy.ms_type = e1000_ms_hw_default;
1907         }
1908
1909         if (igb_check_reset_block(hw))
1910                 dev_info(&pdev->dev,
1911                         "PHY reset is blocked due to SOL/IDER session.\n");
1912
1913         netdev->features = NETIF_F_SG |
1914                            NETIF_F_IP_CSUM |
1915                            NETIF_F_HW_VLAN_TX |
1916                            NETIF_F_HW_VLAN_RX |
1917                            NETIF_F_HW_VLAN_FILTER;
1918
1919         netdev->features |= NETIF_F_IPV6_CSUM;
1920         netdev->features |= NETIF_F_TSO;
1921         netdev->features |= NETIF_F_TSO6;
1922         netdev->features |= NETIF_F_GRO;
1923
1924         netdev->vlan_features |= NETIF_F_TSO;
1925         netdev->vlan_features |= NETIF_F_TSO6;
1926         netdev->vlan_features |= NETIF_F_IP_CSUM;
1927         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1928         netdev->vlan_features |= NETIF_F_SG;
1929
1930         if (pci_using_dac) {
1931                 netdev->features |= NETIF_F_HIGHDMA;
1932                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1933         }
1934
1935         if (hw->mac.type >= e1000_82576)
1936                 netdev->features |= NETIF_F_SCTP_CSUM;
1937
1938         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1939
1940         /* before reading the NVM, reset the controller to put the device in a
1941          * known good starting state */
1942         hw->mac.ops.reset_hw(hw);
1943
1944         /* make sure the NVM is good */
1945         if (hw->nvm.ops.validate(hw) < 0) {
1946                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1947                 err = -EIO;
1948                 goto err_eeprom;
1949         }
1950
1951         /* copy the MAC address out of the NVM */
1952         if (hw->mac.ops.read_mac_addr(hw))
1953                 dev_err(&pdev->dev, "NVM Read Error\n");
1954
1955         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1956         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1957
1958         if (!is_valid_ether_addr(netdev->perm_addr)) {
1959                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1960                 err = -EIO;
1961                 goto err_eeprom;
1962         }
1963
1964         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1965                     (unsigned long) adapter);
1966         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1967                     (unsigned long) adapter);
1968
1969         INIT_WORK(&adapter->reset_task, igb_reset_task);
1970         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1971
1972         /* Initialize link properties that are user-changeable */
1973         adapter->fc_autoneg = true;
1974         hw->mac.autoneg = true;
1975         hw->phy.autoneg_advertised = 0x2f;
1976
1977         hw->fc.requested_mode = e1000_fc_default;
1978         hw->fc.current_mode = e1000_fc_default;
1979
1980         igb_validate_mdi_setting(hw);
1981
1982         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1983          * enable the ACPI Magic Packet filter
1984          */
1985
1986         if (hw->bus.func == 0)
1987                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1988         else if (hw->mac.type == e1000_82580)
1989                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1990                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1991                                  &eeprom_data);
1992         else if (hw->bus.func == 1)
1993                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1994
1995         if (eeprom_data & eeprom_apme_mask)
1996                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1997
1998         /* now that we have the eeprom settings, apply the special cases where
1999          * the eeprom may be wrong or the board simply won't support wake on
2000          * lan on a particular port */
2001         switch (pdev->device) {
2002         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2003                 adapter->eeprom_wol = 0;
2004                 break;
2005         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2006         case E1000_DEV_ID_82576_FIBER:
2007         case E1000_DEV_ID_82576_SERDES:
2008                 /* Wake events only supported on port A for dual fiber
2009                  * regardless of eeprom setting */
2010                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2011                         adapter->eeprom_wol = 0;
2012                 break;
2013         case E1000_DEV_ID_82576_QUAD_COPPER:
2014         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2015                 /* if quad port adapter, disable WoL on all but port A */
2016                 if (global_quad_port_a != 0)
2017                         adapter->eeprom_wol = 0;
2018                 else
2019                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2020                 /* Reset for multiple quad port adapters */
2021                 if (++global_quad_port_a == 4)
2022                         global_quad_port_a = 0;
2023                 break;
2024         }
2025
2026         /* initialize the wol settings based on the eeprom settings */
2027         adapter->wol = adapter->eeprom_wol;
2028         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2029
2030         /* reset the hardware with the new settings */
2031         igb_reset(adapter);
2032
2033         /* let the f/w know that the h/w is now under the control of the
2034          * driver. */
2035         igb_get_hw_control(adapter);
2036
2037         strcpy(netdev->name, "eth%d");
2038         err = register_netdev(netdev);
2039         if (err)
2040                 goto err_register;
2041
2042         /* carrier off reporting is important to ethtool even BEFORE open */
2043         netif_carrier_off(netdev);
2044
2045 #ifdef CONFIG_IGB_DCA
2046         if (dca_add_requester(&pdev->dev) == 0) {
2047                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2048                 dev_info(&pdev->dev, "DCA enabled\n");
2049                 igb_setup_dca(adapter);
2050         }
2051
2052 #endif
2053         /* do hw tstamp init after resetting */
2054         igb_init_hw_timer(adapter);
2055
2056         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2057         /* print bus type/speed/width info */
2058         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2059                  netdev->name,
2060                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2061                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2062                                                             "unknown"),
2063                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2064                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2065                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2066                    "unknown"),
2067                  netdev->dev_addr);
2068
2069         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2070         if (ret_val)
2071                 strcpy(part_str, "Unknown");
2072         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2073         dev_info(&pdev->dev,
2074                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2075                 adapter->msix_entries ? "MSI-X" :
2076                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2077                 adapter->num_rx_queues, adapter->num_tx_queues);
2078         switch (hw->mac.type) {
2079         case e1000_i350:
2080                 igb_set_eee_i350(hw);
2081                 break;
2082         default:
2083                 break;
2084         }
2085         return 0;
2086
2087 err_register:
2088         igb_release_hw_control(adapter);
2089 err_eeprom:
2090         if (!igb_check_reset_block(hw))
2091                 igb_reset_phy(hw);
2092
2093         if (hw->flash_address)
2094                 iounmap(hw->flash_address);
2095 err_sw_init:
2096         igb_clear_interrupt_scheme(adapter);
2097         iounmap(hw->hw_addr);
2098 err_ioremap:
2099         free_netdev(netdev);
2100 err_alloc_etherdev:
2101         pci_release_selected_regions(pdev,
2102                                      pci_select_bars(pdev, IORESOURCE_MEM));
2103 err_pci_reg:
2104 err_dma:
2105         pci_disable_device(pdev);
2106         return err;
2107 }
2108
2109 /**
2110  * igb_remove - Device Removal Routine
2111  * @pdev: PCI device information struct
2112  *
2113  * igb_remove is called by the PCI subsystem to alert the driver
2114  * that it should release a PCI device.  The could be caused by a
2115  * Hot-Plug event, or because the driver is going to be removed from
2116  * memory.
2117  **/
2118 static void __devexit igb_remove(struct pci_dev *pdev)
2119 {
2120         struct net_device *netdev = pci_get_drvdata(pdev);
2121         struct igb_adapter *adapter = netdev_priv(netdev);
2122         struct e1000_hw *hw = &adapter->hw;
2123
2124         /*
2125          * The watchdog timer may be rescheduled, so explicitly
2126          * disable watchdog from being rescheduled.
2127          */
2128         set_bit(__IGB_DOWN, &adapter->state);
2129         del_timer_sync(&adapter->watchdog_timer);
2130         del_timer_sync(&adapter->phy_info_timer);
2131
2132         cancel_work_sync(&adapter->reset_task);
2133         cancel_work_sync(&adapter->watchdog_task);
2134
2135 #ifdef CONFIG_IGB_DCA
2136         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2137                 dev_info(&pdev->dev, "DCA disabled\n");
2138                 dca_remove_requester(&pdev->dev);
2139                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2140                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2141         }
2142 #endif
2143
2144         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2145          * would have already happened in close and is redundant. */
2146         igb_release_hw_control(adapter);
2147
2148         unregister_netdev(netdev);
2149
2150         igb_clear_interrupt_scheme(adapter);
2151
2152 #ifdef CONFIG_PCI_IOV
2153         /* reclaim resources allocated to VFs */
2154         if (adapter->vf_data) {
2155                 /* disable iov and allow time for transactions to clear */
2156                 pci_disable_sriov(pdev);
2157                 msleep(500);
2158
2159                 kfree(adapter->vf_data);
2160                 adapter->vf_data = NULL;
2161                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2162                 msleep(100);
2163                 dev_info(&pdev->dev, "IOV Disabled\n");
2164         }
2165 #endif
2166
2167         iounmap(hw->hw_addr);
2168         if (hw->flash_address)
2169                 iounmap(hw->flash_address);
2170         pci_release_selected_regions(pdev,
2171                                      pci_select_bars(pdev, IORESOURCE_MEM));
2172
2173         free_netdev(netdev);
2174
2175         pci_disable_pcie_error_reporting(pdev);
2176
2177         pci_disable_device(pdev);
2178 }
2179
2180 /**
2181  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2182  * @adapter: board private structure to initialize
2183  *
2184  * This function initializes the vf specific data storage and then attempts to
2185  * allocate the VFs.  The reason for ordering it this way is because it is much
2186  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2187  * the memory for the VFs.
2188  **/
2189 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2190 {
2191 #ifdef CONFIG_PCI_IOV
2192         struct pci_dev *pdev = adapter->pdev;
2193
2194         if (adapter->vfs_allocated_count) {
2195                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2196                                            sizeof(struct vf_data_storage),
2197                                            GFP_KERNEL);
2198                 /* if allocation failed then we do not support SR-IOV */
2199                 if (!adapter->vf_data) {
2200                         adapter->vfs_allocated_count = 0;
2201                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2202                                 "Data Storage\n");
2203                 }
2204         }
2205
2206         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2207                 kfree(adapter->vf_data);
2208                 adapter->vf_data = NULL;
2209 #endif /* CONFIG_PCI_IOV */
2210                 adapter->vfs_allocated_count = 0;
2211 #ifdef CONFIG_PCI_IOV
2212         } else {
2213                 unsigned char mac_addr[ETH_ALEN];
2214                 int i;
2215                 dev_info(&pdev->dev, "%d vfs allocated\n",
2216                          adapter->vfs_allocated_count);
2217                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2218                         random_ether_addr(mac_addr);
2219                         igb_set_vf_mac(adapter, i, mac_addr);
2220                 }
2221                 /* DMA Coalescing is not supported in IOV mode. */
2222                 if (adapter->flags & IGB_FLAG_DMAC)
2223                         adapter->flags &= ~IGB_FLAG_DMAC;
2224         }
2225 #endif /* CONFIG_PCI_IOV */
2226 }
2227
2228
2229 /**
2230  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2231  * @adapter: board private structure to initialize
2232  *
2233  * igb_init_hw_timer initializes the function pointer and values for the hw
2234  * timer found in hardware.
2235  **/
2236 static void igb_init_hw_timer(struct igb_adapter *adapter)
2237 {
2238         struct e1000_hw *hw = &adapter->hw;
2239
2240         switch (hw->mac.type) {
2241         case e1000_i350:
2242         case e1000_82580:
2243                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2244                 adapter->cycles.read = igb_read_clock;
2245                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2246                 adapter->cycles.mult = 1;
2247                 /*
2248                  * The 82580 timesync updates the system timer every 8ns by 8ns
2249                  * and the value cannot be shifted.  Instead we need to shift
2250                  * the registers to generate a 64bit timer value.  As a result
2251                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2252                  * 24 in order to generate a larger value for synchronization.
2253                  */
2254                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2255                 /* disable system timer temporarily by setting bit 31 */
2256                 wr32(E1000_TSAUXC, 0x80000000);
2257                 wrfl();
2258
2259                 /* Set registers so that rollover occurs soon to test this. */
2260                 wr32(E1000_SYSTIMR, 0x00000000);
2261                 wr32(E1000_SYSTIML, 0x80000000);
2262                 wr32(E1000_SYSTIMH, 0x000000FF);
2263                 wrfl();
2264
2265                 /* enable system timer by clearing bit 31 */
2266                 wr32(E1000_TSAUXC, 0x0);
2267                 wrfl();
2268
2269                 timecounter_init(&adapter->clock,
2270                                  &adapter->cycles,
2271                                  ktime_to_ns(ktime_get_real()));
2272                 /*
2273                  * Synchronize our NIC clock against system wall clock. NIC
2274                  * time stamp reading requires ~3us per sample, each sample
2275                  * was pretty stable even under load => only require 10
2276                  * samples for each offset comparison.
2277                  */
2278                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2279                 adapter->compare.source = &adapter->clock;
2280                 adapter->compare.target = ktime_get_real;
2281                 adapter->compare.num_samples = 10;
2282                 timecompare_update(&adapter->compare, 0);
2283                 break;
2284         case e1000_82576:
2285                 /*
2286                  * Initialize hardware timer: we keep it running just in case
2287                  * that some program needs it later on.
2288                  */
2289                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2290                 adapter->cycles.read = igb_read_clock;
2291                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2292                 adapter->cycles.mult = 1;
2293                 /**
2294                  * Scale the NIC clock cycle by a large factor so that
2295                  * relatively small clock corrections can be added or
2296                  * subtracted at each clock tick. The drawbacks of a large
2297                  * factor are a) that the clock register overflows more quickly
2298                  * (not such a big deal) and b) that the increment per tick has
2299                  * to fit into 24 bits.  As a result we need to use a shift of
2300                  * 19 so we can fit a value of 16 into the TIMINCA register.
2301                  */
2302                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2303                 wr32(E1000_TIMINCA,
2304                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2305                                 (16 << IGB_82576_TSYNC_SHIFT));
2306
2307                 /* Set registers so that rollover occurs soon to test this. */
2308                 wr32(E1000_SYSTIML, 0x00000000);
2309                 wr32(E1000_SYSTIMH, 0xFF800000);
2310                 wrfl();
2311
2312                 timecounter_init(&adapter->clock,
2313                                  &adapter->cycles,
2314                                  ktime_to_ns(ktime_get_real()));
2315                 /*
2316                  * Synchronize our NIC clock against system wall clock. NIC
2317                  * time stamp reading requires ~3us per sample, each sample
2318                  * was pretty stable even under load => only require 10
2319                  * samples for each offset comparison.
2320                  */
2321                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2322                 adapter->compare.source = &adapter->clock;
2323                 adapter->compare.target = ktime_get_real;
2324                 adapter->compare.num_samples = 10;
2325                 timecompare_update(&adapter->compare, 0);
2326                 break;
2327         case e1000_82575:
2328                 /* 82575 does not support timesync */
2329         default:
2330                 break;
2331         }
2332
2333 }
2334
2335 /**
2336  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2337  * @adapter: board private structure to initialize
2338  *
2339  * igb_sw_init initializes the Adapter private data structure.
2340  * Fields are initialized based on PCI device information and
2341  * OS network device settings (MTU size).
2342  **/
2343 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2344 {
2345         struct e1000_hw *hw = &adapter->hw;
2346         struct net_device *netdev = adapter->netdev;
2347         struct pci_dev *pdev = adapter->pdev;
2348
2349         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2350
2351         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2352         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2353         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2354         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2355
2356         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2357         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2358
2359         spin_lock_init(&adapter->stats64_lock);
2360 #ifdef CONFIG_PCI_IOV
2361         switch (hw->mac.type) {
2362         case e1000_82576:
2363         case e1000_i350:
2364                 if (max_vfs > 7) {
2365                         dev_warn(&pdev->dev,
2366                                  "Maximum of 7 VFs per PF, using max\n");
2367                         adapter->vfs_allocated_count = 7;
2368                 } else
2369                         adapter->vfs_allocated_count = max_vfs;
2370                 break;
2371         default:
2372                 break;
2373         }
2374 #endif /* CONFIG_PCI_IOV */
2375         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2376         /* i350 cannot do RSS and SR-IOV at the same time */
2377         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2378                 adapter->rss_queues = 1;
2379
2380         /*
2381          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2382          * then we should combine the queues into a queue pair in order to
2383          * conserve interrupts due to limited supply
2384          */
2385         if ((adapter->rss_queues > 4) ||
2386             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2387                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2388
2389         /* This call may decrease the number of queues */
2390         if (igb_init_interrupt_scheme(adapter)) {
2391                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2392                 return -ENOMEM;
2393         }
2394
2395         igb_probe_vfs(adapter);
2396
2397         /* Explicitly disable IRQ since the NIC can be in any state. */
2398         igb_irq_disable(adapter);
2399
2400         if (hw->mac.type == e1000_i350)
2401                 adapter->flags &= ~IGB_FLAG_DMAC;
2402
2403         set_bit(__IGB_DOWN, &adapter->state);
2404         return 0;
2405 }
2406
2407 /**
2408  * igb_open - Called when a network interface is made active
2409  * @netdev: network interface device structure
2410  *
2411  * Returns 0 on success, negative value on failure
2412  *
2413  * The open entry point is called when a network interface is made
2414  * active by the system (IFF_UP).  At this point all resources needed
2415  * for transmit and receive operations are allocated, the interrupt
2416  * handler is registered with the OS, the watchdog timer is started,
2417  * and the stack is notified that the interface is ready.
2418  **/
2419 static int igb_open(struct net_device *netdev)
2420 {
2421         struct igb_adapter *adapter = netdev_priv(netdev);
2422         struct e1000_hw *hw = &adapter->hw;
2423         int err;
2424         int i;
2425
2426         /* disallow open during test */
2427         if (test_bit(__IGB_TESTING, &adapter->state))
2428                 return -EBUSY;
2429
2430         netif_carrier_off(netdev);
2431
2432         /* allocate transmit descriptors */
2433         err = igb_setup_all_tx_resources(adapter);
2434         if (err)
2435                 goto err_setup_tx;
2436
2437         /* allocate receive descriptors */
2438         err = igb_setup_all_rx_resources(adapter);
2439         if (err)
2440                 goto err_setup_rx;
2441
2442         igb_power_up_link(adapter);
2443
2444         /* before we allocate an interrupt, we must be ready to handle it.
2445          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2446          * as soon as we call pci_request_irq, so we have to setup our
2447          * clean_rx handler before we do so.  */
2448         igb_configure(adapter);
2449
2450         err = igb_request_irq(adapter);
2451         if (err)
2452                 goto err_req_irq;
2453
2454         /* From here on the code is the same as igb_up() */
2455         clear_bit(__IGB_DOWN, &adapter->state);
2456
2457         for (i = 0; i < adapter->num_q_vectors; i++) {
2458                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2459                 napi_enable(&q_vector->napi);
2460         }
2461
2462         /* Clear any pending interrupts. */
2463         rd32(E1000_ICR);
2464
2465         igb_irq_enable(adapter);
2466
2467         /* notify VFs that reset has been completed */
2468         if (adapter->vfs_allocated_count) {
2469                 u32 reg_data = rd32(E1000_CTRL_EXT);
2470                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2471                 wr32(E1000_CTRL_EXT, reg_data);
2472         }
2473
2474         netif_tx_start_all_queues(netdev);
2475
2476         /* start the watchdog. */
2477         hw->mac.get_link_status = 1;
2478         schedule_work(&adapter->watchdog_task);
2479
2480         return 0;
2481
2482 err_req_irq:
2483         igb_release_hw_control(adapter);
2484         igb_power_down_link(adapter);
2485         igb_free_all_rx_resources(adapter);
2486 err_setup_rx:
2487         igb_free_all_tx_resources(adapter);
2488 err_setup_tx:
2489         igb_reset(adapter);
2490
2491         return err;
2492 }
2493
2494 /**
2495  * igb_close - Disables a network interface
2496  * @netdev: network interface device structure
2497  *
2498  * Returns 0, this is not allowed to fail
2499  *
2500  * The close entry point is called when an interface is de-activated
2501  * by the OS.  The hardware is still under the driver's control, but
2502  * needs to be disabled.  A global MAC reset is issued to stop the
2503  * hardware, and all transmit and receive resources are freed.
2504  **/
2505 static int igb_close(struct net_device *netdev)
2506 {
2507         struct igb_adapter *adapter = netdev_priv(netdev);
2508
2509         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2510         igb_down(adapter);
2511
2512         igb_free_irq(adapter);
2513
2514         igb_free_all_tx_resources(adapter);
2515         igb_free_all_rx_resources(adapter);
2516
2517         return 0;
2518 }
2519
2520 /**
2521  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2522  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2523  *
2524  * Return 0 on success, negative on failure
2525  **/
2526 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2527 {
2528         struct device *dev = tx_ring->dev;
2529         int size;
2530
2531         size = sizeof(struct igb_buffer) * tx_ring->count;
2532         tx_ring->buffer_info = vzalloc(size);
2533         if (!tx_ring->buffer_info)
2534                 goto err;
2535
2536         /* round up to nearest 4K */
2537         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2538         tx_ring->size = ALIGN(tx_ring->size, 4096);
2539
2540         tx_ring->desc = dma_alloc_coherent(dev,
2541                                            tx_ring->size,
2542                                            &tx_ring->dma,
2543                                            GFP_KERNEL);
2544
2545         if (!tx_ring->desc)
2546                 goto err;
2547
2548         tx_ring->next_to_use = 0;
2549         tx_ring->next_to_clean = 0;
2550         return 0;
2551
2552 err:
2553         vfree(tx_ring->buffer_info);
2554         dev_err(dev,
2555                 "Unable to allocate memory for the transmit descriptor ring\n");
2556         return -ENOMEM;
2557 }
2558
2559 /**
2560  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2561  *                                (Descriptors) for all queues
2562  * @adapter: board private structure
2563  *
2564  * Return 0 on success, negative on failure
2565  **/
2566 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2567 {
2568         struct pci_dev *pdev = adapter->pdev;
2569         int i, err = 0;
2570
2571         for (i = 0; i < adapter->num_tx_queues; i++) {
2572                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2573                 if (err) {
2574                         dev_err(&pdev->dev,
2575                                 "Allocation for Tx Queue %u failed\n", i);
2576                         for (i--; i >= 0; i--)
2577                                 igb_free_tx_resources(adapter->tx_ring[i]);
2578                         break;
2579                 }
2580         }
2581
2582         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2583                 int r_idx = i % adapter->num_tx_queues;
2584                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2585         }
2586         return err;
2587 }
2588
2589 /**
2590  * igb_setup_tctl - configure the transmit control registers
2591  * @adapter: Board private structure
2592  **/
2593 void igb_setup_tctl(struct igb_adapter *adapter)
2594 {
2595         struct e1000_hw *hw = &adapter->hw;
2596         u32 tctl;
2597
2598         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2599         wr32(E1000_TXDCTL(0), 0);
2600
2601         /* Program the Transmit Control Register */
2602         tctl = rd32(E1000_TCTL);
2603         tctl &= ~E1000_TCTL_CT;
2604         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2605                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2606
2607         igb_config_collision_dist(hw);
2608
2609         /* Enable transmits */
2610         tctl |= E1000_TCTL_EN;
2611
2612         wr32(E1000_TCTL, tctl);
2613 }
2614
2615 /**
2616  * igb_configure_tx_ring - Configure transmit ring after Reset
2617  * @adapter: board private structure
2618  * @ring: tx ring to configure
2619  *
2620  * Configure a transmit ring after a reset.
2621  **/
2622 void igb_configure_tx_ring(struct igb_adapter *adapter,
2623                            struct igb_ring *ring)
2624 {
2625         struct e1000_hw *hw = &adapter->hw;
2626         u32 txdctl;
2627         u64 tdba = ring->dma;
2628         int reg_idx = ring->reg_idx;
2629
2630         /* disable the queue */
2631         txdctl = rd32(E1000_TXDCTL(reg_idx));
2632         wr32(E1000_TXDCTL(reg_idx),
2633                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2634         wrfl();
2635         mdelay(10);
2636
2637         wr32(E1000_TDLEN(reg_idx),
2638                         ring->count * sizeof(union e1000_adv_tx_desc));
2639         wr32(E1000_TDBAL(reg_idx),
2640                         tdba & 0x00000000ffffffffULL);
2641         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2642
2643         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2644         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2645         writel(0, ring->head);
2646         writel(0, ring->tail);
2647
2648         txdctl |= IGB_TX_PTHRESH;
2649         txdctl |= IGB_TX_HTHRESH << 8;
2650         txdctl |= IGB_TX_WTHRESH << 16;
2651
2652         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2653         wr32(E1000_TXDCTL(reg_idx), txdctl);
2654 }
2655
2656 /**
2657  * igb_configure_tx - Configure transmit Unit after Reset
2658  * @adapter: board private structure
2659  *
2660  * Configure the Tx unit of the MAC after a reset.
2661  **/
2662 static void igb_configure_tx(struct igb_adapter *adapter)
2663 {
2664         int i;
2665
2666         for (i = 0; i < adapter->num_tx_queues; i++)
2667                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2668 }
2669
2670 /**
2671  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2672  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2673  *
2674  * Returns 0 on success, negative on failure
2675  **/
2676 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2677 {
2678         struct device *dev = rx_ring->dev;
2679         int size, desc_len;
2680
2681         size = sizeof(struct igb_buffer) * rx_ring->count;
2682         rx_ring->buffer_info = vzalloc(size);
2683         if (!rx_ring->buffer_info)
2684                 goto err;
2685
2686         desc_len = sizeof(union e1000_adv_rx_desc);
2687
2688         /* Round up to nearest 4K */
2689         rx_ring->size = rx_ring->count * desc_len;
2690         rx_ring->size = ALIGN(rx_ring->size, 4096);
2691
2692         rx_ring->desc = dma_alloc_coherent(dev,
2693                                            rx_ring->size,
2694                                            &rx_ring->dma,
2695                                            GFP_KERNEL);
2696
2697         if (!rx_ring->desc)
2698                 goto err;
2699
2700         rx_ring->next_to_clean = 0;
2701         rx_ring->next_to_use = 0;
2702
2703         return 0;
2704
2705 err:
2706         vfree(rx_ring->buffer_info);
2707         rx_ring->buffer_info = NULL;
2708         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2709                 " ring\n");
2710         return -ENOMEM;
2711 }
2712
2713 /**
2714  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2715  *                                (Descriptors) for all queues
2716  * @adapter: board private structure
2717  *
2718  * Return 0 on success, negative on failure
2719  **/
2720 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2721 {
2722         struct pci_dev *pdev = adapter->pdev;
2723         int i, err = 0;
2724
2725         for (i = 0; i < adapter->num_rx_queues; i++) {
2726                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2727                 if (err) {
2728                         dev_err(&pdev->dev,
2729                                 "Allocation for Rx Queue %u failed\n", i);
2730                         for (i--; i >= 0; i--)
2731                                 igb_free_rx_resources(adapter->rx_ring[i]);
2732                         break;
2733                 }
2734         }
2735
2736         return err;
2737 }
2738
2739 /**
2740  * igb_setup_mrqc - configure the multiple receive queue control registers
2741  * @adapter: Board private structure
2742  **/
2743 static void igb_setup_mrqc(struct igb_adapter *adapter)
2744 {
2745         struct e1000_hw *hw = &adapter->hw;
2746         u32 mrqc, rxcsum;
2747         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2748         union e1000_reta {
2749                 u32 dword;
2750                 u8  bytes[4];
2751         } reta;
2752         static const u8 rsshash[40] = {
2753                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2754                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2755                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2756                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2757
2758         /* Fill out hash function seeds */
2759         for (j = 0; j < 10; j++) {
2760                 u32 rsskey = rsshash[(j * 4)];
2761                 rsskey |= rsshash[(j * 4) + 1] << 8;
2762                 rsskey |= rsshash[(j * 4) + 2] << 16;
2763                 rsskey |= rsshash[(j * 4) + 3] << 24;
2764                 array_wr32(E1000_RSSRK(0), j, rsskey);
2765         }
2766
2767         num_rx_queues = adapter->rss_queues;
2768
2769         if (adapter->vfs_allocated_count) {
2770                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2771                 switch (hw->mac.type) {
2772                 case e1000_i350:
2773                 case e1000_82580:
2774                         num_rx_queues = 1;
2775                         shift = 0;
2776                         break;
2777                 case e1000_82576:
2778                         shift = 3;
2779                         num_rx_queues = 2;
2780                         break;
2781                 case e1000_82575:
2782                         shift = 2;
2783                         shift2 = 6;
2784                 default:
2785                         break;
2786                 }
2787         } else {
2788                 if (hw->mac.type == e1000_82575)
2789                         shift = 6;
2790         }
2791
2792         for (j = 0; j < (32 * 4); j++) {
2793                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2794                 if (shift2)
2795                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2796                 if ((j & 3) == 3)
2797                         wr32(E1000_RETA(j >> 2), reta.dword);
2798         }
2799
2800         /*
2801          * Disable raw packet checksumming so that RSS hash is placed in
2802          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2803          * offloads as they are enabled by default
2804          */
2805         rxcsum = rd32(E1000_RXCSUM);
2806         rxcsum |= E1000_RXCSUM_PCSD;
2807
2808         if (adapter->hw.mac.type >= e1000_82576)
2809                 /* Enable Receive Checksum Offload for SCTP */
2810                 rxcsum |= E1000_RXCSUM_CRCOFL;
2811
2812         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2813         wr32(E1000_RXCSUM, rxcsum);
2814
2815         /* If VMDq is enabled then we set the appropriate mode for that, else
2816          * we default to RSS so that an RSS hash is calculated per packet even
2817          * if we are only using one queue */
2818         if (adapter->vfs_allocated_count) {
2819                 if (hw->mac.type > e1000_82575) {
2820                         /* Set the default pool for the PF's first queue */
2821                         u32 vtctl = rd32(E1000_VT_CTL);
2822                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2823                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2824                         vtctl |= adapter->vfs_allocated_count <<
2825                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2826                         wr32(E1000_VT_CTL, vtctl);
2827                 }
2828                 if (adapter->rss_queues > 1)
2829                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2830                 else
2831                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2832         } else {
2833                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2834         }
2835         igb_vmm_control(adapter);
2836
2837         /*
2838          * Generate RSS hash based on TCP port numbers and/or
2839          * IPv4/v6 src and dst addresses since UDP cannot be
2840          * hashed reliably due to IP fragmentation
2841          */
2842         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2843                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2844                 E1000_MRQC_RSS_FIELD_IPV6 |
2845                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2846                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2847
2848         wr32(E1000_MRQC, mrqc);
2849 }
2850
2851 /**
2852  * igb_setup_rctl - configure the receive control registers
2853  * @adapter: Board private structure
2854  **/
2855 void igb_setup_rctl(struct igb_adapter *adapter)
2856 {
2857         struct e1000_hw *hw = &adapter->hw;
2858         u32 rctl;
2859
2860         rctl = rd32(E1000_RCTL);
2861
2862         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2863         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2864
2865         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2866                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2867
2868         /*
2869          * enable stripping of CRC. It's unlikely this will break BMC
2870          * redirection as it did with e1000. Newer features require
2871          * that the HW strips the CRC.
2872          */
2873         rctl |= E1000_RCTL_SECRC;
2874
2875         /* disable store bad packets and clear size bits. */
2876         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2877
2878         /* enable LPE to prevent packets larger than max_frame_size */
2879         rctl |= E1000_RCTL_LPE;
2880
2881         /* disable queue 0 to prevent tail write w/o re-config */
2882         wr32(E1000_RXDCTL(0), 0);
2883
2884         /* Attention!!!  For SR-IOV PF driver operations you must enable
2885          * queue drop for all VF and PF queues to prevent head of line blocking
2886          * if an un-trusted VF does not provide descriptors to hardware.
2887          */
2888         if (adapter->vfs_allocated_count) {
2889                 /* set all queue drop enable bits */
2890                 wr32(E1000_QDE, ALL_QUEUES);
2891         }
2892
2893         wr32(E1000_RCTL, rctl);
2894 }
2895
2896 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2897                                    int vfn)
2898 {
2899         struct e1000_hw *hw = &adapter->hw;
2900         u32 vmolr;
2901
2902         /* if it isn't the PF check to see if VFs are enabled and
2903          * increase the size to support vlan tags */
2904         if (vfn < adapter->vfs_allocated_count &&
2905             adapter->vf_data[vfn].vlans_enabled)
2906                 size += VLAN_TAG_SIZE;
2907
2908         vmolr = rd32(E1000_VMOLR(vfn));
2909         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2910         vmolr |= size | E1000_VMOLR_LPE;
2911         wr32(E1000_VMOLR(vfn), vmolr);
2912
2913         return 0;
2914 }
2915
2916 /**
2917  * igb_rlpml_set - set maximum receive packet size
2918  * @adapter: board private structure
2919  *
2920  * Configure maximum receivable packet size.
2921  **/
2922 static void igb_rlpml_set(struct igb_adapter *adapter)
2923 {
2924         u32 max_frame_size = adapter->max_frame_size;
2925         struct e1000_hw *hw = &adapter->hw;
2926         u16 pf_id = adapter->vfs_allocated_count;
2927
2928         if (adapter->vlgrp)
2929                 max_frame_size += VLAN_TAG_SIZE;
2930
2931         /* if vfs are enabled we set RLPML to the largest possible request
2932          * size and set the VMOLR RLPML to the size we need */
2933         if (pf_id) {
2934                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2935                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2936         }
2937
2938         wr32(E1000_RLPML, max_frame_size);
2939 }
2940
2941 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2942                                  int vfn, bool aupe)
2943 {
2944         struct e1000_hw *hw = &adapter->hw;
2945         u32 vmolr;
2946
2947         /*
2948          * This register exists only on 82576 and newer so if we are older then
2949          * we should exit and do nothing
2950          */
2951         if (hw->mac.type < e1000_82576)
2952                 return;
2953
2954         vmolr = rd32(E1000_VMOLR(vfn));
2955         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2956         if (aupe)
2957                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2958         else
2959                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2960
2961         /* clear all bits that might not be set */
2962         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2963
2964         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2965                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2966         /*
2967          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2968          * multicast packets
2969          */
2970         if (vfn <= adapter->vfs_allocated_count)
2971                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2972
2973         wr32(E1000_VMOLR(vfn), vmolr);
2974 }
2975
2976 /**
2977  * igb_configure_rx_ring - Configure a receive ring after Reset
2978  * @adapter: board private structure
2979  * @ring: receive ring to be configured
2980  *
2981  * Configure the Rx unit of the MAC after a reset.
2982  **/
2983 void igb_configure_rx_ring(struct igb_adapter *adapter,
2984                            struct igb_ring *ring)
2985 {
2986         struct e1000_hw *hw = &adapter->hw;
2987         u64 rdba = ring->dma;
2988         int reg_idx = ring->reg_idx;
2989         u32 srrctl, rxdctl;
2990
2991         /* disable the queue */
2992         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2993         wr32(E1000_RXDCTL(reg_idx),
2994                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2995
2996         /* Set DMA base address registers */
2997         wr32(E1000_RDBAL(reg_idx),
2998              rdba & 0x00000000ffffffffULL);
2999         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3000         wr32(E1000_RDLEN(reg_idx),
3001                        ring->count * sizeof(union e1000_adv_rx_desc));
3002
3003         /* initialize head and tail */
3004         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3005         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3006         writel(0, ring->head);
3007         writel(0, ring->tail);
3008
3009         /* set descriptor configuration */
3010         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3011                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3012                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3013 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3014                 srrctl |= IGB_RXBUFFER_16384 >>
3015                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3016 #else
3017                 srrctl |= (PAGE_SIZE / 2) >>
3018                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3019 #endif
3020                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3021         } else {
3022                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3023                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3024                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3025         }
3026         if (hw->mac.type == e1000_82580)
3027                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3028         /* Only set Drop Enable if we are supporting multiple queues */
3029         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3030                 srrctl |= E1000_SRRCTL_DROP_EN;
3031
3032         wr32(E1000_SRRCTL(reg_idx), srrctl);
3033
3034         /* set filtering for VMDQ pools */
3035         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3036
3037         /* enable receive descriptor fetching */
3038         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3039         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3040         rxdctl &= 0xFFF00000;
3041         rxdctl |= IGB_RX_PTHRESH;
3042         rxdctl |= IGB_RX_HTHRESH << 8;
3043         rxdctl |= IGB_RX_WTHRESH << 16;
3044         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3045 }
3046
3047 /**
3048  * igb_configure_rx - Configure receive Unit after Reset
3049  * @adapter: board private structure
3050  *
3051  * Configure the Rx unit of the MAC after a reset.
3052  **/
3053 static void igb_configure_rx(struct igb_adapter *adapter)
3054 {
3055         int i;
3056
3057         /* set UTA to appropriate mode */
3058         igb_set_uta(adapter);
3059
3060         /* set the correct pool for the PF default MAC address in entry 0 */
3061         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3062                          adapter->vfs_allocated_count);
3063
3064         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3065          * the Base and Length of the Rx Descriptor Ring */
3066         for (i = 0; i < adapter->num_rx_queues; i++)
3067                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3068 }
3069
3070 /**
3071  * igb_free_tx_resources - Free Tx Resources per Queue
3072  * @tx_ring: Tx descriptor ring for a specific queue
3073  *
3074  * Free all transmit software resources
3075  **/
3076 void igb_free_tx_resources(struct igb_ring *tx_ring)
3077 {
3078         igb_clean_tx_ring(tx_ring);
3079
3080         vfree(tx_ring->buffer_info);
3081         tx_ring->buffer_info = NULL;
3082
3083         /* if not set, then don't free */
3084         if (!tx_ring->desc)
3085                 return;
3086
3087         dma_free_coherent(tx_ring->dev, tx_ring->size,
3088                           tx_ring->desc, tx_ring->dma);
3089
3090         tx_ring->desc = NULL;
3091 }
3092
3093 /**
3094  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3095  * @adapter: board private structure
3096  *
3097  * Free all transmit software resources
3098  **/
3099 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3100 {
3101         int i;
3102
3103         for (i = 0; i < adapter->num_tx_queues; i++)
3104                 igb_free_tx_resources(adapter->tx_ring[i]);
3105 }
3106
3107 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3108                                     struct igb_buffer *buffer_info)
3109 {
3110         if (buffer_info->dma) {
3111                 if (buffer_info->mapped_as_page)
3112                         dma_unmap_page(tx_ring->dev,
3113                                         buffer_info->dma,
3114                                         buffer_info->length,
3115                                         DMA_TO_DEVICE);
3116                 else
3117                         dma_unmap_single(tx_ring->dev,
3118                                         buffer_info->dma,
3119                                         buffer_info->length,
3120                                         DMA_TO_DEVICE);
3121                 buffer_info->dma = 0;
3122         }
3123         if (buffer_info->skb) {
3124                 dev_kfree_skb_any(buffer_info->skb);
3125                 buffer_info->skb = NULL;
3126         }
3127         buffer_info->time_stamp = 0;
3128         buffer_info->length = 0;
3129         buffer_info->next_to_watch = 0;
3130         buffer_info->mapped_as_page = false;
3131 }
3132
3133 /**
3134  * igb_clean_tx_ring - Free Tx Buffers
3135  * @tx_ring: ring to be cleaned
3136  **/
3137 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3138 {
3139         struct igb_buffer *buffer_info;
3140         unsigned long size;
3141         unsigned int i;
3142
3143         if (!tx_ring->buffer_info)
3144                 return;
3145         /* Free all the Tx ring sk_buffs */
3146
3147         for (i = 0; i < tx_ring->count; i++) {
3148                 buffer_info = &tx_ring->buffer_info[i];
3149                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3150         }
3151
3152         size = sizeof(struct igb_buffer) * tx_ring->count;
3153         memset(tx_ring->buffer_info, 0, size);
3154
3155         /* Zero out the descriptor ring */
3156         memset(tx_ring->desc, 0, tx_ring->size);
3157
3158         tx_ring->next_to_use = 0;
3159         tx_ring->next_to_clean = 0;
3160 }
3161
3162 /**
3163  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3164  * @adapter: board private structure
3165  **/
3166 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3167 {
3168         int i;
3169
3170         for (i = 0; i < adapter->num_tx_queues; i++)
3171                 igb_clean_tx_ring(adapter->tx_ring[i]);
3172 }
3173
3174 /**
3175  * igb_free_rx_resources - Free Rx Resources
3176  * @rx_ring: ring to clean the resources from
3177  *
3178  * Free all receive software resources
3179  **/
3180 void igb_free_rx_resources(struct igb_ring *rx_ring)
3181 {
3182         igb_clean_rx_ring(rx_ring);
3183
3184         vfree(rx_ring->buffer_info);
3185         rx_ring->buffer_info = NULL;
3186
3187         /* if not set, then don't free */
3188         if (!rx_ring->desc)
3189                 return;
3190
3191         dma_free_coherent(rx_ring->dev, rx_ring->size,
3192                           rx_ring->desc, rx_ring->dma);
3193
3194         rx_ring->desc = NULL;
3195 }
3196
3197 /**
3198  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3199  * @adapter: board private structure
3200  *
3201  * Free all receive software resources
3202  **/
3203 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3204 {
3205         int i;
3206
3207         for (i = 0; i < adapter->num_rx_queues; i++)
3208                 igb_free_rx_resources(adapter->rx_ring[i]);
3209 }
3210
3211 /**
3212  * igb_clean_rx_ring - Free Rx Buffers per Queue
3213  * @rx_ring: ring to free buffers from
3214  **/
3215 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3216 {
3217         struct igb_buffer *buffer_info;
3218         unsigned long size;
3219         unsigned int i;
3220
3221         if (!rx_ring->buffer_info)
3222                 return;
3223
3224         /* Free all the Rx ring sk_buffs */
3225         for (i = 0; i < rx_ring->count; i++) {
3226                 buffer_info = &rx_ring->buffer_info[i];
3227                 if (buffer_info->dma) {
3228                         dma_unmap_single(rx_ring->dev,
3229                                          buffer_info->dma,
3230                                          rx_ring->rx_buffer_len,
3231                                          DMA_FROM_DEVICE);
3232                         buffer_info->dma = 0;
3233                 }
3234
3235                 if (buffer_info->skb) {
3236                         dev_kfree_skb(buffer_info->skb);
3237                         buffer_info->skb = NULL;
3238                 }
3239                 if (buffer_info->page_dma) {
3240                         dma_unmap_page(rx_ring->dev,
3241                                        buffer_info->page_dma,
3242                                        PAGE_SIZE / 2,
3243                                        DMA_FROM_DEVICE);
3244                         buffer_info->page_dma = 0;
3245                 }
3246                 if (buffer_info->page) {
3247                         put_page(buffer_info->page);
3248                         buffer_info->page = NULL;
3249                         buffer_info->page_offset = 0;
3250                 }
3251         }
3252
3253         size = sizeof(struct igb_buffer) * rx_ring->count;
3254         memset(rx_ring->buffer_info, 0, size);
3255
3256         /* Zero out the descriptor ring */
3257         memset(rx_ring->desc, 0, rx_ring->size);
3258
3259         rx_ring->next_to_clean = 0;
3260         rx_ring->next_to_use = 0;
3261 }
3262
3263 /**
3264  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3265  * @adapter: board private structure
3266  **/
3267 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3268 {
3269         int i;
3270
3271         for (i = 0; i < adapter->num_rx_queues; i++)
3272                 igb_clean_rx_ring(adapter->rx_ring[i]);
3273 }
3274
3275 /**
3276  * igb_set_mac - Change the Ethernet Address of the NIC
3277  * @netdev: network interface device structure
3278  * @p: pointer to an address structure
3279  *
3280  * Returns 0 on success, negative on failure
3281  **/
3282 static int igb_set_mac(struct net_device *netdev, void *p)
3283 {
3284         struct igb_adapter *adapter = netdev_priv(netdev);
3285         struct e1000_hw *hw = &adapter->hw;
3286         struct sockaddr *addr = p;
3287
3288         if (!is_valid_ether_addr(addr->sa_data))
3289                 return -EADDRNOTAVAIL;
3290
3291         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3292         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3293
3294         /* set the correct pool for the new PF MAC address in entry 0 */
3295         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3296                          adapter->vfs_allocated_count);
3297
3298         return 0;
3299 }
3300
3301 /**
3302  * igb_write_mc_addr_list - write multicast addresses to MTA
3303  * @netdev: network interface device structure
3304  *
3305  * Writes multicast address list to the MTA hash table.
3306  * Returns: -ENOMEM on failure
3307  *                0 on no addresses written
3308  *                X on writing X addresses to MTA
3309  **/
3310 static int igb_write_mc_addr_list(struct net_device *netdev)
3311 {
3312         struct igb_adapter *adapter = netdev_priv(netdev);
3313         struct e1000_hw *hw = &adapter->hw;
3314         struct netdev_hw_addr *ha;
3315         u8  *mta_list;
3316         int i;
3317
3318         if (netdev_mc_empty(netdev)) {
3319                 /* nothing to program, so clear mc list */
3320                 igb_update_mc_addr_list(hw, NULL, 0);
3321                 igb_restore_vf_multicasts(adapter);
3322                 return 0;
3323         }
3324
3325         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3326         if (!mta_list)
3327                 return -ENOMEM;
3328
3329         /* The shared function expects a packed array of only addresses. */
3330         i = 0;
3331         netdev_for_each_mc_addr(ha, netdev)
3332                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3333
3334         igb_update_mc_addr_list(hw, mta_list, i);
3335         kfree(mta_list);
3336
3337         return netdev_mc_count(netdev);
3338 }
3339
3340 /**
3341  * igb_write_uc_addr_list - write unicast addresses to RAR table
3342  * @netdev: network interface device structure
3343  *
3344  * Writes unicast address list to the RAR table.
3345  * Returns: -ENOMEM on failure/insufficient address space
3346  *                0 on no addresses written
3347  *                X on writing X addresses to the RAR table
3348  **/
3349 static int igb_write_uc_addr_list(struct net_device *netdev)
3350 {
3351         struct igb_adapter *adapter = netdev_priv(netdev);
3352         struct e1000_hw *hw = &adapter->hw;
3353         unsigned int vfn = adapter->vfs_allocated_count;
3354         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3355         int count = 0;
3356
3357         /* return ENOMEM indicating insufficient memory for addresses */
3358         if (netdev_uc_count(netdev) > rar_entries)
3359                 return -ENOMEM;
3360
3361         if (!netdev_uc_empty(netdev) && rar_entries) {
3362                 struct netdev_hw_addr *ha;
3363
3364                 netdev_for_each_uc_addr(ha, netdev) {
3365                         if (!rar_entries)
3366                                 break;
3367                         igb_rar_set_qsel(adapter, ha->addr,
3368                                          rar_entries--,
3369                                          vfn);
3370                         count++;
3371                 }
3372         }
3373         /* write the addresses in reverse order to avoid write combining */
3374         for (; rar_entries > 0 ; rar_entries--) {
3375                 wr32(E1000_RAH(rar_entries), 0);
3376                 wr32(E1000_RAL(rar_entries), 0);
3377         }
3378         wrfl();
3379
3380         return count;
3381 }
3382
3383 /**
3384  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3385  * @netdev: network interface device structure
3386  *
3387  * The set_rx_mode entry point is called whenever the unicast or multicast
3388  * address lists or the network interface flags are updated.  This routine is
3389  * responsible for configuring the hardware for proper unicast, multicast,
3390  * promiscuous mode, and all-multi behavior.
3391  **/
3392 static void igb_set_rx_mode(struct net_device *netdev)
3393 {
3394         struct igb_adapter *adapter = netdev_priv(netdev);
3395         struct e1000_hw *hw = &adapter->hw;
3396         unsigned int vfn = adapter->vfs_allocated_count;
3397         u32 rctl, vmolr = 0;
3398         int count;
3399
3400         /* Check for Promiscuous and All Multicast modes */
3401         rctl = rd32(E1000_RCTL);
3402
3403         /* clear the effected bits */
3404         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3405
3406         if (netdev->flags & IFF_PROMISC) {
3407                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3408                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3409         } else {
3410                 if (netdev->flags & IFF_ALLMULTI) {
3411                         rctl |= E1000_RCTL_MPE;
3412                         vmolr |= E1000_VMOLR_MPME;
3413                 } else {
3414                         /*
3415                          * Write addresses to the MTA, if the attempt fails
3416                          * then we should just turn on promiscuous mode so
3417                          * that we can at least receive multicast traffic
3418                          */
3419                         count = igb_write_mc_addr_list(netdev);
3420                         if (count < 0) {
3421                                 rctl |= E1000_RCTL_MPE;
3422                                 vmolr |= E1000_VMOLR_MPME;
3423                         } else if (count) {
3424                                 vmolr |= E1000_VMOLR_ROMPE;
3425                         }
3426                 }
3427                 /*
3428                  * Write addresses to available RAR registers, if there is not
3429                  * sufficient space to store all the addresses then enable
3430                  * unicast promiscuous mode
3431                  */
3432                 count = igb_write_uc_addr_list(netdev);
3433                 if (count < 0) {
3434                         rctl |= E1000_RCTL_UPE;
3435                         vmolr |= E1000_VMOLR_ROPE;
3436                 }
3437                 rctl |= E1000_RCTL_VFE;
3438         }
3439         wr32(E1000_RCTL, rctl);
3440
3441         /*
3442          * In order to support SR-IOV and eventually VMDq it is necessary to set
3443          * the VMOLR to enable the appropriate modes.  Without this workaround
3444          * we will have issues with VLAN tag stripping not being done for frames
3445          * that are only arriving because we are the default pool
3446          */
3447         if (hw->mac.type < e1000_82576)
3448                 return;
3449
3450         vmolr |= rd32(E1000_VMOLR(vfn)) &
3451                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3452         wr32(E1000_VMOLR(vfn), vmolr);
3453         igb_restore_vf_multicasts(adapter);
3454 }
3455
3456 static void igb_check_wvbr(struct igb_adapter *adapter)
3457 {
3458         struct e1000_hw *hw = &adapter->hw;
3459         u32 wvbr = 0;
3460
3461         switch (hw->mac.type) {
3462         case e1000_82576:
3463         case e1000_i350:
3464                 if (!(wvbr = rd32(E1000_WVBR)))
3465                         return;
3466                 break;
3467         default:
3468                 break;
3469         }
3470
3471         adapter->wvbr |= wvbr;
3472 }
3473
3474 #define IGB_STAGGERED_QUEUE_OFFSET 8
3475
3476 static void igb_spoof_check(struct igb_adapter *adapter)
3477 {
3478         int j;
3479
3480         if (!adapter->wvbr)
3481                 return;
3482
3483         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3484                 if (adapter->wvbr & (1 << j) ||
3485                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3486                         dev_warn(&adapter->pdev->dev,
3487                                 "Spoof event(s) detected on VF %d\n", j);
3488                         adapter->wvbr &=
3489                                 ~((1 << j) |
3490                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3491                 }
3492         }
3493 }
3494
3495 /* Need to wait a few seconds after link up to get diagnostic information from
3496  * the phy */
3497 static void igb_update_phy_info(unsigned long data)
3498 {
3499         struct igb_adapter *adapter = (struct igb_adapter *) data;
3500         igb_get_phy_info(&adapter->hw);
3501 }
3502
3503 /**
3504  * igb_has_link - check shared code for link and determine up/down
3505  * @adapter: pointer to driver private info
3506  **/
3507 bool igb_has_link(struct igb_adapter *adapter)
3508 {
3509         struct e1000_hw *hw = &adapter->hw;
3510         bool link_active = false;
3511         s32 ret_val = 0;
3512
3513         /* get_link_status is set on LSC (link status) interrupt or
3514          * rx sequence error interrupt.  get_link_status will stay
3515          * false until the e1000_check_for_link establishes link
3516          * for copper adapters ONLY
3517          */
3518         switch (hw->phy.media_type) {
3519         case e1000_media_type_copper:
3520                 if (hw->mac.get_link_status) {
3521                         ret_val = hw->mac.ops.check_for_link(hw);
3522                         link_active = !hw->mac.get_link_status;
3523                 } else {
3524                         link_active = true;
3525                 }
3526                 break;
3527         case e1000_media_type_internal_serdes:
3528                 ret_val = hw->mac.ops.check_for_link(hw);
3529                 link_active = hw->mac.serdes_has_link;
3530                 break;
3531         default:
3532         case e1000_media_type_unknown:
3533                 break;
3534         }
3535
3536         return link_active;
3537 }
3538
3539 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3540 {
3541         bool ret = false;
3542         u32 ctrl_ext, thstat;
3543
3544         /* check for thermal sensor event on i350, copper only */
3545         if (hw->mac.type == e1000_i350) {
3546                 thstat = rd32(E1000_THSTAT);
3547                 ctrl_ext = rd32(E1000_CTRL_EXT);
3548
3549                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3550                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3551                         ret = !!(thstat & event);
3552                 }
3553         }
3554
3555         return ret;
3556 }
3557
3558 /**
3559  * igb_watchdog - Timer Call-back
3560  * @data: pointer to adapter cast into an unsigned long
3561  **/
3562 static void igb_watchdog(unsigned long data)
3563 {
3564         struct igb_adapter *adapter = (struct igb_adapter *)data;
3565         /* Do the rest outside of interrupt context */
3566         schedule_work(&adapter->watchdog_task);
3567 }
3568
3569 static void igb_watchdog_task(struct work_struct *work)
3570 {
3571         struct igb_adapter *adapter = container_of(work,
3572                                                    struct igb_adapter,
3573                                                    watchdog_task);
3574         struct e1000_hw *hw = &adapter->hw;
3575         struct net_device *netdev = adapter->netdev;
3576         u32 link;
3577         int i;
3578
3579         link = igb_has_link(adapter);
3580         if (link) {
3581                 if (!netif_carrier_ok(netdev)) {
3582                         u32 ctrl;
3583                         hw->mac.ops.get_speed_and_duplex(hw,
3584                                                          &adapter->link_speed,
3585                                                          &adapter->link_duplex);
3586
3587                         ctrl = rd32(E1000_CTRL);
3588                         /* Links status message must follow this format */
3589                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3590                                  "Flow Control: %s\n",
3591                                netdev->name,
3592                                adapter->link_speed,
3593                                adapter->link_duplex == FULL_DUPLEX ?
3594                                  "Full Duplex" : "Half Duplex",
3595                                ((ctrl & E1000_CTRL_TFCE) &&
3596                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3597                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3598                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3599
3600                         /* check for thermal sensor event */
3601                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3602                                 printk(KERN_INFO "igb: %s The network adapter "
3603                                                  "link speed was downshifted "
3604                                                  "because it overheated.\n",
3605                                                  netdev->name);
3606                         }
3607
3608                         /* adjust timeout factor according to speed/duplex */
3609                         adapter->tx_timeout_factor = 1;
3610                         switch (adapter->link_speed) {
3611                         case SPEED_10:
3612                                 adapter->tx_timeout_factor = 14;
3613                                 break;
3614                         case SPEED_100:
3615                                 /* maybe add some timeout factor ? */
3616                                 break;
3617                         }
3618
3619                         netif_carrier_on(netdev);
3620
3621                         igb_ping_all_vfs(adapter);
3622                         igb_check_vf_rate_limit(adapter);
3623
3624                         /* link state has changed, schedule phy info update */
3625                         if (!test_bit(__IGB_DOWN, &adapter->state))
3626                                 mod_timer(&adapter->phy_info_timer,
3627                                           round_jiffies(jiffies + 2 * HZ));
3628                 }
3629         } else {
3630                 if (netif_carrier_ok(netdev)) {
3631                         adapter->link_speed = 0;
3632                         adapter->link_duplex = 0;
3633
3634                         /* check for thermal sensor event */
3635                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3636                                 printk(KERN_ERR "igb: %s The network adapter "
3637                                                 "was stopped because it "
3638                                                 "overheated.\n",
3639                                                 netdev->name);
3640                         }
3641
3642                         /* Links status message must follow this format */
3643                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3644                                netdev->name);
3645                         netif_carrier_off(netdev);
3646
3647                         igb_ping_all_vfs(adapter);
3648
3649                         /* link state has changed, schedule phy info update */
3650                         if (!test_bit(__IGB_DOWN, &adapter->state))
3651                                 mod_timer(&adapter->phy_info_timer,
3652                                           round_jiffies(jiffies + 2 * HZ));
3653                 }
3654         }
3655
3656         spin_lock(&adapter->stats64_lock);
3657         igb_update_stats(adapter, &adapter->stats64);
3658         spin_unlock(&adapter->stats64_lock);
3659
3660         for (i = 0; i < adapter->num_tx_queues; i++) {
3661                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3662                 if (!netif_carrier_ok(netdev)) {
3663                         /* We've lost link, so the controller stops DMA,
3664                          * but we've got queued Tx work that's never going
3665                          * to get done, so reset controller to flush Tx.
3666                          * (Do the reset outside of interrupt context). */
3667                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3668                                 adapter->tx_timeout_count++;
3669                                 schedule_work(&adapter->reset_task);
3670                                 /* return immediately since reset is imminent */
3671                                 return;
3672                         }
3673                 }
3674
3675                 /* Force detection of hung controller every watchdog period */
3676                 tx_ring->detect_tx_hung = true;
3677         }
3678
3679         /* Cause software interrupt to ensure rx ring is cleaned */
3680         if (adapter->msix_entries) {
3681                 u32 eics = 0;
3682                 for (i = 0; i < adapter->num_q_vectors; i++) {
3683                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3684                         eics |= q_vector->eims_value;
3685                 }
3686                 wr32(E1000_EICS, eics);
3687         } else {
3688                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3689         }
3690
3691         igb_spoof_check(adapter);
3692
3693         /* Reset the timer */
3694         if (!test_bit(__IGB_DOWN, &adapter->state))
3695                 mod_timer(&adapter->watchdog_timer,
3696                           round_jiffies(jiffies + 2 * HZ));
3697 }
3698
3699 enum latency_range {
3700         lowest_latency = 0,
3701         low_latency = 1,
3702         bulk_latency = 2,
3703         latency_invalid = 255
3704 };
3705
3706 /**
3707  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3708  *
3709  *      Stores a new ITR value based on strictly on packet size.  This
3710  *      algorithm is less sophisticated than that used in igb_update_itr,
3711  *      due to the difficulty of synchronizing statistics across multiple
3712  *      receive rings.  The divisors and thresholds used by this function
3713  *      were determined based on theoretical maximum wire speed and testing
3714  *      data, in order to minimize response time while increasing bulk
3715  *      throughput.
3716  *      This functionality is controlled by the InterruptThrottleRate module
3717  *      parameter (see igb_param.c)
3718  *      NOTE:  This function is called only when operating in a multiqueue
3719  *             receive environment.
3720  * @q_vector: pointer to q_vector
3721  **/
3722 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3723 {
3724         int new_val = q_vector->itr_val;
3725         int avg_wire_size = 0;
3726         struct igb_adapter *adapter = q_vector->adapter;
3727         struct igb_ring *ring;
3728         unsigned int packets;
3729
3730         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3731          * ints/sec - ITR timer value of 120 ticks.
3732          */
3733         if (adapter->link_speed != SPEED_1000) {
3734                 new_val = 976;
3735                 goto set_itr_val;
3736         }
3737
3738         ring = q_vector->rx_ring;
3739         if (ring) {
3740                 packets = ACCESS_ONCE(ring->total_packets);
3741
3742                 if (packets)
3743                         avg_wire_size = ring->total_bytes / packets;
3744         }
3745
3746         ring = q_vector->tx_ring;
3747         if (ring) {
3748                 packets = ACCESS_ONCE(ring->total_packets);
3749
3750                 if (packets)
3751                         avg_wire_size = max_t(u32, avg_wire_size,
3752                                               ring->total_bytes / packets);
3753         }
3754
3755         /* if avg_wire_size isn't set no work was done */
3756         if (!avg_wire_size)
3757                 goto clear_counts;
3758
3759         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3760         avg_wire_size += 24;
3761
3762         /* Don't starve jumbo frames */
3763         avg_wire_size = min(avg_wire_size, 3000);
3764
3765         /* Give a little boost to mid-size frames */
3766         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3767                 new_val = avg_wire_size / 3;
3768         else
3769                 new_val = avg_wire_size / 2;
3770
3771         /* when in itr mode 3 do not exceed 20K ints/sec */
3772         if (adapter->rx_itr_setting == 3 && new_val < 196)
3773                 new_val = 196;
3774
3775 set_itr_val:
3776         if (new_val != q_vector->itr_val) {
3777                 q_vector->itr_val = new_val;
3778                 q_vector->set_itr = 1;
3779         }
3780 clear_counts:
3781         if (q_vector->rx_ring) {
3782                 q_vector->rx_ring->total_bytes = 0;
3783                 q_vector->rx_ring->total_packets = 0;
3784         }
3785         if (q_vector->tx_ring) {
3786                 q_vector->tx_ring->total_bytes = 0;
3787                 q_vector->tx_ring->total_packets = 0;
3788         }
3789 }
3790
3791 /**
3792  * igb_update_itr - update the dynamic ITR value based on statistics
3793  *      Stores a new ITR value based on packets and byte
3794  *      counts during the last interrupt.  The advantage of per interrupt
3795  *      computation is faster updates and more accurate ITR for the current
3796  *      traffic pattern.  Constants in this function were computed
3797  *      based on theoretical maximum wire speed and thresholds were set based
3798  *      on testing data as well as attempting to minimize response time
3799  *      while increasing bulk throughput.
3800  *      this functionality is controlled by the InterruptThrottleRate module
3801  *      parameter (see igb_param.c)
3802  *      NOTE:  These calculations are only valid when operating in a single-
3803  *             queue environment.
3804  * @adapter: pointer to adapter
3805  * @itr_setting: current q_vector->itr_val
3806  * @packets: the number of packets during this measurement interval
3807  * @bytes: the number of bytes during this measurement interval
3808  **/
3809 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3810                                    int packets, int bytes)
3811 {
3812         unsigned int retval = itr_setting;
3813
3814         if (packets == 0)
3815                 goto update_itr_done;
3816
3817         switch (itr_setting) {
3818         case lowest_latency:
3819                 /* handle TSO and jumbo frames */
3820                 if (bytes/packets > 8000)
3821                         retval = bulk_latency;
3822                 else if ((packets < 5) && (bytes > 512))
3823                         retval = low_latency;
3824                 break;
3825         case low_latency:  /* 50 usec aka 20000 ints/s */
3826                 if (bytes > 10000) {
3827                         /* this if handles the TSO accounting */
3828                         if (bytes/packets > 8000) {
3829                                 retval = bulk_latency;
3830                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3831                                 retval = bulk_latency;
3832                         } else if ((packets > 35)) {
3833                                 retval = lowest_latency;
3834                         }
3835                 } else if (bytes/packets > 2000) {
3836                         retval = bulk_latency;
3837                 } else if (packets <= 2 && bytes < 512) {
3838                         retval = lowest_latency;
3839                 }
3840                 break;
3841         case bulk_latency: /* 250 usec aka 4000 ints/s */
3842                 if (bytes > 25000) {
3843                         if (packets > 35)
3844                                 retval = low_latency;
3845                 } else if (bytes < 1500) {
3846                         retval = low_latency;
3847                 }
3848                 break;
3849         }
3850
3851 update_itr_done:
3852         return retval;
3853 }
3854
3855 static void igb_set_itr(struct igb_adapter *adapter)
3856 {
3857         struct igb_q_vector *q_vector = adapter->q_vector[0];
3858         u16 current_itr;
3859         u32 new_itr = q_vector->itr_val;
3860
3861         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3862         if (adapter->link_speed != SPEED_1000) {
3863                 current_itr = 0;
3864                 new_itr = 4000;
3865                 goto set_itr_now;
3866         }
3867
3868         adapter->rx_itr = igb_update_itr(adapter,
3869                                     adapter->rx_itr,
3870                                     q_vector->rx_ring->total_packets,
3871                                     q_vector->rx_ring->total_bytes);
3872
3873         adapter->tx_itr = igb_update_itr(adapter,
3874                                     adapter->tx_itr,
3875                                     q_vector->tx_ring->total_packets,
3876                                     q_vector->tx_ring->total_bytes);
3877         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3878
3879         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3880         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3881                 current_itr = low_latency;
3882
3883         switch (current_itr) {
3884         /* counts and packets in update_itr are dependent on these numbers */
3885         case lowest_latency:
3886                 new_itr = 56;  /* aka 70,000 ints/sec */
3887                 break;
3888         case low_latency:
3889                 new_itr = 196; /* aka 20,000 ints/sec */
3890                 break;
3891         case bulk_latency:
3892                 new_itr = 980; /* aka 4,000 ints/sec */
3893                 break;
3894         default:
3895                 break;
3896         }
3897
3898 set_itr_now:
3899         q_vector->rx_ring->total_bytes = 0;
3900         q_vector->rx_ring->total_packets = 0;
3901         q_vector->tx_ring->total_bytes = 0;
3902         q_vector->tx_ring->total_packets = 0;
3903
3904         if (new_itr != q_vector->itr_val) {
3905                 /* this attempts to bias the interrupt rate towards Bulk
3906                  * by adding intermediate steps when interrupt rate is
3907                  * increasing */
3908                 new_itr = new_itr > q_vector->itr_val ?
3909                              max((new_itr * q_vector->itr_val) /
3910                                  (new_itr + (q_vector->itr_val >> 2)),
3911                                  new_itr) :
3912                              new_itr;
3913                 /* Don't write the value here; it resets the adapter's
3914                  * internal timer, and causes us to delay far longer than
3915                  * we should between interrupts.  Instead, we write the ITR
3916                  * value at the beginning of the next interrupt so the timing
3917                  * ends up being correct.
3918                  */
3919                 q_vector->itr_val = new_itr;
3920                 q_vector->set_itr = 1;
3921         }
3922 }
3923
3924 #define IGB_TX_FLAGS_CSUM               0x00000001
3925 #define IGB_TX_FLAGS_VLAN               0x00000002
3926 #define IGB_TX_FLAGS_TSO                0x00000004
3927 #define IGB_TX_FLAGS_IPV4               0x00000008
3928 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3929 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3930 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3931
3932 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3933                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3934 {
3935         struct e1000_adv_tx_context_desc *context_desc;
3936         unsigned int i;
3937         int err;
3938         struct igb_buffer *buffer_info;
3939         u32 info = 0, tu_cmd = 0;
3940         u32 mss_l4len_idx;
3941         u8 l4len;
3942
3943         if (skb_header_cloned(skb)) {
3944                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3945                 if (err)
3946                         return err;
3947         }
3948
3949         l4len = tcp_hdrlen(skb);
3950         *hdr_len += l4len;
3951
3952         if (skb->protocol == htons(ETH_P_IP)) {
3953                 struct iphdr *iph = ip_hdr(skb);
3954                 iph->tot_len = 0;
3955                 iph->check = 0;
3956                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3957                                                          iph->daddr, 0,
3958                                                          IPPROTO_TCP,
3959                                                          0);
3960         } else if (skb_is_gso_v6(skb)) {
3961                 ipv6_hdr(skb)->payload_len = 0;
3962                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3963                                                        &ipv6_hdr(skb)->daddr,
3964                                                        0, IPPROTO_TCP, 0);
3965         }
3966
3967         i = tx_ring->next_to_use;
3968
3969         buffer_info = &tx_ring->buffer_info[i];
3970         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3971         /* VLAN MACLEN IPLEN */
3972         if (tx_flags & IGB_TX_FLAGS_VLAN)
3973                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3974         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3975         *hdr_len += skb_network_offset(skb);
3976         info |= skb_network_header_len(skb);
3977         *hdr_len += skb_network_header_len(skb);
3978         context_desc->vlan_macip_lens = cpu_to_le32(info);
3979
3980         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3981         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3982
3983         if (skb->protocol == htons(ETH_P_IP))
3984                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3985         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3986
3987         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3988
3989         /* MSS L4LEN IDX */
3990         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3991         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3992
3993         /* For 82575, context index must be unique per ring. */
3994         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3995                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3996
3997         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3998         context_desc->seqnum_seed = 0;
3999
4000         buffer_info->time_stamp = jiffies;
4001         buffer_info->next_to_watch = i;
4002         buffer_info->dma = 0;
4003         i++;
4004         if (i == tx_ring->count)
4005                 i = 0;
4006
4007         tx_ring->next_to_use = i;
4008
4009         return true;
4010 }
4011
4012 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4013                                    struct sk_buff *skb, u32 tx_flags)
4014 {
4015         struct e1000_adv_tx_context_desc *context_desc;
4016         struct device *dev = tx_ring->dev;
4017         struct igb_buffer *buffer_info;
4018         u32 info = 0, tu_cmd = 0;
4019         unsigned int i;
4020
4021         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4022             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4023                 i = tx_ring->next_to_use;
4024                 buffer_info = &tx_ring->buffer_info[i];
4025                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4026
4027                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4028                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4029
4030                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4031                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4032                         info |= skb_network_header_len(skb);
4033
4034                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4035
4036                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4037
4038                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4039                         __be16 protocol;
4040
4041                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4042                                 const struct vlan_ethhdr *vhdr =
4043                                           (const struct vlan_ethhdr*)skb->data;
4044
4045                                 protocol = vhdr->h_vlan_encapsulated_proto;
4046                         } else {
4047                                 protocol = skb->protocol;
4048                         }
4049
4050                         switch (protocol) {
4051                         case cpu_to_be16(ETH_P_IP):
4052                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4053                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4054                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4055                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4056                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4057                                 break;
4058                         case cpu_to_be16(ETH_P_IPV6):
4059                                 /* XXX what about other V6 headers?? */
4060                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4061                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4062                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4063                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4064                                 break;
4065                         default:
4066                                 if (unlikely(net_ratelimit()))
4067                                         dev_warn(dev,
4068                                             "partial checksum but proto=%x!\n",
4069                                             skb->protocol);
4070                                 break;
4071                         }
4072                 }
4073
4074                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4075                 context_desc->seqnum_seed = 0;
4076                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4077                         context_desc->mss_l4len_idx =
4078                                 cpu_to_le32(tx_ring->reg_idx << 4);
4079
4080                 buffer_info->time_stamp = jiffies;
4081                 buffer_info->next_to_watch = i;
4082                 buffer_info->dma = 0;
4083
4084                 i++;
4085                 if (i == tx_ring->count)
4086                         i = 0;
4087                 tx_ring->next_to_use = i;
4088
4089                 return true;
4090         }
4091         return false;
4092 }
4093
4094 #define IGB_MAX_TXD_PWR 16
4095 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4096
4097 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4098                                  unsigned int first)
4099 {
4100         struct igb_buffer *buffer_info;
4101         struct device *dev = tx_ring->dev;
4102         unsigned int hlen = skb_headlen(skb);
4103         unsigned int count = 0, i;
4104         unsigned int f;
4105         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4106
4107         i = tx_ring->next_to_use;
4108
4109         buffer_info = &tx_ring->buffer_info[i];
4110         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4111         buffer_info->length = hlen;
4112         /* set time_stamp *before* dma to help avoid a possible race */
4113         buffer_info->time_stamp = jiffies;
4114         buffer_info->next_to_watch = i;
4115         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4116                                           DMA_TO_DEVICE);
4117         if (dma_mapping_error(dev, buffer_info->dma))
4118                 goto dma_error;
4119
4120         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4121                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4122                 unsigned int len = frag->size;
4123
4124                 count++;
4125                 i++;
4126                 if (i == tx_ring->count)
4127                         i = 0;
4128
4129                 buffer_info = &tx_ring->buffer_info[i];
4130                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4131                 buffer_info->length = len;
4132                 buffer_info->time_stamp = jiffies;
4133                 buffer_info->next_to_watch = i;
4134                 buffer_info->mapped_as_page = true;
4135                 buffer_info->dma = dma_map_page(dev,
4136                                                 frag->page,
4137                                                 frag->page_offset,
4138                                                 len,
4139                                                 DMA_TO_DEVICE);
4140                 if (dma_mapping_error(dev, buffer_info->dma))
4141                         goto dma_error;
4142
4143         }
4144
4145         tx_ring->buffer_info[i].skb = skb;
4146         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4147         /* multiply data chunks by size of headers */
4148         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4149         tx_ring->buffer_info[i].gso_segs = gso_segs;
4150         tx_ring->buffer_info[first].next_to_watch = i;
4151
4152         return ++count;
4153
4154 dma_error:
4155         dev_err(dev, "TX DMA map failed\n");
4156
4157         /* clear timestamp and dma mappings for failed buffer_info mapping */
4158         buffer_info->dma = 0;
4159         buffer_info->time_stamp = 0;
4160         buffer_info->length = 0;
4161         buffer_info->next_to_watch = 0;
4162         buffer_info->mapped_as_page = false;
4163
4164         /* clear timestamp and dma mappings for remaining portion of packet */
4165         while (count--) {
4166                 if (i == 0)
4167                         i = tx_ring->count;
4168                 i--;
4169                 buffer_info = &tx_ring->buffer_info[i];
4170                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4171         }
4172
4173         return 0;
4174 }
4175
4176 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4177                                     u32 tx_flags, int count, u32 paylen,
4178                                     u8 hdr_len)
4179 {
4180         union e1000_adv_tx_desc *tx_desc;
4181         struct igb_buffer *buffer_info;
4182         u32 olinfo_status = 0, cmd_type_len;
4183         unsigned int i = tx_ring->next_to_use;
4184
4185         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4186                         E1000_ADVTXD_DCMD_DEXT);
4187
4188         if (tx_flags & IGB_TX_FLAGS_VLAN)
4189                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4190
4191         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4192                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4193
4194         if (tx_flags & IGB_TX_FLAGS_TSO) {
4195                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4196
4197                 /* insert tcp checksum */
4198                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4199
4200                 /* insert ip checksum */
4201                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4202                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4203
4204         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4205                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4206         }
4207
4208         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4209             (tx_flags & (IGB_TX_FLAGS_CSUM |
4210                          IGB_TX_FLAGS_TSO |
4211                          IGB_TX_FLAGS_VLAN)))
4212                 olinfo_status |= tx_ring->reg_idx << 4;
4213
4214         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4215
4216         do {
4217                 buffer_info = &tx_ring->buffer_info[i];
4218                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4219                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4220                 tx_desc->read.cmd_type_len =
4221                         cpu_to_le32(cmd_type_len | buffer_info->length);
4222                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4223                 count--;
4224                 i++;
4225                 if (i == tx_ring->count)
4226                         i = 0;
4227         } while (count > 0);
4228
4229         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4230         /* Force memory writes to complete before letting h/w
4231          * know there are new descriptors to fetch.  (Only
4232          * applicable for weak-ordered memory model archs,
4233          * such as IA-64). */
4234         wmb();
4235
4236         tx_ring->next_to_use = i;
4237         writel(i, tx_ring->tail);
4238         /* we need this if more than one processor can write to our tail
4239          * at a time, it syncronizes IO on IA64/Altix systems */
4240         mmiowb();
4241 }
4242
4243 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4244 {
4245         struct net_device *netdev = tx_ring->netdev;
4246
4247         netif_stop_subqueue(netdev, tx_ring->queue_index);
4248
4249         /* Herbert's original patch had:
4250          *  smp_mb__after_netif_stop_queue();
4251          * but since that doesn't exist yet, just open code it. */
4252         smp_mb();
4253
4254         /* We need to check again in a case another CPU has just
4255          * made room available. */
4256         if (igb_desc_unused(tx_ring) < size)
4257                 return -EBUSY;
4258
4259         /* A reprieve! */
4260         netif_wake_subqueue(netdev, tx_ring->queue_index);
4261
4262         u64_stats_update_begin(&tx_ring->tx_syncp2);
4263         tx_ring->tx_stats.restart_queue2++;
4264         u64_stats_update_end(&tx_ring->tx_syncp2);
4265
4266         return 0;
4267 }
4268
4269 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4270 {
4271         if (igb_desc_unused(tx_ring) >= size)
4272                 return 0;
4273         return __igb_maybe_stop_tx(tx_ring, size);
4274 }
4275
4276 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4277                                     struct igb_ring *tx_ring)
4278 {
4279         int tso = 0, count;
4280         u32 tx_flags = 0;
4281         u16 first;
4282         u8 hdr_len = 0;
4283
4284         /* need: 1 descriptor per page,
4285          *       + 2 desc gap to keep tail from touching head,
4286          *       + 1 desc for skb->data,
4287          *       + 1 desc for context descriptor,
4288          * otherwise try next time */
4289         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4290                 /* this is a hard error */
4291                 return NETDEV_TX_BUSY;
4292         }
4293
4294         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4295                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4296                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4297         }
4298
4299         if (vlan_tx_tag_present(skb)) {
4300                 tx_flags |= IGB_TX_FLAGS_VLAN;
4301                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4302         }
4303
4304         if (skb->protocol == htons(ETH_P_IP))
4305                 tx_flags |= IGB_TX_FLAGS_IPV4;
4306
4307         first = tx_ring->next_to_use;
4308         if (skb_is_gso(skb)) {
4309                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4310
4311                 if (tso < 0) {
4312                         dev_kfree_skb_any(skb);
4313                         return NETDEV_TX_OK;
4314                 }
4315         }
4316
4317         if (tso)
4318                 tx_flags |= IGB_TX_FLAGS_TSO;
4319         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4320                  (skb->ip_summed == CHECKSUM_PARTIAL))
4321                 tx_flags |= IGB_TX_FLAGS_CSUM;
4322
4323         /*
4324          * count reflects descriptors mapped, if 0 or less then mapping error
4325          * has occurred and we need to rewind the descriptor queue
4326          */
4327         count = igb_tx_map_adv(tx_ring, skb, first);
4328         if (!count) {
4329                 dev_kfree_skb_any(skb);
4330                 tx_ring->buffer_info[first].time_stamp = 0;
4331                 tx_ring->next_to_use = first;
4332                 return NETDEV_TX_OK;
4333         }
4334
4335         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4336
4337         /* Make sure there is space in the ring for the next send. */
4338         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4339
4340         return NETDEV_TX_OK;
4341 }
4342
4343 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4344                                       struct net_device *netdev)
4345 {
4346         struct igb_adapter *adapter = netdev_priv(netdev);
4347         struct igb_ring *tx_ring;
4348         int r_idx = 0;
4349
4350         if (test_bit(__IGB_DOWN, &adapter->state)) {
4351                 dev_kfree_skb_any(skb);
4352                 return NETDEV_TX_OK;
4353         }
4354
4355         if (skb->len <= 0) {
4356                 dev_kfree_skb_any(skb);
4357                 return NETDEV_TX_OK;
4358         }
4359
4360         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4361         tx_ring = adapter->multi_tx_table[r_idx];
4362
4363         /* This goes back to the question of how to logically map a tx queue
4364          * to a flow.  Right now, performance is impacted slightly negatively
4365          * if using multiple tx queues.  If the stack breaks away from a
4366          * single qdisc implementation, we can look at this again. */
4367         return igb_xmit_frame_ring_adv(skb, tx_ring);
4368 }
4369
4370 /**
4371  * igb_tx_timeout - Respond to a Tx Hang
4372  * @netdev: network interface device structure
4373  **/
4374 static void igb_tx_timeout(struct net_device *netdev)
4375 {
4376         struct igb_adapter *adapter = netdev_priv(netdev);
4377         struct e1000_hw *hw = &adapter->hw;
4378
4379         /* Do the reset outside of interrupt context */
4380         adapter->tx_timeout_count++;
4381
4382         if (hw->mac.type == e1000_82580)
4383                 hw->dev_spec._82575.global_device_reset = true;
4384
4385         schedule_work(&adapter->reset_task);
4386         wr32(E1000_EICS,
4387              (adapter->eims_enable_mask & ~adapter->eims_other));
4388 }
4389
4390 static void igb_reset_task(struct work_struct *work)
4391 {
4392         struct igb_adapter *adapter;
4393         adapter = container_of(work, struct igb_adapter, reset_task);
4394
4395         igb_dump(adapter);
4396         netdev_err(adapter->netdev, "Reset adapter\n");
4397         igb_reinit_locked(adapter);
4398 }
4399
4400 /**
4401  * igb_get_stats64 - Get System Network Statistics
4402  * @netdev: network interface device structure
4403  * @stats: rtnl_link_stats64 pointer
4404  *
4405  **/
4406 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4407                                                  struct rtnl_link_stats64 *stats)
4408 {
4409         struct igb_adapter *adapter = netdev_priv(netdev);
4410
4411         spin_lock(&adapter->stats64_lock);
4412         igb_update_stats(adapter, &adapter->stats64);
4413         memcpy(stats, &adapter->stats64, sizeof(*stats));
4414         spin_unlock(&adapter->stats64_lock);
4415
4416         return stats;
4417 }
4418
4419 /**
4420  * igb_change_mtu - Change the Maximum Transfer Unit
4421  * @netdev: network interface device structure
4422  * @new_mtu: new value for maximum frame size
4423  *
4424  * Returns 0 on success, negative on failure
4425  **/
4426 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4427 {
4428         struct igb_adapter *adapter = netdev_priv(netdev);
4429         struct pci_dev *pdev = adapter->pdev;
4430         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4431         u32 rx_buffer_len, i;
4432
4433         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4434                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4435                 return -EINVAL;
4436         }
4437
4438         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4439                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4440                 return -EINVAL;
4441         }
4442
4443         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4444                 msleep(1);
4445
4446         /* igb_down has a dependency on max_frame_size */
4447         adapter->max_frame_size = max_frame;
4448
4449         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4450          * means we reserve 2 more, this pushes us to allocate from the next
4451          * larger slab size.
4452          * i.e. RXBUFFER_2048 --> size-4096 slab
4453          */
4454
4455         if (adapter->hw.mac.type == e1000_82580)
4456                 max_frame += IGB_TS_HDR_LEN;
4457
4458         if (max_frame <= IGB_RXBUFFER_1024)
4459                 rx_buffer_len = IGB_RXBUFFER_1024;
4460         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4461                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4462         else
4463                 rx_buffer_len = IGB_RXBUFFER_128;
4464
4465         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4466              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4467                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4468
4469         if ((adapter->hw.mac.type == e1000_82580) &&
4470             (rx_buffer_len == IGB_RXBUFFER_128))
4471                 rx_buffer_len += IGB_RXBUFFER_64;
4472
4473         if (netif_running(netdev))
4474                 igb_down(adapter);
4475
4476         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4477                  netdev->mtu, new_mtu);
4478         netdev->mtu = new_mtu;
4479
4480         for (i = 0; i < adapter->num_rx_queues; i++)
4481                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4482
4483         if (netif_running(netdev))
4484                 igb_up(adapter);
4485         else
4486                 igb_reset(adapter);
4487
4488         clear_bit(__IGB_RESETTING, &adapter->state);
4489
4490         return 0;
4491 }
4492
4493 /**
4494  * igb_update_stats - Update the board statistics counters
4495  * @adapter: board private structure
4496  **/
4497
4498 void igb_update_stats(struct igb_adapter *adapter,
4499                       struct rtnl_link_stats64 *net_stats)
4500 {
4501         struct e1000_hw *hw = &adapter->hw;
4502         struct pci_dev *pdev = adapter->pdev;
4503         u32 reg, mpc;
4504         u16 phy_tmp;
4505         int i;
4506         u64 bytes, packets;
4507         unsigned int start;
4508         u64 _bytes, _packets;
4509
4510 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4511
4512         /*
4513          * Prevent stats update while adapter is being reset, or if the pci
4514          * connection is down.
4515          */
4516         if (adapter->link_speed == 0)
4517                 return;
4518         if (pci_channel_offline(pdev))
4519                 return;
4520
4521         bytes = 0;
4522         packets = 0;
4523         for (i = 0; i < adapter->num_rx_queues; i++) {
4524                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4525                 struct igb_ring *ring = adapter->rx_ring[i];
4526
4527                 ring->rx_stats.drops += rqdpc_tmp;
4528                 net_stats->rx_fifo_errors += rqdpc_tmp;
4529
4530                 do {
4531                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4532                         _bytes = ring->rx_stats.bytes;
4533                         _packets = ring->rx_stats.packets;
4534                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4535                 bytes += _bytes;
4536                 packets += _packets;
4537         }
4538
4539         net_stats->rx_bytes = bytes;
4540         net_stats->rx_packets = packets;
4541
4542         bytes = 0;
4543         packets = 0;
4544         for (i = 0; i < adapter->num_tx_queues; i++) {
4545                 struct igb_ring *ring = adapter->tx_ring[i];
4546                 do {
4547                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4548                         _bytes = ring->tx_stats.bytes;
4549                         _packets = ring->tx_stats.packets;
4550                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4551                 bytes += _bytes;
4552                 packets += _packets;
4553         }
4554         net_stats->tx_bytes = bytes;
4555         net_stats->tx_packets = packets;
4556
4557         /* read stats registers */
4558         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4559         adapter->stats.gprc += rd32(E1000_GPRC);
4560         adapter->stats.gorc += rd32(E1000_GORCL);
4561         rd32(E1000_GORCH); /* clear GORCL */
4562         adapter->stats.bprc += rd32(E1000_BPRC);
4563         adapter->stats.mprc += rd32(E1000_MPRC);
4564         adapter->stats.roc += rd32(E1000_ROC);
4565
4566         adapter->stats.prc64 += rd32(E1000_PRC64);
4567         adapter->stats.prc127 += rd32(E1000_PRC127);
4568         adapter->stats.prc255 += rd32(E1000_PRC255);
4569         adapter->stats.prc511 += rd32(E1000_PRC511);
4570         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4571         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4572         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4573         adapter->stats.sec += rd32(E1000_SEC);
4574
4575         mpc = rd32(E1000_MPC);
4576         adapter->stats.mpc += mpc;
4577         net_stats->rx_fifo_errors += mpc;
4578         adapter->stats.scc += rd32(E1000_SCC);
4579         adapter->stats.ecol += rd32(E1000_ECOL);
4580         adapter->stats.mcc += rd32(E1000_MCC);
4581         adapter->stats.latecol += rd32(E1000_LATECOL);
4582         adapter->stats.dc += rd32(E1000_DC);
4583         adapter->stats.rlec += rd32(E1000_RLEC);
4584         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4585         adapter->stats.xontxc += rd32(E1000_XONTXC);
4586         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4587         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4588         adapter->stats.fcruc += rd32(E1000_FCRUC);
4589         adapter->stats.gptc += rd32(E1000_GPTC);
4590         adapter->stats.gotc += rd32(E1000_GOTCL);
4591         rd32(E1000_GOTCH); /* clear GOTCL */
4592         adapter->stats.rnbc += rd32(E1000_RNBC);
4593         adapter->stats.ruc += rd32(E1000_RUC);
4594         adapter->stats.rfc += rd32(E1000_RFC);
4595         adapter->stats.rjc += rd32(E1000_RJC);
4596         adapter->stats.tor += rd32(E1000_TORH);
4597         adapter->stats.tot += rd32(E1000_TOTH);
4598         adapter->stats.tpr += rd32(E1000_TPR);
4599
4600         adapter->stats.ptc64 += rd32(E1000_PTC64);
4601         adapter->stats.ptc127 += rd32(E1000_PTC127);
4602         adapter->stats.ptc255 += rd32(E1000_PTC255);
4603         adapter->stats.ptc511 += rd32(E1000_PTC511);
4604         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4605         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4606
4607         adapter->stats.mptc += rd32(E1000_MPTC);
4608         adapter->stats.bptc += rd32(E1000_BPTC);
4609
4610         adapter->stats.tpt += rd32(E1000_TPT);
4611         adapter->stats.colc += rd32(E1000_COLC);
4612
4613         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4614         /* read internal phy specific stats */
4615         reg = rd32(E1000_CTRL_EXT);
4616         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4617                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4618                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4619         }
4620
4621         adapter->stats.tsctc += rd32(E1000_TSCTC);
4622         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4623
4624         adapter->stats.iac += rd32(E1000_IAC);
4625         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4626         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4627         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4628         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4629         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4630         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4631         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4632         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4633
4634         /* Fill out the OS statistics structure */
4635         net_stats->multicast = adapter->stats.mprc;
4636         net_stats->collisions = adapter->stats.colc;
4637
4638         /* Rx Errors */
4639
4640         /* RLEC on some newer hardware can be incorrect so build
4641          * our own version based on RUC and ROC */
4642         net_stats->rx_errors = adapter->stats.rxerrc +
4643                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4644                 adapter->stats.ruc + adapter->stats.roc +
4645                 adapter->stats.cexterr;
4646         net_stats->rx_length_errors = adapter->stats.ruc +
4647                                       adapter->stats.roc;
4648         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4649         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4650         net_stats->rx_missed_errors = adapter->stats.mpc;
4651
4652         /* Tx Errors */
4653         net_stats->tx_errors = adapter->stats.ecol +
4654                                adapter->stats.latecol;
4655         net_stats->tx_aborted_errors = adapter->stats.ecol;
4656         net_stats->tx_window_errors = adapter->stats.latecol;
4657         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4658
4659         /* Tx Dropped needs to be maintained elsewhere */
4660
4661         /* Phy Stats */
4662         if (hw->phy.media_type == e1000_media_type_copper) {
4663                 if ((adapter->link_speed == SPEED_1000) &&
4664                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4665                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4666                         adapter->phy_stats.idle_errors += phy_tmp;
4667                 }
4668         }
4669
4670         /* Management Stats */
4671         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4672         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4673         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4674
4675         /* OS2BMC Stats */
4676         reg = rd32(E1000_MANC);
4677         if (reg & E1000_MANC_EN_BMC2OS) {
4678                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4679                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4680                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4681                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4682         }
4683 }
4684
4685 static irqreturn_t igb_msix_other(int irq, void *data)
4686 {
4687         struct igb_adapter *adapter = data;
4688         struct e1000_hw *hw = &adapter->hw;
4689         u32 icr = rd32(E1000_ICR);
4690         /* reading ICR causes bit 31 of EICR to be cleared */
4691
4692         if (icr & E1000_ICR_DRSTA)
4693                 schedule_work(&adapter->reset_task);
4694
4695         if (icr & E1000_ICR_DOUTSYNC) {
4696                 /* HW is reporting DMA is out of sync */
4697                 adapter->stats.doosync++;
4698                 /* The DMA Out of Sync is also indication of a spoof event
4699                  * in IOV mode. Check the Wrong VM Behavior register to
4700                  * see if it is really a spoof event. */
4701                 igb_check_wvbr(adapter);
4702         }
4703
4704         /* Check for a mailbox event */
4705         if (icr & E1000_ICR_VMMB)
4706                 igb_msg_task(adapter);
4707
4708         if (icr & E1000_ICR_LSC) {
4709                 hw->mac.get_link_status = 1;
4710                 /* guard against interrupt when we're going down */
4711                 if (!test_bit(__IGB_DOWN, &adapter->state))
4712                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4713         }
4714
4715         if (adapter->vfs_allocated_count)
4716                 wr32(E1000_IMS, E1000_IMS_LSC |
4717                                 E1000_IMS_VMMB |
4718                                 E1000_IMS_DOUTSYNC);
4719         else
4720                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4721         wr32(E1000_EIMS, adapter->eims_other);
4722
4723         return IRQ_HANDLED;
4724 }
4725
4726 static void igb_write_itr(struct igb_q_vector *q_vector)
4727 {
4728         struct igb_adapter *adapter = q_vector->adapter;
4729         u32 itr_val = q_vector->itr_val & 0x7FFC;
4730
4731         if (!q_vector->set_itr)
4732                 return;
4733
4734         if (!itr_val)
4735                 itr_val = 0x4;
4736
4737         if (adapter->hw.mac.type == e1000_82575)
4738                 itr_val |= itr_val << 16;
4739         else
4740                 itr_val |= 0x8000000;
4741
4742         writel(itr_val, q_vector->itr_register);
4743         q_vector->set_itr = 0;
4744 }
4745
4746 static irqreturn_t igb_msix_ring(int irq, void *data)
4747 {
4748         struct igb_q_vector *q_vector = data;
4749
4750         /* Write the ITR value calculated from the previous interrupt. */
4751         igb_write_itr(q_vector);
4752
4753         napi_schedule(&q_vector->napi);
4754
4755         return IRQ_HANDLED;
4756 }
4757
4758 #ifdef CONFIG_IGB_DCA
4759 static void igb_update_dca(struct igb_q_vector *q_vector)
4760 {
4761         struct igb_adapter *adapter = q_vector->adapter;
4762         struct e1000_hw *hw = &adapter->hw;
4763         int cpu = get_cpu();
4764
4765         if (q_vector->cpu == cpu)
4766                 goto out_no_update;
4767
4768         if (q_vector->tx_ring) {
4769                 int q = q_vector->tx_ring->reg_idx;
4770                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4771                 if (hw->mac.type == e1000_82575) {
4772                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4773                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4774                 } else {
4775                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4776                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4777                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4778                 }
4779                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4780                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4781         }
4782         if (q_vector->rx_ring) {
4783                 int q = q_vector->rx_ring->reg_idx;
4784                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4785                 if (hw->mac.type == e1000_82575) {
4786                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4787                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4788                 } else {
4789                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4790                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4791                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4792                 }
4793                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4794                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4795                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4796                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4797         }
4798         q_vector->cpu = cpu;
4799 out_no_update:
4800         put_cpu();
4801 }
4802
4803 static void igb_setup_dca(struct igb_adapter *adapter)
4804 {
4805         struct e1000_hw *hw = &adapter->hw;
4806         int i;
4807
4808         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4809                 return;
4810
4811         /* Always use CB2 mode, difference is masked in the CB driver. */
4812         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4813
4814         for (i = 0; i < adapter->num_q_vectors; i++) {
4815                 adapter->q_vector[i]->cpu = -1;
4816                 igb_update_dca(adapter->q_vector[i]);
4817         }
4818 }
4819
4820 static int __igb_notify_dca(struct device *dev, void *data)
4821 {
4822         struct net_device *netdev = dev_get_drvdata(dev);
4823         struct igb_adapter *adapter = netdev_priv(netdev);
4824         struct pci_dev *pdev = adapter->pdev;
4825         struct e1000_hw *hw = &adapter->hw;
4826         unsigned long event = *(unsigned long *)data;
4827
4828         switch (event) {
4829         case DCA_PROVIDER_ADD:
4830                 /* if already enabled, don't do it again */
4831                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4832                         break;
4833                 if (dca_add_requester(dev) == 0) {
4834                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4835                         dev_info(&pdev->dev, "DCA enabled\n");
4836                         igb_setup_dca(adapter);
4837                         break;
4838                 }
4839                 /* Fall Through since DCA is disabled. */
4840         case DCA_PROVIDER_REMOVE:
4841                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4842                         /* without this a class_device is left
4843                          * hanging around in the sysfs model */
4844                         dca_remove_requester(dev);
4845                         dev_info(&pdev->dev, "DCA disabled\n");
4846                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4847                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4848                 }
4849                 break;
4850         }
4851
4852         return 0;
4853 }
4854
4855 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4856                           void *p)
4857 {
4858         int ret_val;
4859
4860         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4861                                          __igb_notify_dca);
4862
4863         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4864 }
4865 #endif /* CONFIG_IGB_DCA */
4866
4867 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4868 {
4869         struct e1000_hw *hw = &adapter->hw;
4870         u32 ping;
4871         int i;
4872
4873         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4874                 ping = E1000_PF_CONTROL_MSG;
4875                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4876                         ping |= E1000_VT_MSGTYPE_CTS;
4877                 igb_write_mbx(hw, &ping, 1, i);
4878         }
4879 }
4880
4881 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4882 {
4883         struct e1000_hw *hw = &adapter->hw;
4884         u32 vmolr = rd32(E1000_VMOLR(vf));
4885         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4886
4887         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4888                             IGB_VF_FLAG_MULTI_PROMISC);
4889         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4890
4891         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4892                 vmolr |= E1000_VMOLR_MPME;
4893                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4894                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4895         } else {
4896                 /*
4897                  * if we have hashes and we are clearing a multicast promisc
4898                  * flag we need to write the hashes to the MTA as this step
4899                  * was previously skipped
4900                  */
4901                 if (vf_data->num_vf_mc_hashes > 30) {
4902                         vmolr |= E1000_VMOLR_MPME;
4903                 } else if (vf_data->num_vf_mc_hashes) {
4904                         int j;
4905                         vmolr |= E1000_VMOLR_ROMPE;
4906                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4907                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4908                 }
4909         }
4910
4911         wr32(E1000_VMOLR(vf), vmolr);
4912
4913         /* there are flags left unprocessed, likely not supported */
4914         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4915                 return -EINVAL;
4916
4917         return 0;
4918
4919 }
4920
4921 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4922                                   u32 *msgbuf, u32 vf)
4923 {
4924         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4925         u16 *hash_list = (u16 *)&msgbuf[1];
4926         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4927         int i;
4928
4929         /* salt away the number of multicast addresses assigned
4930          * to this VF for later use to restore when the PF multi cast
4931          * list changes
4932          */
4933         vf_data->num_vf_mc_hashes = n;
4934
4935         /* only up to 30 hash values supported */
4936         if (n > 30)
4937                 n = 30;
4938
4939         /* store the hashes for later use */
4940         for (i = 0; i < n; i++)
4941                 vf_data->vf_mc_hashes[i] = hash_list[i];
4942
4943         /* Flush and reset the mta with the new values */
4944         igb_set_rx_mode(adapter->netdev);
4945
4946         return 0;
4947 }
4948
4949 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4950 {
4951         struct e1000_hw *hw = &adapter->hw;
4952         struct vf_data_storage *vf_data;
4953         int i, j;
4954
4955         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4956                 u32 vmolr = rd32(E1000_VMOLR(i));
4957                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4958
4959                 vf_data = &adapter->vf_data[i];
4960
4961                 if ((vf_data->num_vf_mc_hashes > 30) ||
4962                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4963                         vmolr |= E1000_VMOLR_MPME;
4964                 } else if (vf_data->num_vf_mc_hashes) {
4965                         vmolr |= E1000_VMOLR_ROMPE;
4966                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4967                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4968                 }
4969                 wr32(E1000_VMOLR(i), vmolr);
4970         }
4971 }
4972
4973 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4974 {
4975         struct e1000_hw *hw = &adapter->hw;
4976         u32 pool_mask, reg, vid;
4977         int i;
4978
4979         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4980
4981         /* Find the vlan filter for this id */
4982         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4983                 reg = rd32(E1000_VLVF(i));
4984
4985                 /* remove the vf from the pool */
4986                 reg &= ~pool_mask;
4987
4988                 /* if pool is empty then remove entry from vfta */
4989                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4990                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4991                         reg = 0;
4992                         vid = reg & E1000_VLVF_VLANID_MASK;
4993                         igb_vfta_set(hw, vid, false);
4994                 }
4995
4996                 wr32(E1000_VLVF(i), reg);
4997         }
4998
4999         adapter->vf_data[vf].vlans_enabled = 0;
5000 }
5001
5002 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5003 {
5004         struct e1000_hw *hw = &adapter->hw;
5005         u32 reg, i;
5006
5007         /* The vlvf table only exists on 82576 hardware and newer */
5008         if (hw->mac.type < e1000_82576)
5009                 return -1;
5010
5011         /* we only need to do this if VMDq is enabled */
5012         if (!adapter->vfs_allocated_count)
5013                 return -1;
5014
5015         /* Find the vlan filter for this id */
5016         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5017                 reg = rd32(E1000_VLVF(i));
5018                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5019                     vid == (reg & E1000_VLVF_VLANID_MASK))
5020                         break;
5021         }
5022
5023         if (add) {
5024                 if (i == E1000_VLVF_ARRAY_SIZE) {
5025                         /* Did not find a matching VLAN ID entry that was
5026                          * enabled.  Search for a free filter entry, i.e.
5027                          * one without the enable bit set
5028                          */
5029                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5030                                 reg = rd32(E1000_VLVF(i));
5031                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5032                                         break;
5033                         }
5034                 }
5035                 if (i < E1000_VLVF_ARRAY_SIZE) {
5036                         /* Found an enabled/available entry */
5037                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5038
5039                         /* if !enabled we need to set this up in vfta */
5040                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5041                                 /* add VID to filter table */
5042                                 igb_vfta_set(hw, vid, true);
5043                                 reg |= E1000_VLVF_VLANID_ENABLE;
5044                         }
5045                         reg &= ~E1000_VLVF_VLANID_MASK;
5046                         reg |= vid;
5047                         wr32(E1000_VLVF(i), reg);
5048
5049                         /* do not modify RLPML for PF devices */
5050                         if (vf >= adapter->vfs_allocated_count)
5051                                 return 0;
5052
5053                         if (!adapter->vf_data[vf].vlans_enabled) {
5054                                 u32 size;
5055                                 reg = rd32(E1000_VMOLR(vf));
5056                                 size = reg & E1000_VMOLR_RLPML_MASK;
5057                                 size += 4;
5058                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5059                                 reg |= size;
5060                                 wr32(E1000_VMOLR(vf), reg);
5061                         }
5062
5063                         adapter->vf_data[vf].vlans_enabled++;
5064                         return 0;
5065                 }
5066         } else {
5067                 if (i < E1000_VLVF_ARRAY_SIZE) {
5068                         /* remove vf from the pool */
5069                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5070                         /* if pool is empty then remove entry from vfta */
5071                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5072                                 reg = 0;
5073                                 igb_vfta_set(hw, vid, false);
5074                         }
5075                         wr32(E1000_VLVF(i), reg);
5076
5077                         /* do not modify RLPML for PF devices */
5078                         if (vf >= adapter->vfs_allocated_count)
5079                                 return 0;
5080
5081                         adapter->vf_data[vf].vlans_enabled--;
5082                         if (!adapter->vf_data[vf].vlans_enabled) {
5083                                 u32 size;
5084                                 reg = rd32(E1000_VMOLR(vf));
5085                                 size = reg & E1000_VMOLR_RLPML_MASK;
5086                                 size -= 4;
5087                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5088                                 reg |= size;
5089                                 wr32(E1000_VMOLR(vf), reg);
5090                         }
5091                 }
5092         }
5093         return 0;
5094 }
5095
5096 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5097 {
5098         struct e1000_hw *hw = &adapter->hw;
5099
5100         if (vid)
5101                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5102         else
5103                 wr32(E1000_VMVIR(vf), 0);
5104 }
5105
5106 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5107                                int vf, u16 vlan, u8 qos)
5108 {
5109         int err = 0;
5110         struct igb_adapter *adapter = netdev_priv(netdev);
5111
5112         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5113                 return -EINVAL;
5114         if (vlan || qos) {
5115                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5116                 if (err)
5117                         goto out;
5118                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5119                 igb_set_vmolr(adapter, vf, !vlan);
5120                 adapter->vf_data[vf].pf_vlan = vlan;
5121                 adapter->vf_data[vf].pf_qos = qos;
5122                 dev_info(&adapter->pdev->dev,
5123                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5124                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5125                         dev_warn(&adapter->pdev->dev,
5126                                  "The VF VLAN has been set,"
5127                                  " but the PF device is not up.\n");
5128                         dev_warn(&adapter->pdev->dev,
5129                                  "Bring the PF device up before"
5130                                  " attempting to use the VF device.\n");
5131                 }
5132         } else {
5133                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5134                                    false, vf);
5135                 igb_set_vmvir(adapter, vlan, vf);
5136                 igb_set_vmolr(adapter, vf, true);
5137                 adapter->vf_data[vf].pf_vlan = 0;
5138                 adapter->vf_data[vf].pf_qos = 0;
5139        }
5140 out:
5141        return err;
5142 }
5143
5144 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5145 {
5146         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5147         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5148
5149         return igb_vlvf_set(adapter, vid, add, vf);
5150 }
5151
5152 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5153 {
5154         /* clear flags - except flag that indicates PF has set the MAC */
5155         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5156         adapter->vf_data[vf].last_nack = jiffies;
5157
5158         /* reset offloads to defaults */
5159         igb_set_vmolr(adapter, vf, true);
5160
5161         /* reset vlans for device */
5162         igb_clear_vf_vfta(adapter, vf);
5163         if (adapter->vf_data[vf].pf_vlan)
5164                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5165                                     adapter->vf_data[vf].pf_vlan,
5166                                     adapter->vf_data[vf].pf_qos);
5167         else
5168                 igb_clear_vf_vfta(adapter, vf);
5169
5170         /* reset multicast table array for vf */
5171         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5172
5173         /* Flush and reset the mta with the new values */
5174         igb_set_rx_mode(adapter->netdev);
5175 }
5176
5177 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5178 {
5179         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5180
5181         /* generate a new mac address as we were hotplug removed/added */
5182         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5183                 random_ether_addr(vf_mac);
5184
5185         /* process remaining reset events */
5186         igb_vf_reset(adapter, vf);
5187 }
5188
5189 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5190 {
5191         struct e1000_hw *hw = &adapter->hw;
5192         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5193         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5194         u32 reg, msgbuf[3];
5195         u8 *addr = (u8 *)(&msgbuf[1]);
5196
5197         /* process all the same items cleared in a function level reset */
5198         igb_vf_reset(adapter, vf);
5199
5200         /* set vf mac address */
5201         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5202
5203         /* enable transmit and receive for vf */
5204         reg = rd32(E1000_VFTE);
5205         wr32(E1000_VFTE, reg | (1 << vf));
5206         reg = rd32(E1000_VFRE);
5207         wr32(E1000_VFRE, reg | (1 << vf));
5208
5209         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5210
5211         /* reply to reset with ack and vf mac address */
5212         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5213         memcpy(addr, vf_mac, 6);
5214         igb_write_mbx(hw, msgbuf, 3, vf);
5215 }
5216
5217 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5218 {
5219         /*
5220          * The VF MAC Address is stored in a packed array of bytes
5221          * starting at the second 32 bit word of the msg array
5222          */
5223         unsigned char *addr = (char *)&msg[1];
5224         int err = -1;
5225
5226         if (is_valid_ether_addr(addr))
5227                 err = igb_set_vf_mac(adapter, vf, addr);
5228
5229         return err;
5230 }
5231
5232 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5233 {
5234         struct e1000_hw *hw = &adapter->hw;
5235         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5236         u32 msg = E1000_VT_MSGTYPE_NACK;
5237
5238         /* if device isn't clear to send it shouldn't be reading either */
5239         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5240             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5241                 igb_write_mbx(hw, &msg, 1, vf);
5242                 vf_data->last_nack = jiffies;
5243         }
5244 }
5245
5246 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5247 {
5248         struct pci_dev *pdev = adapter->pdev;
5249         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5250         struct e1000_hw *hw = &adapter->hw;
5251         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5252         s32 retval;
5253
5254         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5255
5256         if (retval) {
5257                 /* if receive failed revoke VF CTS stats and restart init */
5258                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5259                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5260                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5261                         return;
5262                 goto out;
5263         }
5264
5265         /* this is a message we already processed, do nothing */
5266         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5267                 return;
5268
5269         /*
5270          * until the vf completes a reset it should not be
5271          * allowed to start any configuration.
5272          */
5273
5274         if (msgbuf[0] == E1000_VF_RESET) {
5275                 igb_vf_reset_msg(adapter, vf);
5276                 return;
5277         }
5278
5279         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5280                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5281                         return;
5282                 retval = -1;
5283                 goto out;
5284         }
5285
5286         switch ((msgbuf[0] & 0xFFFF)) {
5287         case E1000_VF_SET_MAC_ADDR:
5288                 retval = -EINVAL;
5289                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5290                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5291                 else
5292                         dev_warn(&pdev->dev,
5293                                  "VF %d attempted to override administratively "
5294                                  "set MAC address\nReload the VF driver to "
5295                                  "resume operations\n", vf);
5296                 break;
5297         case E1000_VF_SET_PROMISC:
5298                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5299                 break;
5300         case E1000_VF_SET_MULTICAST:
5301                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5302                 break;
5303         case E1000_VF_SET_LPE:
5304                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5305                 break;
5306         case E1000_VF_SET_VLAN:
5307                 retval = -1;
5308                 if (vf_data->pf_vlan)
5309                         dev_warn(&pdev->dev,
5310                                  "VF %d attempted to override administratively "
5311                                  "set VLAN tag\nReload the VF driver to "
5312                                  "resume operations\n", vf);
5313                 else
5314                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5315                 break;
5316         default:
5317                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5318                 retval = -1;
5319                 break;
5320         }
5321
5322         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5323 out:
5324         /* notify the VF of the results of what it sent us */
5325         if (retval)
5326                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5327         else
5328                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5329
5330         igb_write_mbx(hw, msgbuf, 1, vf);
5331 }
5332
5333 static void igb_msg_task(struct igb_adapter *adapter)
5334 {
5335         struct e1000_hw *hw = &adapter->hw;
5336         u32 vf;
5337
5338         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5339                 /* process any reset requests */
5340                 if (!igb_check_for_rst(hw, vf))
5341                         igb_vf_reset_event(adapter, vf);
5342
5343                 /* process any messages pending */
5344                 if (!igb_check_for_msg(hw, vf))
5345                         igb_rcv_msg_from_vf(adapter, vf);
5346
5347                 /* process any acks */
5348                 if (!igb_check_for_ack(hw, vf))
5349                         igb_rcv_ack_from_vf(adapter, vf);
5350         }
5351 }
5352
5353 /**
5354  *  igb_set_uta - Set unicast filter table address
5355  *  @adapter: board private structure
5356  *
5357  *  The unicast table address is a register array of 32-bit registers.
5358  *  The table is meant to be used in a way similar to how the MTA is used
5359  *  however due to certain limitations in the hardware it is necessary to
5360  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5361  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5362  **/
5363 static void igb_set_uta(struct igb_adapter *adapter)
5364 {
5365         struct e1000_hw *hw = &adapter->hw;
5366         int i;
5367
5368         /* The UTA table only exists on 82576 hardware and newer */
5369         if (hw->mac.type < e1000_82576)
5370                 return;
5371
5372         /* we only need to do this if VMDq is enabled */
5373         if (!adapter->vfs_allocated_count)
5374                 return;
5375
5376         for (i = 0; i < hw->mac.uta_reg_count; i++)
5377                 array_wr32(E1000_UTA, i, ~0);
5378 }
5379
5380 /**
5381  * igb_intr_msi - Interrupt Handler
5382  * @irq: interrupt number
5383  * @data: pointer to a network interface device structure
5384  **/
5385 static irqreturn_t igb_intr_msi(int irq, void *data)
5386 {
5387         struct igb_adapter *adapter = data;
5388         struct igb_q_vector *q_vector = adapter->q_vector[0];
5389         struct e1000_hw *hw = &adapter->hw;
5390         /* read ICR disables interrupts using IAM */
5391         u32 icr = rd32(E1000_ICR);
5392
5393         igb_write_itr(q_vector);
5394
5395         if (icr & E1000_ICR_DRSTA)
5396                 schedule_work(&adapter->reset_task);
5397
5398         if (icr & E1000_ICR_DOUTSYNC) {
5399                 /* HW is reporting DMA is out of sync */
5400                 adapter->stats.doosync++;
5401         }
5402
5403         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5404                 hw->mac.get_link_status = 1;
5405                 if (!test_bit(__IGB_DOWN, &adapter->state))
5406                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5407         }
5408
5409         napi_schedule(&q_vector->napi);
5410
5411         return IRQ_HANDLED;
5412 }
5413
5414 /**
5415  * igb_intr - Legacy Interrupt Handler
5416  * @irq: interrupt number
5417  * @data: pointer to a network interface device structure
5418  **/
5419 static irqreturn_t igb_intr(int irq, void *data)
5420 {
5421         struct igb_adapter *adapter = data;
5422         struct igb_q_vector *q_vector = adapter->q_vector[0];
5423         struct e1000_hw *hw = &adapter->hw;
5424         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5425          * need for the IMC write */
5426         u32 icr = rd32(E1000_ICR);
5427         if (!icr)
5428                 return IRQ_NONE;  /* Not our interrupt */
5429
5430         igb_write_itr(q_vector);
5431
5432         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5433          * not set, then the adapter didn't send an interrupt */
5434         if (!(icr & E1000_ICR_INT_ASSERTED))
5435                 return IRQ_NONE;
5436
5437         if (icr & E1000_ICR_DRSTA)
5438                 schedule_work(&adapter->reset_task);
5439
5440         if (icr & E1000_ICR_DOUTSYNC) {
5441                 /* HW is reporting DMA is out of sync */
5442                 adapter->stats.doosync++;
5443         }
5444
5445         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5446                 hw->mac.get_link_status = 1;
5447                 /* guard against interrupt when we're going down */
5448                 if (!test_bit(__IGB_DOWN, &adapter->state))
5449                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5450         }
5451
5452         napi_schedule(&q_vector->napi);
5453
5454         return IRQ_HANDLED;
5455 }
5456
5457 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5458 {
5459         struct igb_adapter *adapter = q_vector->adapter;
5460         struct e1000_hw *hw = &adapter->hw;
5461
5462         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5463             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5464                 if (!adapter->msix_entries)
5465                         igb_set_itr(adapter);
5466                 else
5467                         igb_update_ring_itr(q_vector);
5468         }
5469
5470         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5471                 if (adapter->msix_entries)
5472                         wr32(E1000_EIMS, q_vector->eims_value);
5473                 else
5474                         igb_irq_enable(adapter);
5475         }
5476 }
5477
5478 /**
5479  * igb_poll - NAPI Rx polling callback
5480  * @napi: napi polling structure
5481  * @budget: count of how many packets we should handle
5482  **/
5483 static int igb_poll(struct napi_struct *napi, int budget)
5484 {
5485         struct igb_q_vector *q_vector = container_of(napi,
5486                                                      struct igb_q_vector,
5487                                                      napi);
5488         int tx_clean_complete = 1, work_done = 0;
5489
5490 #ifdef CONFIG_IGB_DCA
5491         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5492                 igb_update_dca(q_vector);
5493 #endif
5494         if (q_vector->tx_ring)
5495                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5496
5497         if (q_vector->rx_ring)
5498                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5499
5500         if (!tx_clean_complete)
5501                 work_done = budget;
5502
5503         /* If not enough Rx work done, exit the polling mode */
5504         if (work_done < budget) {
5505                 napi_complete(napi);
5506                 igb_ring_irq_enable(q_vector);
5507         }
5508
5509         return work_done;
5510 }
5511
5512 /**
5513  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5514  * @adapter: board private structure
5515  * @shhwtstamps: timestamp structure to update
5516  * @regval: unsigned 64bit system time value.
5517  *
5518  * We need to convert the system time value stored in the RX/TXSTMP registers
5519  * into a hwtstamp which can be used by the upper level timestamping functions
5520  */
5521 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5522                                    struct skb_shared_hwtstamps *shhwtstamps,
5523                                    u64 regval)
5524 {
5525         u64 ns;
5526
5527         /*
5528          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5529          * 24 to match clock shift we setup earlier.
5530          */
5531         if (adapter->hw.mac.type == e1000_82580)
5532                 regval <<= IGB_82580_TSYNC_SHIFT;
5533
5534         ns = timecounter_cyc2time(&adapter->clock, regval);
5535         timecompare_update(&adapter->compare, ns);
5536         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5537         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5538         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5539 }
5540
5541 /**
5542  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5543  * @q_vector: pointer to q_vector containing needed info
5544  * @buffer: pointer to igb_buffer structure
5545  *
5546  * If we were asked to do hardware stamping and such a time stamp is
5547  * available, then it must have been for this skb here because we only
5548  * allow only one such packet into the queue.
5549  */
5550 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5551 {
5552         struct igb_adapter *adapter = q_vector->adapter;
5553         struct e1000_hw *hw = &adapter->hw;
5554         struct skb_shared_hwtstamps shhwtstamps;
5555         u64 regval;
5556
5557         /* if skb does not support hw timestamp or TX stamp not valid exit */
5558         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5559             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5560                 return;
5561
5562         regval = rd32(E1000_TXSTMPL);
5563         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5564
5565         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5566         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5567 }
5568
5569 /**
5570  * igb_clean_tx_irq - Reclaim resources after transmit completes
5571  * @q_vector: pointer to q_vector containing needed info
5572  * returns true if ring is completely cleaned
5573  **/
5574 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5575 {
5576         struct igb_adapter *adapter = q_vector->adapter;
5577         struct igb_ring *tx_ring = q_vector->tx_ring;
5578         struct net_device *netdev = tx_ring->netdev;
5579         struct e1000_hw *hw = &adapter->hw;
5580         struct igb_buffer *buffer_info;
5581         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5582         unsigned int total_bytes = 0, total_packets = 0;
5583         unsigned int i, eop, count = 0;
5584         bool cleaned = false;
5585
5586         i = tx_ring->next_to_clean;
5587         eop = tx_ring->buffer_info[i].next_to_watch;
5588         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5589
5590         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5591                (count < tx_ring->count)) {
5592                 rmb();  /* read buffer_info after eop_desc status */
5593                 for (cleaned = false; !cleaned; count++) {
5594                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5595                         buffer_info = &tx_ring->buffer_info[i];
5596                         cleaned = (i == eop);
5597
5598                         if (buffer_info->skb) {
5599                                 total_bytes += buffer_info->bytecount;
5600                                 /* gso_segs is currently only valid for tcp */
5601                                 total_packets += buffer_info->gso_segs;
5602                                 igb_tx_hwtstamp(q_vector, buffer_info);
5603                         }
5604
5605                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5606                         tx_desc->wb.status = 0;
5607
5608                         i++;
5609                         if (i == tx_ring->count)
5610                                 i = 0;
5611                 }
5612                 eop = tx_ring->buffer_info[i].next_to_watch;
5613                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5614         }
5615
5616         tx_ring->next_to_clean = i;
5617
5618         if (unlikely(count &&
5619                      netif_carrier_ok(netdev) &&
5620                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5621                 /* Make sure that anybody stopping the queue after this
5622                  * sees the new next_to_clean.
5623                  */
5624                 smp_mb();
5625                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5626                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5627                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5628
5629                         u64_stats_update_begin(&tx_ring->tx_syncp);
5630                         tx_ring->tx_stats.restart_queue++;
5631                         u64_stats_update_end(&tx_ring->tx_syncp);
5632                 }
5633         }
5634
5635         if (tx_ring->detect_tx_hung) {
5636                 /* Detect a transmit hang in hardware, this serializes the
5637                  * check with the clearing of time_stamp and movement of i */
5638                 tx_ring->detect_tx_hung = false;
5639                 if (tx_ring->buffer_info[i].time_stamp &&
5640                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5641                                (adapter->tx_timeout_factor * HZ)) &&
5642                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5643
5644                         /* detected Tx unit hang */
5645                         dev_err(tx_ring->dev,
5646                                 "Detected Tx Unit Hang\n"
5647                                 "  Tx Queue             <%d>\n"
5648                                 "  TDH                  <%x>\n"
5649                                 "  TDT                  <%x>\n"
5650                                 "  next_to_use          <%x>\n"
5651                                 "  next_to_clean        <%x>\n"
5652                                 "buffer_info[next_to_clean]\n"
5653                                 "  time_stamp           <%lx>\n"
5654                                 "  next_to_watch        <%x>\n"
5655                                 "  jiffies              <%lx>\n"
5656                                 "  desc.status          <%x>\n",
5657                                 tx_ring->queue_index,
5658                                 readl(tx_ring->head),
5659                                 readl(tx_ring->tail),
5660                                 tx_ring->next_to_use,
5661                                 tx_ring->next_to_clean,
5662                                 tx_ring->buffer_info[eop].time_stamp,
5663                                 eop,
5664                                 jiffies,
5665                                 eop_desc->wb.status);
5666                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5667                 }
5668         }
5669         tx_ring->total_bytes += total_bytes;
5670         tx_ring->total_packets += total_packets;
5671         u64_stats_update_begin(&tx_ring->tx_syncp);
5672         tx_ring->tx_stats.bytes += total_bytes;
5673         tx_ring->tx_stats.packets += total_packets;
5674         u64_stats_update_end(&tx_ring->tx_syncp);
5675         return count < tx_ring->count;
5676 }
5677
5678 /**
5679  * igb_receive_skb - helper function to handle rx indications
5680  * @q_vector: structure containing interrupt and ring information
5681  * @skb: packet to send up
5682  * @vlan_tag: vlan tag for packet
5683  **/
5684 static void igb_receive_skb(struct igb_q_vector *q_vector,
5685                             struct sk_buff *skb,
5686                             u16 vlan_tag)
5687 {
5688         struct igb_adapter *adapter = q_vector->adapter;
5689
5690         if (vlan_tag && adapter->vlgrp)
5691                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5692                                  vlan_tag, skb);
5693         else
5694                 napi_gro_receive(&q_vector->napi, skb);
5695 }
5696
5697 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5698                                        u32 status_err, struct sk_buff *skb)
5699 {
5700         skb_checksum_none_assert(skb);
5701
5702         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5703         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5704              (status_err & E1000_RXD_STAT_IXSM))
5705                 return;
5706
5707         /* TCP/UDP checksum error bit is set */
5708         if (status_err &
5709             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5710                 /*
5711                  * work around errata with sctp packets where the TCPE aka
5712                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5713                  * packets, (aka let the stack check the crc32c)
5714                  */
5715                 if ((skb->len == 60) &&
5716                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5717                         u64_stats_update_begin(&ring->rx_syncp);
5718                         ring->rx_stats.csum_err++;
5719                         u64_stats_update_end(&ring->rx_syncp);
5720                 }
5721                 /* let the stack verify checksum errors */
5722                 return;
5723         }
5724         /* It must be a TCP or UDP packet with a valid checksum */
5725         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5726                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5727
5728         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5729 }
5730
5731 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5732                                    struct sk_buff *skb)
5733 {
5734         struct igb_adapter *adapter = q_vector->adapter;
5735         struct e1000_hw *hw = &adapter->hw;
5736         u64 regval;
5737
5738         /*
5739          * If this bit is set, then the RX registers contain the time stamp. No
5740          * other packet will be time stamped until we read these registers, so
5741          * read the registers to make them available again. Because only one
5742          * packet can be time stamped at a time, we know that the register
5743          * values must belong to this one here and therefore we don't need to
5744          * compare any of the additional attributes stored for it.
5745          *
5746          * If nothing went wrong, then it should have a shared tx_flags that we
5747          * can turn into a skb_shared_hwtstamps.
5748          */
5749         if (staterr & E1000_RXDADV_STAT_TSIP) {
5750                 u32 *stamp = (u32 *)skb->data;
5751                 regval = le32_to_cpu(*(stamp + 2));
5752                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5753                 skb_pull(skb, IGB_TS_HDR_LEN);
5754         } else {
5755                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5756                         return;
5757
5758                 regval = rd32(E1000_RXSTMPL);
5759                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5760         }
5761
5762         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5763 }
5764 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5765                                union e1000_adv_rx_desc *rx_desc)
5766 {
5767         /* HW will not DMA in data larger than the given buffer, even if it
5768          * parses the (NFS, of course) header to be larger.  In that case, it
5769          * fills the header buffer and spills the rest into the page.
5770          */
5771         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5772                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5773         if (hlen > rx_ring->rx_buffer_len)
5774                 hlen = rx_ring->rx_buffer_len;
5775         return hlen;
5776 }
5777
5778 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5779                                  int *work_done, int budget)
5780 {
5781         struct igb_ring *rx_ring = q_vector->rx_ring;
5782         struct net_device *netdev = rx_ring->netdev;
5783         struct device *dev = rx_ring->dev;
5784         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5785         struct igb_buffer *buffer_info , *next_buffer;
5786         struct sk_buff *skb;
5787         bool cleaned = false;
5788         int cleaned_count = 0;
5789         int current_node = numa_node_id();
5790         unsigned int total_bytes = 0, total_packets = 0;
5791         unsigned int i;
5792         u32 staterr;
5793         u16 length;
5794         u16 vlan_tag;
5795
5796         i = rx_ring->next_to_clean;
5797         buffer_info = &rx_ring->buffer_info[i];
5798         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5799         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5800
5801         while (staterr & E1000_RXD_STAT_DD) {
5802                 if (*work_done >= budget)
5803                         break;
5804                 (*work_done)++;
5805                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5806
5807                 skb = buffer_info->skb;
5808                 prefetch(skb->data - NET_IP_ALIGN);
5809                 buffer_info->skb = NULL;
5810
5811                 i++;
5812                 if (i == rx_ring->count)
5813                         i = 0;
5814
5815                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5816                 prefetch(next_rxd);
5817                 next_buffer = &rx_ring->buffer_info[i];
5818
5819                 length = le16_to_cpu(rx_desc->wb.upper.length);
5820                 cleaned = true;
5821                 cleaned_count++;
5822
5823                 if (buffer_info->dma) {
5824                         dma_unmap_single(dev, buffer_info->dma,
5825                                          rx_ring->rx_buffer_len,
5826                                          DMA_FROM_DEVICE);
5827                         buffer_info->dma = 0;
5828                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5829                                 skb_put(skb, length);
5830                                 goto send_up;
5831                         }
5832                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5833                 }
5834
5835                 if (length) {
5836                         dma_unmap_page(dev, buffer_info->page_dma,
5837                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5838                         buffer_info->page_dma = 0;
5839
5840                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5841                                                 buffer_info->page,
5842                                                 buffer_info->page_offset,
5843                                                 length);
5844
5845                         if ((page_count(buffer_info->page) != 1) ||
5846                             (page_to_nid(buffer_info->page) != current_node))
5847                                 buffer_info->page = NULL;
5848                         else
5849                                 get_page(buffer_info->page);
5850
5851                         skb->len += length;
5852                         skb->data_len += length;
5853                         skb->truesize += length;
5854                 }
5855
5856                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5857                         buffer_info->skb = next_buffer->skb;
5858                         buffer_info->dma = next_buffer->dma;
5859                         next_buffer->skb = skb;
5860                         next_buffer->dma = 0;
5861                         goto next_desc;
5862                 }
5863 send_up:
5864                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5865                         dev_kfree_skb_irq(skb);
5866                         goto next_desc;
5867                 }
5868
5869                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5870                         igb_rx_hwtstamp(q_vector, staterr, skb);
5871                 total_bytes += skb->len;
5872                 total_packets++;
5873
5874                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5875
5876                 skb->protocol = eth_type_trans(skb, netdev);
5877                 skb_record_rx_queue(skb, rx_ring->queue_index);
5878
5879                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5880                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5881
5882                 igb_receive_skb(q_vector, skb, vlan_tag);
5883
5884 next_desc:
5885                 rx_desc->wb.upper.status_error = 0;
5886
5887                 /* return some buffers to hardware, one at a time is too slow */
5888                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5889                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5890                         cleaned_count = 0;
5891                 }
5892
5893                 /* use prefetched values */
5894                 rx_desc = next_rxd;
5895                 buffer_info = next_buffer;
5896                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5897         }
5898
5899         rx_ring->next_to_clean = i;
5900         cleaned_count = igb_desc_unused(rx_ring);
5901
5902         if (cleaned_count)
5903                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5904
5905         rx_ring->total_packets += total_packets;
5906         rx_ring->total_bytes += total_bytes;
5907         u64_stats_update_begin(&rx_ring->rx_syncp);
5908         rx_ring->rx_stats.packets += total_packets;
5909         rx_ring->rx_stats.bytes += total_bytes;
5910         u64_stats_update_end(&rx_ring->rx_syncp);
5911         return cleaned;
5912 }
5913
5914 /**
5915  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5916  * @adapter: address of board private structure
5917  **/
5918 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5919 {
5920         struct net_device *netdev = rx_ring->netdev;
5921         union e1000_adv_rx_desc *rx_desc;
5922         struct igb_buffer *buffer_info;
5923         struct sk_buff *skb;
5924         unsigned int i;
5925         int bufsz;
5926
5927         i = rx_ring->next_to_use;
5928         buffer_info = &rx_ring->buffer_info[i];
5929
5930         bufsz = rx_ring->rx_buffer_len;
5931
5932         while (cleaned_count--) {
5933                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5934
5935                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5936                         if (!buffer_info->page) {
5937                                 buffer_info->page = netdev_alloc_page(netdev);
5938                                 if (unlikely(!buffer_info->page)) {
5939                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5940                                         rx_ring->rx_stats.alloc_failed++;
5941                                         u64_stats_update_end(&rx_ring->rx_syncp);
5942                                         goto no_buffers;
5943                                 }
5944                                 buffer_info->page_offset = 0;
5945                         } else {
5946                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5947                         }
5948                         buffer_info->page_dma =
5949                                 dma_map_page(rx_ring->dev, buffer_info->page,
5950                                              buffer_info->page_offset,
5951                                              PAGE_SIZE / 2,
5952                                              DMA_FROM_DEVICE);
5953                         if (dma_mapping_error(rx_ring->dev,
5954                                               buffer_info->page_dma)) {
5955                                 buffer_info->page_dma = 0;
5956                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5957                                 rx_ring->rx_stats.alloc_failed++;
5958                                 u64_stats_update_end(&rx_ring->rx_syncp);
5959                                 goto no_buffers;
5960                         }
5961                 }
5962
5963                 skb = buffer_info->skb;
5964                 if (!skb) {
5965                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5966                         if (unlikely(!skb)) {
5967                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5968                                 rx_ring->rx_stats.alloc_failed++;
5969                                 u64_stats_update_end(&rx_ring->rx_syncp);
5970                                 goto no_buffers;
5971                         }
5972
5973                         buffer_info->skb = skb;
5974                 }
5975                 if (!buffer_info->dma) {
5976                         buffer_info->dma = dma_map_single(rx_ring->dev,
5977                                                           skb->data,
5978                                                           bufsz,
5979                                                           DMA_FROM_DEVICE);
5980                         if (dma_mapping_error(rx_ring->dev,
5981                                               buffer_info->dma)) {
5982                                 buffer_info->dma = 0;
5983                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5984                                 rx_ring->rx_stats.alloc_failed++;
5985                                 u64_stats_update_end(&rx_ring->rx_syncp);
5986                                 goto no_buffers;
5987                         }
5988                 }
5989                 /* Refresh the desc even if buffer_addrs didn't change because
5990                  * each write-back erases this info. */
5991                 if (bufsz < IGB_RXBUFFER_1024) {
5992                         rx_desc->read.pkt_addr =
5993                              cpu_to_le64(buffer_info->page_dma);
5994                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5995                 } else {
5996                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5997                         rx_desc->read.hdr_addr = 0;
5998                 }
5999
6000                 i++;
6001                 if (i == rx_ring->count)
6002                         i = 0;
6003                 buffer_info = &rx_ring->buffer_info[i];
6004         }
6005
6006 no_buffers:
6007         if (rx_ring->next_to_use != i) {
6008                 rx_ring->next_to_use = i;
6009                 if (i == 0)
6010                         i = (rx_ring->count - 1);
6011                 else
6012                         i--;
6013
6014                 /* Force memory writes to complete before letting h/w
6015                  * know there are new descriptors to fetch.  (Only
6016                  * applicable for weak-ordered memory model archs,
6017                  * such as IA-64). */
6018                 wmb();
6019                 writel(i, rx_ring->tail);
6020         }
6021 }
6022
6023 /**
6024  * igb_mii_ioctl -
6025  * @netdev:
6026  * @ifreq:
6027  * @cmd:
6028  **/
6029 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6030 {
6031         struct igb_adapter *adapter = netdev_priv(netdev);
6032         struct mii_ioctl_data *data = if_mii(ifr);
6033
6034         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6035                 return -EOPNOTSUPP;
6036
6037         switch (cmd) {
6038         case SIOCGMIIPHY:
6039                 data->phy_id = adapter->hw.phy.addr;
6040                 break;
6041         case SIOCGMIIREG:
6042                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6043                                      &data->val_out))
6044                         return -EIO;
6045                 break;
6046         case SIOCSMIIREG:
6047         default:
6048                 return -EOPNOTSUPP;
6049         }
6050         return 0;
6051 }
6052
6053 /**
6054  * igb_hwtstamp_ioctl - control hardware time stamping
6055  * @netdev:
6056  * @ifreq:
6057  * @cmd:
6058  *
6059  * Outgoing time stamping can be enabled and disabled. Play nice and
6060  * disable it when requested, although it shouldn't case any overhead
6061  * when no packet needs it. At most one packet in the queue may be
6062  * marked for time stamping, otherwise it would be impossible to tell
6063  * for sure to which packet the hardware time stamp belongs.
6064  *
6065  * Incoming time stamping has to be configured via the hardware
6066  * filters. Not all combinations are supported, in particular event
6067  * type has to be specified. Matching the kind of event packet is
6068  * not supported, with the exception of "all V2 events regardless of
6069  * level 2 or 4".
6070  *
6071  **/
6072 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6073                               struct ifreq *ifr, int cmd)
6074 {
6075         struct igb_adapter *adapter = netdev_priv(netdev);
6076         struct e1000_hw *hw = &adapter->hw;
6077         struct hwtstamp_config config;
6078         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6079         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6080         u32 tsync_rx_cfg = 0;
6081         bool is_l4 = false;
6082         bool is_l2 = false;
6083         u32 regval;
6084
6085         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6086                 return -EFAULT;
6087
6088         /* reserved for future extensions */
6089         if (config.flags)
6090                 return -EINVAL;
6091
6092         switch (config.tx_type) {
6093         case HWTSTAMP_TX_OFF:
6094                 tsync_tx_ctl = 0;
6095         case HWTSTAMP_TX_ON:
6096                 break;
6097         default:
6098                 return -ERANGE;
6099         }
6100
6101         switch (config.rx_filter) {
6102         case HWTSTAMP_FILTER_NONE:
6103                 tsync_rx_ctl = 0;
6104                 break;
6105         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6106         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6107         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6108         case HWTSTAMP_FILTER_ALL:
6109                 /*
6110                  * register TSYNCRXCFG must be set, therefore it is not
6111                  * possible to time stamp both Sync and Delay_Req messages
6112                  * => fall back to time stamping all packets
6113                  */
6114                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6115                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6116                 break;
6117         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6118                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6119                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6120                 is_l4 = true;
6121                 break;
6122         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6123                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6124                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6125                 is_l4 = true;
6126                 break;
6127         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6128         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6129                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6130                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6131                 is_l2 = true;
6132                 is_l4 = true;
6133                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6134                 break;
6135         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6136         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6137                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6138                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6139                 is_l2 = true;
6140                 is_l4 = true;
6141                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6142                 break;
6143         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6144         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6145         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6146                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6147                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6148                 is_l2 = true;
6149                 break;
6150         default:
6151                 return -ERANGE;
6152         }
6153
6154         if (hw->mac.type == e1000_82575) {
6155                 if (tsync_rx_ctl | tsync_tx_ctl)
6156                         return -EINVAL;
6157                 return 0;
6158         }
6159
6160         /*
6161          * Per-packet timestamping only works if all packets are
6162          * timestamped, so enable timestamping in all packets as
6163          * long as one rx filter was configured.
6164          */
6165         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6166                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6167                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6168         }
6169
6170         /* enable/disable TX */
6171         regval = rd32(E1000_TSYNCTXCTL);
6172         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6173         regval |= tsync_tx_ctl;
6174         wr32(E1000_TSYNCTXCTL, regval);
6175
6176         /* enable/disable RX */
6177         regval = rd32(E1000_TSYNCRXCTL);
6178         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6179         regval |= tsync_rx_ctl;
6180         wr32(E1000_TSYNCRXCTL, regval);
6181
6182         /* define which PTP packets are time stamped */
6183         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6184
6185         /* define ethertype filter for timestamped packets */
6186         if (is_l2)
6187                 wr32(E1000_ETQF(3),
6188                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6189                                  E1000_ETQF_1588 | /* enable timestamping */
6190                                  ETH_P_1588));     /* 1588 eth protocol type */
6191         else
6192                 wr32(E1000_ETQF(3), 0);
6193
6194 #define PTP_PORT 319
6195         /* L4 Queue Filter[3]: filter by destination port and protocol */
6196         if (is_l4) {
6197                 u32 ftqf = (IPPROTO_UDP /* UDP */
6198                         | E1000_FTQF_VF_BP /* VF not compared */
6199                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6200                         | E1000_FTQF_MASK); /* mask all inputs */
6201                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6202
6203                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6204                 wr32(E1000_IMIREXT(3),
6205                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6206                 if (hw->mac.type == e1000_82576) {
6207                         /* enable source port check */
6208                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6209                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6210                 }
6211                 wr32(E1000_FTQF(3), ftqf);
6212         } else {
6213                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6214         }
6215         wrfl();
6216
6217         adapter->hwtstamp_config = config;
6218
6219         /* clear TX/RX time stamp registers, just to be sure */
6220         regval = rd32(E1000_TXSTMPH);
6221         regval = rd32(E1000_RXSTMPH);
6222
6223         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6224                 -EFAULT : 0;
6225 }
6226
6227 /**
6228  * igb_ioctl -
6229  * @netdev:
6230  * @ifreq:
6231  * @cmd:
6232  **/
6233 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6234 {
6235         switch (cmd) {
6236         case SIOCGMIIPHY:
6237         case SIOCGMIIREG:
6238         case SIOCSMIIREG:
6239                 return igb_mii_ioctl(netdev, ifr, cmd);
6240         case SIOCSHWTSTAMP:
6241                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6242         default:
6243                 return -EOPNOTSUPP;
6244         }
6245 }
6246
6247 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6248 {
6249         struct igb_adapter *adapter = hw->back;
6250         u16 cap_offset;
6251
6252         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6253         if (!cap_offset)
6254                 return -E1000_ERR_CONFIG;
6255
6256         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6257
6258         return 0;
6259 }
6260
6261 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6262 {
6263         struct igb_adapter *adapter = hw->back;
6264         u16 cap_offset;
6265
6266         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6267         if (!cap_offset)
6268                 return -E1000_ERR_CONFIG;
6269
6270         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6271
6272         return 0;
6273 }
6274
6275 static void igb_vlan_rx_register(struct net_device *netdev,
6276                                  struct vlan_group *grp)
6277 {
6278         struct igb_adapter *adapter = netdev_priv(netdev);
6279         struct e1000_hw *hw = &adapter->hw;
6280         u32 ctrl, rctl;
6281
6282         igb_irq_disable(adapter);
6283         adapter->vlgrp = grp;
6284
6285         if (grp) {
6286                 /* enable VLAN tag insert/strip */
6287                 ctrl = rd32(E1000_CTRL);
6288                 ctrl |= E1000_CTRL_VME;
6289                 wr32(E1000_CTRL, ctrl);
6290
6291                 /* Disable CFI check */
6292                 rctl = rd32(E1000_RCTL);
6293                 rctl &= ~E1000_RCTL_CFIEN;
6294                 wr32(E1000_RCTL, rctl);
6295         } else {
6296                 /* disable VLAN tag insert/strip */
6297                 ctrl = rd32(E1000_CTRL);
6298                 ctrl &= ~E1000_CTRL_VME;
6299                 wr32(E1000_CTRL, ctrl);
6300         }
6301
6302         igb_rlpml_set(adapter);
6303
6304         if (!test_bit(__IGB_DOWN, &adapter->state))
6305                 igb_irq_enable(adapter);
6306 }
6307
6308 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6309 {
6310         struct igb_adapter *adapter = netdev_priv(netdev);
6311         struct e1000_hw *hw = &adapter->hw;
6312         int pf_id = adapter->vfs_allocated_count;
6313
6314         /* attempt to add filter to vlvf array */
6315         igb_vlvf_set(adapter, vid, true, pf_id);
6316
6317         /* add the filter since PF can receive vlans w/o entry in vlvf */
6318         igb_vfta_set(hw, vid, true);
6319 }
6320
6321 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6322 {
6323         struct igb_adapter *adapter = netdev_priv(netdev);
6324         struct e1000_hw *hw = &adapter->hw;
6325         int pf_id = adapter->vfs_allocated_count;
6326         s32 err;
6327
6328         igb_irq_disable(adapter);
6329         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6330
6331         if (!test_bit(__IGB_DOWN, &adapter->state))
6332                 igb_irq_enable(adapter);
6333
6334         /* remove vlan from VLVF table array */
6335         err = igb_vlvf_set(adapter, vid, false, pf_id);
6336
6337         /* if vid was not present in VLVF just remove it from table */
6338         if (err)
6339                 igb_vfta_set(hw, vid, false);
6340 }
6341
6342 static void igb_restore_vlan(struct igb_adapter *adapter)
6343 {
6344         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6345
6346         if (adapter->vlgrp) {
6347                 u16 vid;
6348                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6349                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6350                                 continue;
6351                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6352                 }
6353         }
6354 }
6355
6356 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6357 {
6358         struct pci_dev *pdev = adapter->pdev;
6359         struct e1000_mac_info *mac = &adapter->hw.mac;
6360
6361         mac->autoneg = 0;
6362
6363         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6364          * for the switch() below to work */
6365         if ((spd & 1) || (dplx & ~1))
6366                 goto err_inval;
6367
6368         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6369         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6370             spd != SPEED_1000 &&
6371             dplx != DUPLEX_FULL)
6372                 goto err_inval;
6373
6374         switch (spd + dplx) {
6375         case SPEED_10 + DUPLEX_HALF:
6376                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6377                 break;
6378         case SPEED_10 + DUPLEX_FULL:
6379                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6380                 break;
6381         case SPEED_100 + DUPLEX_HALF:
6382                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6383                 break;
6384         case SPEED_100 + DUPLEX_FULL:
6385                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6386                 break;
6387         case SPEED_1000 + DUPLEX_FULL:
6388                 mac->autoneg = 1;
6389                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6390                 break;
6391         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6392         default:
6393                 goto err_inval;
6394         }
6395         return 0;
6396
6397 err_inval:
6398         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6399         return -EINVAL;
6400 }
6401
6402 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6403 {
6404         struct net_device *netdev = pci_get_drvdata(pdev);
6405         struct igb_adapter *adapter = netdev_priv(netdev);
6406         struct e1000_hw *hw = &adapter->hw;
6407         u32 ctrl, rctl, status;
6408         u32 wufc = adapter->wol;
6409 #ifdef CONFIG_PM
6410         int retval = 0;
6411 #endif
6412
6413         netif_device_detach(netdev);
6414
6415         if (netif_running(netdev))
6416                 igb_close(netdev);
6417
6418         igb_clear_interrupt_scheme(adapter);
6419
6420 #ifdef CONFIG_PM
6421         retval = pci_save_state(pdev);
6422         if (retval)
6423                 return retval;
6424 #endif
6425
6426         status = rd32(E1000_STATUS);
6427         if (status & E1000_STATUS_LU)
6428                 wufc &= ~E1000_WUFC_LNKC;
6429
6430         if (wufc) {
6431                 igb_setup_rctl(adapter);
6432                 igb_set_rx_mode(netdev);
6433
6434                 /* turn on all-multi mode if wake on multicast is enabled */
6435                 if (wufc & E1000_WUFC_MC) {
6436                         rctl = rd32(E1000_RCTL);
6437                         rctl |= E1000_RCTL_MPE;
6438                         wr32(E1000_RCTL, rctl);
6439                 }
6440
6441                 ctrl = rd32(E1000_CTRL);
6442                 /* advertise wake from D3Cold */
6443                 #define E1000_CTRL_ADVD3WUC 0x00100000
6444                 /* phy power management enable */
6445                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6446                 ctrl |= E1000_CTRL_ADVD3WUC;
6447                 wr32(E1000_CTRL, ctrl);
6448
6449                 /* Allow time for pending master requests to run */
6450                 igb_disable_pcie_master(hw);
6451
6452                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6453                 wr32(E1000_WUFC, wufc);
6454         } else {
6455                 wr32(E1000_WUC, 0);
6456                 wr32(E1000_WUFC, 0);
6457         }
6458
6459         *enable_wake = wufc || adapter->en_mng_pt;
6460         if (!*enable_wake)
6461                 igb_power_down_link(adapter);
6462         else
6463                 igb_power_up_link(adapter);
6464
6465         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6466          * would have already happened in close and is redundant. */
6467         igb_release_hw_control(adapter);
6468
6469         pci_disable_device(pdev);
6470
6471         return 0;
6472 }
6473
6474 #ifdef CONFIG_PM
6475 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6476 {
6477         int retval;
6478         bool wake;
6479
6480         retval = __igb_shutdown(pdev, &wake);
6481         if (retval)
6482                 return retval;
6483
6484         if (wake) {
6485                 pci_prepare_to_sleep(pdev);
6486         } else {
6487                 pci_wake_from_d3(pdev, false);
6488                 pci_set_power_state(pdev, PCI_D3hot);
6489         }
6490
6491         return 0;
6492 }
6493
6494 static int igb_resume(struct pci_dev *pdev)
6495 {
6496         struct net_device *netdev = pci_get_drvdata(pdev);
6497         struct igb_adapter *adapter = netdev_priv(netdev);
6498         struct e1000_hw *hw = &adapter->hw;
6499         u32 err;
6500
6501         pci_set_power_state(pdev, PCI_D0);
6502         pci_restore_state(pdev);
6503         pci_save_state(pdev);
6504
6505         err = pci_enable_device_mem(pdev);
6506         if (err) {
6507                 dev_err(&pdev->dev,
6508                         "igb: Cannot enable PCI device from suspend\n");
6509                 return err;
6510         }
6511         pci_set_master(pdev);
6512
6513         pci_enable_wake(pdev, PCI_D3hot, 0);
6514         pci_enable_wake(pdev, PCI_D3cold, 0);
6515
6516         if (igb_init_interrupt_scheme(adapter)) {
6517                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6518                 return -ENOMEM;
6519         }
6520
6521         igb_reset(adapter);
6522
6523         /* let the f/w know that the h/w is now under the control of the
6524          * driver. */
6525         igb_get_hw_control(adapter);
6526
6527         wr32(E1000_WUS, ~0);
6528
6529         if (netif_running(netdev)) {
6530                 err = igb_open(netdev);
6531                 if (err)
6532                         return err;
6533         }
6534
6535         netif_device_attach(netdev);
6536
6537         return 0;
6538 }
6539 #endif
6540
6541 static void igb_shutdown(struct pci_dev *pdev)
6542 {
6543         bool wake;
6544
6545         __igb_shutdown(pdev, &wake);
6546
6547         if (system_state == SYSTEM_POWER_OFF) {
6548                 pci_wake_from_d3(pdev, wake);
6549                 pci_set_power_state(pdev, PCI_D3hot);
6550         }
6551 }
6552
6553 #ifdef CONFIG_NET_POLL_CONTROLLER
6554 /*
6555  * Polling 'interrupt' - used by things like netconsole to send skbs
6556  * without having to re-enable interrupts. It's not called while
6557  * the interrupt routine is executing.
6558  */
6559 static void igb_netpoll(struct net_device *netdev)
6560 {
6561         struct igb_adapter *adapter = netdev_priv(netdev);
6562         struct e1000_hw *hw = &adapter->hw;
6563         int i;
6564
6565         if (!adapter->msix_entries) {
6566                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6567                 igb_irq_disable(adapter);
6568                 napi_schedule(&q_vector->napi);
6569                 return;
6570         }
6571
6572         for (i = 0; i < adapter->num_q_vectors; i++) {
6573                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6574                 wr32(E1000_EIMC, q_vector->eims_value);
6575                 napi_schedule(&q_vector->napi);
6576         }
6577 }
6578 #endif /* CONFIG_NET_POLL_CONTROLLER */
6579
6580 /**
6581  * igb_io_error_detected - called when PCI error is detected
6582  * @pdev: Pointer to PCI device
6583  * @state: The current pci connection state
6584  *
6585  * This function is called after a PCI bus error affecting
6586  * this device has been detected.
6587  */
6588 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6589                                               pci_channel_state_t state)
6590 {
6591         struct net_device *netdev = pci_get_drvdata(pdev);
6592         struct igb_adapter *adapter = netdev_priv(netdev);
6593
6594         netif_device_detach(netdev);
6595
6596         if (state == pci_channel_io_perm_failure)
6597                 return PCI_ERS_RESULT_DISCONNECT;
6598
6599         if (netif_running(netdev))
6600                 igb_down(adapter);
6601         pci_disable_device(pdev);
6602
6603         /* Request a slot slot reset. */
6604         return PCI_ERS_RESULT_NEED_RESET;
6605 }
6606
6607 /**
6608  * igb_io_slot_reset - called after the pci bus has been reset.
6609  * @pdev: Pointer to PCI device
6610  *
6611  * Restart the card from scratch, as if from a cold-boot. Implementation
6612  * resembles the first-half of the igb_resume routine.
6613  */
6614 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6615 {
6616         struct net_device *netdev = pci_get_drvdata(pdev);
6617         struct igb_adapter *adapter = netdev_priv(netdev);
6618         struct e1000_hw *hw = &adapter->hw;
6619         pci_ers_result_t result;
6620         int err;
6621
6622         if (pci_enable_device_mem(pdev)) {
6623                 dev_err(&pdev->dev,
6624                         "Cannot re-enable PCI device after reset.\n");
6625                 result = PCI_ERS_RESULT_DISCONNECT;
6626         } else {
6627                 pci_set_master(pdev);
6628                 pci_restore_state(pdev);
6629                 pci_save_state(pdev);
6630
6631                 pci_enable_wake(pdev, PCI_D3hot, 0);
6632                 pci_enable_wake(pdev, PCI_D3cold, 0);
6633
6634                 igb_reset(adapter);
6635                 wr32(E1000_WUS, ~0);
6636                 result = PCI_ERS_RESULT_RECOVERED;
6637         }
6638
6639         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6640         if (err) {
6641                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6642                         "failed 0x%0x\n", err);
6643                 /* non-fatal, continue */
6644         }
6645
6646         return result;
6647 }
6648
6649 /**
6650  * igb_io_resume - called when traffic can start flowing again.
6651  * @pdev: Pointer to PCI device
6652  *
6653  * This callback is called when the error recovery driver tells us that
6654  * its OK to resume normal operation. Implementation resembles the
6655  * second-half of the igb_resume routine.
6656  */
6657 static void igb_io_resume(struct pci_dev *pdev)
6658 {
6659         struct net_device *netdev = pci_get_drvdata(pdev);
6660         struct igb_adapter *adapter = netdev_priv(netdev);
6661
6662         if (netif_running(netdev)) {
6663                 if (igb_up(adapter)) {
6664                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6665                         return;
6666                 }
6667         }
6668
6669         netif_device_attach(netdev);
6670
6671         /* let the f/w know that the h/w is now under the control of the
6672          * driver. */
6673         igb_get_hw_control(adapter);
6674 }
6675
6676 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6677                              u8 qsel)
6678 {
6679         u32 rar_low, rar_high;
6680         struct e1000_hw *hw = &adapter->hw;
6681
6682         /* HW expects these in little endian so we reverse the byte order
6683          * from network order (big endian) to little endian
6684          */
6685         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6686                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6687         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6688
6689         /* Indicate to hardware the Address is Valid. */
6690         rar_high |= E1000_RAH_AV;
6691
6692         if (hw->mac.type == e1000_82575)
6693                 rar_high |= E1000_RAH_POOL_1 * qsel;
6694         else
6695                 rar_high |= E1000_RAH_POOL_1 << qsel;
6696
6697         wr32(E1000_RAL(index), rar_low);
6698         wrfl();
6699         wr32(E1000_RAH(index), rar_high);
6700         wrfl();
6701 }
6702
6703 static int igb_set_vf_mac(struct igb_adapter *adapter,
6704                           int vf, unsigned char *mac_addr)
6705 {
6706         struct e1000_hw *hw = &adapter->hw;
6707         /* VF MAC addresses start at end of receive addresses and moves
6708          * torwards the first, as a result a collision should not be possible */
6709         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6710
6711         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6712
6713         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6714
6715         return 0;
6716 }
6717
6718 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6719 {
6720         struct igb_adapter *adapter = netdev_priv(netdev);
6721         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6722                 return -EINVAL;
6723         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6724         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6725         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6726                                       " change effective.");
6727         if (test_bit(__IGB_DOWN, &adapter->state)) {
6728                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6729                          " but the PF device is not up.\n");
6730                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6731                          " attempting to use the VF device.\n");
6732         }
6733         return igb_set_vf_mac(adapter, vf, mac);
6734 }
6735
6736 static int igb_link_mbps(int internal_link_speed)
6737 {
6738         switch (internal_link_speed) {
6739         case SPEED_100:
6740                 return 100;
6741         case SPEED_1000:
6742                 return 1000;
6743         default:
6744                 return 0;
6745         }
6746 }
6747
6748 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6749                                   int link_speed)
6750 {
6751         int rf_dec, rf_int;
6752         u32 bcnrc_val;
6753
6754         if (tx_rate != 0) {
6755                 /* Calculate the rate factor values to set */
6756                 rf_int = link_speed / tx_rate;
6757                 rf_dec = (link_speed - (rf_int * tx_rate));
6758                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6759
6760                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6761                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6762                                E1000_RTTBCNRC_RF_INT_MASK);
6763                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6764         } else {
6765                 bcnrc_val = 0;
6766         }
6767
6768         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6769         wr32(E1000_RTTBCNRC, bcnrc_val);
6770 }
6771
6772 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6773 {
6774         int actual_link_speed, i;
6775         bool reset_rate = false;
6776
6777         /* VF TX rate limit was not set or not supported */
6778         if ((adapter->vf_rate_link_speed == 0) ||
6779             (adapter->hw.mac.type != e1000_82576))
6780                 return;
6781
6782         actual_link_speed = igb_link_mbps(adapter->link_speed);
6783         if (actual_link_speed != adapter->vf_rate_link_speed) {
6784                 reset_rate = true;
6785                 adapter->vf_rate_link_speed = 0;
6786                 dev_info(&adapter->pdev->dev,
6787                          "Link speed has been changed. VF Transmit "
6788                          "rate is disabled\n");
6789         }
6790
6791         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6792                 if (reset_rate)
6793                         adapter->vf_data[i].tx_rate = 0;
6794
6795                 igb_set_vf_rate_limit(&adapter->hw, i,
6796                                       adapter->vf_data[i].tx_rate,
6797                                       actual_link_speed);
6798         }
6799 }
6800
6801 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6802 {
6803         struct igb_adapter *adapter = netdev_priv(netdev);
6804         struct e1000_hw *hw = &adapter->hw;
6805         int actual_link_speed;
6806
6807         if (hw->mac.type != e1000_82576)
6808                 return -EOPNOTSUPP;
6809
6810         actual_link_speed = igb_link_mbps(adapter->link_speed);
6811         if ((vf >= adapter->vfs_allocated_count) ||
6812             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6813             (tx_rate < 0) || (tx_rate > actual_link_speed))
6814                 return -EINVAL;
6815
6816         adapter->vf_rate_link_speed = actual_link_speed;
6817         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6818         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6819
6820         return 0;
6821 }
6822
6823 static int igb_ndo_get_vf_config(struct net_device *netdev,
6824                                  int vf, struct ifla_vf_info *ivi)
6825 {
6826         struct igb_adapter *adapter = netdev_priv(netdev);
6827         if (vf >= adapter->vfs_allocated_count)
6828                 return -EINVAL;
6829         ivi->vf = vf;
6830         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6831         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6832         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6833         ivi->qos = adapter->vf_data[vf].pf_qos;
6834         return 0;
6835 }
6836
6837 static void igb_vmm_control(struct igb_adapter *adapter)
6838 {
6839         struct e1000_hw *hw = &adapter->hw;
6840         u32 reg;
6841
6842         switch (hw->mac.type) {
6843         case e1000_82575:
6844         default:
6845                 /* replication is not supported for 82575 */
6846                 return;
6847         case e1000_82576:
6848                 /* notify HW that the MAC is adding vlan tags */
6849                 reg = rd32(E1000_DTXCTL);
6850                 reg |= E1000_DTXCTL_VLAN_ADDED;
6851                 wr32(E1000_DTXCTL, reg);
6852         case e1000_82580:
6853                 /* enable replication vlan tag stripping */
6854                 reg = rd32(E1000_RPLOLR);
6855                 reg |= E1000_RPLOLR_STRVLAN;
6856                 wr32(E1000_RPLOLR, reg);
6857         case e1000_i350:
6858                 /* none of the above registers are supported by i350 */
6859                 break;
6860         }
6861
6862         if (adapter->vfs_allocated_count) {
6863                 igb_vmdq_set_loopback_pf(hw, true);
6864                 igb_vmdq_set_replication_pf(hw, true);
6865                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6866                                                 adapter->vfs_allocated_count);
6867         } else {
6868                 igb_vmdq_set_loopback_pf(hw, false);
6869                 igb_vmdq_set_replication_pf(hw, false);
6870         }
6871 }
6872
6873 /* igb_main.c */