Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if_vlan.h>
43 #include <linux/pci.h>
44 #include <linux/pci-aspm.h>
45 #include <linux/delay.h>
46 #include <linux/interrupt.h>
47 #include <linux/if_ether.h>
48 #include <linux/aer.h>
49 #include <linux/prefetch.h>
50 #ifdef CONFIG_IGB_DCA
51 #include <linux/dca.h>
52 #endif
53 #include "igb.h"
54
55 #define MAJ 3
56 #define MIN 0
57 #define BUILD 6
58 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
59 __stringify(BUILD) "-k"
60 char igb_driver_name[] = "igb";
61 char igb_driver_version[] = DRV_VERSION;
62 static const char igb_driver_string[] =
63                                 "Intel(R) Gigabit Ethernet Network Driver";
64 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
65
66 static const struct e1000_info *igb_info_tbl[] = {
67         [board_82575] = &e1000_82575_info,
68 };
69
70 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
96         /* required last entry */
97         {0, }
98 };
99
100 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
101
102 void igb_reset(struct igb_adapter *);
103 static int igb_setup_all_tx_resources(struct igb_adapter *);
104 static int igb_setup_all_rx_resources(struct igb_adapter *);
105 static void igb_free_all_tx_resources(struct igb_adapter *);
106 static void igb_free_all_rx_resources(struct igb_adapter *);
107 static void igb_setup_mrqc(struct igb_adapter *);
108 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
109 static void __devexit igb_remove(struct pci_dev *pdev);
110 static void igb_init_hw_timer(struct igb_adapter *adapter);
111 static int igb_sw_init(struct igb_adapter *);
112 static int igb_open(struct net_device *);
113 static int igb_close(struct net_device *);
114 static void igb_configure_tx(struct igb_adapter *);
115 static void igb_configure_rx(struct igb_adapter *);
116 static void igb_clean_all_tx_rings(struct igb_adapter *);
117 static void igb_clean_all_rx_rings(struct igb_adapter *);
118 static void igb_clean_tx_ring(struct igb_ring *);
119 static void igb_clean_rx_ring(struct igb_ring *);
120 static void igb_set_rx_mode(struct net_device *);
121 static void igb_update_phy_info(unsigned long);
122 static void igb_watchdog(unsigned long);
123 static void igb_watchdog_task(struct work_struct *);
124 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
125 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
126                                                  struct rtnl_link_stats64 *stats);
127 static int igb_change_mtu(struct net_device *, int);
128 static int igb_set_mac(struct net_device *, void *);
129 static void igb_set_uta(struct igb_adapter *adapter);
130 static irqreturn_t igb_intr(int irq, void *);
131 static irqreturn_t igb_intr_msi(int irq, void *);
132 static irqreturn_t igb_msix_other(int irq, void *);
133 static irqreturn_t igb_msix_ring(int irq, void *);
134 #ifdef CONFIG_IGB_DCA
135 static void igb_update_dca(struct igb_q_vector *);
136 static void igb_setup_dca(struct igb_adapter *);
137 #endif /* CONFIG_IGB_DCA */
138 static bool igb_clean_tx_irq(struct igb_q_vector *);
139 static int igb_poll(struct napi_struct *, int);
140 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
141 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
142 static void igb_tx_timeout(struct net_device *);
143 static void igb_reset_task(struct work_struct *);
144 static void igb_vlan_mode(struct net_device *netdev, u32 features);
145 static void igb_vlan_rx_add_vid(struct net_device *, u16);
146 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
147 static void igb_restore_vlan(struct igb_adapter *);
148 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
149 static void igb_ping_all_vfs(struct igb_adapter *);
150 static void igb_msg_task(struct igb_adapter *);
151 static void igb_vmm_control(struct igb_adapter *);
152 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
153 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
154 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
155 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
156                                int vf, u16 vlan, u8 qos);
157 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
158 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
159                                  struct ifla_vf_info *ivi);
160 static void igb_check_vf_rate_limit(struct igb_adapter *);
161
162 #ifdef CONFIG_PM
163 static int igb_suspend(struct pci_dev *, pm_message_t);
164 static int igb_resume(struct pci_dev *);
165 #endif
166 static void igb_shutdown(struct pci_dev *);
167 #ifdef CONFIG_IGB_DCA
168 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
169 static struct notifier_block dca_notifier = {
170         .notifier_call  = igb_notify_dca,
171         .next           = NULL,
172         .priority       = 0
173 };
174 #endif
175 #ifdef CONFIG_NET_POLL_CONTROLLER
176 /* for netdump / net console */
177 static void igb_netpoll(struct net_device *);
178 #endif
179 #ifdef CONFIG_PCI_IOV
180 static unsigned int max_vfs = 0;
181 module_param(max_vfs, uint, 0);
182 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
183                  "per physical function");
184 #endif /* CONFIG_PCI_IOV */
185
186 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
187                      pci_channel_state_t);
188 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
189 static void igb_io_resume(struct pci_dev *);
190
191 static struct pci_error_handlers igb_err_handler = {
192         .error_detected = igb_io_error_detected,
193         .slot_reset = igb_io_slot_reset,
194         .resume = igb_io_resume,
195 };
196
197
198 static struct pci_driver igb_driver = {
199         .name     = igb_driver_name,
200         .id_table = igb_pci_tbl,
201         .probe    = igb_probe,
202         .remove   = __devexit_p(igb_remove),
203 #ifdef CONFIG_PM
204         /* Power Management Hooks */
205         .suspend  = igb_suspend,
206         .resume   = igb_resume,
207 #endif
208         .shutdown = igb_shutdown,
209         .err_handler = &igb_err_handler
210 };
211
212 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
213 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
214 MODULE_LICENSE("GPL");
215 MODULE_VERSION(DRV_VERSION);
216
217 struct igb_reg_info {
218         u32 ofs;
219         char *name;
220 };
221
222 static const struct igb_reg_info igb_reg_info_tbl[] = {
223
224         /* General Registers */
225         {E1000_CTRL, "CTRL"},
226         {E1000_STATUS, "STATUS"},
227         {E1000_CTRL_EXT, "CTRL_EXT"},
228
229         /* Interrupt Registers */
230         {E1000_ICR, "ICR"},
231
232         /* RX Registers */
233         {E1000_RCTL, "RCTL"},
234         {E1000_RDLEN(0), "RDLEN"},
235         {E1000_RDH(0), "RDH"},
236         {E1000_RDT(0), "RDT"},
237         {E1000_RXDCTL(0), "RXDCTL"},
238         {E1000_RDBAL(0), "RDBAL"},
239         {E1000_RDBAH(0), "RDBAH"},
240
241         /* TX Registers */
242         {E1000_TCTL, "TCTL"},
243         {E1000_TDBAL(0), "TDBAL"},
244         {E1000_TDBAH(0), "TDBAH"},
245         {E1000_TDLEN(0), "TDLEN"},
246         {E1000_TDH(0), "TDH"},
247         {E1000_TDT(0), "TDT"},
248         {E1000_TXDCTL(0), "TXDCTL"},
249         {E1000_TDFH, "TDFH"},
250         {E1000_TDFT, "TDFT"},
251         {E1000_TDFHS, "TDFHS"},
252         {E1000_TDFPC, "TDFPC"},
253
254         /* List Terminator */
255         {}
256 };
257
258 /*
259  * igb_regdump - register printout routine
260  */
261 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
262 {
263         int n = 0;
264         char rname[16];
265         u32 regs[8];
266
267         switch (reginfo->ofs) {
268         case E1000_RDLEN(0):
269                 for (n = 0; n < 4; n++)
270                         regs[n] = rd32(E1000_RDLEN(n));
271                 break;
272         case E1000_RDH(0):
273                 for (n = 0; n < 4; n++)
274                         regs[n] = rd32(E1000_RDH(n));
275                 break;
276         case E1000_RDT(0):
277                 for (n = 0; n < 4; n++)
278                         regs[n] = rd32(E1000_RDT(n));
279                 break;
280         case E1000_RXDCTL(0):
281                 for (n = 0; n < 4; n++)
282                         regs[n] = rd32(E1000_RXDCTL(n));
283                 break;
284         case E1000_RDBAL(0):
285                 for (n = 0; n < 4; n++)
286                         regs[n] = rd32(E1000_RDBAL(n));
287                 break;
288         case E1000_RDBAH(0):
289                 for (n = 0; n < 4; n++)
290                         regs[n] = rd32(E1000_RDBAH(n));
291                 break;
292         case E1000_TDBAL(0):
293                 for (n = 0; n < 4; n++)
294                         regs[n] = rd32(E1000_RDBAL(n));
295                 break;
296         case E1000_TDBAH(0):
297                 for (n = 0; n < 4; n++)
298                         regs[n] = rd32(E1000_TDBAH(n));
299                 break;
300         case E1000_TDLEN(0):
301                 for (n = 0; n < 4; n++)
302                         regs[n] = rd32(E1000_TDLEN(n));
303                 break;
304         case E1000_TDH(0):
305                 for (n = 0; n < 4; n++)
306                         regs[n] = rd32(E1000_TDH(n));
307                 break;
308         case E1000_TDT(0):
309                 for (n = 0; n < 4; n++)
310                         regs[n] = rd32(E1000_TDT(n));
311                 break;
312         case E1000_TXDCTL(0):
313                 for (n = 0; n < 4; n++)
314                         regs[n] = rd32(E1000_TXDCTL(n));
315                 break;
316         default:
317                 printk(KERN_INFO "%-15s %08x\n",
318                         reginfo->name, rd32(reginfo->ofs));
319                 return;
320         }
321
322         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
323         printk(KERN_INFO "%-15s ", rname);
324         for (n = 0; n < 4; n++)
325                 printk(KERN_CONT "%08x ", regs[n]);
326         printk(KERN_CONT "\n");
327 }
328
329 /*
330  * igb_dump - Print registers, tx-rings and rx-rings
331  */
332 static void igb_dump(struct igb_adapter *adapter)
333 {
334         struct net_device *netdev = adapter->netdev;
335         struct e1000_hw *hw = &adapter->hw;
336         struct igb_reg_info *reginfo;
337         int n = 0;
338         struct igb_ring *tx_ring;
339         union e1000_adv_tx_desc *tx_desc;
340         struct my_u0 { u64 a; u64 b; } *u0;
341         struct igb_buffer *buffer_info;
342         struct igb_ring *rx_ring;
343         union e1000_adv_rx_desc *rx_desc;
344         u32 staterr;
345         int i = 0;
346
347         if (!netif_msg_hw(adapter))
348                 return;
349
350         /* Print netdevice Info */
351         if (netdev) {
352                 dev_info(&adapter->pdev->dev, "Net device Info\n");
353                 printk(KERN_INFO "Device Name     state            "
354                         "trans_start      last_rx\n");
355                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
356                 netdev->name,
357                 netdev->state,
358                 netdev->trans_start,
359                 netdev->last_rx);
360         }
361
362         /* Print Registers */
363         dev_info(&adapter->pdev->dev, "Register Dump\n");
364         printk(KERN_INFO " Register Name   Value\n");
365         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
366              reginfo->name; reginfo++) {
367                 igb_regdump(hw, reginfo);
368         }
369
370         /* Print TX Ring Summary */
371         if (!netdev || !netif_running(netdev))
372                 goto exit;
373
374         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
375         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
376                 " leng ntw timestamp\n");
377         for (n = 0; n < adapter->num_tx_queues; n++) {
378                 tx_ring = adapter->tx_ring[n];
379                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
380                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
381                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
382                            (u64)buffer_info->dma,
383                            buffer_info->length,
384                            buffer_info->next_to_watch,
385                            (u64)buffer_info->time_stamp);
386         }
387
388         /* Print TX Rings */
389         if (!netif_msg_tx_done(adapter))
390                 goto rx_ring_summary;
391
392         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
393
394         /* Transmit Descriptor Formats
395          *
396          * Advanced Transmit Descriptor
397          *   +--------------------------------------------------------------+
398          * 0 |         Buffer Address [63:0]                                |
399          *   +--------------------------------------------------------------+
400          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
401          *   +--------------------------------------------------------------+
402          *   63      46 45    40 39 38 36 35 32 31   24             15       0
403          */
404
405         for (n = 0; n < adapter->num_tx_queues; n++) {
406                 tx_ring = adapter->tx_ring[n];
407                 printk(KERN_INFO "------------------------------------\n");
408                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
409                 printk(KERN_INFO "------------------------------------\n");
410                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
411                         "[PlPOCIStDDM Ln] [bi->dma       ] "
412                         "leng  ntw timestamp        bi->skb\n");
413
414                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
415                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
416                         buffer_info = &tx_ring->buffer_info[i];
417                         u0 = (struct my_u0 *)tx_desc;
418                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
419                                 " %04X  %3X %016llX %p", i,
420                                 le64_to_cpu(u0->a),
421                                 le64_to_cpu(u0->b),
422                                 (u64)buffer_info->dma,
423                                 buffer_info->length,
424                                 buffer_info->next_to_watch,
425                                 (u64)buffer_info->time_stamp,
426                                 buffer_info->skb);
427                         if (i == tx_ring->next_to_use &&
428                                 i == tx_ring->next_to_clean)
429                                 printk(KERN_CONT " NTC/U\n");
430                         else if (i == tx_ring->next_to_use)
431                                 printk(KERN_CONT " NTU\n");
432                         else if (i == tx_ring->next_to_clean)
433                                 printk(KERN_CONT " NTC\n");
434                         else
435                                 printk(KERN_CONT "\n");
436
437                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
438                                 print_hex_dump(KERN_INFO, "",
439                                         DUMP_PREFIX_ADDRESS,
440                                         16, 1, phys_to_virt(buffer_info->dma),
441                                         buffer_info->length, true);
442                 }
443         }
444
445         /* Print RX Rings Summary */
446 rx_ring_summary:
447         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
448         printk(KERN_INFO "Queue [NTU] [NTC]\n");
449         for (n = 0; n < adapter->num_rx_queues; n++) {
450                 rx_ring = adapter->rx_ring[n];
451                 printk(KERN_INFO " %5d %5X %5X\n", n,
452                            rx_ring->next_to_use, rx_ring->next_to_clean);
453         }
454
455         /* Print RX Rings */
456         if (!netif_msg_rx_status(adapter))
457                 goto exit;
458
459         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
460
461         /* Advanced Receive Descriptor (Read) Format
462          *    63                                           1        0
463          *    +-----------------------------------------------------+
464          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
465          *    +----------------------------------------------+------+
466          *  8 |       Header Buffer Address [63:1]           |  DD  |
467          *    +-----------------------------------------------------+
468          *
469          *
470          * Advanced Receive Descriptor (Write-Back) Format
471          *
472          *   63       48 47    32 31  30      21 20 17 16   4 3     0
473          *   +------------------------------------------------------+
474          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
475          *   | Checksum   Ident  |   |           |    | Type | Type |
476          *   +------------------------------------------------------+
477          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
478          *   +------------------------------------------------------+
479          *   63       48 47    32 31            20 19               0
480          */
481
482         for (n = 0; n < adapter->num_rx_queues; n++) {
483                 rx_ring = adapter->rx_ring[n];
484                 printk(KERN_INFO "------------------------------------\n");
485                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
486                 printk(KERN_INFO "------------------------------------\n");
487                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
488                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
489                         "<-- Adv Rx Read format\n");
490                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
491                         "[vl er S cks ln] ---------------- [bi->skb] "
492                         "<-- Adv Rx Write-Back format\n");
493
494                 for (i = 0; i < rx_ring->count; i++) {
495                         buffer_info = &rx_ring->buffer_info[i];
496                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
497                         u0 = (struct my_u0 *)rx_desc;
498                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
499                         if (staterr & E1000_RXD_STAT_DD) {
500                                 /* Descriptor Done */
501                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
502                                         "%016llX ---------------- %p", i,
503                                         le64_to_cpu(u0->a),
504                                         le64_to_cpu(u0->b),
505                                         buffer_info->skb);
506                         } else {
507                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
508                                         "%016llX %016llX %p", i,
509                                         le64_to_cpu(u0->a),
510                                         le64_to_cpu(u0->b),
511                                         (u64)buffer_info->dma,
512                                         buffer_info->skb);
513
514                                 if (netif_msg_pktdata(adapter)) {
515                                         print_hex_dump(KERN_INFO, "",
516                                                 DUMP_PREFIX_ADDRESS,
517                                                 16, 1,
518                                                 phys_to_virt(buffer_info->dma),
519                                                 rx_ring->rx_buffer_len, true);
520                                         if (rx_ring->rx_buffer_len
521                                                 < IGB_RXBUFFER_1024)
522                                                 print_hex_dump(KERN_INFO, "",
523                                                   DUMP_PREFIX_ADDRESS,
524                                                   16, 1,
525                                                   phys_to_virt(
526                                                     buffer_info->page_dma +
527                                                     buffer_info->page_offset),
528                                                   PAGE_SIZE/2, true);
529                                 }
530                         }
531
532                         if (i == rx_ring->next_to_use)
533                                 printk(KERN_CONT " NTU\n");
534                         else if (i == rx_ring->next_to_clean)
535                                 printk(KERN_CONT " NTC\n");
536                         else
537                                 printk(KERN_CONT "\n");
538
539                 }
540         }
541
542 exit:
543         return;
544 }
545
546
547 /**
548  * igb_read_clock - read raw cycle counter (to be used by time counter)
549  */
550 static cycle_t igb_read_clock(const struct cyclecounter *tc)
551 {
552         struct igb_adapter *adapter =
553                 container_of(tc, struct igb_adapter, cycles);
554         struct e1000_hw *hw = &adapter->hw;
555         u64 stamp = 0;
556         int shift = 0;
557
558         /*
559          * The timestamp latches on lowest register read. For the 82580
560          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
561          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
562          */
563         if (hw->mac.type == e1000_82580) {
564                 stamp = rd32(E1000_SYSTIMR) >> 8;
565                 shift = IGB_82580_TSYNC_SHIFT;
566         }
567
568         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
569         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
570         return stamp;
571 }
572
573 /**
574  * igb_get_hw_dev - return device
575  * used by hardware layer to print debugging information
576  **/
577 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 {
579         struct igb_adapter *adapter = hw->back;
580         return adapter->netdev;
581 }
582
583 /**
584  * igb_init_module - Driver Registration Routine
585  *
586  * igb_init_module is the first routine called when the driver is
587  * loaded. All it does is register with the PCI subsystem.
588  **/
589 static int __init igb_init_module(void)
590 {
591         int ret;
592         printk(KERN_INFO "%s - version %s\n",
593                igb_driver_string, igb_driver_version);
594
595         printk(KERN_INFO "%s\n", igb_copyright);
596
597 #ifdef CONFIG_IGB_DCA
598         dca_register_notify(&dca_notifier);
599 #endif
600         ret = pci_register_driver(&igb_driver);
601         return ret;
602 }
603
604 module_init(igb_init_module);
605
606 /**
607  * igb_exit_module - Driver Exit Cleanup Routine
608  *
609  * igb_exit_module is called just before the driver is removed
610  * from memory.
611  **/
612 static void __exit igb_exit_module(void)
613 {
614 #ifdef CONFIG_IGB_DCA
615         dca_unregister_notify(&dca_notifier);
616 #endif
617         pci_unregister_driver(&igb_driver);
618 }
619
620 module_exit(igb_exit_module);
621
622 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 /**
624  * igb_cache_ring_register - Descriptor ring to register mapping
625  * @adapter: board private structure to initialize
626  *
627  * Once we know the feature-set enabled for the device, we'll cache
628  * the register offset the descriptor ring is assigned to.
629  **/
630 static void igb_cache_ring_register(struct igb_adapter *adapter)
631 {
632         int i = 0, j = 0;
633         u32 rbase_offset = adapter->vfs_allocated_count;
634
635         switch (adapter->hw.mac.type) {
636         case e1000_82576:
637                 /* The queues are allocated for virtualization such that VF 0
638                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
639                  * In order to avoid collision we start at the first free queue
640                  * and continue consuming queues in the same sequence
641                  */
642                 if (adapter->vfs_allocated_count) {
643                         for (; i < adapter->rss_queues; i++)
644                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
645                                                                Q_IDX_82576(i);
646                 }
647         case e1000_82575:
648         case e1000_82580:
649         case e1000_i350:
650         default:
651                 for (; i < adapter->num_rx_queues; i++)
652                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653                 for (; j < adapter->num_tx_queues; j++)
654                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655                 break;
656         }
657 }
658
659 static void igb_free_queues(struct igb_adapter *adapter)
660 {
661         int i;
662
663         for (i = 0; i < adapter->num_tx_queues; i++) {
664                 kfree(adapter->tx_ring[i]);
665                 adapter->tx_ring[i] = NULL;
666         }
667         for (i = 0; i < adapter->num_rx_queues; i++) {
668                 kfree(adapter->rx_ring[i]);
669                 adapter->rx_ring[i] = NULL;
670         }
671         adapter->num_rx_queues = 0;
672         adapter->num_tx_queues = 0;
673 }
674
675 /**
676  * igb_alloc_queues - Allocate memory for all rings
677  * @adapter: board private structure to initialize
678  *
679  * We allocate one ring per queue at run-time since we don't know the
680  * number of queues at compile-time.
681  **/
682 static int igb_alloc_queues(struct igb_adapter *adapter)
683 {
684         struct igb_ring *ring;
685         int i;
686
687         for (i = 0; i < adapter->num_tx_queues; i++) {
688                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
689                 if (!ring)
690                         goto err;
691                 ring->count = adapter->tx_ring_count;
692                 ring->queue_index = i;
693                 ring->dev = &adapter->pdev->dev;
694                 ring->netdev = adapter->netdev;
695                 /* For 82575, context index must be unique per ring. */
696                 if (adapter->hw.mac.type == e1000_82575)
697                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
698                 adapter->tx_ring[i] = ring;
699         }
700
701         for (i = 0; i < adapter->num_rx_queues; i++) {
702                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
703                 if (!ring)
704                         goto err;
705                 ring->count = adapter->rx_ring_count;
706                 ring->queue_index = i;
707                 ring->dev = &adapter->pdev->dev;
708                 ring->netdev = adapter->netdev;
709                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
710                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
711                 /* set flag indicating ring supports SCTP checksum offload */
712                 if (adapter->hw.mac.type >= e1000_82576)
713                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
714                 adapter->rx_ring[i] = ring;
715         }
716
717         igb_cache_ring_register(adapter);
718
719         return 0;
720
721 err:
722         igb_free_queues(adapter);
723
724         return -ENOMEM;
725 }
726
727 #define IGB_N0_QUEUE -1
728 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
729 {
730         u32 msixbm = 0;
731         struct igb_adapter *adapter = q_vector->adapter;
732         struct e1000_hw *hw = &adapter->hw;
733         u32 ivar, index;
734         int rx_queue = IGB_N0_QUEUE;
735         int tx_queue = IGB_N0_QUEUE;
736
737         if (q_vector->rx_ring)
738                 rx_queue = q_vector->rx_ring->reg_idx;
739         if (q_vector->tx_ring)
740                 tx_queue = q_vector->tx_ring->reg_idx;
741
742         switch (hw->mac.type) {
743         case e1000_82575:
744                 /* The 82575 assigns vectors using a bitmask, which matches the
745                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
746                    or more queues to a vector, we write the appropriate bits
747                    into the MSIXBM register for that vector. */
748                 if (rx_queue > IGB_N0_QUEUE)
749                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
750                 if (tx_queue > IGB_N0_QUEUE)
751                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
752                 if (!adapter->msix_entries && msix_vector == 0)
753                         msixbm |= E1000_EIMS_OTHER;
754                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
755                 q_vector->eims_value = msixbm;
756                 break;
757         case e1000_82576:
758                 /* 82576 uses a table-based method for assigning vectors.
759                    Each queue has a single entry in the table to which we write
760                    a vector number along with a "valid" bit.  Sadly, the layout
761                    of the table is somewhat counterintuitive. */
762                 if (rx_queue > IGB_N0_QUEUE) {
763                         index = (rx_queue & 0x7);
764                         ivar = array_rd32(E1000_IVAR0, index);
765                         if (rx_queue < 8) {
766                                 /* vector goes into low byte of register */
767                                 ivar = ivar & 0xFFFFFF00;
768                                 ivar |= msix_vector | E1000_IVAR_VALID;
769                         } else {
770                                 /* vector goes into third byte of register */
771                                 ivar = ivar & 0xFF00FFFF;
772                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
773                         }
774                         array_wr32(E1000_IVAR0, index, ivar);
775                 }
776                 if (tx_queue > IGB_N0_QUEUE) {
777                         index = (tx_queue & 0x7);
778                         ivar = array_rd32(E1000_IVAR0, index);
779                         if (tx_queue < 8) {
780                                 /* vector goes into second byte of register */
781                                 ivar = ivar & 0xFFFF00FF;
782                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
783                         } else {
784                                 /* vector goes into high byte of register */
785                                 ivar = ivar & 0x00FFFFFF;
786                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
787                         }
788                         array_wr32(E1000_IVAR0, index, ivar);
789                 }
790                 q_vector->eims_value = 1 << msix_vector;
791                 break;
792         case e1000_82580:
793         case e1000_i350:
794                 /* 82580 uses the same table-based approach as 82576 but has fewer
795                    entries as a result we carry over for queues greater than 4. */
796                 if (rx_queue > IGB_N0_QUEUE) {
797                         index = (rx_queue >> 1);
798                         ivar = array_rd32(E1000_IVAR0, index);
799                         if (rx_queue & 0x1) {
800                                 /* vector goes into third byte of register */
801                                 ivar = ivar & 0xFF00FFFF;
802                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803                         } else {
804                                 /* vector goes into low byte of register */
805                                 ivar = ivar & 0xFFFFFF00;
806                                 ivar |= msix_vector | E1000_IVAR_VALID;
807                         }
808                         array_wr32(E1000_IVAR0, index, ivar);
809                 }
810                 if (tx_queue > IGB_N0_QUEUE) {
811                         index = (tx_queue >> 1);
812                         ivar = array_rd32(E1000_IVAR0, index);
813                         if (tx_queue & 0x1) {
814                                 /* vector goes into high byte of register */
815                                 ivar = ivar & 0x00FFFFFF;
816                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817                         } else {
818                                 /* vector goes into second byte of register */
819                                 ivar = ivar & 0xFFFF00FF;
820                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
821                         }
822                         array_wr32(E1000_IVAR0, index, ivar);
823                 }
824                 q_vector->eims_value = 1 << msix_vector;
825                 break;
826         default:
827                 BUG();
828                 break;
829         }
830
831         /* add q_vector eims value to global eims_enable_mask */
832         adapter->eims_enable_mask |= q_vector->eims_value;
833
834         /* configure q_vector to set itr on first interrupt */
835         q_vector->set_itr = 1;
836 }
837
838 /**
839  * igb_configure_msix - Configure MSI-X hardware
840  *
841  * igb_configure_msix sets up the hardware to properly
842  * generate MSI-X interrupts.
843  **/
844 static void igb_configure_msix(struct igb_adapter *adapter)
845 {
846         u32 tmp;
847         int i, vector = 0;
848         struct e1000_hw *hw = &adapter->hw;
849
850         adapter->eims_enable_mask = 0;
851
852         /* set vector for other causes, i.e. link changes */
853         switch (hw->mac.type) {
854         case e1000_82575:
855                 tmp = rd32(E1000_CTRL_EXT);
856                 /* enable MSI-X PBA support*/
857                 tmp |= E1000_CTRL_EXT_PBA_CLR;
858
859                 /* Auto-Mask interrupts upon ICR read. */
860                 tmp |= E1000_CTRL_EXT_EIAME;
861                 tmp |= E1000_CTRL_EXT_IRCA;
862
863                 wr32(E1000_CTRL_EXT, tmp);
864
865                 /* enable msix_other interrupt */
866                 array_wr32(E1000_MSIXBM(0), vector++,
867                                       E1000_EIMS_OTHER);
868                 adapter->eims_other = E1000_EIMS_OTHER;
869
870                 break;
871
872         case e1000_82576:
873         case e1000_82580:
874         case e1000_i350:
875                 /* Turn on MSI-X capability first, or our settings
876                  * won't stick.  And it will take days to debug. */
877                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
878                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
879                                 E1000_GPIE_NSICR);
880
881                 /* enable msix_other interrupt */
882                 adapter->eims_other = 1 << vector;
883                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
884
885                 wr32(E1000_IVAR_MISC, tmp);
886                 break;
887         default:
888                 /* do nothing, since nothing else supports MSI-X */
889                 break;
890         } /* switch (hw->mac.type) */
891
892         adapter->eims_enable_mask |= adapter->eims_other;
893
894         for (i = 0; i < adapter->num_q_vectors; i++)
895                 igb_assign_vector(adapter->q_vector[i], vector++);
896
897         wrfl();
898 }
899
900 /**
901  * igb_request_msix - Initialize MSI-X interrupts
902  *
903  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
904  * kernel.
905  **/
906 static int igb_request_msix(struct igb_adapter *adapter)
907 {
908         struct net_device *netdev = adapter->netdev;
909         struct e1000_hw *hw = &adapter->hw;
910         int i, err = 0, vector = 0;
911
912         err = request_irq(adapter->msix_entries[vector].vector,
913                           igb_msix_other, 0, netdev->name, adapter);
914         if (err)
915                 goto out;
916         vector++;
917
918         for (i = 0; i < adapter->num_q_vectors; i++) {
919                 struct igb_q_vector *q_vector = adapter->q_vector[i];
920
921                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
922
923                 if (q_vector->rx_ring && q_vector->tx_ring)
924                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
925                                 q_vector->rx_ring->queue_index);
926                 else if (q_vector->tx_ring)
927                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
928                                 q_vector->tx_ring->queue_index);
929                 else if (q_vector->rx_ring)
930                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
931                                 q_vector->rx_ring->queue_index);
932                 else
933                         sprintf(q_vector->name, "%s-unused", netdev->name);
934
935                 err = request_irq(adapter->msix_entries[vector].vector,
936                                   igb_msix_ring, 0, q_vector->name,
937                                   q_vector);
938                 if (err)
939                         goto out;
940                 vector++;
941         }
942
943         igb_configure_msix(adapter);
944         return 0;
945 out:
946         return err;
947 }
948
949 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
950 {
951         if (adapter->msix_entries) {
952                 pci_disable_msix(adapter->pdev);
953                 kfree(adapter->msix_entries);
954                 adapter->msix_entries = NULL;
955         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
956                 pci_disable_msi(adapter->pdev);
957         }
958 }
959
960 /**
961  * igb_free_q_vectors - Free memory allocated for interrupt vectors
962  * @adapter: board private structure to initialize
963  *
964  * This function frees the memory allocated to the q_vectors.  In addition if
965  * NAPI is enabled it will delete any references to the NAPI struct prior
966  * to freeing the q_vector.
967  **/
968 static void igb_free_q_vectors(struct igb_adapter *adapter)
969 {
970         int v_idx;
971
972         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
973                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
974                 adapter->q_vector[v_idx] = NULL;
975                 if (!q_vector)
976                         continue;
977                 netif_napi_del(&q_vector->napi);
978                 kfree(q_vector);
979         }
980         adapter->num_q_vectors = 0;
981 }
982
983 /**
984  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
985  *
986  * This function resets the device so that it has 0 rx queues, tx queues, and
987  * MSI-X interrupts allocated.
988  */
989 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
990 {
991         igb_free_queues(adapter);
992         igb_free_q_vectors(adapter);
993         igb_reset_interrupt_capability(adapter);
994 }
995
996 /**
997  * igb_set_interrupt_capability - set MSI or MSI-X if supported
998  *
999  * Attempt to configure interrupts using the best available
1000  * capabilities of the hardware and kernel.
1001  **/
1002 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1003 {
1004         int err;
1005         int numvecs, i;
1006
1007         /* Number of supported queues. */
1008         adapter->num_rx_queues = adapter->rss_queues;
1009         if (adapter->vfs_allocated_count)
1010                 adapter->num_tx_queues = 1;
1011         else
1012                 adapter->num_tx_queues = adapter->rss_queues;
1013
1014         /* start with one vector for every rx queue */
1015         numvecs = adapter->num_rx_queues;
1016
1017         /* if tx handler is separate add 1 for every tx queue */
1018         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1019                 numvecs += adapter->num_tx_queues;
1020
1021         /* store the number of vectors reserved for queues */
1022         adapter->num_q_vectors = numvecs;
1023
1024         /* add 1 vector for link status interrupts */
1025         numvecs++;
1026         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1027                                         GFP_KERNEL);
1028         if (!adapter->msix_entries)
1029                 goto msi_only;
1030
1031         for (i = 0; i < numvecs; i++)
1032                 adapter->msix_entries[i].entry = i;
1033
1034         err = pci_enable_msix(adapter->pdev,
1035                               adapter->msix_entries,
1036                               numvecs);
1037         if (err == 0)
1038                 goto out;
1039
1040         igb_reset_interrupt_capability(adapter);
1041
1042         /* If we can't do MSI-X, try MSI */
1043 msi_only:
1044 #ifdef CONFIG_PCI_IOV
1045         /* disable SR-IOV for non MSI-X configurations */
1046         if (adapter->vf_data) {
1047                 struct e1000_hw *hw = &adapter->hw;
1048                 /* disable iov and allow time for transactions to clear */
1049                 pci_disable_sriov(adapter->pdev);
1050                 msleep(500);
1051
1052                 kfree(adapter->vf_data);
1053                 adapter->vf_data = NULL;
1054                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055                 wrfl();
1056                 msleep(100);
1057                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1058         }
1059 #endif
1060         adapter->vfs_allocated_count = 0;
1061         adapter->rss_queues = 1;
1062         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1063         adapter->num_rx_queues = 1;
1064         adapter->num_tx_queues = 1;
1065         adapter->num_q_vectors = 1;
1066         if (!pci_enable_msi(adapter->pdev))
1067                 adapter->flags |= IGB_FLAG_HAS_MSI;
1068 out:
1069         /* Notify the stack of the (possibly) reduced queue counts. */
1070         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1071         return netif_set_real_num_rx_queues(adapter->netdev,
1072                                             adapter->num_rx_queues);
1073 }
1074
1075 /**
1076  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1077  * @adapter: board private structure to initialize
1078  *
1079  * We allocate one q_vector per queue interrupt.  If allocation fails we
1080  * return -ENOMEM.
1081  **/
1082 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1083 {
1084         struct igb_q_vector *q_vector;
1085         struct e1000_hw *hw = &adapter->hw;
1086         int v_idx;
1087
1088         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1089                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1090                 if (!q_vector)
1091                         goto err_out;
1092                 q_vector->adapter = adapter;
1093                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1094                 q_vector->itr_val = IGB_START_ITR;
1095                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1096                 adapter->q_vector[v_idx] = q_vector;
1097         }
1098         return 0;
1099
1100 err_out:
1101         igb_free_q_vectors(adapter);
1102         return -ENOMEM;
1103 }
1104
1105 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1106                                       int ring_idx, int v_idx)
1107 {
1108         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109
1110         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1111         q_vector->rx_ring->q_vector = q_vector;
1112         q_vector->itr_val = adapter->rx_itr_setting;
1113         if (q_vector->itr_val && q_vector->itr_val <= 3)
1114                 q_vector->itr_val = IGB_START_ITR;
1115 }
1116
1117 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1118                                       int ring_idx, int v_idx)
1119 {
1120         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1121
1122         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1123         q_vector->tx_ring->q_vector = q_vector;
1124         q_vector->itr_val = adapter->tx_itr_setting;
1125         if (q_vector->itr_val && q_vector->itr_val <= 3)
1126                 q_vector->itr_val = IGB_START_ITR;
1127 }
1128
1129 /**
1130  * igb_map_ring_to_vector - maps allocated queues to vectors
1131  *
1132  * This function maps the recently allocated queues to vectors.
1133  **/
1134 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1135 {
1136         int i;
1137         int v_idx = 0;
1138
1139         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1140             (adapter->num_q_vectors < adapter->num_tx_queues))
1141                 return -ENOMEM;
1142
1143         if (adapter->num_q_vectors >=
1144             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1145                 for (i = 0; i < adapter->num_rx_queues; i++)
1146                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1147                 for (i = 0; i < adapter->num_tx_queues; i++)
1148                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149         } else {
1150                 for (i = 0; i < adapter->num_rx_queues; i++) {
1151                         if (i < adapter->num_tx_queues)
1152                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1153                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1154                 }
1155                 for (; i < adapter->num_tx_queues; i++)
1156                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1157         }
1158         return 0;
1159 }
1160
1161 /**
1162  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1163  *
1164  * This function initializes the interrupts and allocates all of the queues.
1165  **/
1166 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1167 {
1168         struct pci_dev *pdev = adapter->pdev;
1169         int err;
1170
1171         err = igb_set_interrupt_capability(adapter);
1172         if (err)
1173                 return err;
1174
1175         err = igb_alloc_q_vectors(adapter);
1176         if (err) {
1177                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1178                 goto err_alloc_q_vectors;
1179         }
1180
1181         err = igb_alloc_queues(adapter);
1182         if (err) {
1183                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1184                 goto err_alloc_queues;
1185         }
1186
1187         err = igb_map_ring_to_vector(adapter);
1188         if (err) {
1189                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1190                 goto err_map_queues;
1191         }
1192
1193
1194         return 0;
1195 err_map_queues:
1196         igb_free_queues(adapter);
1197 err_alloc_queues:
1198         igb_free_q_vectors(adapter);
1199 err_alloc_q_vectors:
1200         igb_reset_interrupt_capability(adapter);
1201         return err;
1202 }
1203
1204 /**
1205  * igb_request_irq - initialize interrupts
1206  *
1207  * Attempts to configure interrupts using the best available
1208  * capabilities of the hardware and kernel.
1209  **/
1210 static int igb_request_irq(struct igb_adapter *adapter)
1211 {
1212         struct net_device *netdev = adapter->netdev;
1213         struct pci_dev *pdev = adapter->pdev;
1214         int err = 0;
1215
1216         if (adapter->msix_entries) {
1217                 err = igb_request_msix(adapter);
1218                 if (!err)
1219                         goto request_done;
1220                 /* fall back to MSI */
1221                 igb_clear_interrupt_scheme(adapter);
1222                 if (!pci_enable_msi(adapter->pdev))
1223                         adapter->flags |= IGB_FLAG_HAS_MSI;
1224                 igb_free_all_tx_resources(adapter);
1225                 igb_free_all_rx_resources(adapter);
1226                 adapter->num_tx_queues = 1;
1227                 adapter->num_rx_queues = 1;
1228                 adapter->num_q_vectors = 1;
1229                 err = igb_alloc_q_vectors(adapter);
1230                 if (err) {
1231                         dev_err(&pdev->dev,
1232                                 "Unable to allocate memory for vectors\n");
1233                         goto request_done;
1234                 }
1235                 err = igb_alloc_queues(adapter);
1236                 if (err) {
1237                         dev_err(&pdev->dev,
1238                                 "Unable to allocate memory for queues\n");
1239                         igb_free_q_vectors(adapter);
1240                         goto request_done;
1241                 }
1242                 igb_setup_all_tx_resources(adapter);
1243                 igb_setup_all_rx_resources(adapter);
1244         } else {
1245                 igb_assign_vector(adapter->q_vector[0], 0);
1246         }
1247
1248         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1249                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1250                                   netdev->name, adapter);
1251                 if (!err)
1252                         goto request_done;
1253
1254                 /* fall back to legacy interrupts */
1255                 igb_reset_interrupt_capability(adapter);
1256                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1257         }
1258
1259         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1260                           netdev->name, adapter);
1261
1262         if (err)
1263                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1264                         err);
1265
1266 request_done:
1267         return err;
1268 }
1269
1270 static void igb_free_irq(struct igb_adapter *adapter)
1271 {
1272         if (adapter->msix_entries) {
1273                 int vector = 0, i;
1274
1275                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1276
1277                 for (i = 0; i < adapter->num_q_vectors; i++) {
1278                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1279                         free_irq(adapter->msix_entries[vector++].vector,
1280                                  q_vector);
1281                 }
1282         } else {
1283                 free_irq(adapter->pdev->irq, adapter);
1284         }
1285 }
1286
1287 /**
1288  * igb_irq_disable - Mask off interrupt generation on the NIC
1289  * @adapter: board private structure
1290  **/
1291 static void igb_irq_disable(struct igb_adapter *adapter)
1292 {
1293         struct e1000_hw *hw = &adapter->hw;
1294
1295         /*
1296          * we need to be careful when disabling interrupts.  The VFs are also
1297          * mapped into these registers and so clearing the bits can cause
1298          * issues on the VF drivers so we only need to clear what we set
1299          */
1300         if (adapter->msix_entries) {
1301                 u32 regval = rd32(E1000_EIAM);
1302                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1303                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1304                 regval = rd32(E1000_EIAC);
1305                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1306         }
1307
1308         wr32(E1000_IAM, 0);
1309         wr32(E1000_IMC, ~0);
1310         wrfl();
1311         if (adapter->msix_entries) {
1312                 int i;
1313                 for (i = 0; i < adapter->num_q_vectors; i++)
1314                         synchronize_irq(adapter->msix_entries[i].vector);
1315         } else {
1316                 synchronize_irq(adapter->pdev->irq);
1317         }
1318 }
1319
1320 /**
1321  * igb_irq_enable - Enable default interrupt generation settings
1322  * @adapter: board private structure
1323  **/
1324 static void igb_irq_enable(struct igb_adapter *adapter)
1325 {
1326         struct e1000_hw *hw = &adapter->hw;
1327
1328         if (adapter->msix_entries) {
1329                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1330                 u32 regval = rd32(E1000_EIAC);
1331                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1332                 regval = rd32(E1000_EIAM);
1333                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1334                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1335                 if (adapter->vfs_allocated_count) {
1336                         wr32(E1000_MBVFIMR, 0xFF);
1337                         ims |= E1000_IMS_VMMB;
1338                 }
1339                 if (adapter->hw.mac.type == e1000_82580)
1340                         ims |= E1000_IMS_DRSTA;
1341
1342                 wr32(E1000_IMS, ims);
1343         } else {
1344                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1345                                 E1000_IMS_DRSTA);
1346                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1347                                 E1000_IMS_DRSTA);
1348         }
1349 }
1350
1351 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1352 {
1353         struct e1000_hw *hw = &adapter->hw;
1354         u16 vid = adapter->hw.mng_cookie.vlan_id;
1355         u16 old_vid = adapter->mng_vlan_id;
1356
1357         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1358                 /* add VID to filter table */
1359                 igb_vfta_set(hw, vid, true);
1360                 adapter->mng_vlan_id = vid;
1361         } else {
1362                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1363         }
1364
1365         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1366             (vid != old_vid) &&
1367             !test_bit(old_vid, adapter->active_vlans)) {
1368                 /* remove VID from filter table */
1369                 igb_vfta_set(hw, old_vid, false);
1370         }
1371 }
1372
1373 /**
1374  * igb_release_hw_control - release control of the h/w to f/w
1375  * @adapter: address of board private structure
1376  *
1377  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1378  * For ASF and Pass Through versions of f/w this means that the
1379  * driver is no longer loaded.
1380  *
1381  **/
1382 static void igb_release_hw_control(struct igb_adapter *adapter)
1383 {
1384         struct e1000_hw *hw = &adapter->hw;
1385         u32 ctrl_ext;
1386
1387         /* Let firmware take over control of h/w */
1388         ctrl_ext = rd32(E1000_CTRL_EXT);
1389         wr32(E1000_CTRL_EXT,
1390                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1391 }
1392
1393 /**
1394  * igb_get_hw_control - get control of the h/w from f/w
1395  * @adapter: address of board private structure
1396  *
1397  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1398  * For ASF and Pass Through versions of f/w this means that
1399  * the driver is loaded.
1400  *
1401  **/
1402 static void igb_get_hw_control(struct igb_adapter *adapter)
1403 {
1404         struct e1000_hw *hw = &adapter->hw;
1405         u32 ctrl_ext;
1406
1407         /* Let firmware know the driver has taken over */
1408         ctrl_ext = rd32(E1000_CTRL_EXT);
1409         wr32(E1000_CTRL_EXT,
1410                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1411 }
1412
1413 /**
1414  * igb_configure - configure the hardware for RX and TX
1415  * @adapter: private board structure
1416  **/
1417 static void igb_configure(struct igb_adapter *adapter)
1418 {
1419         struct net_device *netdev = adapter->netdev;
1420         int i;
1421
1422         igb_get_hw_control(adapter);
1423         igb_set_rx_mode(netdev);
1424
1425         igb_restore_vlan(adapter);
1426
1427         igb_setup_tctl(adapter);
1428         igb_setup_mrqc(adapter);
1429         igb_setup_rctl(adapter);
1430
1431         igb_configure_tx(adapter);
1432         igb_configure_rx(adapter);
1433
1434         igb_rx_fifo_flush_82575(&adapter->hw);
1435
1436         /* call igb_desc_unused which always leaves
1437          * at least 1 descriptor unused to make sure
1438          * next_to_use != next_to_clean */
1439         for (i = 0; i < adapter->num_rx_queues; i++) {
1440                 struct igb_ring *ring = adapter->rx_ring[i];
1441                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1442         }
1443 }
1444
1445 /**
1446  * igb_power_up_link - Power up the phy/serdes link
1447  * @adapter: address of board private structure
1448  **/
1449 void igb_power_up_link(struct igb_adapter *adapter)
1450 {
1451         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452                 igb_power_up_phy_copper(&adapter->hw);
1453         else
1454                 igb_power_up_serdes_link_82575(&adapter->hw);
1455 }
1456
1457 /**
1458  * igb_power_down_link - Power down the phy/serdes link
1459  * @adapter: address of board private structure
1460  */
1461 static void igb_power_down_link(struct igb_adapter *adapter)
1462 {
1463         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1464                 igb_power_down_phy_copper_82575(&adapter->hw);
1465         else
1466                 igb_shutdown_serdes_link_82575(&adapter->hw);
1467 }
1468
1469 /**
1470  * igb_up - Open the interface and prepare it to handle traffic
1471  * @adapter: board private structure
1472  **/
1473 int igb_up(struct igb_adapter *adapter)
1474 {
1475         struct e1000_hw *hw = &adapter->hw;
1476         int i;
1477
1478         /* hardware has been reset, we need to reload some things */
1479         igb_configure(adapter);
1480
1481         clear_bit(__IGB_DOWN, &adapter->state);
1482
1483         for (i = 0; i < adapter->num_q_vectors; i++) {
1484                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1485                 napi_enable(&q_vector->napi);
1486         }
1487         if (adapter->msix_entries)
1488                 igb_configure_msix(adapter);
1489         else
1490                 igb_assign_vector(adapter->q_vector[0], 0);
1491
1492         /* Clear any pending interrupts. */
1493         rd32(E1000_ICR);
1494         igb_irq_enable(adapter);
1495
1496         /* notify VFs that reset has been completed */
1497         if (adapter->vfs_allocated_count) {
1498                 u32 reg_data = rd32(E1000_CTRL_EXT);
1499                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1500                 wr32(E1000_CTRL_EXT, reg_data);
1501         }
1502
1503         netif_tx_start_all_queues(adapter->netdev);
1504
1505         /* start the watchdog. */
1506         hw->mac.get_link_status = 1;
1507         schedule_work(&adapter->watchdog_task);
1508
1509         return 0;
1510 }
1511
1512 void igb_down(struct igb_adapter *adapter)
1513 {
1514         struct net_device *netdev = adapter->netdev;
1515         struct e1000_hw *hw = &adapter->hw;
1516         u32 tctl, rctl;
1517         int i;
1518
1519         /* signal that we're down so the interrupt handler does not
1520          * reschedule our watchdog timer */
1521         set_bit(__IGB_DOWN, &adapter->state);
1522
1523         /* disable receives in the hardware */
1524         rctl = rd32(E1000_RCTL);
1525         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1526         /* flush and sleep below */
1527
1528         netif_tx_stop_all_queues(netdev);
1529
1530         /* disable transmits in the hardware */
1531         tctl = rd32(E1000_TCTL);
1532         tctl &= ~E1000_TCTL_EN;
1533         wr32(E1000_TCTL, tctl);
1534         /* flush both disables and wait for them to finish */
1535         wrfl();
1536         msleep(10);
1537
1538         for (i = 0; i < adapter->num_q_vectors; i++) {
1539                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1540                 napi_disable(&q_vector->napi);
1541         }
1542
1543         igb_irq_disable(adapter);
1544
1545         del_timer_sync(&adapter->watchdog_timer);
1546         del_timer_sync(&adapter->phy_info_timer);
1547
1548         netif_carrier_off(netdev);
1549
1550         /* record the stats before reset*/
1551         spin_lock(&adapter->stats64_lock);
1552         igb_update_stats(adapter, &adapter->stats64);
1553         spin_unlock(&adapter->stats64_lock);
1554
1555         adapter->link_speed = 0;
1556         adapter->link_duplex = 0;
1557
1558         if (!pci_channel_offline(adapter->pdev))
1559                 igb_reset(adapter);
1560         igb_clean_all_tx_rings(adapter);
1561         igb_clean_all_rx_rings(adapter);
1562 #ifdef CONFIG_IGB_DCA
1563
1564         /* since we reset the hardware DCA settings were cleared */
1565         igb_setup_dca(adapter);
1566 #endif
1567 }
1568
1569 void igb_reinit_locked(struct igb_adapter *adapter)
1570 {
1571         WARN_ON(in_interrupt());
1572         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1573                 msleep(1);
1574         igb_down(adapter);
1575         igb_up(adapter);
1576         clear_bit(__IGB_RESETTING, &adapter->state);
1577 }
1578
1579 void igb_reset(struct igb_adapter *adapter)
1580 {
1581         struct pci_dev *pdev = adapter->pdev;
1582         struct e1000_hw *hw = &adapter->hw;
1583         struct e1000_mac_info *mac = &hw->mac;
1584         struct e1000_fc_info *fc = &hw->fc;
1585         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1586         u16 hwm;
1587
1588         /* Repartition Pba for greater than 9k mtu
1589          * To take effect CTRL.RST is required.
1590          */
1591         switch (mac->type) {
1592         case e1000_i350:
1593         case e1000_82580:
1594                 pba = rd32(E1000_RXPBS);
1595                 pba = igb_rxpbs_adjust_82580(pba);
1596                 break;
1597         case e1000_82576:
1598                 pba = rd32(E1000_RXPBS);
1599                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1600                 break;
1601         case e1000_82575:
1602         default:
1603                 pba = E1000_PBA_34K;
1604                 break;
1605         }
1606
1607         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1608             (mac->type < e1000_82576)) {
1609                 /* adjust PBA for jumbo frames */
1610                 wr32(E1000_PBA, pba);
1611
1612                 /* To maintain wire speed transmits, the Tx FIFO should be
1613                  * large enough to accommodate two full transmit packets,
1614                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1615                  * the Rx FIFO should be large enough to accommodate at least
1616                  * one full receive packet and is similarly rounded up and
1617                  * expressed in KB. */
1618                 pba = rd32(E1000_PBA);
1619                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1620                 tx_space = pba >> 16;
1621                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1622                 pba &= 0xffff;
1623                 /* the tx fifo also stores 16 bytes of information about the tx
1624                  * but don't include ethernet FCS because hardware appends it */
1625                 min_tx_space = (adapter->max_frame_size +
1626                                 sizeof(union e1000_adv_tx_desc) -
1627                                 ETH_FCS_LEN) * 2;
1628                 min_tx_space = ALIGN(min_tx_space, 1024);
1629                 min_tx_space >>= 10;
1630                 /* software strips receive CRC, so leave room for it */
1631                 min_rx_space = adapter->max_frame_size;
1632                 min_rx_space = ALIGN(min_rx_space, 1024);
1633                 min_rx_space >>= 10;
1634
1635                 /* If current Tx allocation is less than the min Tx FIFO size,
1636                  * and the min Tx FIFO size is less than the current Rx FIFO
1637                  * allocation, take space away from current Rx allocation */
1638                 if (tx_space < min_tx_space &&
1639                     ((min_tx_space - tx_space) < pba)) {
1640                         pba = pba - (min_tx_space - tx_space);
1641
1642                         /* if short on rx space, rx wins and must trump tx
1643                          * adjustment */
1644                         if (pba < min_rx_space)
1645                                 pba = min_rx_space;
1646                 }
1647                 wr32(E1000_PBA, pba);
1648         }
1649
1650         /* flow control settings */
1651         /* The high water mark must be low enough to fit one full frame
1652          * (or the size used for early receive) above it in the Rx FIFO.
1653          * Set it to the lower of:
1654          * - 90% of the Rx FIFO size, or
1655          * - the full Rx FIFO size minus one full frame */
1656         hwm = min(((pba << 10) * 9 / 10),
1657                         ((pba << 10) - 2 * adapter->max_frame_size));
1658
1659         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1660         fc->low_water = fc->high_water - 16;
1661         fc->pause_time = 0xFFFF;
1662         fc->send_xon = 1;
1663         fc->current_mode = fc->requested_mode;
1664
1665         /* disable receive for all VFs and wait one second */
1666         if (adapter->vfs_allocated_count) {
1667                 int i;
1668                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1669                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1670
1671                 /* ping all the active vfs to let them know we are going down */
1672                 igb_ping_all_vfs(adapter);
1673
1674                 /* disable transmits and receives */
1675                 wr32(E1000_VFRE, 0);
1676                 wr32(E1000_VFTE, 0);
1677         }
1678
1679         /* Allow time for pending master requests to run */
1680         hw->mac.ops.reset_hw(hw);
1681         wr32(E1000_WUC, 0);
1682
1683         if (hw->mac.ops.init_hw(hw))
1684                 dev_err(&pdev->dev, "Hardware Error\n");
1685         if (hw->mac.type > e1000_82580) {
1686                 if (adapter->flags & IGB_FLAG_DMAC) {
1687                         u32 reg;
1688
1689                         /*
1690                          * DMA Coalescing high water mark needs to be higher
1691                          * than * the * Rx threshold.  The Rx threshold is
1692                          * currently * pba - 6, so we * should use a high water
1693                          * mark of pba * - 4. */
1694                         hwm = (pba - 4) << 10;
1695
1696                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1697                                & E1000_DMACR_DMACTHR_MASK);
1698
1699                         /* transition to L0x or L1 if available..*/
1700                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1701
1702                         /* watchdog timer= +-1000 usec in 32usec intervals */
1703                         reg |= (1000 >> 5);
1704                         wr32(E1000_DMACR, reg);
1705
1706                         /* no lower threshold to disable coalescing(smart fifb)
1707                          * -UTRESH=0*/
1708                         wr32(E1000_DMCRTRH, 0);
1709
1710                         /* set hwm to PBA -  2 * max frame size */
1711                         wr32(E1000_FCRTC, hwm);
1712
1713                         /*
1714                          * This sets the time to wait before requesting tran-
1715                          * sition to * low power state to number of usecs needed
1716                          * to receive 1 512 * byte frame at gigabit line rate
1717                          */
1718                         reg = rd32(E1000_DMCTLX);
1719                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1720
1721                         /* Delay 255 usec before entering Lx state. */
1722                         reg |= 0xFF;
1723                         wr32(E1000_DMCTLX, reg);
1724
1725                         /* free space in Tx packet buffer to wake from DMAC */
1726                         wr32(E1000_DMCTXTH,
1727                              (IGB_MIN_TXPBSIZE -
1728                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1729                              >> 6);
1730
1731                         /* make low power state decision controlled by DMAC */
1732                         reg = rd32(E1000_PCIEMISC);
1733                         reg |= E1000_PCIEMISC_LX_DECISION;
1734                         wr32(E1000_PCIEMISC, reg);
1735                 } /* end if IGB_FLAG_DMAC set */
1736         }
1737         if (hw->mac.type == e1000_82580) {
1738                 u32 reg = rd32(E1000_PCIEMISC);
1739                 wr32(E1000_PCIEMISC,
1740                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1741         }
1742         if (!netif_running(adapter->netdev))
1743                 igb_power_down_link(adapter);
1744
1745         igb_update_mng_vlan(adapter);
1746
1747         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749
1750         igb_get_phy_info(hw);
1751 }
1752
1753 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1754 {
1755         /*
1756          * Since there is no support for separate rx/tx vlan accel
1757          * enable/disable make sure tx flag is always in same state as rx.
1758          */
1759         if (features & NETIF_F_HW_VLAN_RX)
1760                 features |= NETIF_F_HW_VLAN_TX;
1761         else
1762                 features &= ~NETIF_F_HW_VLAN_TX;
1763
1764         return features;
1765 }
1766
1767 static int igb_set_features(struct net_device *netdev, u32 features)
1768 {
1769         struct igb_adapter *adapter = netdev_priv(netdev);
1770         int i;
1771         u32 changed = netdev->features ^ features;
1772
1773         for (i = 0; i < adapter->num_rx_queues; i++) {
1774                 if (features & NETIF_F_RXCSUM)
1775                         adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1776                 else
1777                         adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1778         }
1779
1780         if (changed & NETIF_F_HW_VLAN_RX)
1781                 igb_vlan_mode(netdev, features);
1782
1783         return 0;
1784 }
1785
1786 static const struct net_device_ops igb_netdev_ops = {
1787         .ndo_open               = igb_open,
1788         .ndo_stop               = igb_close,
1789         .ndo_start_xmit         = igb_xmit_frame_adv,
1790         .ndo_get_stats64        = igb_get_stats64,
1791         .ndo_set_rx_mode        = igb_set_rx_mode,
1792         .ndo_set_multicast_list = igb_set_rx_mode,
1793         .ndo_set_mac_address    = igb_set_mac,
1794         .ndo_change_mtu         = igb_change_mtu,
1795         .ndo_do_ioctl           = igb_ioctl,
1796         .ndo_tx_timeout         = igb_tx_timeout,
1797         .ndo_validate_addr      = eth_validate_addr,
1798         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1799         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1800         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1801         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1802         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1803         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1804 #ifdef CONFIG_NET_POLL_CONTROLLER
1805         .ndo_poll_controller    = igb_netpoll,
1806 #endif
1807         .ndo_fix_features       = igb_fix_features,
1808         .ndo_set_features       = igb_set_features,
1809 };
1810
1811 /**
1812  * igb_probe - Device Initialization Routine
1813  * @pdev: PCI device information struct
1814  * @ent: entry in igb_pci_tbl
1815  *
1816  * Returns 0 on success, negative on failure
1817  *
1818  * igb_probe initializes an adapter identified by a pci_dev structure.
1819  * The OS initialization, configuring of the adapter private structure,
1820  * and a hardware reset occur.
1821  **/
1822 static int __devinit igb_probe(struct pci_dev *pdev,
1823                                const struct pci_device_id *ent)
1824 {
1825         struct net_device *netdev;
1826         struct igb_adapter *adapter;
1827         struct e1000_hw *hw;
1828         u16 eeprom_data = 0;
1829         s32 ret_val;
1830         static int global_quad_port_a; /* global quad port a indication */
1831         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1832         unsigned long mmio_start, mmio_len;
1833         int err, pci_using_dac;
1834         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1835         u8 part_str[E1000_PBANUM_LENGTH];
1836
1837         /* Catch broken hardware that put the wrong VF device ID in
1838          * the PCIe SR-IOV capability.
1839          */
1840         if (pdev->is_virtfn) {
1841                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1842                      pci_name(pdev), pdev->vendor, pdev->device);
1843                 return -EINVAL;
1844         }
1845
1846         err = pci_enable_device_mem(pdev);
1847         if (err)
1848                 return err;
1849
1850         pci_using_dac = 0;
1851         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1852         if (!err) {
1853                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1854                 if (!err)
1855                         pci_using_dac = 1;
1856         } else {
1857                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1858                 if (err) {
1859                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1860                         if (err) {
1861                                 dev_err(&pdev->dev, "No usable DMA "
1862                                         "configuration, aborting\n");
1863                                 goto err_dma;
1864                         }
1865                 }
1866         }
1867
1868         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1869                                            IORESOURCE_MEM),
1870                                            igb_driver_name);
1871         if (err)
1872                 goto err_pci_reg;
1873
1874         pci_enable_pcie_error_reporting(pdev);
1875
1876         pci_set_master(pdev);
1877         pci_save_state(pdev);
1878
1879         err = -ENOMEM;
1880         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1881                                    IGB_ABS_MAX_TX_QUEUES);
1882         if (!netdev)
1883                 goto err_alloc_etherdev;
1884
1885         SET_NETDEV_DEV(netdev, &pdev->dev);
1886
1887         pci_set_drvdata(pdev, netdev);
1888         adapter = netdev_priv(netdev);
1889         adapter->netdev = netdev;
1890         adapter->pdev = pdev;
1891         hw = &adapter->hw;
1892         hw->back = adapter;
1893         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1894
1895         mmio_start = pci_resource_start(pdev, 0);
1896         mmio_len = pci_resource_len(pdev, 0);
1897
1898         err = -EIO;
1899         hw->hw_addr = ioremap(mmio_start, mmio_len);
1900         if (!hw->hw_addr)
1901                 goto err_ioremap;
1902
1903         netdev->netdev_ops = &igb_netdev_ops;
1904         igb_set_ethtool_ops(netdev);
1905         netdev->watchdog_timeo = 5 * HZ;
1906
1907         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1908
1909         netdev->mem_start = mmio_start;
1910         netdev->mem_end = mmio_start + mmio_len;
1911
1912         /* PCI config space info */
1913         hw->vendor_id = pdev->vendor;
1914         hw->device_id = pdev->device;
1915         hw->revision_id = pdev->revision;
1916         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1917         hw->subsystem_device_id = pdev->subsystem_device;
1918
1919         /* Copy the default MAC, PHY and NVM function pointers */
1920         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1921         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1922         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1923         /* Initialize skew-specific constants */
1924         err = ei->get_invariants(hw);
1925         if (err)
1926                 goto err_sw_init;
1927
1928         /* setup the private structure */
1929         err = igb_sw_init(adapter);
1930         if (err)
1931                 goto err_sw_init;
1932
1933         igb_get_bus_info_pcie(hw);
1934
1935         hw->phy.autoneg_wait_to_complete = false;
1936
1937         /* Copper options */
1938         if (hw->phy.media_type == e1000_media_type_copper) {
1939                 hw->phy.mdix = AUTO_ALL_MODES;
1940                 hw->phy.disable_polarity_correction = false;
1941                 hw->phy.ms_type = e1000_ms_hw_default;
1942         }
1943
1944         if (igb_check_reset_block(hw))
1945                 dev_info(&pdev->dev,
1946                         "PHY reset is blocked due to SOL/IDER session.\n");
1947
1948         netdev->hw_features = NETIF_F_SG |
1949                            NETIF_F_IP_CSUM |
1950                            NETIF_F_IPV6_CSUM |
1951                            NETIF_F_TSO |
1952                            NETIF_F_TSO6 |
1953                            NETIF_F_RXCSUM |
1954                            NETIF_F_HW_VLAN_RX;
1955
1956         netdev->features = netdev->hw_features |
1957                            NETIF_F_HW_VLAN_TX |
1958                            NETIF_F_HW_VLAN_FILTER;
1959
1960         netdev->vlan_features |= NETIF_F_TSO;
1961         netdev->vlan_features |= NETIF_F_TSO6;
1962         netdev->vlan_features |= NETIF_F_IP_CSUM;
1963         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1964         netdev->vlan_features |= NETIF_F_SG;
1965
1966         if (pci_using_dac) {
1967                 netdev->features |= NETIF_F_HIGHDMA;
1968                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1969         }
1970
1971         if (hw->mac.type >= e1000_82576) {
1972                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1973                 netdev->features |= NETIF_F_SCTP_CSUM;
1974         }
1975
1976         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1977
1978         /* before reading the NVM, reset the controller to put the device in a
1979          * known good starting state */
1980         hw->mac.ops.reset_hw(hw);
1981
1982         /* make sure the NVM is good */
1983         if (hw->nvm.ops.validate(hw) < 0) {
1984                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1985                 err = -EIO;
1986                 goto err_eeprom;
1987         }
1988
1989         /* copy the MAC address out of the NVM */
1990         if (hw->mac.ops.read_mac_addr(hw))
1991                 dev_err(&pdev->dev, "NVM Read Error\n");
1992
1993         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1994         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1995
1996         if (!is_valid_ether_addr(netdev->perm_addr)) {
1997                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1998                 err = -EIO;
1999                 goto err_eeprom;
2000         }
2001
2002         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2003                     (unsigned long) adapter);
2004         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2005                     (unsigned long) adapter);
2006
2007         INIT_WORK(&adapter->reset_task, igb_reset_task);
2008         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2009
2010         /* Initialize link properties that are user-changeable */
2011         adapter->fc_autoneg = true;
2012         hw->mac.autoneg = true;
2013         hw->phy.autoneg_advertised = 0x2f;
2014
2015         hw->fc.requested_mode = e1000_fc_default;
2016         hw->fc.current_mode = e1000_fc_default;
2017
2018         igb_validate_mdi_setting(hw);
2019
2020         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2021          * enable the ACPI Magic Packet filter
2022          */
2023
2024         if (hw->bus.func == 0)
2025                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2026         else if (hw->mac.type >= e1000_82580)
2027                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2028                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2029                                  &eeprom_data);
2030         else if (hw->bus.func == 1)
2031                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2032
2033         if (eeprom_data & eeprom_apme_mask)
2034                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2035
2036         /* now that we have the eeprom settings, apply the special cases where
2037          * the eeprom may be wrong or the board simply won't support wake on
2038          * lan on a particular port */
2039         switch (pdev->device) {
2040         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2041                 adapter->eeprom_wol = 0;
2042                 break;
2043         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2044         case E1000_DEV_ID_82576_FIBER:
2045         case E1000_DEV_ID_82576_SERDES:
2046                 /* Wake events only supported on port A for dual fiber
2047                  * regardless of eeprom setting */
2048                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2049                         adapter->eeprom_wol = 0;
2050                 break;
2051         case E1000_DEV_ID_82576_QUAD_COPPER:
2052         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2053                 /* if quad port adapter, disable WoL on all but port A */
2054                 if (global_quad_port_a != 0)
2055                         adapter->eeprom_wol = 0;
2056                 else
2057                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2058                 /* Reset for multiple quad port adapters */
2059                 if (++global_quad_port_a == 4)
2060                         global_quad_port_a = 0;
2061                 break;
2062         }
2063
2064         /* initialize the wol settings based on the eeprom settings */
2065         adapter->wol = adapter->eeprom_wol;
2066         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2067
2068         /* reset the hardware with the new settings */
2069         igb_reset(adapter);
2070
2071         /* let the f/w know that the h/w is now under the control of the
2072          * driver. */
2073         igb_get_hw_control(adapter);
2074
2075         strcpy(netdev->name, "eth%d");
2076         err = register_netdev(netdev);
2077         if (err)
2078                 goto err_register;
2079
2080         igb_vlan_mode(netdev, netdev->features);
2081
2082         /* carrier off reporting is important to ethtool even BEFORE open */
2083         netif_carrier_off(netdev);
2084
2085 #ifdef CONFIG_IGB_DCA
2086         if (dca_add_requester(&pdev->dev) == 0) {
2087                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2088                 dev_info(&pdev->dev, "DCA enabled\n");
2089                 igb_setup_dca(adapter);
2090         }
2091
2092 #endif
2093         /* do hw tstamp init after resetting */
2094         igb_init_hw_timer(adapter);
2095
2096         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2097         /* print bus type/speed/width info */
2098         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2099                  netdev->name,
2100                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2101                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2102                                                             "unknown"),
2103                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2104                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2105                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2106                    "unknown"),
2107                  netdev->dev_addr);
2108
2109         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2110         if (ret_val)
2111                 strcpy(part_str, "Unknown");
2112         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2113         dev_info(&pdev->dev,
2114                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2115                 adapter->msix_entries ? "MSI-X" :
2116                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2117                 adapter->num_rx_queues, adapter->num_tx_queues);
2118         switch (hw->mac.type) {
2119         case e1000_i350:
2120                 igb_set_eee_i350(hw);
2121                 break;
2122         default:
2123                 break;
2124         }
2125         return 0;
2126
2127 err_register:
2128         igb_release_hw_control(adapter);
2129 err_eeprom:
2130         if (!igb_check_reset_block(hw))
2131                 igb_reset_phy(hw);
2132
2133         if (hw->flash_address)
2134                 iounmap(hw->flash_address);
2135 err_sw_init:
2136         igb_clear_interrupt_scheme(adapter);
2137         iounmap(hw->hw_addr);
2138 err_ioremap:
2139         free_netdev(netdev);
2140 err_alloc_etherdev:
2141         pci_release_selected_regions(pdev,
2142                                      pci_select_bars(pdev, IORESOURCE_MEM));
2143 err_pci_reg:
2144 err_dma:
2145         pci_disable_device(pdev);
2146         return err;
2147 }
2148
2149 /**
2150  * igb_remove - Device Removal Routine
2151  * @pdev: PCI device information struct
2152  *
2153  * igb_remove is called by the PCI subsystem to alert the driver
2154  * that it should release a PCI device.  The could be caused by a
2155  * Hot-Plug event, or because the driver is going to be removed from
2156  * memory.
2157  **/
2158 static void __devexit igb_remove(struct pci_dev *pdev)
2159 {
2160         struct net_device *netdev = pci_get_drvdata(pdev);
2161         struct igb_adapter *adapter = netdev_priv(netdev);
2162         struct e1000_hw *hw = &adapter->hw;
2163
2164         /*
2165          * The watchdog timer may be rescheduled, so explicitly
2166          * disable watchdog from being rescheduled.
2167          */
2168         set_bit(__IGB_DOWN, &adapter->state);
2169         del_timer_sync(&adapter->watchdog_timer);
2170         del_timer_sync(&adapter->phy_info_timer);
2171
2172         cancel_work_sync(&adapter->reset_task);
2173         cancel_work_sync(&adapter->watchdog_task);
2174
2175 #ifdef CONFIG_IGB_DCA
2176         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2177                 dev_info(&pdev->dev, "DCA disabled\n");
2178                 dca_remove_requester(&pdev->dev);
2179                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2180                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2181         }
2182 #endif
2183
2184         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2185          * would have already happened in close and is redundant. */
2186         igb_release_hw_control(adapter);
2187
2188         unregister_netdev(netdev);
2189
2190         igb_clear_interrupt_scheme(adapter);
2191
2192 #ifdef CONFIG_PCI_IOV
2193         /* reclaim resources allocated to VFs */
2194         if (adapter->vf_data) {
2195                 /* disable iov and allow time for transactions to clear */
2196                 pci_disable_sriov(pdev);
2197                 msleep(500);
2198
2199                 kfree(adapter->vf_data);
2200                 adapter->vf_data = NULL;
2201                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2202                 wrfl();
2203                 msleep(100);
2204                 dev_info(&pdev->dev, "IOV Disabled\n");
2205         }
2206 #endif
2207
2208         iounmap(hw->hw_addr);
2209         if (hw->flash_address)
2210                 iounmap(hw->flash_address);
2211         pci_release_selected_regions(pdev,
2212                                      pci_select_bars(pdev, IORESOURCE_MEM));
2213
2214         free_netdev(netdev);
2215
2216         pci_disable_pcie_error_reporting(pdev);
2217
2218         pci_disable_device(pdev);
2219 }
2220
2221 /**
2222  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2223  * @adapter: board private structure to initialize
2224  *
2225  * This function initializes the vf specific data storage and then attempts to
2226  * allocate the VFs.  The reason for ordering it this way is because it is much
2227  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2228  * the memory for the VFs.
2229  **/
2230 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2231 {
2232 #ifdef CONFIG_PCI_IOV
2233         struct pci_dev *pdev = adapter->pdev;
2234
2235         if (adapter->vfs_allocated_count) {
2236                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2237                                            sizeof(struct vf_data_storage),
2238                                            GFP_KERNEL);
2239                 /* if allocation failed then we do not support SR-IOV */
2240                 if (!adapter->vf_data) {
2241                         adapter->vfs_allocated_count = 0;
2242                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2243                                 "Data Storage\n");
2244                 }
2245         }
2246
2247         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2248                 kfree(adapter->vf_data);
2249                 adapter->vf_data = NULL;
2250 #endif /* CONFIG_PCI_IOV */
2251                 adapter->vfs_allocated_count = 0;
2252 #ifdef CONFIG_PCI_IOV
2253         } else {
2254                 unsigned char mac_addr[ETH_ALEN];
2255                 int i;
2256                 dev_info(&pdev->dev, "%d vfs allocated\n",
2257                          adapter->vfs_allocated_count);
2258                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2259                         random_ether_addr(mac_addr);
2260                         igb_set_vf_mac(adapter, i, mac_addr);
2261                 }
2262                 /* DMA Coalescing is not supported in IOV mode. */
2263                 if (adapter->flags & IGB_FLAG_DMAC)
2264                         adapter->flags &= ~IGB_FLAG_DMAC;
2265         }
2266 #endif /* CONFIG_PCI_IOV */
2267 }
2268
2269
2270 /**
2271  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2272  * @adapter: board private structure to initialize
2273  *
2274  * igb_init_hw_timer initializes the function pointer and values for the hw
2275  * timer found in hardware.
2276  **/
2277 static void igb_init_hw_timer(struct igb_adapter *adapter)
2278 {
2279         struct e1000_hw *hw = &adapter->hw;
2280
2281         switch (hw->mac.type) {
2282         case e1000_i350:
2283         case e1000_82580:
2284                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2285                 adapter->cycles.read = igb_read_clock;
2286                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2287                 adapter->cycles.mult = 1;
2288                 /*
2289                  * The 82580 timesync updates the system timer every 8ns by 8ns
2290                  * and the value cannot be shifted.  Instead we need to shift
2291                  * the registers to generate a 64bit timer value.  As a result
2292                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2293                  * 24 in order to generate a larger value for synchronization.
2294                  */
2295                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2296                 /* disable system timer temporarily by setting bit 31 */
2297                 wr32(E1000_TSAUXC, 0x80000000);
2298                 wrfl();
2299
2300                 /* Set registers so that rollover occurs soon to test this. */
2301                 wr32(E1000_SYSTIMR, 0x00000000);
2302                 wr32(E1000_SYSTIML, 0x80000000);
2303                 wr32(E1000_SYSTIMH, 0x000000FF);
2304                 wrfl();
2305
2306                 /* enable system timer by clearing bit 31 */
2307                 wr32(E1000_TSAUXC, 0x0);
2308                 wrfl();
2309
2310                 timecounter_init(&adapter->clock,
2311                                  &adapter->cycles,
2312                                  ktime_to_ns(ktime_get_real()));
2313                 /*
2314                  * Synchronize our NIC clock against system wall clock. NIC
2315                  * time stamp reading requires ~3us per sample, each sample
2316                  * was pretty stable even under load => only require 10
2317                  * samples for each offset comparison.
2318                  */
2319                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2320                 adapter->compare.source = &adapter->clock;
2321                 adapter->compare.target = ktime_get_real;
2322                 adapter->compare.num_samples = 10;
2323                 timecompare_update(&adapter->compare, 0);
2324                 break;
2325         case e1000_82576:
2326                 /*
2327                  * Initialize hardware timer: we keep it running just in case
2328                  * that some program needs it later on.
2329                  */
2330                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2331                 adapter->cycles.read = igb_read_clock;
2332                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2333                 adapter->cycles.mult = 1;
2334                 /**
2335                  * Scale the NIC clock cycle by a large factor so that
2336                  * relatively small clock corrections can be added or
2337                  * subtracted at each clock tick. The drawbacks of a large
2338                  * factor are a) that the clock register overflows more quickly
2339                  * (not such a big deal) and b) that the increment per tick has
2340                  * to fit into 24 bits.  As a result we need to use a shift of
2341                  * 19 so we can fit a value of 16 into the TIMINCA register.
2342                  */
2343                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2344                 wr32(E1000_TIMINCA,
2345                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2346                                 (16 << IGB_82576_TSYNC_SHIFT));
2347
2348                 /* Set registers so that rollover occurs soon to test this. */
2349                 wr32(E1000_SYSTIML, 0x00000000);
2350                 wr32(E1000_SYSTIMH, 0xFF800000);
2351                 wrfl();
2352
2353                 timecounter_init(&adapter->clock,
2354                                  &adapter->cycles,
2355                                  ktime_to_ns(ktime_get_real()));
2356                 /*
2357                  * Synchronize our NIC clock against system wall clock. NIC
2358                  * time stamp reading requires ~3us per sample, each sample
2359                  * was pretty stable even under load => only require 10
2360                  * samples for each offset comparison.
2361                  */
2362                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2363                 adapter->compare.source = &adapter->clock;
2364                 adapter->compare.target = ktime_get_real;
2365                 adapter->compare.num_samples = 10;
2366                 timecompare_update(&adapter->compare, 0);
2367                 break;
2368         case e1000_82575:
2369                 /* 82575 does not support timesync */
2370         default:
2371                 break;
2372         }
2373
2374 }
2375
2376 /**
2377  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2378  * @adapter: board private structure to initialize
2379  *
2380  * igb_sw_init initializes the Adapter private data structure.
2381  * Fields are initialized based on PCI device information and
2382  * OS network device settings (MTU size).
2383  **/
2384 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2385 {
2386         struct e1000_hw *hw = &adapter->hw;
2387         struct net_device *netdev = adapter->netdev;
2388         struct pci_dev *pdev = adapter->pdev;
2389
2390         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2391
2392         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2393         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2394         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2395         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2396
2397         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2398         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2399
2400         spin_lock_init(&adapter->stats64_lock);
2401 #ifdef CONFIG_PCI_IOV
2402         switch (hw->mac.type) {
2403         case e1000_82576:
2404         case e1000_i350:
2405                 if (max_vfs > 7) {
2406                         dev_warn(&pdev->dev,
2407                                  "Maximum of 7 VFs per PF, using max\n");
2408                         adapter->vfs_allocated_count = 7;
2409                 } else
2410                         adapter->vfs_allocated_count = max_vfs;
2411                 break;
2412         default:
2413                 break;
2414         }
2415 #endif /* CONFIG_PCI_IOV */
2416         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2417         /* i350 cannot do RSS and SR-IOV at the same time */
2418         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2419                 adapter->rss_queues = 1;
2420
2421         /*
2422          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2423          * then we should combine the queues into a queue pair in order to
2424          * conserve interrupts due to limited supply
2425          */
2426         if ((adapter->rss_queues > 4) ||
2427             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2428                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2429
2430         /* This call may decrease the number of queues */
2431         if (igb_init_interrupt_scheme(adapter)) {
2432                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2433                 return -ENOMEM;
2434         }
2435
2436         igb_probe_vfs(adapter);
2437
2438         /* Explicitly disable IRQ since the NIC can be in any state. */
2439         igb_irq_disable(adapter);
2440
2441         if (hw->mac.type == e1000_i350)
2442                 adapter->flags &= ~IGB_FLAG_DMAC;
2443
2444         set_bit(__IGB_DOWN, &adapter->state);
2445         return 0;
2446 }
2447
2448 /**
2449  * igb_open - Called when a network interface is made active
2450  * @netdev: network interface device structure
2451  *
2452  * Returns 0 on success, negative value on failure
2453  *
2454  * The open entry point is called when a network interface is made
2455  * active by the system (IFF_UP).  At this point all resources needed
2456  * for transmit and receive operations are allocated, the interrupt
2457  * handler is registered with the OS, the watchdog timer is started,
2458  * and the stack is notified that the interface is ready.
2459  **/
2460 static int igb_open(struct net_device *netdev)
2461 {
2462         struct igb_adapter *adapter = netdev_priv(netdev);
2463         struct e1000_hw *hw = &adapter->hw;
2464         int err;
2465         int i;
2466
2467         /* disallow open during test */
2468         if (test_bit(__IGB_TESTING, &adapter->state))
2469                 return -EBUSY;
2470
2471         netif_carrier_off(netdev);
2472
2473         /* allocate transmit descriptors */
2474         err = igb_setup_all_tx_resources(adapter);
2475         if (err)
2476                 goto err_setup_tx;
2477
2478         /* allocate receive descriptors */
2479         err = igb_setup_all_rx_resources(adapter);
2480         if (err)
2481                 goto err_setup_rx;
2482
2483         igb_power_up_link(adapter);
2484
2485         /* before we allocate an interrupt, we must be ready to handle it.
2486          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2487          * as soon as we call pci_request_irq, so we have to setup our
2488          * clean_rx handler before we do so.  */
2489         igb_configure(adapter);
2490
2491         err = igb_request_irq(adapter);
2492         if (err)
2493                 goto err_req_irq;
2494
2495         /* From here on the code is the same as igb_up() */
2496         clear_bit(__IGB_DOWN, &adapter->state);
2497
2498         for (i = 0; i < adapter->num_q_vectors; i++) {
2499                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2500                 napi_enable(&q_vector->napi);
2501         }
2502
2503         /* Clear any pending interrupts. */
2504         rd32(E1000_ICR);
2505
2506         igb_irq_enable(adapter);
2507
2508         /* notify VFs that reset has been completed */
2509         if (adapter->vfs_allocated_count) {
2510                 u32 reg_data = rd32(E1000_CTRL_EXT);
2511                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2512                 wr32(E1000_CTRL_EXT, reg_data);
2513         }
2514
2515         netif_tx_start_all_queues(netdev);
2516
2517         /* start the watchdog. */
2518         hw->mac.get_link_status = 1;
2519         schedule_work(&adapter->watchdog_task);
2520
2521         return 0;
2522
2523 err_req_irq:
2524         igb_release_hw_control(adapter);
2525         igb_power_down_link(adapter);
2526         igb_free_all_rx_resources(adapter);
2527 err_setup_rx:
2528         igb_free_all_tx_resources(adapter);
2529 err_setup_tx:
2530         igb_reset(adapter);
2531
2532         return err;
2533 }
2534
2535 /**
2536  * igb_close - Disables a network interface
2537  * @netdev: network interface device structure
2538  *
2539  * Returns 0, this is not allowed to fail
2540  *
2541  * The close entry point is called when an interface is de-activated
2542  * by the OS.  The hardware is still under the driver's control, but
2543  * needs to be disabled.  A global MAC reset is issued to stop the
2544  * hardware, and all transmit and receive resources are freed.
2545  **/
2546 static int igb_close(struct net_device *netdev)
2547 {
2548         struct igb_adapter *adapter = netdev_priv(netdev);
2549
2550         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551         igb_down(adapter);
2552
2553         igb_free_irq(adapter);
2554
2555         igb_free_all_tx_resources(adapter);
2556         igb_free_all_rx_resources(adapter);
2557
2558         return 0;
2559 }
2560
2561 /**
2562  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2563  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2564  *
2565  * Return 0 on success, negative on failure
2566  **/
2567 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2568 {
2569         struct device *dev = tx_ring->dev;
2570         int size;
2571
2572         size = sizeof(struct igb_buffer) * tx_ring->count;
2573         tx_ring->buffer_info = vzalloc(size);
2574         if (!tx_ring->buffer_info)
2575                 goto err;
2576
2577         /* round up to nearest 4K */
2578         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2579         tx_ring->size = ALIGN(tx_ring->size, 4096);
2580
2581         tx_ring->desc = dma_alloc_coherent(dev,
2582                                            tx_ring->size,
2583                                            &tx_ring->dma,
2584                                            GFP_KERNEL);
2585
2586         if (!tx_ring->desc)
2587                 goto err;
2588
2589         tx_ring->next_to_use = 0;
2590         tx_ring->next_to_clean = 0;
2591         return 0;
2592
2593 err:
2594         vfree(tx_ring->buffer_info);
2595         dev_err(dev,
2596                 "Unable to allocate memory for the transmit descriptor ring\n");
2597         return -ENOMEM;
2598 }
2599
2600 /**
2601  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2602  *                                (Descriptors) for all queues
2603  * @adapter: board private structure
2604  *
2605  * Return 0 on success, negative on failure
2606  **/
2607 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2608 {
2609         struct pci_dev *pdev = adapter->pdev;
2610         int i, err = 0;
2611
2612         for (i = 0; i < adapter->num_tx_queues; i++) {
2613                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2614                 if (err) {
2615                         dev_err(&pdev->dev,
2616                                 "Allocation for Tx Queue %u failed\n", i);
2617                         for (i--; i >= 0; i--)
2618                                 igb_free_tx_resources(adapter->tx_ring[i]);
2619                         break;
2620                 }
2621         }
2622
2623         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2624                 int r_idx = i % adapter->num_tx_queues;
2625                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2626         }
2627         return err;
2628 }
2629
2630 /**
2631  * igb_setup_tctl - configure the transmit control registers
2632  * @adapter: Board private structure
2633  **/
2634 void igb_setup_tctl(struct igb_adapter *adapter)
2635 {
2636         struct e1000_hw *hw = &adapter->hw;
2637         u32 tctl;
2638
2639         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2640         wr32(E1000_TXDCTL(0), 0);
2641
2642         /* Program the Transmit Control Register */
2643         tctl = rd32(E1000_TCTL);
2644         tctl &= ~E1000_TCTL_CT;
2645         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2646                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2647
2648         igb_config_collision_dist(hw);
2649
2650         /* Enable transmits */
2651         tctl |= E1000_TCTL_EN;
2652
2653         wr32(E1000_TCTL, tctl);
2654 }
2655
2656 /**
2657  * igb_configure_tx_ring - Configure transmit ring after Reset
2658  * @adapter: board private structure
2659  * @ring: tx ring to configure
2660  *
2661  * Configure a transmit ring after a reset.
2662  **/
2663 void igb_configure_tx_ring(struct igb_adapter *adapter,
2664                            struct igb_ring *ring)
2665 {
2666         struct e1000_hw *hw = &adapter->hw;
2667         u32 txdctl;
2668         u64 tdba = ring->dma;
2669         int reg_idx = ring->reg_idx;
2670
2671         /* disable the queue */
2672         txdctl = rd32(E1000_TXDCTL(reg_idx));
2673         wr32(E1000_TXDCTL(reg_idx),
2674                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2675         wrfl();
2676         mdelay(10);
2677
2678         wr32(E1000_TDLEN(reg_idx),
2679                         ring->count * sizeof(union e1000_adv_tx_desc));
2680         wr32(E1000_TDBAL(reg_idx),
2681                         tdba & 0x00000000ffffffffULL);
2682         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2683
2684         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2685         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2686         writel(0, ring->head);
2687         writel(0, ring->tail);
2688
2689         txdctl |= IGB_TX_PTHRESH;
2690         txdctl |= IGB_TX_HTHRESH << 8;
2691         txdctl |= IGB_TX_WTHRESH << 16;
2692
2693         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2694         wr32(E1000_TXDCTL(reg_idx), txdctl);
2695 }
2696
2697 /**
2698  * igb_configure_tx - Configure transmit Unit after Reset
2699  * @adapter: board private structure
2700  *
2701  * Configure the Tx unit of the MAC after a reset.
2702  **/
2703 static void igb_configure_tx(struct igb_adapter *adapter)
2704 {
2705         int i;
2706
2707         for (i = 0; i < adapter->num_tx_queues; i++)
2708                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2709 }
2710
2711 /**
2712  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2713  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2714  *
2715  * Returns 0 on success, negative on failure
2716  **/
2717 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2718 {
2719         struct device *dev = rx_ring->dev;
2720         int size, desc_len;
2721
2722         size = sizeof(struct igb_buffer) * rx_ring->count;
2723         rx_ring->buffer_info = vzalloc(size);
2724         if (!rx_ring->buffer_info)
2725                 goto err;
2726
2727         desc_len = sizeof(union e1000_adv_rx_desc);
2728
2729         /* Round up to nearest 4K */
2730         rx_ring->size = rx_ring->count * desc_len;
2731         rx_ring->size = ALIGN(rx_ring->size, 4096);
2732
2733         rx_ring->desc = dma_alloc_coherent(dev,
2734                                            rx_ring->size,
2735                                            &rx_ring->dma,
2736                                            GFP_KERNEL);
2737
2738         if (!rx_ring->desc)
2739                 goto err;
2740
2741         rx_ring->next_to_clean = 0;
2742         rx_ring->next_to_use = 0;
2743
2744         return 0;
2745
2746 err:
2747         vfree(rx_ring->buffer_info);
2748         rx_ring->buffer_info = NULL;
2749         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2750                 " ring\n");
2751         return -ENOMEM;
2752 }
2753
2754 /**
2755  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2756  *                                (Descriptors) for all queues
2757  * @adapter: board private structure
2758  *
2759  * Return 0 on success, negative on failure
2760  **/
2761 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2762 {
2763         struct pci_dev *pdev = adapter->pdev;
2764         int i, err = 0;
2765
2766         for (i = 0; i < adapter->num_rx_queues; i++) {
2767                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2768                 if (err) {
2769                         dev_err(&pdev->dev,
2770                                 "Allocation for Rx Queue %u failed\n", i);
2771                         for (i--; i >= 0; i--)
2772                                 igb_free_rx_resources(adapter->rx_ring[i]);
2773                         break;
2774                 }
2775         }
2776
2777         return err;
2778 }
2779
2780 /**
2781  * igb_setup_mrqc - configure the multiple receive queue control registers
2782  * @adapter: Board private structure
2783  **/
2784 static void igb_setup_mrqc(struct igb_adapter *adapter)
2785 {
2786         struct e1000_hw *hw = &adapter->hw;
2787         u32 mrqc, rxcsum;
2788         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2789         union e1000_reta {
2790                 u32 dword;
2791                 u8  bytes[4];
2792         } reta;
2793         static const u8 rsshash[40] = {
2794                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2795                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2796                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2797                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2798
2799         /* Fill out hash function seeds */
2800         for (j = 0; j < 10; j++) {
2801                 u32 rsskey = rsshash[(j * 4)];
2802                 rsskey |= rsshash[(j * 4) + 1] << 8;
2803                 rsskey |= rsshash[(j * 4) + 2] << 16;
2804                 rsskey |= rsshash[(j * 4) + 3] << 24;
2805                 array_wr32(E1000_RSSRK(0), j, rsskey);
2806         }
2807
2808         num_rx_queues = adapter->rss_queues;
2809
2810         if (adapter->vfs_allocated_count) {
2811                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2812                 switch (hw->mac.type) {
2813                 case e1000_i350:
2814                 case e1000_82580:
2815                         num_rx_queues = 1;
2816                         shift = 0;
2817                         break;
2818                 case e1000_82576:
2819                         shift = 3;
2820                         num_rx_queues = 2;
2821                         break;
2822                 case e1000_82575:
2823                         shift = 2;
2824                         shift2 = 6;
2825                 default:
2826                         break;
2827                 }
2828         } else {
2829                 if (hw->mac.type == e1000_82575)
2830                         shift = 6;
2831         }
2832
2833         for (j = 0; j < (32 * 4); j++) {
2834                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2835                 if (shift2)
2836                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2837                 if ((j & 3) == 3)
2838                         wr32(E1000_RETA(j >> 2), reta.dword);
2839         }
2840
2841         /*
2842          * Disable raw packet checksumming so that RSS hash is placed in
2843          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2844          * offloads as they are enabled by default
2845          */
2846         rxcsum = rd32(E1000_RXCSUM);
2847         rxcsum |= E1000_RXCSUM_PCSD;
2848
2849         if (adapter->hw.mac.type >= e1000_82576)
2850                 /* Enable Receive Checksum Offload for SCTP */
2851                 rxcsum |= E1000_RXCSUM_CRCOFL;
2852
2853         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2854         wr32(E1000_RXCSUM, rxcsum);
2855
2856         /* If VMDq is enabled then we set the appropriate mode for that, else
2857          * we default to RSS so that an RSS hash is calculated per packet even
2858          * if we are only using one queue */
2859         if (adapter->vfs_allocated_count) {
2860                 if (hw->mac.type > e1000_82575) {
2861                         /* Set the default pool for the PF's first queue */
2862                         u32 vtctl = rd32(E1000_VT_CTL);
2863                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2864                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2865                         vtctl |= adapter->vfs_allocated_count <<
2866                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2867                         wr32(E1000_VT_CTL, vtctl);
2868                 }
2869                 if (adapter->rss_queues > 1)
2870                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2871                 else
2872                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2873         } else {
2874                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2875         }
2876         igb_vmm_control(adapter);
2877
2878         /*
2879          * Generate RSS hash based on TCP port numbers and/or
2880          * IPv4/v6 src and dst addresses since UDP cannot be
2881          * hashed reliably due to IP fragmentation
2882          */
2883         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2884                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2885                 E1000_MRQC_RSS_FIELD_IPV6 |
2886                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2887                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2888
2889         wr32(E1000_MRQC, mrqc);
2890 }
2891
2892 /**
2893  * igb_setup_rctl - configure the receive control registers
2894  * @adapter: Board private structure
2895  **/
2896 void igb_setup_rctl(struct igb_adapter *adapter)
2897 {
2898         struct e1000_hw *hw = &adapter->hw;
2899         u32 rctl;
2900
2901         rctl = rd32(E1000_RCTL);
2902
2903         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2904         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2905
2906         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2907                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2908
2909         /*
2910          * enable stripping of CRC. It's unlikely this will break BMC
2911          * redirection as it did with e1000. Newer features require
2912          * that the HW strips the CRC.
2913          */
2914         rctl |= E1000_RCTL_SECRC;
2915
2916         /* disable store bad packets and clear size bits. */
2917         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2918
2919         /* enable LPE to prevent packets larger than max_frame_size */
2920         rctl |= E1000_RCTL_LPE;
2921
2922         /* disable queue 0 to prevent tail write w/o re-config */
2923         wr32(E1000_RXDCTL(0), 0);
2924
2925         /* Attention!!!  For SR-IOV PF driver operations you must enable
2926          * queue drop for all VF and PF queues to prevent head of line blocking
2927          * if an un-trusted VF does not provide descriptors to hardware.
2928          */
2929         if (adapter->vfs_allocated_count) {
2930                 /* set all queue drop enable bits */
2931                 wr32(E1000_QDE, ALL_QUEUES);
2932         }
2933
2934         wr32(E1000_RCTL, rctl);
2935 }
2936
2937 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2938                                    int vfn)
2939 {
2940         struct e1000_hw *hw = &adapter->hw;
2941         u32 vmolr;
2942
2943         /* if it isn't the PF check to see if VFs are enabled and
2944          * increase the size to support vlan tags */
2945         if (vfn < adapter->vfs_allocated_count &&
2946             adapter->vf_data[vfn].vlans_enabled)
2947                 size += VLAN_TAG_SIZE;
2948
2949         vmolr = rd32(E1000_VMOLR(vfn));
2950         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2951         vmolr |= size | E1000_VMOLR_LPE;
2952         wr32(E1000_VMOLR(vfn), vmolr);
2953
2954         return 0;
2955 }
2956
2957 /**
2958  * igb_rlpml_set - set maximum receive packet size
2959  * @adapter: board private structure
2960  *
2961  * Configure maximum receivable packet size.
2962  **/
2963 static void igb_rlpml_set(struct igb_adapter *adapter)
2964 {
2965         u32 max_frame_size;
2966         struct e1000_hw *hw = &adapter->hw;
2967         u16 pf_id = adapter->vfs_allocated_count;
2968
2969         max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2970
2971         /* if vfs are enabled we set RLPML to the largest possible request
2972          * size and set the VMOLR RLPML to the size we need */
2973         if (pf_id) {
2974                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2975                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2976         }
2977
2978         wr32(E1000_RLPML, max_frame_size);
2979 }
2980
2981 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2982                                  int vfn, bool aupe)
2983 {
2984         struct e1000_hw *hw = &adapter->hw;
2985         u32 vmolr;
2986
2987         /*
2988          * This register exists only on 82576 and newer so if we are older then
2989          * we should exit and do nothing
2990          */
2991         if (hw->mac.type < e1000_82576)
2992                 return;
2993
2994         vmolr = rd32(E1000_VMOLR(vfn));
2995         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2996         if (aupe)
2997                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2998         else
2999                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3000
3001         /* clear all bits that might not be set */
3002         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3003
3004         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3005                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3006         /*
3007          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3008          * multicast packets
3009          */
3010         if (vfn <= adapter->vfs_allocated_count)
3011                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3012
3013         wr32(E1000_VMOLR(vfn), vmolr);
3014 }
3015
3016 /**
3017  * igb_configure_rx_ring - Configure a receive ring after Reset
3018  * @adapter: board private structure
3019  * @ring: receive ring to be configured
3020  *
3021  * Configure the Rx unit of the MAC after a reset.
3022  **/
3023 void igb_configure_rx_ring(struct igb_adapter *adapter,
3024                            struct igb_ring *ring)
3025 {
3026         struct e1000_hw *hw = &adapter->hw;
3027         u64 rdba = ring->dma;
3028         int reg_idx = ring->reg_idx;
3029         u32 srrctl, rxdctl;
3030
3031         /* disable the queue */
3032         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3033         wr32(E1000_RXDCTL(reg_idx),
3034                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3035
3036         /* Set DMA base address registers */
3037         wr32(E1000_RDBAL(reg_idx),
3038              rdba & 0x00000000ffffffffULL);
3039         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3040         wr32(E1000_RDLEN(reg_idx),
3041                        ring->count * sizeof(union e1000_adv_rx_desc));
3042
3043         /* initialize head and tail */
3044         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3045         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3046         writel(0, ring->head);
3047         writel(0, ring->tail);
3048
3049         /* set descriptor configuration */
3050         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3051                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3052                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3053 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3054                 srrctl |= IGB_RXBUFFER_16384 >>
3055                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3056 #else
3057                 srrctl |= (PAGE_SIZE / 2) >>
3058                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3059 #endif
3060                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3061         } else {
3062                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3063                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3064                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3065         }
3066         if (hw->mac.type == e1000_82580)
3067                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3068         /* Only set Drop Enable if we are supporting multiple queues */
3069         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3070                 srrctl |= E1000_SRRCTL_DROP_EN;
3071
3072         wr32(E1000_SRRCTL(reg_idx), srrctl);
3073
3074         /* set filtering for VMDQ pools */
3075         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3076
3077         /* enable receive descriptor fetching */
3078         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3079         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3080         rxdctl &= 0xFFF00000;
3081         rxdctl |= IGB_RX_PTHRESH;
3082         rxdctl |= IGB_RX_HTHRESH << 8;
3083         rxdctl |= IGB_RX_WTHRESH << 16;
3084         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3085 }
3086
3087 /**
3088  * igb_configure_rx - Configure receive Unit after Reset
3089  * @adapter: board private structure
3090  *
3091  * Configure the Rx unit of the MAC after a reset.
3092  **/
3093 static void igb_configure_rx(struct igb_adapter *adapter)
3094 {
3095         int i;
3096
3097         /* set UTA to appropriate mode */
3098         igb_set_uta(adapter);
3099
3100         /* set the correct pool for the PF default MAC address in entry 0 */
3101         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3102                          adapter->vfs_allocated_count);
3103
3104         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3105          * the Base and Length of the Rx Descriptor Ring */
3106         for (i = 0; i < adapter->num_rx_queues; i++)
3107                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3108 }
3109
3110 /**
3111  * igb_free_tx_resources - Free Tx Resources per Queue
3112  * @tx_ring: Tx descriptor ring for a specific queue
3113  *
3114  * Free all transmit software resources
3115  **/
3116 void igb_free_tx_resources(struct igb_ring *tx_ring)
3117 {
3118         igb_clean_tx_ring(tx_ring);
3119
3120         vfree(tx_ring->buffer_info);
3121         tx_ring->buffer_info = NULL;
3122
3123         /* if not set, then don't free */
3124         if (!tx_ring->desc)
3125                 return;
3126
3127         dma_free_coherent(tx_ring->dev, tx_ring->size,
3128                           tx_ring->desc, tx_ring->dma);
3129
3130         tx_ring->desc = NULL;
3131 }
3132
3133 /**
3134  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3135  * @adapter: board private structure
3136  *
3137  * Free all transmit software resources
3138  **/
3139 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3140 {
3141         int i;
3142
3143         for (i = 0; i < adapter->num_tx_queues; i++)
3144                 igb_free_tx_resources(adapter->tx_ring[i]);
3145 }
3146
3147 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3148                                     struct igb_buffer *buffer_info)
3149 {
3150         if (buffer_info->dma) {
3151                 if (buffer_info->mapped_as_page)
3152                         dma_unmap_page(tx_ring->dev,
3153                                         buffer_info->dma,
3154                                         buffer_info->length,
3155                                         DMA_TO_DEVICE);
3156                 else
3157                         dma_unmap_single(tx_ring->dev,
3158                                         buffer_info->dma,
3159                                         buffer_info->length,
3160                                         DMA_TO_DEVICE);
3161                 buffer_info->dma = 0;
3162         }
3163         if (buffer_info->skb) {
3164                 dev_kfree_skb_any(buffer_info->skb);
3165                 buffer_info->skb = NULL;
3166         }
3167         buffer_info->time_stamp = 0;
3168         buffer_info->length = 0;
3169         buffer_info->next_to_watch = 0;
3170         buffer_info->mapped_as_page = false;
3171 }
3172
3173 /**
3174  * igb_clean_tx_ring - Free Tx Buffers
3175  * @tx_ring: ring to be cleaned
3176  **/
3177 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3178 {
3179         struct igb_buffer *buffer_info;
3180         unsigned long size;
3181         unsigned int i;
3182
3183         if (!tx_ring->buffer_info)
3184                 return;
3185         /* Free all the Tx ring sk_buffs */
3186
3187         for (i = 0; i < tx_ring->count; i++) {
3188                 buffer_info = &tx_ring->buffer_info[i];
3189                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3190         }
3191
3192         size = sizeof(struct igb_buffer) * tx_ring->count;
3193         memset(tx_ring->buffer_info, 0, size);
3194
3195         /* Zero out the descriptor ring */
3196         memset(tx_ring->desc, 0, tx_ring->size);
3197
3198         tx_ring->next_to_use = 0;
3199         tx_ring->next_to_clean = 0;
3200 }
3201
3202 /**
3203  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3204  * @adapter: board private structure
3205  **/
3206 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3207 {
3208         int i;
3209
3210         for (i = 0; i < adapter->num_tx_queues; i++)
3211                 igb_clean_tx_ring(adapter->tx_ring[i]);
3212 }
3213
3214 /**
3215  * igb_free_rx_resources - Free Rx Resources
3216  * @rx_ring: ring to clean the resources from
3217  *
3218  * Free all receive software resources
3219  **/
3220 void igb_free_rx_resources(struct igb_ring *rx_ring)
3221 {
3222         igb_clean_rx_ring(rx_ring);
3223
3224         vfree(rx_ring->buffer_info);
3225         rx_ring->buffer_info = NULL;
3226
3227         /* if not set, then don't free */
3228         if (!rx_ring->desc)
3229                 return;
3230
3231         dma_free_coherent(rx_ring->dev, rx_ring->size,
3232                           rx_ring->desc, rx_ring->dma);
3233
3234         rx_ring->desc = NULL;
3235 }
3236
3237 /**
3238  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3239  * @adapter: board private structure
3240  *
3241  * Free all receive software resources
3242  **/
3243 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3244 {
3245         int i;
3246
3247         for (i = 0; i < adapter->num_rx_queues; i++)
3248                 igb_free_rx_resources(adapter->rx_ring[i]);
3249 }
3250
3251 /**
3252  * igb_clean_rx_ring - Free Rx Buffers per Queue
3253  * @rx_ring: ring to free buffers from
3254  **/
3255 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3256 {
3257         struct igb_buffer *buffer_info;
3258         unsigned long size;
3259         unsigned int i;
3260
3261         if (!rx_ring->buffer_info)
3262                 return;
3263
3264         /* Free all the Rx ring sk_buffs */
3265         for (i = 0; i < rx_ring->count; i++) {
3266                 buffer_info = &rx_ring->buffer_info[i];
3267                 if (buffer_info->dma) {
3268                         dma_unmap_single(rx_ring->dev,
3269                                          buffer_info->dma,
3270                                          rx_ring->rx_buffer_len,
3271                                          DMA_FROM_DEVICE);
3272                         buffer_info->dma = 0;
3273                 }
3274
3275                 if (buffer_info->skb) {
3276                         dev_kfree_skb(buffer_info->skb);
3277                         buffer_info->skb = NULL;
3278                 }
3279                 if (buffer_info->page_dma) {
3280                         dma_unmap_page(rx_ring->dev,
3281                                        buffer_info->page_dma,
3282                                        PAGE_SIZE / 2,
3283                                        DMA_FROM_DEVICE);
3284                         buffer_info->page_dma = 0;
3285                 }
3286                 if (buffer_info->page) {
3287                         put_page(buffer_info->page);
3288                         buffer_info->page = NULL;
3289                         buffer_info->page_offset = 0;
3290                 }
3291         }
3292
3293         size = sizeof(struct igb_buffer) * rx_ring->count;
3294         memset(rx_ring->buffer_info, 0, size);
3295
3296         /* Zero out the descriptor ring */
3297         memset(rx_ring->desc, 0, rx_ring->size);
3298
3299         rx_ring->next_to_clean = 0;
3300         rx_ring->next_to_use = 0;
3301 }
3302
3303 /**
3304  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3305  * @adapter: board private structure
3306  **/
3307 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3308 {
3309         int i;
3310
3311         for (i = 0; i < adapter->num_rx_queues; i++)
3312                 igb_clean_rx_ring(adapter->rx_ring[i]);
3313 }
3314
3315 /**
3316  * igb_set_mac - Change the Ethernet Address of the NIC
3317  * @netdev: network interface device structure
3318  * @p: pointer to an address structure
3319  *
3320  * Returns 0 on success, negative on failure
3321  **/
3322 static int igb_set_mac(struct net_device *netdev, void *p)
3323 {
3324         struct igb_adapter *adapter = netdev_priv(netdev);
3325         struct e1000_hw *hw = &adapter->hw;
3326         struct sockaddr *addr = p;
3327
3328         if (!is_valid_ether_addr(addr->sa_data))
3329                 return -EADDRNOTAVAIL;
3330
3331         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3332         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3333
3334         /* set the correct pool for the new PF MAC address in entry 0 */
3335         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3336                          adapter->vfs_allocated_count);
3337
3338         return 0;
3339 }
3340
3341 /**
3342  * igb_write_mc_addr_list - write multicast addresses to MTA
3343  * @netdev: network interface device structure
3344  *
3345  * Writes multicast address list to the MTA hash table.
3346  * Returns: -ENOMEM on failure
3347  *                0 on no addresses written
3348  *                X on writing X addresses to MTA
3349  **/
3350 static int igb_write_mc_addr_list(struct net_device *netdev)
3351 {
3352         struct igb_adapter *adapter = netdev_priv(netdev);
3353         struct e1000_hw *hw = &adapter->hw;
3354         struct netdev_hw_addr *ha;
3355         u8  *mta_list;
3356         int i;
3357
3358         if (netdev_mc_empty(netdev)) {
3359                 /* nothing to program, so clear mc list */
3360                 igb_update_mc_addr_list(hw, NULL, 0);
3361                 igb_restore_vf_multicasts(adapter);
3362                 return 0;
3363         }
3364
3365         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3366         if (!mta_list)
3367                 return -ENOMEM;
3368
3369         /* The shared function expects a packed array of only addresses. */
3370         i = 0;
3371         netdev_for_each_mc_addr(ha, netdev)
3372                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3373
3374         igb_update_mc_addr_list(hw, mta_list, i);
3375         kfree(mta_list);
3376
3377         return netdev_mc_count(netdev);
3378 }
3379
3380 /**
3381  * igb_write_uc_addr_list - write unicast addresses to RAR table
3382  * @netdev: network interface device structure
3383  *
3384  * Writes unicast address list to the RAR table.
3385  * Returns: -ENOMEM on failure/insufficient address space
3386  *                0 on no addresses written
3387  *                X on writing X addresses to the RAR table
3388  **/
3389 static int igb_write_uc_addr_list(struct net_device *netdev)
3390 {
3391         struct igb_adapter *adapter = netdev_priv(netdev);
3392         struct e1000_hw *hw = &adapter->hw;
3393         unsigned int vfn = adapter->vfs_allocated_count;
3394         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3395         int count = 0;
3396
3397         /* return ENOMEM indicating insufficient memory for addresses */
3398         if (netdev_uc_count(netdev) > rar_entries)
3399                 return -ENOMEM;
3400
3401         if (!netdev_uc_empty(netdev) && rar_entries) {
3402                 struct netdev_hw_addr *ha;
3403
3404                 netdev_for_each_uc_addr(ha, netdev) {
3405                         if (!rar_entries)
3406                                 break;
3407                         igb_rar_set_qsel(adapter, ha->addr,
3408                                          rar_entries--,
3409                                          vfn);
3410                         count++;
3411                 }
3412         }
3413         /* write the addresses in reverse order to avoid write combining */
3414         for (; rar_entries > 0 ; rar_entries--) {
3415                 wr32(E1000_RAH(rar_entries), 0);
3416                 wr32(E1000_RAL(rar_entries), 0);
3417         }
3418         wrfl();
3419
3420         return count;
3421 }
3422
3423 /**
3424  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3425  * @netdev: network interface device structure
3426  *
3427  * The set_rx_mode entry point is called whenever the unicast or multicast
3428  * address lists or the network interface flags are updated.  This routine is
3429  * responsible for configuring the hardware for proper unicast, multicast,
3430  * promiscuous mode, and all-multi behavior.
3431  **/
3432 static void igb_set_rx_mode(struct net_device *netdev)
3433 {
3434         struct igb_adapter *adapter = netdev_priv(netdev);
3435         struct e1000_hw *hw = &adapter->hw;
3436         unsigned int vfn = adapter->vfs_allocated_count;
3437         u32 rctl, vmolr = 0;
3438         int count;
3439
3440         /* Check for Promiscuous and All Multicast modes */
3441         rctl = rd32(E1000_RCTL);
3442
3443         /* clear the effected bits */
3444         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3445
3446         if (netdev->flags & IFF_PROMISC) {
3447                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3448                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3449         } else {
3450                 if (netdev->flags & IFF_ALLMULTI) {
3451                         rctl |= E1000_RCTL_MPE;
3452                         vmolr |= E1000_VMOLR_MPME;
3453                 } else {
3454                         /*
3455                          * Write addresses to the MTA, if the attempt fails
3456                          * then we should just turn on promiscuous mode so
3457                          * that we can at least receive multicast traffic
3458                          */
3459                         count = igb_write_mc_addr_list(netdev);
3460                         if (count < 0) {
3461                                 rctl |= E1000_RCTL_MPE;
3462                                 vmolr |= E1000_VMOLR_MPME;
3463                         } else if (count) {
3464                                 vmolr |= E1000_VMOLR_ROMPE;
3465                         }
3466                 }
3467                 /*
3468                  * Write addresses to available RAR registers, if there is not
3469                  * sufficient space to store all the addresses then enable
3470                  * unicast promiscuous mode
3471                  */
3472                 count = igb_write_uc_addr_list(netdev);
3473                 if (count < 0) {
3474                         rctl |= E1000_RCTL_UPE;
3475                         vmolr |= E1000_VMOLR_ROPE;
3476                 }
3477                 rctl |= E1000_RCTL_VFE;
3478         }
3479         wr32(E1000_RCTL, rctl);
3480
3481         /*
3482          * In order to support SR-IOV and eventually VMDq it is necessary to set
3483          * the VMOLR to enable the appropriate modes.  Without this workaround
3484          * we will have issues with VLAN tag stripping not being done for frames
3485          * that are only arriving because we are the default pool
3486          */
3487         if (hw->mac.type < e1000_82576)
3488                 return;
3489
3490         vmolr |= rd32(E1000_VMOLR(vfn)) &
3491                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3492         wr32(E1000_VMOLR(vfn), vmolr);
3493         igb_restore_vf_multicasts(adapter);
3494 }
3495
3496 static void igb_check_wvbr(struct igb_adapter *adapter)
3497 {
3498         struct e1000_hw *hw = &adapter->hw;
3499         u32 wvbr = 0;
3500
3501         switch (hw->mac.type) {
3502         case e1000_82576:
3503         case e1000_i350:
3504                 if (!(wvbr = rd32(E1000_WVBR)))
3505                         return;
3506                 break;
3507         default:
3508                 break;
3509         }
3510
3511         adapter->wvbr |= wvbr;
3512 }
3513
3514 #define IGB_STAGGERED_QUEUE_OFFSET 8
3515
3516 static void igb_spoof_check(struct igb_adapter *adapter)
3517 {
3518         int j;
3519
3520         if (!adapter->wvbr)
3521                 return;
3522
3523         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3524                 if (adapter->wvbr & (1 << j) ||
3525                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3526                         dev_warn(&adapter->pdev->dev,
3527                                 "Spoof event(s) detected on VF %d\n", j);
3528                         adapter->wvbr &=
3529                                 ~((1 << j) |
3530                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3531                 }
3532         }
3533 }
3534
3535 /* Need to wait a few seconds after link up to get diagnostic information from
3536  * the phy */
3537 static void igb_update_phy_info(unsigned long data)
3538 {
3539         struct igb_adapter *adapter = (struct igb_adapter *) data;
3540         igb_get_phy_info(&adapter->hw);
3541 }
3542
3543 /**
3544  * igb_has_link - check shared code for link and determine up/down
3545  * @adapter: pointer to driver private info
3546  **/
3547 bool igb_has_link(struct igb_adapter *adapter)
3548 {
3549         struct e1000_hw *hw = &adapter->hw;
3550         bool link_active = false;
3551         s32 ret_val = 0;
3552
3553         /* get_link_status is set on LSC (link status) interrupt or
3554          * rx sequence error interrupt.  get_link_status will stay
3555          * false until the e1000_check_for_link establishes link
3556          * for copper adapters ONLY
3557          */
3558         switch (hw->phy.media_type) {
3559         case e1000_media_type_copper:
3560                 if (hw->mac.get_link_status) {
3561                         ret_val = hw->mac.ops.check_for_link(hw);
3562                         link_active = !hw->mac.get_link_status;
3563                 } else {
3564                         link_active = true;
3565                 }
3566                 break;
3567         case e1000_media_type_internal_serdes:
3568                 ret_val = hw->mac.ops.check_for_link(hw);
3569                 link_active = hw->mac.serdes_has_link;
3570                 break;
3571         default:
3572         case e1000_media_type_unknown:
3573                 break;
3574         }
3575
3576         return link_active;
3577 }
3578
3579 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3580 {
3581         bool ret = false;
3582         u32 ctrl_ext, thstat;
3583
3584         /* check for thermal sensor event on i350, copper only */
3585         if (hw->mac.type == e1000_i350) {
3586                 thstat = rd32(E1000_THSTAT);
3587                 ctrl_ext = rd32(E1000_CTRL_EXT);
3588
3589                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3590                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3591                         ret = !!(thstat & event);
3592                 }
3593         }
3594
3595         return ret;
3596 }
3597
3598 /**
3599  * igb_watchdog - Timer Call-back
3600  * @data: pointer to adapter cast into an unsigned long
3601  **/
3602 static void igb_watchdog(unsigned long data)
3603 {
3604         struct igb_adapter *adapter = (struct igb_adapter *)data;
3605         /* Do the rest outside of interrupt context */
3606         schedule_work(&adapter->watchdog_task);
3607 }
3608
3609 static void igb_watchdog_task(struct work_struct *work)
3610 {
3611         struct igb_adapter *adapter = container_of(work,
3612                                                    struct igb_adapter,
3613                                                    watchdog_task);
3614         struct e1000_hw *hw = &adapter->hw;
3615         struct net_device *netdev = adapter->netdev;
3616         u32 link;
3617         int i;
3618
3619         link = igb_has_link(adapter);
3620         if (link) {
3621                 if (!netif_carrier_ok(netdev)) {
3622                         u32 ctrl;
3623                         hw->mac.ops.get_speed_and_duplex(hw,
3624                                                          &adapter->link_speed,
3625                                                          &adapter->link_duplex);
3626
3627                         ctrl = rd32(E1000_CTRL);
3628                         /* Links status message must follow this format */
3629                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3630                                  "Flow Control: %s\n",
3631                                netdev->name,
3632                                adapter->link_speed,
3633                                adapter->link_duplex == FULL_DUPLEX ?
3634                                  "Full Duplex" : "Half Duplex",
3635                                ((ctrl & E1000_CTRL_TFCE) &&
3636                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3637                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3638                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3639
3640                         /* check for thermal sensor event */
3641                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3642                                 printk(KERN_INFO "igb: %s The network adapter "
3643                                                  "link speed was downshifted "
3644                                                  "because it overheated.\n",
3645                                                  netdev->name);
3646                         }
3647
3648                         /* adjust timeout factor according to speed/duplex */
3649                         adapter->tx_timeout_factor = 1;
3650                         switch (adapter->link_speed) {
3651                         case SPEED_10:
3652                                 adapter->tx_timeout_factor = 14;
3653                                 break;
3654                         case SPEED_100:
3655                                 /* maybe add some timeout factor ? */
3656                                 break;
3657                         }
3658
3659                         netif_carrier_on(netdev);
3660
3661                         igb_ping_all_vfs(adapter);
3662                         igb_check_vf_rate_limit(adapter);
3663
3664                         /* link state has changed, schedule phy info update */
3665                         if (!test_bit(__IGB_DOWN, &adapter->state))
3666                                 mod_timer(&adapter->phy_info_timer,
3667                                           round_jiffies(jiffies + 2 * HZ));
3668                 }
3669         } else {
3670                 if (netif_carrier_ok(netdev)) {
3671                         adapter->link_speed = 0;
3672                         adapter->link_duplex = 0;
3673
3674                         /* check for thermal sensor event */
3675                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3676                                 printk(KERN_ERR "igb: %s The network adapter "
3677                                                 "was stopped because it "
3678                                                 "overheated.\n",
3679                                                 netdev->name);
3680                         }
3681
3682                         /* Links status message must follow this format */
3683                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3684                                netdev->name);
3685                         netif_carrier_off(netdev);
3686
3687                         igb_ping_all_vfs(adapter);
3688
3689                         /* link state has changed, schedule phy info update */
3690                         if (!test_bit(__IGB_DOWN, &adapter->state))
3691                                 mod_timer(&adapter->phy_info_timer,
3692                                           round_jiffies(jiffies + 2 * HZ));
3693                 }
3694         }
3695
3696         spin_lock(&adapter->stats64_lock);
3697         igb_update_stats(adapter, &adapter->stats64);
3698         spin_unlock(&adapter->stats64_lock);
3699
3700         for (i = 0; i < adapter->num_tx_queues; i++) {
3701                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3702                 if (!netif_carrier_ok(netdev)) {
3703                         /* We've lost link, so the controller stops DMA,
3704                          * but we've got queued Tx work that's never going
3705                          * to get done, so reset controller to flush Tx.
3706                          * (Do the reset outside of interrupt context). */
3707                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3708                                 adapter->tx_timeout_count++;
3709                                 schedule_work(&adapter->reset_task);
3710                                 /* return immediately since reset is imminent */
3711                                 return;
3712                         }
3713                 }
3714
3715                 /* Force detection of hung controller every watchdog period */
3716                 tx_ring->detect_tx_hung = true;
3717         }
3718
3719         /* Cause software interrupt to ensure rx ring is cleaned */
3720         if (adapter->msix_entries) {
3721                 u32 eics = 0;
3722                 for (i = 0; i < adapter->num_q_vectors; i++) {
3723                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3724                         eics |= q_vector->eims_value;
3725                 }
3726                 wr32(E1000_EICS, eics);
3727         } else {
3728                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3729         }
3730
3731         igb_spoof_check(adapter);
3732
3733         /* Reset the timer */
3734         if (!test_bit(__IGB_DOWN, &adapter->state))
3735                 mod_timer(&adapter->watchdog_timer,
3736                           round_jiffies(jiffies + 2 * HZ));
3737 }
3738
3739 enum latency_range {
3740         lowest_latency = 0,
3741         low_latency = 1,
3742         bulk_latency = 2,
3743         latency_invalid = 255
3744 };
3745
3746 /**
3747  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3748  *
3749  *      Stores a new ITR value based on strictly on packet size.  This
3750  *      algorithm is less sophisticated than that used in igb_update_itr,
3751  *      due to the difficulty of synchronizing statistics across multiple
3752  *      receive rings.  The divisors and thresholds used by this function
3753  *      were determined based on theoretical maximum wire speed and testing
3754  *      data, in order to minimize response time while increasing bulk
3755  *      throughput.
3756  *      This functionality is controlled by the InterruptThrottleRate module
3757  *      parameter (see igb_param.c)
3758  *      NOTE:  This function is called only when operating in a multiqueue
3759  *             receive environment.
3760  * @q_vector: pointer to q_vector
3761  **/
3762 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3763 {
3764         int new_val = q_vector->itr_val;
3765         int avg_wire_size = 0;
3766         struct igb_adapter *adapter = q_vector->adapter;
3767         struct igb_ring *ring;
3768         unsigned int packets;
3769
3770         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3771          * ints/sec - ITR timer value of 120 ticks.
3772          */
3773         if (adapter->link_speed != SPEED_1000) {
3774                 new_val = 976;
3775                 goto set_itr_val;
3776         }
3777
3778         ring = q_vector->rx_ring;
3779         if (ring) {
3780                 packets = ACCESS_ONCE(ring->total_packets);
3781
3782                 if (packets)
3783                         avg_wire_size = ring->total_bytes / packets;
3784         }
3785
3786         ring = q_vector->tx_ring;
3787         if (ring) {
3788                 packets = ACCESS_ONCE(ring->total_packets);
3789
3790                 if (packets)
3791                         avg_wire_size = max_t(u32, avg_wire_size,
3792                                               ring->total_bytes / packets);
3793         }
3794
3795         /* if avg_wire_size isn't set no work was done */
3796         if (!avg_wire_size)
3797                 goto clear_counts;
3798
3799         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3800         avg_wire_size += 24;
3801
3802         /* Don't starve jumbo frames */
3803         avg_wire_size = min(avg_wire_size, 3000);
3804
3805         /* Give a little boost to mid-size frames */
3806         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3807                 new_val = avg_wire_size / 3;
3808         else
3809                 new_val = avg_wire_size / 2;
3810
3811         /* when in itr mode 3 do not exceed 20K ints/sec */
3812         if (adapter->rx_itr_setting == 3 && new_val < 196)
3813                 new_val = 196;
3814
3815 set_itr_val:
3816         if (new_val != q_vector->itr_val) {
3817                 q_vector->itr_val = new_val;
3818                 q_vector->set_itr = 1;
3819         }
3820 clear_counts:
3821         if (q_vector->rx_ring) {
3822                 q_vector->rx_ring->total_bytes = 0;
3823                 q_vector->rx_ring->total_packets = 0;
3824         }
3825         if (q_vector->tx_ring) {
3826                 q_vector->tx_ring->total_bytes = 0;
3827                 q_vector->tx_ring->total_packets = 0;
3828         }
3829 }
3830
3831 /**
3832  * igb_update_itr - update the dynamic ITR value based on statistics
3833  *      Stores a new ITR value based on packets and byte
3834  *      counts during the last interrupt.  The advantage of per interrupt
3835  *      computation is faster updates and more accurate ITR for the current
3836  *      traffic pattern.  Constants in this function were computed
3837  *      based on theoretical maximum wire speed and thresholds were set based
3838  *      on testing data as well as attempting to minimize response time
3839  *      while increasing bulk throughput.
3840  *      this functionality is controlled by the InterruptThrottleRate module
3841  *      parameter (see igb_param.c)
3842  *      NOTE:  These calculations are only valid when operating in a single-
3843  *             queue environment.
3844  * @adapter: pointer to adapter
3845  * @itr_setting: current q_vector->itr_val
3846  * @packets: the number of packets during this measurement interval
3847  * @bytes: the number of bytes during this measurement interval
3848  **/
3849 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3850                                    int packets, int bytes)
3851 {
3852         unsigned int retval = itr_setting;
3853
3854         if (packets == 0)
3855                 goto update_itr_done;
3856
3857         switch (itr_setting) {
3858         case lowest_latency:
3859                 /* handle TSO and jumbo frames */
3860                 if (bytes/packets > 8000)
3861                         retval = bulk_latency;
3862                 else if ((packets < 5) && (bytes > 512))
3863                         retval = low_latency;
3864                 break;
3865         case low_latency:  /* 50 usec aka 20000 ints/s */
3866                 if (bytes > 10000) {
3867                         /* this if handles the TSO accounting */
3868                         if (bytes/packets > 8000) {
3869                                 retval = bulk_latency;
3870                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3871                                 retval = bulk_latency;
3872                         } else if ((packets > 35)) {
3873                                 retval = lowest_latency;
3874                         }
3875                 } else if (bytes/packets > 2000) {
3876                         retval = bulk_latency;
3877                 } else if (packets <= 2 && bytes < 512) {
3878                         retval = lowest_latency;
3879                 }
3880                 break;
3881         case bulk_latency: /* 250 usec aka 4000 ints/s */
3882                 if (bytes > 25000) {
3883                         if (packets > 35)
3884                                 retval = low_latency;
3885                 } else if (bytes < 1500) {
3886                         retval = low_latency;
3887                 }
3888                 break;
3889         }
3890
3891 update_itr_done:
3892         return retval;
3893 }
3894
3895 static void igb_set_itr(struct igb_adapter *adapter)
3896 {
3897         struct igb_q_vector *q_vector = adapter->q_vector[0];
3898         u16 current_itr;
3899         u32 new_itr = q_vector->itr_val;
3900
3901         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3902         if (adapter->link_speed != SPEED_1000) {
3903                 current_itr = 0;
3904                 new_itr = 4000;
3905                 goto set_itr_now;
3906         }
3907
3908         adapter->rx_itr = igb_update_itr(adapter,
3909                                     adapter->rx_itr,
3910                                     q_vector->rx_ring->total_packets,
3911                                     q_vector->rx_ring->total_bytes);
3912
3913         adapter->tx_itr = igb_update_itr(adapter,
3914                                     adapter->tx_itr,
3915                                     q_vector->tx_ring->total_packets,
3916                                     q_vector->tx_ring->total_bytes);
3917         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3918
3919         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3920         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3921                 current_itr = low_latency;
3922
3923         switch (current_itr) {
3924         /* counts and packets in update_itr are dependent on these numbers */
3925         case lowest_latency:
3926                 new_itr = 56;  /* aka 70,000 ints/sec */
3927                 break;
3928         case low_latency:
3929                 new_itr = 196; /* aka 20,000 ints/sec */
3930                 break;
3931         case bulk_latency:
3932                 new_itr = 980; /* aka 4,000 ints/sec */
3933                 break;
3934         default:
3935                 break;
3936         }
3937
3938 set_itr_now:
3939         q_vector->rx_ring->total_bytes = 0;
3940         q_vector->rx_ring->total_packets = 0;
3941         q_vector->tx_ring->total_bytes = 0;
3942         q_vector->tx_ring->total_packets = 0;
3943
3944         if (new_itr != q_vector->itr_val) {
3945                 /* this attempts to bias the interrupt rate towards Bulk
3946                  * by adding intermediate steps when interrupt rate is
3947                  * increasing */
3948                 new_itr = new_itr > q_vector->itr_val ?
3949                              max((new_itr * q_vector->itr_val) /
3950                                  (new_itr + (q_vector->itr_val >> 2)),
3951                                  new_itr) :
3952                              new_itr;
3953                 /* Don't write the value here; it resets the adapter's
3954                  * internal timer, and causes us to delay far longer than
3955                  * we should between interrupts.  Instead, we write the ITR
3956                  * value at the beginning of the next interrupt so the timing
3957                  * ends up being correct.
3958                  */
3959                 q_vector->itr_val = new_itr;
3960                 q_vector->set_itr = 1;
3961         }
3962 }
3963
3964 #define IGB_TX_FLAGS_CSUM               0x00000001
3965 #define IGB_TX_FLAGS_VLAN               0x00000002
3966 #define IGB_TX_FLAGS_TSO                0x00000004
3967 #define IGB_TX_FLAGS_IPV4               0x00000008
3968 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3969 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3970 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3971
3972 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3973                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3974 {
3975         struct e1000_adv_tx_context_desc *context_desc;
3976         unsigned int i;
3977         int err;
3978         struct igb_buffer *buffer_info;
3979         u32 info = 0, tu_cmd = 0;
3980         u32 mss_l4len_idx;
3981         u8 l4len;
3982
3983         if (skb_header_cloned(skb)) {
3984                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3985                 if (err)
3986                         return err;
3987         }
3988
3989         l4len = tcp_hdrlen(skb);
3990         *hdr_len += l4len;
3991
3992         if (skb->protocol == htons(ETH_P_IP)) {
3993                 struct iphdr *iph = ip_hdr(skb);
3994                 iph->tot_len = 0;
3995                 iph->check = 0;
3996                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3997                                                          iph->daddr, 0,
3998                                                          IPPROTO_TCP,
3999                                                          0);
4000         } else if (skb_is_gso_v6(skb)) {
4001                 ipv6_hdr(skb)->payload_len = 0;
4002                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4003                                                        &ipv6_hdr(skb)->daddr,
4004                                                        0, IPPROTO_TCP, 0);
4005         }
4006
4007         i = tx_ring->next_to_use;
4008
4009         buffer_info = &tx_ring->buffer_info[i];
4010         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4011         /* VLAN MACLEN IPLEN */
4012         if (tx_flags & IGB_TX_FLAGS_VLAN)
4013                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4014         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4015         *hdr_len += skb_network_offset(skb);
4016         info |= skb_network_header_len(skb);
4017         *hdr_len += skb_network_header_len(skb);
4018         context_desc->vlan_macip_lens = cpu_to_le32(info);
4019
4020         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4021         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4022
4023         if (skb->protocol == htons(ETH_P_IP))
4024                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4025         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4026
4027         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4028
4029         /* MSS L4LEN IDX */
4030         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4031         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4032
4033         /* For 82575, context index must be unique per ring. */
4034         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4035                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4036
4037         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4038         context_desc->seqnum_seed = 0;
4039
4040         buffer_info->time_stamp = jiffies;
4041         buffer_info->next_to_watch = i;
4042         buffer_info->dma = 0;
4043         i++;
4044         if (i == tx_ring->count)
4045                 i = 0;
4046
4047         tx_ring->next_to_use = i;
4048
4049         return true;
4050 }
4051
4052 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4053                                    struct sk_buff *skb, u32 tx_flags)
4054 {
4055         struct e1000_adv_tx_context_desc *context_desc;
4056         struct device *dev = tx_ring->dev;
4057         struct igb_buffer *buffer_info;
4058         u32 info = 0, tu_cmd = 0;
4059         unsigned int i;
4060
4061         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4062             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4063                 i = tx_ring->next_to_use;
4064                 buffer_info = &tx_ring->buffer_info[i];
4065                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4066
4067                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4068                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4069
4070                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4071                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4072                         info |= skb_network_header_len(skb);
4073
4074                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4075
4076                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4077
4078                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4079                         __be16 protocol;
4080
4081                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4082                                 const struct vlan_ethhdr *vhdr =
4083                                           (const struct vlan_ethhdr*)skb->data;
4084
4085                                 protocol = vhdr->h_vlan_encapsulated_proto;
4086                         } else {
4087                                 protocol = skb->protocol;
4088                         }
4089
4090                         switch (protocol) {
4091                         case cpu_to_be16(ETH_P_IP):
4092                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4093                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4094                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4095                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4096                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4097                                 break;
4098                         case cpu_to_be16(ETH_P_IPV6):
4099                                 /* XXX what about other V6 headers?? */
4100                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4101                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4102                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4103                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4104                                 break;
4105                         default:
4106                                 if (unlikely(net_ratelimit()))
4107                                         dev_warn(dev,
4108                                             "partial checksum but proto=%x!\n",
4109                                             skb->protocol);
4110                                 break;
4111                         }
4112                 }
4113
4114                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4115                 context_desc->seqnum_seed = 0;
4116                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4117                         context_desc->mss_l4len_idx =
4118                                 cpu_to_le32(tx_ring->reg_idx << 4);
4119
4120                 buffer_info->time_stamp = jiffies;
4121                 buffer_info->next_to_watch = i;
4122                 buffer_info->dma = 0;
4123
4124                 i++;
4125                 if (i == tx_ring->count)
4126                         i = 0;
4127                 tx_ring->next_to_use = i;
4128
4129                 return true;
4130         }
4131         return false;
4132 }
4133
4134 #define IGB_MAX_TXD_PWR 16
4135 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4136
4137 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4138                                  unsigned int first)
4139 {
4140         struct igb_buffer *buffer_info;
4141         struct device *dev = tx_ring->dev;
4142         unsigned int hlen = skb_headlen(skb);
4143         unsigned int count = 0, i;
4144         unsigned int f;
4145         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4146
4147         i = tx_ring->next_to_use;
4148
4149         buffer_info = &tx_ring->buffer_info[i];
4150         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4151         buffer_info->length = hlen;
4152         /* set time_stamp *before* dma to help avoid a possible race */
4153         buffer_info->time_stamp = jiffies;
4154         buffer_info->next_to_watch = i;
4155         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4156                                           DMA_TO_DEVICE);
4157         if (dma_mapping_error(dev, buffer_info->dma))
4158                 goto dma_error;
4159
4160         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4161                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4162                 unsigned int len = frag->size;
4163
4164                 count++;
4165                 i++;
4166                 if (i == tx_ring->count)
4167                         i = 0;
4168
4169                 buffer_info = &tx_ring->buffer_info[i];
4170                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4171                 buffer_info->length = len;
4172                 buffer_info->time_stamp = jiffies;
4173                 buffer_info->next_to_watch = i;
4174                 buffer_info->mapped_as_page = true;
4175                 buffer_info->dma = dma_map_page(dev,
4176                                                 frag->page,
4177                                                 frag->page_offset,
4178                                                 len,
4179                                                 DMA_TO_DEVICE);
4180                 if (dma_mapping_error(dev, buffer_info->dma))
4181                         goto dma_error;
4182
4183         }
4184
4185         tx_ring->buffer_info[i].skb = skb;
4186         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4187         /* multiply data chunks by size of headers */
4188         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4189         tx_ring->buffer_info[i].gso_segs = gso_segs;
4190         tx_ring->buffer_info[first].next_to_watch = i;
4191
4192         return ++count;
4193
4194 dma_error:
4195         dev_err(dev, "TX DMA map failed\n");
4196
4197         /* clear timestamp and dma mappings for failed buffer_info mapping */
4198         buffer_info->dma = 0;
4199         buffer_info->time_stamp = 0;
4200         buffer_info->length = 0;
4201         buffer_info->next_to_watch = 0;
4202         buffer_info->mapped_as_page = false;
4203
4204         /* clear timestamp and dma mappings for remaining portion of packet */
4205         while (count--) {
4206                 if (i == 0)
4207                         i = tx_ring->count;
4208                 i--;
4209                 buffer_info = &tx_ring->buffer_info[i];
4210                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4211         }
4212
4213         return 0;
4214 }
4215
4216 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4217                                     u32 tx_flags, int count, u32 paylen,
4218                                     u8 hdr_len)
4219 {
4220         union e1000_adv_tx_desc *tx_desc;
4221         struct igb_buffer *buffer_info;
4222         u32 olinfo_status = 0, cmd_type_len;
4223         unsigned int i = tx_ring->next_to_use;
4224
4225         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4226                         E1000_ADVTXD_DCMD_DEXT);
4227
4228         if (tx_flags & IGB_TX_FLAGS_VLAN)
4229                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4230
4231         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4232                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4233
4234         if (tx_flags & IGB_TX_FLAGS_TSO) {
4235                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4236
4237                 /* insert tcp checksum */
4238                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4239
4240                 /* insert ip checksum */
4241                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4242                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4243
4244         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4245                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4246         }
4247
4248         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4249             (tx_flags & (IGB_TX_FLAGS_CSUM |
4250                          IGB_TX_FLAGS_TSO |
4251                          IGB_TX_FLAGS_VLAN)))
4252                 olinfo_status |= tx_ring->reg_idx << 4;
4253
4254         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4255
4256         do {
4257                 buffer_info = &tx_ring->buffer_info[i];
4258                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4259                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4260                 tx_desc->read.cmd_type_len =
4261                         cpu_to_le32(cmd_type_len | buffer_info->length);
4262                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4263                 count--;
4264                 i++;
4265                 if (i == tx_ring->count)
4266                         i = 0;
4267         } while (count > 0);
4268
4269         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4270         /* Force memory writes to complete before letting h/w
4271          * know there are new descriptors to fetch.  (Only
4272          * applicable for weak-ordered memory model archs,
4273          * such as IA-64). */
4274         wmb();
4275
4276         tx_ring->next_to_use = i;
4277         writel(i, tx_ring->tail);
4278         /* we need this if more than one processor can write to our tail
4279          * at a time, it syncronizes IO on IA64/Altix systems */
4280         mmiowb();
4281 }
4282
4283 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4284 {
4285         struct net_device *netdev = tx_ring->netdev;
4286
4287         netif_stop_subqueue(netdev, tx_ring->queue_index);
4288
4289         /* Herbert's original patch had:
4290          *  smp_mb__after_netif_stop_queue();
4291          * but since that doesn't exist yet, just open code it. */
4292         smp_mb();
4293
4294         /* We need to check again in a case another CPU has just
4295          * made room available. */
4296         if (igb_desc_unused(tx_ring) < size)
4297                 return -EBUSY;
4298
4299         /* A reprieve! */
4300         netif_wake_subqueue(netdev, tx_ring->queue_index);
4301
4302         u64_stats_update_begin(&tx_ring->tx_syncp2);
4303         tx_ring->tx_stats.restart_queue2++;
4304         u64_stats_update_end(&tx_ring->tx_syncp2);
4305
4306         return 0;
4307 }
4308
4309 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4310 {
4311         if (igb_desc_unused(tx_ring) >= size)
4312                 return 0;
4313         return __igb_maybe_stop_tx(tx_ring, size);
4314 }
4315
4316 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4317                                     struct igb_ring *tx_ring)
4318 {
4319         int tso = 0, count;
4320         u32 tx_flags = 0;
4321         u16 first;
4322         u8 hdr_len = 0;
4323
4324         /* need: 1 descriptor per page,
4325          *       + 2 desc gap to keep tail from touching head,
4326          *       + 1 desc for skb->data,
4327          *       + 1 desc for context descriptor,
4328          * otherwise try next time */
4329         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4330                 /* this is a hard error */
4331                 return NETDEV_TX_BUSY;
4332         }
4333
4334         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4335                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4336                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4337         }
4338
4339         if (vlan_tx_tag_present(skb)) {
4340                 tx_flags |= IGB_TX_FLAGS_VLAN;
4341                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4342         }
4343
4344         if (skb->protocol == htons(ETH_P_IP))
4345                 tx_flags |= IGB_TX_FLAGS_IPV4;
4346
4347         first = tx_ring->next_to_use;
4348         if (skb_is_gso(skb)) {
4349                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4350
4351                 if (tso < 0) {
4352                         dev_kfree_skb_any(skb);
4353                         return NETDEV_TX_OK;
4354                 }
4355         }
4356
4357         if (tso)
4358                 tx_flags |= IGB_TX_FLAGS_TSO;
4359         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4360                  (skb->ip_summed == CHECKSUM_PARTIAL))
4361                 tx_flags |= IGB_TX_FLAGS_CSUM;
4362
4363         /*
4364          * count reflects descriptors mapped, if 0 or less then mapping error
4365          * has occurred and we need to rewind the descriptor queue
4366          */
4367         count = igb_tx_map_adv(tx_ring, skb, first);
4368         if (!count) {
4369                 dev_kfree_skb_any(skb);
4370                 tx_ring->buffer_info[first].time_stamp = 0;
4371                 tx_ring->next_to_use = first;
4372                 return NETDEV_TX_OK;
4373         }
4374
4375         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4376
4377         /* Make sure there is space in the ring for the next send. */
4378         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4379
4380         return NETDEV_TX_OK;
4381 }
4382
4383 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4384                                       struct net_device *netdev)
4385 {
4386         struct igb_adapter *adapter = netdev_priv(netdev);
4387         struct igb_ring *tx_ring;
4388         int r_idx = 0;
4389
4390         if (test_bit(__IGB_DOWN, &adapter->state)) {
4391                 dev_kfree_skb_any(skb);
4392                 return NETDEV_TX_OK;
4393         }
4394
4395         if (skb->len <= 0) {
4396                 dev_kfree_skb_any(skb);
4397                 return NETDEV_TX_OK;
4398         }
4399
4400         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4401         tx_ring = adapter->multi_tx_table[r_idx];
4402
4403         /* This goes back to the question of how to logically map a tx queue
4404          * to a flow.  Right now, performance is impacted slightly negatively
4405          * if using multiple tx queues.  If the stack breaks away from a
4406          * single qdisc implementation, we can look at this again. */
4407         return igb_xmit_frame_ring_adv(skb, tx_ring);
4408 }
4409
4410 /**
4411  * igb_tx_timeout - Respond to a Tx Hang
4412  * @netdev: network interface device structure
4413  **/
4414 static void igb_tx_timeout(struct net_device *netdev)
4415 {
4416         struct igb_adapter *adapter = netdev_priv(netdev);
4417         struct e1000_hw *hw = &adapter->hw;
4418
4419         /* Do the reset outside of interrupt context */
4420         adapter->tx_timeout_count++;
4421
4422         if (hw->mac.type == e1000_82580)
4423                 hw->dev_spec._82575.global_device_reset = true;
4424
4425         schedule_work(&adapter->reset_task);
4426         wr32(E1000_EICS,
4427              (adapter->eims_enable_mask & ~adapter->eims_other));
4428 }
4429
4430 static void igb_reset_task(struct work_struct *work)
4431 {
4432         struct igb_adapter *adapter;
4433         adapter = container_of(work, struct igb_adapter, reset_task);
4434
4435         igb_dump(adapter);
4436         netdev_err(adapter->netdev, "Reset adapter\n");
4437         igb_reinit_locked(adapter);
4438 }
4439
4440 /**
4441  * igb_get_stats64 - Get System Network Statistics
4442  * @netdev: network interface device structure
4443  * @stats: rtnl_link_stats64 pointer
4444  *
4445  **/
4446 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4447                                                  struct rtnl_link_stats64 *stats)
4448 {
4449         struct igb_adapter *adapter = netdev_priv(netdev);
4450
4451         spin_lock(&adapter->stats64_lock);
4452         igb_update_stats(adapter, &adapter->stats64);
4453         memcpy(stats, &adapter->stats64, sizeof(*stats));
4454         spin_unlock(&adapter->stats64_lock);
4455
4456         return stats;
4457 }
4458
4459 /**
4460  * igb_change_mtu - Change the Maximum Transfer Unit
4461  * @netdev: network interface device structure
4462  * @new_mtu: new value for maximum frame size
4463  *
4464  * Returns 0 on success, negative on failure
4465  **/
4466 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4467 {
4468         struct igb_adapter *adapter = netdev_priv(netdev);
4469         struct pci_dev *pdev = adapter->pdev;
4470         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4471         u32 rx_buffer_len, i;
4472
4473         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4474                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4475                 return -EINVAL;
4476         }
4477
4478         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4479                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4480                 return -EINVAL;
4481         }
4482
4483         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4484                 msleep(1);
4485
4486         /* igb_down has a dependency on max_frame_size */
4487         adapter->max_frame_size = max_frame;
4488
4489         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4490          * means we reserve 2 more, this pushes us to allocate from the next
4491          * larger slab size.
4492          * i.e. RXBUFFER_2048 --> size-4096 slab
4493          */
4494
4495         if (adapter->hw.mac.type == e1000_82580)
4496                 max_frame += IGB_TS_HDR_LEN;
4497
4498         if (max_frame <= IGB_RXBUFFER_1024)
4499                 rx_buffer_len = IGB_RXBUFFER_1024;
4500         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4501                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4502         else
4503                 rx_buffer_len = IGB_RXBUFFER_128;
4504
4505         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4506              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4507                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4508
4509         if ((adapter->hw.mac.type == e1000_82580) &&
4510             (rx_buffer_len == IGB_RXBUFFER_128))
4511                 rx_buffer_len += IGB_RXBUFFER_64;
4512
4513         if (netif_running(netdev))
4514                 igb_down(adapter);
4515
4516         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4517                  netdev->mtu, new_mtu);
4518         netdev->mtu = new_mtu;
4519
4520         for (i = 0; i < adapter->num_rx_queues; i++)
4521                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4522
4523         if (netif_running(netdev))
4524                 igb_up(adapter);
4525         else
4526                 igb_reset(adapter);
4527
4528         clear_bit(__IGB_RESETTING, &adapter->state);
4529
4530         return 0;
4531 }
4532
4533 /**
4534  * igb_update_stats - Update the board statistics counters
4535  * @adapter: board private structure
4536  **/
4537
4538 void igb_update_stats(struct igb_adapter *adapter,
4539                       struct rtnl_link_stats64 *net_stats)
4540 {
4541         struct e1000_hw *hw = &adapter->hw;
4542         struct pci_dev *pdev = adapter->pdev;
4543         u32 reg, mpc;
4544         u16 phy_tmp;
4545         int i;
4546         u64 bytes, packets;
4547         unsigned int start;
4548         u64 _bytes, _packets;
4549
4550 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4551
4552         /*
4553          * Prevent stats update while adapter is being reset, or if the pci
4554          * connection is down.
4555          */
4556         if (adapter->link_speed == 0)
4557                 return;
4558         if (pci_channel_offline(pdev))
4559                 return;
4560
4561         bytes = 0;
4562         packets = 0;
4563         for (i = 0; i < adapter->num_rx_queues; i++) {
4564                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4565                 struct igb_ring *ring = adapter->rx_ring[i];
4566
4567                 ring->rx_stats.drops += rqdpc_tmp;
4568                 net_stats->rx_fifo_errors += rqdpc_tmp;
4569
4570                 do {
4571                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4572                         _bytes = ring->rx_stats.bytes;
4573                         _packets = ring->rx_stats.packets;
4574                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4575                 bytes += _bytes;
4576                 packets += _packets;
4577         }
4578
4579         net_stats->rx_bytes = bytes;
4580         net_stats->rx_packets = packets;
4581
4582         bytes = 0;
4583         packets = 0;
4584         for (i = 0; i < adapter->num_tx_queues; i++) {
4585                 struct igb_ring *ring = adapter->tx_ring[i];
4586                 do {
4587                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4588                         _bytes = ring->tx_stats.bytes;
4589                         _packets = ring->tx_stats.packets;
4590                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4591                 bytes += _bytes;
4592                 packets += _packets;
4593         }
4594         net_stats->tx_bytes = bytes;
4595         net_stats->tx_packets = packets;
4596
4597         /* read stats registers */
4598         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4599         adapter->stats.gprc += rd32(E1000_GPRC);
4600         adapter->stats.gorc += rd32(E1000_GORCL);
4601         rd32(E1000_GORCH); /* clear GORCL */
4602         adapter->stats.bprc += rd32(E1000_BPRC);
4603         adapter->stats.mprc += rd32(E1000_MPRC);
4604         adapter->stats.roc += rd32(E1000_ROC);
4605
4606         adapter->stats.prc64 += rd32(E1000_PRC64);
4607         adapter->stats.prc127 += rd32(E1000_PRC127);
4608         adapter->stats.prc255 += rd32(E1000_PRC255);
4609         adapter->stats.prc511 += rd32(E1000_PRC511);
4610         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4611         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4612         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4613         adapter->stats.sec += rd32(E1000_SEC);
4614
4615         mpc = rd32(E1000_MPC);
4616         adapter->stats.mpc += mpc;
4617         net_stats->rx_fifo_errors += mpc;
4618         adapter->stats.scc += rd32(E1000_SCC);
4619         adapter->stats.ecol += rd32(E1000_ECOL);
4620         adapter->stats.mcc += rd32(E1000_MCC);
4621         adapter->stats.latecol += rd32(E1000_LATECOL);
4622         adapter->stats.dc += rd32(E1000_DC);
4623         adapter->stats.rlec += rd32(E1000_RLEC);
4624         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4625         adapter->stats.xontxc += rd32(E1000_XONTXC);
4626         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4627         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4628         adapter->stats.fcruc += rd32(E1000_FCRUC);
4629         adapter->stats.gptc += rd32(E1000_GPTC);
4630         adapter->stats.gotc += rd32(E1000_GOTCL);
4631         rd32(E1000_GOTCH); /* clear GOTCL */
4632         adapter->stats.rnbc += rd32(E1000_RNBC);
4633         adapter->stats.ruc += rd32(E1000_RUC);
4634         adapter->stats.rfc += rd32(E1000_RFC);
4635         adapter->stats.rjc += rd32(E1000_RJC);
4636         adapter->stats.tor += rd32(E1000_TORH);
4637         adapter->stats.tot += rd32(E1000_TOTH);
4638         adapter->stats.tpr += rd32(E1000_TPR);
4639
4640         adapter->stats.ptc64 += rd32(E1000_PTC64);
4641         adapter->stats.ptc127 += rd32(E1000_PTC127);
4642         adapter->stats.ptc255 += rd32(E1000_PTC255);
4643         adapter->stats.ptc511 += rd32(E1000_PTC511);
4644         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4645         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4646
4647         adapter->stats.mptc += rd32(E1000_MPTC);
4648         adapter->stats.bptc += rd32(E1000_BPTC);
4649
4650         adapter->stats.tpt += rd32(E1000_TPT);
4651         adapter->stats.colc += rd32(E1000_COLC);
4652
4653         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4654         /* read internal phy specific stats */
4655         reg = rd32(E1000_CTRL_EXT);
4656         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4657                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4658                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4659         }
4660
4661         adapter->stats.tsctc += rd32(E1000_TSCTC);
4662         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4663
4664         adapter->stats.iac += rd32(E1000_IAC);
4665         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4666         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4667         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4668         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4669         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4670         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4671         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4672         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4673
4674         /* Fill out the OS statistics structure */
4675         net_stats->multicast = adapter->stats.mprc;
4676         net_stats->collisions = adapter->stats.colc;
4677
4678         /* Rx Errors */
4679
4680         /* RLEC on some newer hardware can be incorrect so build
4681          * our own version based on RUC and ROC */
4682         net_stats->rx_errors = adapter->stats.rxerrc +
4683                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4684                 adapter->stats.ruc + adapter->stats.roc +
4685                 adapter->stats.cexterr;
4686         net_stats->rx_length_errors = adapter->stats.ruc +
4687                                       adapter->stats.roc;
4688         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4689         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4690         net_stats->rx_missed_errors = adapter->stats.mpc;
4691
4692         /* Tx Errors */
4693         net_stats->tx_errors = adapter->stats.ecol +
4694                                adapter->stats.latecol;
4695         net_stats->tx_aborted_errors = adapter->stats.ecol;
4696         net_stats->tx_window_errors = adapter->stats.latecol;
4697         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4698
4699         /* Tx Dropped needs to be maintained elsewhere */
4700
4701         /* Phy Stats */
4702         if (hw->phy.media_type == e1000_media_type_copper) {
4703                 if ((adapter->link_speed == SPEED_1000) &&
4704                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4705                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4706                         adapter->phy_stats.idle_errors += phy_tmp;
4707                 }
4708         }
4709
4710         /* Management Stats */
4711         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4712         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4713         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4714
4715         /* OS2BMC Stats */
4716         reg = rd32(E1000_MANC);
4717         if (reg & E1000_MANC_EN_BMC2OS) {
4718                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4719                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4720                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4721                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4722         }
4723 }
4724
4725 static irqreturn_t igb_msix_other(int irq, void *data)
4726 {
4727         struct igb_adapter *adapter = data;
4728         struct e1000_hw *hw = &adapter->hw;
4729         u32 icr = rd32(E1000_ICR);
4730         /* reading ICR causes bit 31 of EICR to be cleared */
4731
4732         if (icr & E1000_ICR_DRSTA)
4733                 schedule_work(&adapter->reset_task);
4734
4735         if (icr & E1000_ICR_DOUTSYNC) {
4736                 /* HW is reporting DMA is out of sync */
4737                 adapter->stats.doosync++;
4738                 /* The DMA Out of Sync is also indication of a spoof event
4739                  * in IOV mode. Check the Wrong VM Behavior register to
4740                  * see if it is really a spoof event. */
4741                 igb_check_wvbr(adapter);
4742         }
4743
4744         /* Check for a mailbox event */
4745         if (icr & E1000_ICR_VMMB)
4746                 igb_msg_task(adapter);
4747
4748         if (icr & E1000_ICR_LSC) {
4749                 hw->mac.get_link_status = 1;
4750                 /* guard against interrupt when we're going down */
4751                 if (!test_bit(__IGB_DOWN, &adapter->state))
4752                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4753         }
4754
4755         if (adapter->vfs_allocated_count)
4756                 wr32(E1000_IMS, E1000_IMS_LSC |
4757                                 E1000_IMS_VMMB |
4758                                 E1000_IMS_DOUTSYNC);
4759         else
4760                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4761         wr32(E1000_EIMS, adapter->eims_other);
4762
4763         return IRQ_HANDLED;
4764 }
4765
4766 static void igb_write_itr(struct igb_q_vector *q_vector)
4767 {
4768         struct igb_adapter *adapter = q_vector->adapter;
4769         u32 itr_val = q_vector->itr_val & 0x7FFC;
4770
4771         if (!q_vector->set_itr)
4772                 return;
4773
4774         if (!itr_val)
4775                 itr_val = 0x4;
4776
4777         if (adapter->hw.mac.type == e1000_82575)
4778                 itr_val |= itr_val << 16;
4779         else
4780                 itr_val |= 0x8000000;
4781
4782         writel(itr_val, q_vector->itr_register);
4783         q_vector->set_itr = 0;
4784 }
4785
4786 static irqreturn_t igb_msix_ring(int irq, void *data)
4787 {
4788         struct igb_q_vector *q_vector = data;
4789
4790         /* Write the ITR value calculated from the previous interrupt. */
4791         igb_write_itr(q_vector);
4792
4793         napi_schedule(&q_vector->napi);
4794
4795         return IRQ_HANDLED;
4796 }
4797
4798 #ifdef CONFIG_IGB_DCA
4799 static void igb_update_dca(struct igb_q_vector *q_vector)
4800 {
4801         struct igb_adapter *adapter = q_vector->adapter;
4802         struct e1000_hw *hw = &adapter->hw;
4803         int cpu = get_cpu();
4804
4805         if (q_vector->cpu == cpu)
4806                 goto out_no_update;
4807
4808         if (q_vector->tx_ring) {
4809                 int q = q_vector->tx_ring->reg_idx;
4810                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4811                 if (hw->mac.type == e1000_82575) {
4812                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4813                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4814                 } else {
4815                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4816                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4817                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4818                 }
4819                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4820                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4821         }
4822         if (q_vector->rx_ring) {
4823                 int q = q_vector->rx_ring->reg_idx;
4824                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4825                 if (hw->mac.type == e1000_82575) {
4826                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4827                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4828                 } else {
4829                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4830                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4831                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4832                 }
4833                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4834                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4835                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4836                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4837         }
4838         q_vector->cpu = cpu;
4839 out_no_update:
4840         put_cpu();
4841 }
4842
4843 static void igb_setup_dca(struct igb_adapter *adapter)
4844 {
4845         struct e1000_hw *hw = &adapter->hw;
4846         int i;
4847
4848         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4849                 return;
4850
4851         /* Always use CB2 mode, difference is masked in the CB driver. */
4852         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4853
4854         for (i = 0; i < adapter->num_q_vectors; i++) {
4855                 adapter->q_vector[i]->cpu = -1;
4856                 igb_update_dca(adapter->q_vector[i]);
4857         }
4858 }
4859
4860 static int __igb_notify_dca(struct device *dev, void *data)
4861 {
4862         struct net_device *netdev = dev_get_drvdata(dev);
4863         struct igb_adapter *adapter = netdev_priv(netdev);
4864         struct pci_dev *pdev = adapter->pdev;
4865         struct e1000_hw *hw = &adapter->hw;
4866         unsigned long event = *(unsigned long *)data;
4867
4868         switch (event) {
4869         case DCA_PROVIDER_ADD:
4870                 /* if already enabled, don't do it again */
4871                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4872                         break;
4873                 if (dca_add_requester(dev) == 0) {
4874                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4875                         dev_info(&pdev->dev, "DCA enabled\n");
4876                         igb_setup_dca(adapter);
4877                         break;
4878                 }
4879                 /* Fall Through since DCA is disabled. */
4880         case DCA_PROVIDER_REMOVE:
4881                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4882                         /* without this a class_device is left
4883                          * hanging around in the sysfs model */
4884                         dca_remove_requester(dev);
4885                         dev_info(&pdev->dev, "DCA disabled\n");
4886                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4887                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4888                 }
4889                 break;
4890         }
4891
4892         return 0;
4893 }
4894
4895 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4896                           void *p)
4897 {
4898         int ret_val;
4899
4900         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4901                                          __igb_notify_dca);
4902
4903         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4904 }
4905 #endif /* CONFIG_IGB_DCA */
4906
4907 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4908 {
4909         struct e1000_hw *hw = &adapter->hw;
4910         u32 ping;
4911         int i;
4912
4913         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4914                 ping = E1000_PF_CONTROL_MSG;
4915                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4916                         ping |= E1000_VT_MSGTYPE_CTS;
4917                 igb_write_mbx(hw, &ping, 1, i);
4918         }
4919 }
4920
4921 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4922 {
4923         struct e1000_hw *hw = &adapter->hw;
4924         u32 vmolr = rd32(E1000_VMOLR(vf));
4925         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4926
4927         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4928                             IGB_VF_FLAG_MULTI_PROMISC);
4929         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4930
4931         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4932                 vmolr |= E1000_VMOLR_MPME;
4933                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4934                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4935         } else {
4936                 /*
4937                  * if we have hashes and we are clearing a multicast promisc
4938                  * flag we need to write the hashes to the MTA as this step
4939                  * was previously skipped
4940                  */
4941                 if (vf_data->num_vf_mc_hashes > 30) {
4942                         vmolr |= E1000_VMOLR_MPME;
4943                 } else if (vf_data->num_vf_mc_hashes) {
4944                         int j;
4945                         vmolr |= E1000_VMOLR_ROMPE;
4946                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4947                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4948                 }
4949         }
4950
4951         wr32(E1000_VMOLR(vf), vmolr);
4952
4953         /* there are flags left unprocessed, likely not supported */
4954         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4955                 return -EINVAL;
4956
4957         return 0;
4958
4959 }
4960
4961 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4962                                   u32 *msgbuf, u32 vf)
4963 {
4964         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4965         u16 *hash_list = (u16 *)&msgbuf[1];
4966         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4967         int i;
4968
4969         /* salt away the number of multicast addresses assigned
4970          * to this VF for later use to restore when the PF multi cast
4971          * list changes
4972          */
4973         vf_data->num_vf_mc_hashes = n;
4974
4975         /* only up to 30 hash values supported */
4976         if (n > 30)
4977                 n = 30;
4978
4979         /* store the hashes for later use */
4980         for (i = 0; i < n; i++)
4981                 vf_data->vf_mc_hashes[i] = hash_list[i];
4982
4983         /* Flush and reset the mta with the new values */
4984         igb_set_rx_mode(adapter->netdev);
4985
4986         return 0;
4987 }
4988
4989 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4990 {
4991         struct e1000_hw *hw = &adapter->hw;
4992         struct vf_data_storage *vf_data;
4993         int i, j;
4994
4995         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4996                 u32 vmolr = rd32(E1000_VMOLR(i));
4997                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4998
4999                 vf_data = &adapter->vf_data[i];
5000
5001                 if ((vf_data->num_vf_mc_hashes > 30) ||
5002                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5003                         vmolr |= E1000_VMOLR_MPME;
5004                 } else if (vf_data->num_vf_mc_hashes) {
5005                         vmolr |= E1000_VMOLR_ROMPE;
5006                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5007                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5008                 }
5009                 wr32(E1000_VMOLR(i), vmolr);
5010         }
5011 }
5012
5013 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5014 {
5015         struct e1000_hw *hw = &adapter->hw;
5016         u32 pool_mask, reg, vid;
5017         int i;
5018
5019         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5020
5021         /* Find the vlan filter for this id */
5022         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5023                 reg = rd32(E1000_VLVF(i));
5024
5025                 /* remove the vf from the pool */
5026                 reg &= ~pool_mask;
5027
5028                 /* if pool is empty then remove entry from vfta */
5029                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5030                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5031                         reg = 0;
5032                         vid = reg & E1000_VLVF_VLANID_MASK;
5033                         igb_vfta_set(hw, vid, false);
5034                 }
5035
5036                 wr32(E1000_VLVF(i), reg);
5037         }
5038
5039         adapter->vf_data[vf].vlans_enabled = 0;
5040 }
5041
5042 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5043 {
5044         struct e1000_hw *hw = &adapter->hw;
5045         u32 reg, i;
5046
5047         /* The vlvf table only exists on 82576 hardware and newer */
5048         if (hw->mac.type < e1000_82576)
5049                 return -1;
5050
5051         /* we only need to do this if VMDq is enabled */
5052         if (!adapter->vfs_allocated_count)
5053                 return -1;
5054
5055         /* Find the vlan filter for this id */
5056         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5057                 reg = rd32(E1000_VLVF(i));
5058                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5059                     vid == (reg & E1000_VLVF_VLANID_MASK))
5060                         break;
5061         }
5062
5063         if (add) {
5064                 if (i == E1000_VLVF_ARRAY_SIZE) {
5065                         /* Did not find a matching VLAN ID entry that was
5066                          * enabled.  Search for a free filter entry, i.e.
5067                          * one without the enable bit set
5068                          */
5069                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5070                                 reg = rd32(E1000_VLVF(i));
5071                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5072                                         break;
5073                         }
5074                 }
5075                 if (i < E1000_VLVF_ARRAY_SIZE) {
5076                         /* Found an enabled/available entry */
5077                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5078
5079                         /* if !enabled we need to set this up in vfta */
5080                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5081                                 /* add VID to filter table */
5082                                 igb_vfta_set(hw, vid, true);
5083                                 reg |= E1000_VLVF_VLANID_ENABLE;
5084                         }
5085                         reg &= ~E1000_VLVF_VLANID_MASK;
5086                         reg |= vid;
5087                         wr32(E1000_VLVF(i), reg);
5088
5089                         /* do not modify RLPML for PF devices */
5090                         if (vf >= adapter->vfs_allocated_count)
5091                                 return 0;
5092
5093                         if (!adapter->vf_data[vf].vlans_enabled) {
5094                                 u32 size;
5095                                 reg = rd32(E1000_VMOLR(vf));
5096                                 size = reg & E1000_VMOLR_RLPML_MASK;
5097                                 size += 4;
5098                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5099                                 reg |= size;
5100                                 wr32(E1000_VMOLR(vf), reg);
5101                         }
5102
5103                         adapter->vf_data[vf].vlans_enabled++;
5104                         return 0;
5105                 }
5106         } else {
5107                 if (i < E1000_VLVF_ARRAY_SIZE) {
5108                         /* remove vf from the pool */
5109                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5110                         /* if pool is empty then remove entry from vfta */
5111                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5112                                 reg = 0;
5113                                 igb_vfta_set(hw, vid, false);
5114                         }
5115                         wr32(E1000_VLVF(i), reg);
5116
5117                         /* do not modify RLPML for PF devices */
5118                         if (vf >= adapter->vfs_allocated_count)
5119                                 return 0;
5120
5121                         adapter->vf_data[vf].vlans_enabled--;
5122                         if (!adapter->vf_data[vf].vlans_enabled) {
5123                                 u32 size;
5124                                 reg = rd32(E1000_VMOLR(vf));
5125                                 size = reg & E1000_VMOLR_RLPML_MASK;
5126                                 size -= 4;
5127                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5128                                 reg |= size;
5129                                 wr32(E1000_VMOLR(vf), reg);
5130                         }
5131                 }
5132         }
5133         return 0;
5134 }
5135
5136 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5137 {
5138         struct e1000_hw *hw = &adapter->hw;
5139
5140         if (vid)
5141                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5142         else
5143                 wr32(E1000_VMVIR(vf), 0);
5144 }
5145
5146 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5147                                int vf, u16 vlan, u8 qos)
5148 {
5149         int err = 0;
5150         struct igb_adapter *adapter = netdev_priv(netdev);
5151
5152         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5153                 return -EINVAL;
5154         if (vlan || qos) {
5155                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5156                 if (err)
5157                         goto out;
5158                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5159                 igb_set_vmolr(adapter, vf, !vlan);
5160                 adapter->vf_data[vf].pf_vlan = vlan;
5161                 adapter->vf_data[vf].pf_qos = qos;
5162                 dev_info(&adapter->pdev->dev,
5163                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5164                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5165                         dev_warn(&adapter->pdev->dev,
5166                                  "The VF VLAN has been set,"
5167                                  " but the PF device is not up.\n");
5168                         dev_warn(&adapter->pdev->dev,
5169                                  "Bring the PF device up before"
5170                                  " attempting to use the VF device.\n");
5171                 }
5172         } else {
5173                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5174                                    false, vf);
5175                 igb_set_vmvir(adapter, vlan, vf);
5176                 igb_set_vmolr(adapter, vf, true);
5177                 adapter->vf_data[vf].pf_vlan = 0;
5178                 adapter->vf_data[vf].pf_qos = 0;
5179        }
5180 out:
5181        return err;
5182 }
5183
5184 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5185 {
5186         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5187         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5188
5189         return igb_vlvf_set(adapter, vid, add, vf);
5190 }
5191
5192 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5193 {
5194         /* clear flags - except flag that indicates PF has set the MAC */
5195         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5196         adapter->vf_data[vf].last_nack = jiffies;
5197
5198         /* reset offloads to defaults */
5199         igb_set_vmolr(adapter, vf, true);
5200
5201         /* reset vlans for device */
5202         igb_clear_vf_vfta(adapter, vf);
5203         if (adapter->vf_data[vf].pf_vlan)
5204                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5205                                     adapter->vf_data[vf].pf_vlan,
5206                                     adapter->vf_data[vf].pf_qos);
5207         else
5208                 igb_clear_vf_vfta(adapter, vf);
5209
5210         /* reset multicast table array for vf */
5211         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5212
5213         /* Flush and reset the mta with the new values */
5214         igb_set_rx_mode(adapter->netdev);
5215 }
5216
5217 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5218 {
5219         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5220
5221         /* generate a new mac address as we were hotplug removed/added */
5222         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5223                 random_ether_addr(vf_mac);
5224
5225         /* process remaining reset events */
5226         igb_vf_reset(adapter, vf);
5227 }
5228
5229 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5230 {
5231         struct e1000_hw *hw = &adapter->hw;
5232         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5233         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5234         u32 reg, msgbuf[3];
5235         u8 *addr = (u8 *)(&msgbuf[1]);
5236
5237         /* process all the same items cleared in a function level reset */
5238         igb_vf_reset(adapter, vf);
5239
5240         /* set vf mac address */
5241         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5242
5243         /* enable transmit and receive for vf */
5244         reg = rd32(E1000_VFTE);
5245         wr32(E1000_VFTE, reg | (1 << vf));
5246         reg = rd32(E1000_VFRE);
5247         wr32(E1000_VFRE, reg | (1 << vf));
5248
5249         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5250
5251         /* reply to reset with ack and vf mac address */
5252         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5253         memcpy(addr, vf_mac, 6);
5254         igb_write_mbx(hw, msgbuf, 3, vf);
5255 }
5256
5257 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5258 {
5259         /*
5260          * The VF MAC Address is stored in a packed array of bytes
5261          * starting at the second 32 bit word of the msg array
5262          */
5263         unsigned char *addr = (char *)&msg[1];
5264         int err = -1;
5265
5266         if (is_valid_ether_addr(addr))
5267                 err = igb_set_vf_mac(adapter, vf, addr);
5268
5269         return err;
5270 }
5271
5272 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5273 {
5274         struct e1000_hw *hw = &adapter->hw;
5275         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5276         u32 msg = E1000_VT_MSGTYPE_NACK;
5277
5278         /* if device isn't clear to send it shouldn't be reading either */
5279         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5280             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5281                 igb_write_mbx(hw, &msg, 1, vf);
5282                 vf_data->last_nack = jiffies;
5283         }
5284 }
5285
5286 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5287 {
5288         struct pci_dev *pdev = adapter->pdev;
5289         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5290         struct e1000_hw *hw = &adapter->hw;
5291         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5292         s32 retval;
5293
5294         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5295
5296         if (retval) {
5297                 /* if receive failed revoke VF CTS stats and restart init */
5298                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5299                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5300                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5301                         return;
5302                 goto out;
5303         }
5304
5305         /* this is a message we already processed, do nothing */
5306         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5307                 return;
5308
5309         /*
5310          * until the vf completes a reset it should not be
5311          * allowed to start any configuration.
5312          */
5313
5314         if (msgbuf[0] == E1000_VF_RESET) {
5315                 igb_vf_reset_msg(adapter, vf);
5316                 return;
5317         }
5318
5319         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5320                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5321                         return;
5322                 retval = -1;
5323                 goto out;
5324         }
5325
5326         switch ((msgbuf[0] & 0xFFFF)) {
5327         case E1000_VF_SET_MAC_ADDR:
5328                 retval = -EINVAL;
5329                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5330                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5331                 else
5332                         dev_warn(&pdev->dev,
5333                                  "VF %d attempted to override administratively "
5334                                  "set MAC address\nReload the VF driver to "
5335                                  "resume operations\n", vf);
5336                 break;
5337         case E1000_VF_SET_PROMISC:
5338                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5339                 break;
5340         case E1000_VF_SET_MULTICAST:
5341                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5342                 break;
5343         case E1000_VF_SET_LPE:
5344                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5345                 break;
5346         case E1000_VF_SET_VLAN:
5347                 retval = -1;
5348                 if (vf_data->pf_vlan)
5349                         dev_warn(&pdev->dev,
5350                                  "VF %d attempted to override administratively "
5351                                  "set VLAN tag\nReload the VF driver to "
5352                                  "resume operations\n", vf);
5353                 else
5354                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5355                 break;
5356         default:
5357                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5358                 retval = -1;
5359                 break;
5360         }
5361
5362         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5363 out:
5364         /* notify the VF of the results of what it sent us */
5365         if (retval)
5366                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5367         else
5368                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5369
5370         igb_write_mbx(hw, msgbuf, 1, vf);
5371 }
5372
5373 static void igb_msg_task(struct igb_adapter *adapter)
5374 {
5375         struct e1000_hw *hw = &adapter->hw;
5376         u32 vf;
5377
5378         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5379                 /* process any reset requests */
5380                 if (!igb_check_for_rst(hw, vf))
5381                         igb_vf_reset_event(adapter, vf);
5382
5383                 /* process any messages pending */
5384                 if (!igb_check_for_msg(hw, vf))
5385                         igb_rcv_msg_from_vf(adapter, vf);
5386
5387                 /* process any acks */
5388                 if (!igb_check_for_ack(hw, vf))
5389                         igb_rcv_ack_from_vf(adapter, vf);
5390         }
5391 }
5392
5393 /**
5394  *  igb_set_uta - Set unicast filter table address
5395  *  @adapter: board private structure
5396  *
5397  *  The unicast table address is a register array of 32-bit registers.
5398  *  The table is meant to be used in a way similar to how the MTA is used
5399  *  however due to certain limitations in the hardware it is necessary to
5400  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5401  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5402  **/
5403 static void igb_set_uta(struct igb_adapter *adapter)
5404 {
5405         struct e1000_hw *hw = &adapter->hw;
5406         int i;
5407
5408         /* The UTA table only exists on 82576 hardware and newer */
5409         if (hw->mac.type < e1000_82576)
5410                 return;
5411
5412         /* we only need to do this if VMDq is enabled */
5413         if (!adapter->vfs_allocated_count)
5414                 return;
5415
5416         for (i = 0; i < hw->mac.uta_reg_count; i++)
5417                 array_wr32(E1000_UTA, i, ~0);
5418 }
5419
5420 /**
5421  * igb_intr_msi - Interrupt Handler
5422  * @irq: interrupt number
5423  * @data: pointer to a network interface device structure
5424  **/
5425 static irqreturn_t igb_intr_msi(int irq, void *data)
5426 {
5427         struct igb_adapter *adapter = data;
5428         struct igb_q_vector *q_vector = adapter->q_vector[0];
5429         struct e1000_hw *hw = &adapter->hw;
5430         /* read ICR disables interrupts using IAM */
5431         u32 icr = rd32(E1000_ICR);
5432
5433         igb_write_itr(q_vector);
5434
5435         if (icr & E1000_ICR_DRSTA)
5436                 schedule_work(&adapter->reset_task);
5437
5438         if (icr & E1000_ICR_DOUTSYNC) {
5439                 /* HW is reporting DMA is out of sync */
5440                 adapter->stats.doosync++;
5441         }
5442
5443         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5444                 hw->mac.get_link_status = 1;
5445                 if (!test_bit(__IGB_DOWN, &adapter->state))
5446                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5447         }
5448
5449         napi_schedule(&q_vector->napi);
5450
5451         return IRQ_HANDLED;
5452 }
5453
5454 /**
5455  * igb_intr - Legacy Interrupt Handler
5456  * @irq: interrupt number
5457  * @data: pointer to a network interface device structure
5458  **/
5459 static irqreturn_t igb_intr(int irq, void *data)
5460 {
5461         struct igb_adapter *adapter = data;
5462         struct igb_q_vector *q_vector = adapter->q_vector[0];
5463         struct e1000_hw *hw = &adapter->hw;
5464         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5465          * need for the IMC write */
5466         u32 icr = rd32(E1000_ICR);
5467         if (!icr)
5468                 return IRQ_NONE;  /* Not our interrupt */
5469
5470         igb_write_itr(q_vector);
5471
5472         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5473          * not set, then the adapter didn't send an interrupt */
5474         if (!(icr & E1000_ICR_INT_ASSERTED))
5475                 return IRQ_NONE;
5476
5477         if (icr & E1000_ICR_DRSTA)
5478                 schedule_work(&adapter->reset_task);
5479
5480         if (icr & E1000_ICR_DOUTSYNC) {
5481                 /* HW is reporting DMA is out of sync */
5482                 adapter->stats.doosync++;
5483         }
5484
5485         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5486                 hw->mac.get_link_status = 1;
5487                 /* guard against interrupt when we're going down */
5488                 if (!test_bit(__IGB_DOWN, &adapter->state))
5489                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5490         }
5491
5492         napi_schedule(&q_vector->napi);
5493
5494         return IRQ_HANDLED;
5495 }
5496
5497 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5498 {
5499         struct igb_adapter *adapter = q_vector->adapter;
5500         struct e1000_hw *hw = &adapter->hw;
5501
5502         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5503             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5504                 if (!adapter->msix_entries)
5505                         igb_set_itr(adapter);
5506                 else
5507                         igb_update_ring_itr(q_vector);
5508         }
5509
5510         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5511                 if (adapter->msix_entries)
5512                         wr32(E1000_EIMS, q_vector->eims_value);
5513                 else
5514                         igb_irq_enable(adapter);
5515         }
5516 }
5517
5518 /**
5519  * igb_poll - NAPI Rx polling callback
5520  * @napi: napi polling structure
5521  * @budget: count of how many packets we should handle
5522  **/
5523 static int igb_poll(struct napi_struct *napi, int budget)
5524 {
5525         struct igb_q_vector *q_vector = container_of(napi,
5526                                                      struct igb_q_vector,
5527                                                      napi);
5528         int tx_clean_complete = 1, work_done = 0;
5529
5530 #ifdef CONFIG_IGB_DCA
5531         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5532                 igb_update_dca(q_vector);
5533 #endif
5534         if (q_vector->tx_ring)
5535                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5536
5537         if (q_vector->rx_ring)
5538                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5539
5540         if (!tx_clean_complete)
5541                 work_done = budget;
5542
5543         /* If not enough Rx work done, exit the polling mode */
5544         if (work_done < budget) {
5545                 napi_complete(napi);
5546                 igb_ring_irq_enable(q_vector);
5547         }
5548
5549         return work_done;
5550 }
5551
5552 /**
5553  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5554  * @adapter: board private structure
5555  * @shhwtstamps: timestamp structure to update
5556  * @regval: unsigned 64bit system time value.
5557  *
5558  * We need to convert the system time value stored in the RX/TXSTMP registers
5559  * into a hwtstamp which can be used by the upper level timestamping functions
5560  */
5561 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5562                                    struct skb_shared_hwtstamps *shhwtstamps,
5563                                    u64 regval)
5564 {
5565         u64 ns;
5566
5567         /*
5568          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5569          * 24 to match clock shift we setup earlier.
5570          */
5571         if (adapter->hw.mac.type == e1000_82580)
5572                 regval <<= IGB_82580_TSYNC_SHIFT;
5573
5574         ns = timecounter_cyc2time(&adapter->clock, regval);
5575         timecompare_update(&adapter->compare, ns);
5576         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5577         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5578         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5579 }
5580
5581 /**
5582  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5583  * @q_vector: pointer to q_vector containing needed info
5584  * @buffer: pointer to igb_buffer structure
5585  *
5586  * If we were asked to do hardware stamping and such a time stamp is
5587  * available, then it must have been for this skb here because we only
5588  * allow only one such packet into the queue.
5589  */
5590 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5591 {
5592         struct igb_adapter *adapter = q_vector->adapter;
5593         struct e1000_hw *hw = &adapter->hw;
5594         struct skb_shared_hwtstamps shhwtstamps;
5595         u64 regval;
5596
5597         /* if skb does not support hw timestamp or TX stamp not valid exit */
5598         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5599             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5600                 return;
5601
5602         regval = rd32(E1000_TXSTMPL);
5603         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5604
5605         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5606         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5607 }
5608
5609 /**
5610  * igb_clean_tx_irq - Reclaim resources after transmit completes
5611  * @q_vector: pointer to q_vector containing needed info
5612  * returns true if ring is completely cleaned
5613  **/
5614 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5615 {
5616         struct igb_adapter *adapter = q_vector->adapter;
5617         struct igb_ring *tx_ring = q_vector->tx_ring;
5618         struct net_device *netdev = tx_ring->netdev;
5619         struct e1000_hw *hw = &adapter->hw;
5620         struct igb_buffer *buffer_info;
5621         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5622         unsigned int total_bytes = 0, total_packets = 0;
5623         unsigned int i, eop, count = 0;
5624         bool cleaned = false;
5625
5626         i = tx_ring->next_to_clean;
5627         eop = tx_ring->buffer_info[i].next_to_watch;
5628         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5629
5630         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5631                (count < tx_ring->count)) {
5632                 rmb();  /* read buffer_info after eop_desc status */
5633                 for (cleaned = false; !cleaned; count++) {
5634                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5635                         buffer_info = &tx_ring->buffer_info[i];
5636                         cleaned = (i == eop);
5637
5638                         if (buffer_info->skb) {
5639                                 total_bytes += buffer_info->bytecount;
5640                                 /* gso_segs is currently only valid for tcp */
5641                                 total_packets += buffer_info->gso_segs;
5642                                 igb_tx_hwtstamp(q_vector, buffer_info);
5643                         }
5644
5645                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5646                         tx_desc->wb.status = 0;
5647
5648                         i++;
5649                         if (i == tx_ring->count)
5650                                 i = 0;
5651                 }
5652                 eop = tx_ring->buffer_info[i].next_to_watch;
5653                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5654         }
5655
5656         tx_ring->next_to_clean = i;
5657
5658         if (unlikely(count &&
5659                      netif_carrier_ok(netdev) &&
5660                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5661                 /* Make sure that anybody stopping the queue after this
5662                  * sees the new next_to_clean.
5663                  */
5664                 smp_mb();
5665                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5666                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5667                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5668
5669                         u64_stats_update_begin(&tx_ring->tx_syncp);
5670                         tx_ring->tx_stats.restart_queue++;
5671                         u64_stats_update_end(&tx_ring->tx_syncp);
5672                 }
5673         }
5674
5675         if (tx_ring->detect_tx_hung) {
5676                 /* Detect a transmit hang in hardware, this serializes the
5677                  * check with the clearing of time_stamp and movement of i */
5678                 tx_ring->detect_tx_hung = false;
5679                 if (tx_ring->buffer_info[i].time_stamp &&
5680                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5681                                (adapter->tx_timeout_factor * HZ)) &&
5682                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5683
5684                         /* detected Tx unit hang */
5685                         dev_err(tx_ring->dev,
5686                                 "Detected Tx Unit Hang\n"
5687                                 "  Tx Queue             <%d>\n"
5688                                 "  TDH                  <%x>\n"
5689                                 "  TDT                  <%x>\n"
5690                                 "  next_to_use          <%x>\n"
5691                                 "  next_to_clean        <%x>\n"
5692                                 "buffer_info[next_to_clean]\n"
5693                                 "  time_stamp           <%lx>\n"
5694                                 "  next_to_watch        <%x>\n"
5695                                 "  jiffies              <%lx>\n"
5696                                 "  desc.status          <%x>\n",
5697                                 tx_ring->queue_index,
5698                                 readl(tx_ring->head),
5699                                 readl(tx_ring->tail),
5700                                 tx_ring->next_to_use,
5701                                 tx_ring->next_to_clean,
5702                                 tx_ring->buffer_info[eop].time_stamp,
5703                                 eop,
5704                                 jiffies,
5705                                 eop_desc->wb.status);
5706                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5707                 }
5708         }
5709         tx_ring->total_bytes += total_bytes;
5710         tx_ring->total_packets += total_packets;
5711         u64_stats_update_begin(&tx_ring->tx_syncp);
5712         tx_ring->tx_stats.bytes += total_bytes;
5713         tx_ring->tx_stats.packets += total_packets;
5714         u64_stats_update_end(&tx_ring->tx_syncp);
5715         return count < tx_ring->count;
5716 }
5717
5718 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5719                                        u32 status_err, struct sk_buff *skb)
5720 {
5721         skb_checksum_none_assert(skb);
5722
5723         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5724         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5725              (status_err & E1000_RXD_STAT_IXSM))
5726                 return;
5727
5728         /* TCP/UDP checksum error bit is set */
5729         if (status_err &
5730             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5731                 /*
5732                  * work around errata with sctp packets where the TCPE aka
5733                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5734                  * packets, (aka let the stack check the crc32c)
5735                  */
5736                 if ((skb->len == 60) &&
5737                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5738                         u64_stats_update_begin(&ring->rx_syncp);
5739                         ring->rx_stats.csum_err++;
5740                         u64_stats_update_end(&ring->rx_syncp);
5741                 }
5742                 /* let the stack verify checksum errors */
5743                 return;
5744         }
5745         /* It must be a TCP or UDP packet with a valid checksum */
5746         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5747                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5748
5749         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5750 }
5751
5752 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5753                                    struct sk_buff *skb)
5754 {
5755         struct igb_adapter *adapter = q_vector->adapter;
5756         struct e1000_hw *hw = &adapter->hw;
5757         u64 regval;
5758
5759         /*
5760          * If this bit is set, then the RX registers contain the time stamp. No
5761          * other packet will be time stamped until we read these registers, so
5762          * read the registers to make them available again. Because only one
5763          * packet can be time stamped at a time, we know that the register
5764          * values must belong to this one here and therefore we don't need to
5765          * compare any of the additional attributes stored for it.
5766          *
5767          * If nothing went wrong, then it should have a shared tx_flags that we
5768          * can turn into a skb_shared_hwtstamps.
5769          */
5770         if (staterr & E1000_RXDADV_STAT_TSIP) {
5771                 u32 *stamp = (u32 *)skb->data;
5772                 regval = le32_to_cpu(*(stamp + 2));
5773                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5774                 skb_pull(skb, IGB_TS_HDR_LEN);
5775         } else {
5776                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5777                         return;
5778
5779                 regval = rd32(E1000_RXSTMPL);
5780                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5781         }
5782
5783         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5784 }
5785 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5786                                union e1000_adv_rx_desc *rx_desc)
5787 {
5788         /* HW will not DMA in data larger than the given buffer, even if it
5789          * parses the (NFS, of course) header to be larger.  In that case, it
5790          * fills the header buffer and spills the rest into the page.
5791          */
5792         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5793                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5794         if (hlen > rx_ring->rx_buffer_len)
5795                 hlen = rx_ring->rx_buffer_len;
5796         return hlen;
5797 }
5798
5799 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5800                                  int *work_done, int budget)
5801 {
5802         struct igb_ring *rx_ring = q_vector->rx_ring;
5803         struct net_device *netdev = rx_ring->netdev;
5804         struct device *dev = rx_ring->dev;
5805         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5806         struct igb_buffer *buffer_info , *next_buffer;
5807         struct sk_buff *skb;
5808         bool cleaned = false;
5809         int cleaned_count = 0;
5810         int current_node = numa_node_id();
5811         unsigned int total_bytes = 0, total_packets = 0;
5812         unsigned int i;
5813         u32 staterr;
5814         u16 length;
5815
5816         i = rx_ring->next_to_clean;
5817         buffer_info = &rx_ring->buffer_info[i];
5818         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5819         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5820
5821         while (staterr & E1000_RXD_STAT_DD) {
5822                 if (*work_done >= budget)
5823                         break;
5824                 (*work_done)++;
5825                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5826
5827                 skb = buffer_info->skb;
5828                 prefetch(skb->data - NET_IP_ALIGN);
5829                 buffer_info->skb = NULL;
5830
5831                 i++;
5832                 if (i == rx_ring->count)
5833                         i = 0;
5834
5835                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5836                 prefetch(next_rxd);
5837                 next_buffer = &rx_ring->buffer_info[i];
5838
5839                 length = le16_to_cpu(rx_desc->wb.upper.length);
5840                 cleaned = true;
5841                 cleaned_count++;
5842
5843                 if (buffer_info->dma) {
5844                         dma_unmap_single(dev, buffer_info->dma,
5845                                          rx_ring->rx_buffer_len,
5846                                          DMA_FROM_DEVICE);
5847                         buffer_info->dma = 0;
5848                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5849                                 skb_put(skb, length);
5850                                 goto send_up;
5851                         }
5852                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5853                 }
5854
5855                 if (length) {
5856                         dma_unmap_page(dev, buffer_info->page_dma,
5857                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5858                         buffer_info->page_dma = 0;
5859
5860                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5861                                                 buffer_info->page,
5862                                                 buffer_info->page_offset,
5863                                                 length);
5864
5865                         if ((page_count(buffer_info->page) != 1) ||
5866                             (page_to_nid(buffer_info->page) != current_node))
5867                                 buffer_info->page = NULL;
5868                         else
5869                                 get_page(buffer_info->page);
5870
5871                         skb->len += length;
5872                         skb->data_len += length;
5873                         skb->truesize += length;
5874                 }
5875
5876                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5877                         buffer_info->skb = next_buffer->skb;
5878                         buffer_info->dma = next_buffer->dma;
5879                         next_buffer->skb = skb;
5880                         next_buffer->dma = 0;
5881                         goto next_desc;
5882                 }
5883 send_up:
5884                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5885                         dev_kfree_skb_irq(skb);
5886                         goto next_desc;
5887                 }
5888
5889                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5890                         igb_rx_hwtstamp(q_vector, staterr, skb);
5891                 total_bytes += skb->len;
5892                 total_packets++;
5893
5894                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5895
5896                 skb->protocol = eth_type_trans(skb, netdev);
5897                 skb_record_rx_queue(skb, rx_ring->queue_index);
5898
5899                 if (staterr & E1000_RXD_STAT_VP) {
5900                         u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5901
5902                         __vlan_hwaccel_put_tag(skb, vid);
5903                 }
5904                 napi_gro_receive(&q_vector->napi, skb);
5905
5906 next_desc:
5907                 rx_desc->wb.upper.status_error = 0;
5908
5909                 /* return some buffers to hardware, one at a time is too slow */
5910                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5911                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5912                         cleaned_count = 0;
5913                 }
5914
5915                 /* use prefetched values */
5916                 rx_desc = next_rxd;
5917                 buffer_info = next_buffer;
5918                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5919         }
5920
5921         rx_ring->next_to_clean = i;
5922         cleaned_count = igb_desc_unused(rx_ring);
5923
5924         if (cleaned_count)
5925                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5926
5927         rx_ring->total_packets += total_packets;
5928         rx_ring->total_bytes += total_bytes;
5929         u64_stats_update_begin(&rx_ring->rx_syncp);
5930         rx_ring->rx_stats.packets += total_packets;
5931         rx_ring->rx_stats.bytes += total_bytes;
5932         u64_stats_update_end(&rx_ring->rx_syncp);
5933         return cleaned;
5934 }
5935
5936 /**
5937  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5938  * @adapter: address of board private structure
5939  **/
5940 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5941 {
5942         struct net_device *netdev = rx_ring->netdev;
5943         union e1000_adv_rx_desc *rx_desc;
5944         struct igb_buffer *buffer_info;
5945         struct sk_buff *skb;
5946         unsigned int i;
5947         int bufsz;
5948
5949         i = rx_ring->next_to_use;
5950         buffer_info = &rx_ring->buffer_info[i];
5951
5952         bufsz = rx_ring->rx_buffer_len;
5953
5954         while (cleaned_count--) {
5955                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5956
5957                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5958                         if (!buffer_info->page) {
5959                                 buffer_info->page = netdev_alloc_page(netdev);
5960                                 if (unlikely(!buffer_info->page)) {
5961                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5962                                         rx_ring->rx_stats.alloc_failed++;
5963                                         u64_stats_update_end(&rx_ring->rx_syncp);
5964                                         goto no_buffers;
5965                                 }
5966                                 buffer_info->page_offset = 0;
5967                         } else {
5968                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5969                         }
5970                         buffer_info->page_dma =
5971                                 dma_map_page(rx_ring->dev, buffer_info->page,
5972                                              buffer_info->page_offset,
5973                                              PAGE_SIZE / 2,
5974                                              DMA_FROM_DEVICE);
5975                         if (dma_mapping_error(rx_ring->dev,
5976                                               buffer_info->page_dma)) {
5977                                 buffer_info->page_dma = 0;
5978                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5979                                 rx_ring->rx_stats.alloc_failed++;
5980                                 u64_stats_update_end(&rx_ring->rx_syncp);
5981                                 goto no_buffers;
5982                         }
5983                 }
5984
5985                 skb = buffer_info->skb;
5986                 if (!skb) {
5987                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5988                         if (unlikely(!skb)) {
5989                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5990                                 rx_ring->rx_stats.alloc_failed++;
5991                                 u64_stats_update_end(&rx_ring->rx_syncp);
5992                                 goto no_buffers;
5993                         }
5994
5995                         buffer_info->skb = skb;
5996                 }
5997                 if (!buffer_info->dma) {
5998                         buffer_info->dma = dma_map_single(rx_ring->dev,
5999                                                           skb->data,
6000                                                           bufsz,
6001                                                           DMA_FROM_DEVICE);
6002                         if (dma_mapping_error(rx_ring->dev,
6003                                               buffer_info->dma)) {
6004                                 buffer_info->dma = 0;
6005                                 u64_stats_update_begin(&rx_ring->rx_syncp);
6006                                 rx_ring->rx_stats.alloc_failed++;
6007                                 u64_stats_update_end(&rx_ring->rx_syncp);
6008                                 goto no_buffers;
6009                         }
6010                 }
6011                 /* Refresh the desc even if buffer_addrs didn't change because
6012                  * each write-back erases this info. */
6013                 if (bufsz < IGB_RXBUFFER_1024) {
6014                         rx_desc->read.pkt_addr =
6015                              cpu_to_le64(buffer_info->page_dma);
6016                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6017                 } else {
6018                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6019                         rx_desc->read.hdr_addr = 0;
6020                 }
6021
6022                 i++;
6023                 if (i == rx_ring->count)
6024                         i = 0;
6025                 buffer_info = &rx_ring->buffer_info[i];
6026         }
6027
6028 no_buffers:
6029         if (rx_ring->next_to_use != i) {
6030                 rx_ring->next_to_use = i;
6031                 if (i == 0)
6032                         i = (rx_ring->count - 1);
6033                 else
6034                         i--;
6035
6036                 /* Force memory writes to complete before letting h/w
6037                  * know there are new descriptors to fetch.  (Only
6038                  * applicable for weak-ordered memory model archs,
6039                  * such as IA-64). */
6040                 wmb();
6041                 writel(i, rx_ring->tail);
6042         }
6043 }
6044
6045 /**
6046  * igb_mii_ioctl -
6047  * @netdev:
6048  * @ifreq:
6049  * @cmd:
6050  **/
6051 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6052 {
6053         struct igb_adapter *adapter = netdev_priv(netdev);
6054         struct mii_ioctl_data *data = if_mii(ifr);
6055
6056         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6057                 return -EOPNOTSUPP;
6058
6059         switch (cmd) {
6060         case SIOCGMIIPHY:
6061                 data->phy_id = adapter->hw.phy.addr;
6062                 break;
6063         case SIOCGMIIREG:
6064                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6065                                      &data->val_out))
6066                         return -EIO;
6067                 break;
6068         case SIOCSMIIREG:
6069         default:
6070                 return -EOPNOTSUPP;
6071         }
6072         return 0;
6073 }
6074
6075 /**
6076  * igb_hwtstamp_ioctl - control hardware time stamping
6077  * @netdev:
6078  * @ifreq:
6079  * @cmd:
6080  *
6081  * Outgoing time stamping can be enabled and disabled. Play nice and
6082  * disable it when requested, although it shouldn't case any overhead
6083  * when no packet needs it. At most one packet in the queue may be
6084  * marked for time stamping, otherwise it would be impossible to tell
6085  * for sure to which packet the hardware time stamp belongs.
6086  *
6087  * Incoming time stamping has to be configured via the hardware
6088  * filters. Not all combinations are supported, in particular event
6089  * type has to be specified. Matching the kind of event packet is
6090  * not supported, with the exception of "all V2 events regardless of
6091  * level 2 or 4".
6092  *
6093  **/
6094 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6095                               struct ifreq *ifr, int cmd)
6096 {
6097         struct igb_adapter *adapter = netdev_priv(netdev);
6098         struct e1000_hw *hw = &adapter->hw;
6099         struct hwtstamp_config config;
6100         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6101         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6102         u32 tsync_rx_cfg = 0;
6103         bool is_l4 = false;
6104         bool is_l2 = false;
6105         u32 regval;
6106
6107         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6108                 return -EFAULT;
6109
6110         /* reserved for future extensions */
6111         if (config.flags)
6112                 return -EINVAL;
6113
6114         switch (config.tx_type) {
6115         case HWTSTAMP_TX_OFF:
6116                 tsync_tx_ctl = 0;
6117         case HWTSTAMP_TX_ON:
6118                 break;
6119         default:
6120                 return -ERANGE;
6121         }
6122
6123         switch (config.rx_filter) {
6124         case HWTSTAMP_FILTER_NONE:
6125                 tsync_rx_ctl = 0;
6126                 break;
6127         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6128         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6129         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6130         case HWTSTAMP_FILTER_ALL:
6131                 /*
6132                  * register TSYNCRXCFG must be set, therefore it is not
6133                  * possible to time stamp both Sync and Delay_Req messages
6134                  * => fall back to time stamping all packets
6135                  */
6136                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6137                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6138                 break;
6139         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6140                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6141                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6142                 is_l4 = true;
6143                 break;
6144         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6145                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6146                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6147                 is_l4 = true;
6148                 break;
6149         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6150         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6151                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6152                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6153                 is_l2 = true;
6154                 is_l4 = true;
6155                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6156                 break;
6157         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6158         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6159                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6160                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6161                 is_l2 = true;
6162                 is_l4 = true;
6163                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6164                 break;
6165         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6166         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6167         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6168                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6169                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6170                 is_l2 = true;
6171                 break;
6172         default:
6173                 return -ERANGE;
6174         }
6175
6176         if (hw->mac.type == e1000_82575) {
6177                 if (tsync_rx_ctl | tsync_tx_ctl)
6178                         return -EINVAL;
6179                 return 0;
6180         }
6181
6182         /*
6183          * Per-packet timestamping only works if all packets are
6184          * timestamped, so enable timestamping in all packets as
6185          * long as one rx filter was configured.
6186          */
6187         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6188                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6189                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6190         }
6191
6192         /* enable/disable TX */
6193         regval = rd32(E1000_TSYNCTXCTL);
6194         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6195         regval |= tsync_tx_ctl;
6196         wr32(E1000_TSYNCTXCTL, regval);
6197
6198         /* enable/disable RX */
6199         regval = rd32(E1000_TSYNCRXCTL);
6200         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6201         regval |= tsync_rx_ctl;
6202         wr32(E1000_TSYNCRXCTL, regval);
6203
6204         /* define which PTP packets are time stamped */
6205         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6206
6207         /* define ethertype filter for timestamped packets */
6208         if (is_l2)
6209                 wr32(E1000_ETQF(3),
6210                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6211                                  E1000_ETQF_1588 | /* enable timestamping */
6212                                  ETH_P_1588));     /* 1588 eth protocol type */
6213         else
6214                 wr32(E1000_ETQF(3), 0);
6215
6216 #define PTP_PORT 319
6217         /* L4 Queue Filter[3]: filter by destination port and protocol */
6218         if (is_l4) {
6219                 u32 ftqf = (IPPROTO_UDP /* UDP */
6220                         | E1000_FTQF_VF_BP /* VF not compared */
6221                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6222                         | E1000_FTQF_MASK); /* mask all inputs */
6223                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6224
6225                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6226                 wr32(E1000_IMIREXT(3),
6227                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6228                 if (hw->mac.type == e1000_82576) {
6229                         /* enable source port check */
6230                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6231                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6232                 }
6233                 wr32(E1000_FTQF(3), ftqf);
6234         } else {
6235                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6236         }
6237         wrfl();
6238
6239         adapter->hwtstamp_config = config;
6240
6241         /* clear TX/RX time stamp registers, just to be sure */
6242         regval = rd32(E1000_TXSTMPH);
6243         regval = rd32(E1000_RXSTMPH);
6244
6245         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6246                 -EFAULT : 0;
6247 }
6248
6249 /**
6250  * igb_ioctl -
6251  * @netdev:
6252  * @ifreq:
6253  * @cmd:
6254  **/
6255 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6256 {
6257         switch (cmd) {
6258         case SIOCGMIIPHY:
6259         case SIOCGMIIREG:
6260         case SIOCSMIIREG:
6261                 return igb_mii_ioctl(netdev, ifr, cmd);
6262         case SIOCSHWTSTAMP:
6263                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6264         default:
6265                 return -EOPNOTSUPP;
6266         }
6267 }
6268
6269 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6270 {
6271         struct igb_adapter *adapter = hw->back;
6272         u16 cap_offset;
6273
6274         cap_offset = adapter->pdev->pcie_cap;
6275         if (!cap_offset)
6276                 return -E1000_ERR_CONFIG;
6277
6278         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6279
6280         return 0;
6281 }
6282
6283 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6284 {
6285         struct igb_adapter *adapter = hw->back;
6286         u16 cap_offset;
6287
6288         cap_offset = adapter->pdev->pcie_cap;
6289         if (!cap_offset)
6290                 return -E1000_ERR_CONFIG;
6291
6292         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6293
6294         return 0;
6295 }
6296
6297 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6298 {
6299         struct igb_adapter *adapter = netdev_priv(netdev);
6300         struct e1000_hw *hw = &adapter->hw;
6301         u32 ctrl, rctl;
6302
6303         igb_irq_disable(adapter);
6304
6305         if (features & NETIF_F_HW_VLAN_RX) {
6306                 /* enable VLAN tag insert/strip */
6307                 ctrl = rd32(E1000_CTRL);
6308                 ctrl |= E1000_CTRL_VME;
6309                 wr32(E1000_CTRL, ctrl);
6310
6311                 /* Disable CFI check */
6312                 rctl = rd32(E1000_RCTL);
6313                 rctl &= ~E1000_RCTL_CFIEN;
6314                 wr32(E1000_RCTL, rctl);
6315         } else {
6316                 /* disable VLAN tag insert/strip */
6317                 ctrl = rd32(E1000_CTRL);
6318                 ctrl &= ~E1000_CTRL_VME;
6319                 wr32(E1000_CTRL, ctrl);
6320         }
6321
6322         igb_rlpml_set(adapter);
6323
6324         if (!test_bit(__IGB_DOWN, &adapter->state))
6325                 igb_irq_enable(adapter);
6326 }
6327
6328 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6329 {
6330         struct igb_adapter *adapter = netdev_priv(netdev);
6331         struct e1000_hw *hw = &adapter->hw;
6332         int pf_id = adapter->vfs_allocated_count;
6333
6334         /* attempt to add filter to vlvf array */
6335         igb_vlvf_set(adapter, vid, true, pf_id);
6336
6337         /* add the filter since PF can receive vlans w/o entry in vlvf */
6338         igb_vfta_set(hw, vid, true);
6339
6340         set_bit(vid, adapter->active_vlans);
6341 }
6342
6343 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6344 {
6345         struct igb_adapter *adapter = netdev_priv(netdev);
6346         struct e1000_hw *hw = &adapter->hw;
6347         int pf_id = adapter->vfs_allocated_count;
6348         s32 err;
6349
6350         igb_irq_disable(adapter);
6351
6352         if (!test_bit(__IGB_DOWN, &adapter->state))
6353                 igb_irq_enable(adapter);
6354
6355         /* remove vlan from VLVF table array */
6356         err = igb_vlvf_set(adapter, vid, false, pf_id);
6357
6358         /* if vid was not present in VLVF just remove it from table */
6359         if (err)
6360                 igb_vfta_set(hw, vid, false);
6361
6362         clear_bit(vid, adapter->active_vlans);
6363 }
6364
6365 static void igb_restore_vlan(struct igb_adapter *adapter)
6366 {
6367         u16 vid;
6368
6369         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6370                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6371 }
6372
6373 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6374 {
6375         struct pci_dev *pdev = adapter->pdev;
6376         struct e1000_mac_info *mac = &adapter->hw.mac;
6377
6378         mac->autoneg = 0;
6379
6380         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6381          * for the switch() below to work */
6382         if ((spd & 1) || (dplx & ~1))
6383                 goto err_inval;
6384
6385         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6386         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6387             spd != SPEED_1000 &&
6388             dplx != DUPLEX_FULL)
6389                 goto err_inval;
6390
6391         switch (spd + dplx) {
6392         case SPEED_10 + DUPLEX_HALF:
6393                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6394                 break;
6395         case SPEED_10 + DUPLEX_FULL:
6396                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6397                 break;
6398         case SPEED_100 + DUPLEX_HALF:
6399                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6400                 break;
6401         case SPEED_100 + DUPLEX_FULL:
6402                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6403                 break;
6404         case SPEED_1000 + DUPLEX_FULL:
6405                 mac->autoneg = 1;
6406                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6407                 break;
6408         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6409         default:
6410                 goto err_inval;
6411         }
6412         return 0;
6413
6414 err_inval:
6415         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6416         return -EINVAL;
6417 }
6418
6419 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6420 {
6421         struct net_device *netdev = pci_get_drvdata(pdev);
6422         struct igb_adapter *adapter = netdev_priv(netdev);
6423         struct e1000_hw *hw = &adapter->hw;
6424         u32 ctrl, rctl, status;
6425         u32 wufc = adapter->wol;
6426 #ifdef CONFIG_PM
6427         int retval = 0;
6428 #endif
6429
6430         netif_device_detach(netdev);
6431
6432         if (netif_running(netdev))
6433                 igb_close(netdev);
6434
6435         igb_clear_interrupt_scheme(adapter);
6436
6437 #ifdef CONFIG_PM
6438         retval = pci_save_state(pdev);
6439         if (retval)
6440                 return retval;
6441 #endif
6442
6443         status = rd32(E1000_STATUS);
6444         if (status & E1000_STATUS_LU)
6445                 wufc &= ~E1000_WUFC_LNKC;
6446
6447         if (wufc) {
6448                 igb_setup_rctl(adapter);
6449                 igb_set_rx_mode(netdev);
6450
6451                 /* turn on all-multi mode if wake on multicast is enabled */
6452                 if (wufc & E1000_WUFC_MC) {
6453                         rctl = rd32(E1000_RCTL);
6454                         rctl |= E1000_RCTL_MPE;
6455                         wr32(E1000_RCTL, rctl);
6456                 }
6457
6458                 ctrl = rd32(E1000_CTRL);
6459                 /* advertise wake from D3Cold */
6460                 #define E1000_CTRL_ADVD3WUC 0x00100000
6461                 /* phy power management enable */
6462                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6463                 ctrl |= E1000_CTRL_ADVD3WUC;
6464                 wr32(E1000_CTRL, ctrl);
6465
6466                 /* Allow time for pending master requests to run */
6467                 igb_disable_pcie_master(hw);
6468
6469                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6470                 wr32(E1000_WUFC, wufc);
6471         } else {
6472                 wr32(E1000_WUC, 0);
6473                 wr32(E1000_WUFC, 0);
6474         }
6475
6476         *enable_wake = wufc || adapter->en_mng_pt;
6477         if (!*enable_wake)
6478                 igb_power_down_link(adapter);
6479         else
6480                 igb_power_up_link(adapter);
6481
6482         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6483          * would have already happened in close and is redundant. */
6484         igb_release_hw_control(adapter);
6485
6486         pci_disable_device(pdev);
6487
6488         return 0;
6489 }
6490
6491 #ifdef CONFIG_PM
6492 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6493 {
6494         int retval;
6495         bool wake;
6496
6497         retval = __igb_shutdown(pdev, &wake);
6498         if (retval)
6499                 return retval;
6500
6501         if (wake) {
6502                 pci_prepare_to_sleep(pdev);
6503         } else {
6504                 pci_wake_from_d3(pdev, false);
6505                 pci_set_power_state(pdev, PCI_D3hot);
6506         }
6507
6508         return 0;
6509 }
6510
6511 static int igb_resume(struct pci_dev *pdev)
6512 {
6513         struct net_device *netdev = pci_get_drvdata(pdev);
6514         struct igb_adapter *adapter = netdev_priv(netdev);
6515         struct e1000_hw *hw = &adapter->hw;
6516         u32 err;
6517
6518         pci_set_power_state(pdev, PCI_D0);
6519         pci_restore_state(pdev);
6520         pci_save_state(pdev);
6521
6522         err = pci_enable_device_mem(pdev);
6523         if (err) {
6524                 dev_err(&pdev->dev,
6525                         "igb: Cannot enable PCI device from suspend\n");
6526                 return err;
6527         }
6528         pci_set_master(pdev);
6529
6530         pci_enable_wake(pdev, PCI_D3hot, 0);
6531         pci_enable_wake(pdev, PCI_D3cold, 0);
6532
6533         if (igb_init_interrupt_scheme(adapter)) {
6534                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6535                 return -ENOMEM;
6536         }
6537
6538         igb_reset(adapter);
6539
6540         /* let the f/w know that the h/w is now under the control of the
6541          * driver. */
6542         igb_get_hw_control(adapter);
6543
6544         wr32(E1000_WUS, ~0);
6545
6546         if (netif_running(netdev)) {
6547                 err = igb_open(netdev);
6548                 if (err)
6549                         return err;
6550         }
6551
6552         netif_device_attach(netdev);
6553
6554         return 0;
6555 }
6556 #endif
6557
6558 static void igb_shutdown(struct pci_dev *pdev)
6559 {
6560         bool wake;
6561
6562         __igb_shutdown(pdev, &wake);
6563
6564         if (system_state == SYSTEM_POWER_OFF) {
6565                 pci_wake_from_d3(pdev, wake);
6566                 pci_set_power_state(pdev, PCI_D3hot);
6567         }
6568 }
6569
6570 #ifdef CONFIG_NET_POLL_CONTROLLER
6571 /*
6572  * Polling 'interrupt' - used by things like netconsole to send skbs
6573  * without having to re-enable interrupts. It's not called while
6574  * the interrupt routine is executing.
6575  */
6576 static void igb_netpoll(struct net_device *netdev)
6577 {
6578         struct igb_adapter *adapter = netdev_priv(netdev);
6579         struct e1000_hw *hw = &adapter->hw;
6580         int i;
6581
6582         if (!adapter->msix_entries) {
6583                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6584                 igb_irq_disable(adapter);
6585                 napi_schedule(&q_vector->napi);
6586                 return;
6587         }
6588
6589         for (i = 0; i < adapter->num_q_vectors; i++) {
6590                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6591                 wr32(E1000_EIMC, q_vector->eims_value);
6592                 napi_schedule(&q_vector->napi);
6593         }
6594 }
6595 #endif /* CONFIG_NET_POLL_CONTROLLER */
6596
6597 /**
6598  * igb_io_error_detected - called when PCI error is detected
6599  * @pdev: Pointer to PCI device
6600  * @state: The current pci connection state
6601  *
6602  * This function is called after a PCI bus error affecting
6603  * this device has been detected.
6604  */
6605 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6606                                               pci_channel_state_t state)
6607 {
6608         struct net_device *netdev = pci_get_drvdata(pdev);
6609         struct igb_adapter *adapter = netdev_priv(netdev);
6610
6611         netif_device_detach(netdev);
6612
6613         if (state == pci_channel_io_perm_failure)
6614                 return PCI_ERS_RESULT_DISCONNECT;
6615
6616         if (netif_running(netdev))
6617                 igb_down(adapter);
6618         pci_disable_device(pdev);
6619
6620         /* Request a slot slot reset. */
6621         return PCI_ERS_RESULT_NEED_RESET;
6622 }
6623
6624 /**
6625  * igb_io_slot_reset - called after the pci bus has been reset.
6626  * @pdev: Pointer to PCI device
6627  *
6628  * Restart the card from scratch, as if from a cold-boot. Implementation
6629  * resembles the first-half of the igb_resume routine.
6630  */
6631 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6632 {
6633         struct net_device *netdev = pci_get_drvdata(pdev);
6634         struct igb_adapter *adapter = netdev_priv(netdev);
6635         struct e1000_hw *hw = &adapter->hw;
6636         pci_ers_result_t result;
6637         int err;
6638
6639         if (pci_enable_device_mem(pdev)) {
6640                 dev_err(&pdev->dev,
6641                         "Cannot re-enable PCI device after reset.\n");
6642                 result = PCI_ERS_RESULT_DISCONNECT;
6643         } else {
6644                 pci_set_master(pdev);
6645                 pci_restore_state(pdev);
6646                 pci_save_state(pdev);
6647
6648                 pci_enable_wake(pdev, PCI_D3hot, 0);
6649                 pci_enable_wake(pdev, PCI_D3cold, 0);
6650
6651                 igb_reset(adapter);
6652                 wr32(E1000_WUS, ~0);
6653                 result = PCI_ERS_RESULT_RECOVERED;
6654         }
6655
6656         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6657         if (err) {
6658                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6659                         "failed 0x%0x\n", err);
6660                 /* non-fatal, continue */
6661         }
6662
6663         return result;
6664 }
6665
6666 /**
6667  * igb_io_resume - called when traffic can start flowing again.
6668  * @pdev: Pointer to PCI device
6669  *
6670  * This callback is called when the error recovery driver tells us that
6671  * its OK to resume normal operation. Implementation resembles the
6672  * second-half of the igb_resume routine.
6673  */
6674 static void igb_io_resume(struct pci_dev *pdev)
6675 {
6676         struct net_device *netdev = pci_get_drvdata(pdev);
6677         struct igb_adapter *adapter = netdev_priv(netdev);
6678
6679         if (netif_running(netdev)) {
6680                 if (igb_up(adapter)) {
6681                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6682                         return;
6683                 }
6684         }
6685
6686         netif_device_attach(netdev);
6687
6688         /* let the f/w know that the h/w is now under the control of the
6689          * driver. */
6690         igb_get_hw_control(adapter);
6691 }
6692
6693 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6694                              u8 qsel)
6695 {
6696         u32 rar_low, rar_high;
6697         struct e1000_hw *hw = &adapter->hw;
6698
6699         /* HW expects these in little endian so we reverse the byte order
6700          * from network order (big endian) to little endian
6701          */
6702         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6703                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6704         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6705
6706         /* Indicate to hardware the Address is Valid. */
6707         rar_high |= E1000_RAH_AV;
6708
6709         if (hw->mac.type == e1000_82575)
6710                 rar_high |= E1000_RAH_POOL_1 * qsel;
6711         else
6712                 rar_high |= E1000_RAH_POOL_1 << qsel;
6713
6714         wr32(E1000_RAL(index), rar_low);
6715         wrfl();
6716         wr32(E1000_RAH(index), rar_high);
6717         wrfl();
6718 }
6719
6720 static int igb_set_vf_mac(struct igb_adapter *adapter,
6721                           int vf, unsigned char *mac_addr)
6722 {
6723         struct e1000_hw *hw = &adapter->hw;
6724         /* VF MAC addresses start at end of receive addresses and moves
6725          * torwards the first, as a result a collision should not be possible */
6726         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6727
6728         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6729
6730         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6731
6732         return 0;
6733 }
6734
6735 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6736 {
6737         struct igb_adapter *adapter = netdev_priv(netdev);
6738         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6739                 return -EINVAL;
6740         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6741         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6742         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6743                                       " change effective.");
6744         if (test_bit(__IGB_DOWN, &adapter->state)) {
6745                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6746                          " but the PF device is not up.\n");
6747                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6748                          " attempting to use the VF device.\n");
6749         }
6750         return igb_set_vf_mac(adapter, vf, mac);
6751 }
6752
6753 static int igb_link_mbps(int internal_link_speed)
6754 {
6755         switch (internal_link_speed) {
6756         case SPEED_100:
6757                 return 100;
6758         case SPEED_1000:
6759                 return 1000;
6760         default:
6761                 return 0;
6762         }
6763 }
6764
6765 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6766                                   int link_speed)
6767 {
6768         int rf_dec, rf_int;
6769         u32 bcnrc_val;
6770
6771         if (tx_rate != 0) {
6772                 /* Calculate the rate factor values to set */
6773                 rf_int = link_speed / tx_rate;
6774                 rf_dec = (link_speed - (rf_int * tx_rate));
6775                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6776
6777                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6778                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6779                                E1000_RTTBCNRC_RF_INT_MASK);
6780                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6781         } else {
6782                 bcnrc_val = 0;
6783         }
6784
6785         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6786         wr32(E1000_RTTBCNRC, bcnrc_val);
6787 }
6788
6789 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6790 {
6791         int actual_link_speed, i;
6792         bool reset_rate = false;
6793
6794         /* VF TX rate limit was not set or not supported */
6795         if ((adapter->vf_rate_link_speed == 0) ||
6796             (adapter->hw.mac.type != e1000_82576))
6797                 return;
6798
6799         actual_link_speed = igb_link_mbps(adapter->link_speed);
6800         if (actual_link_speed != adapter->vf_rate_link_speed) {
6801                 reset_rate = true;
6802                 adapter->vf_rate_link_speed = 0;
6803                 dev_info(&adapter->pdev->dev,
6804                          "Link speed has been changed. VF Transmit "
6805                          "rate is disabled\n");
6806         }
6807
6808         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6809                 if (reset_rate)
6810                         adapter->vf_data[i].tx_rate = 0;
6811
6812                 igb_set_vf_rate_limit(&adapter->hw, i,
6813                                       adapter->vf_data[i].tx_rate,
6814                                       actual_link_speed);
6815         }
6816 }
6817
6818 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6819 {
6820         struct igb_adapter *adapter = netdev_priv(netdev);
6821         struct e1000_hw *hw = &adapter->hw;
6822         int actual_link_speed;
6823
6824         if (hw->mac.type != e1000_82576)
6825                 return -EOPNOTSUPP;
6826
6827         actual_link_speed = igb_link_mbps(adapter->link_speed);
6828         if ((vf >= adapter->vfs_allocated_count) ||
6829             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6830             (tx_rate < 0) || (tx_rate > actual_link_speed))
6831                 return -EINVAL;
6832
6833         adapter->vf_rate_link_speed = actual_link_speed;
6834         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6835         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6836
6837         return 0;
6838 }
6839
6840 static int igb_ndo_get_vf_config(struct net_device *netdev,
6841                                  int vf, struct ifla_vf_info *ivi)
6842 {
6843         struct igb_adapter *adapter = netdev_priv(netdev);
6844         if (vf >= adapter->vfs_allocated_count)
6845                 return -EINVAL;
6846         ivi->vf = vf;
6847         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6848         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6849         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6850         ivi->qos = adapter->vf_data[vf].pf_qos;
6851         return 0;
6852 }
6853
6854 static void igb_vmm_control(struct igb_adapter *adapter)
6855 {
6856         struct e1000_hw *hw = &adapter->hw;
6857         u32 reg;
6858
6859         switch (hw->mac.type) {
6860         case e1000_82575:
6861         default:
6862                 /* replication is not supported for 82575 */
6863                 return;
6864         case e1000_82576:
6865                 /* notify HW that the MAC is adding vlan tags */
6866                 reg = rd32(E1000_DTXCTL);
6867                 reg |= E1000_DTXCTL_VLAN_ADDED;
6868                 wr32(E1000_DTXCTL, reg);
6869         case e1000_82580:
6870                 /* enable replication vlan tag stripping */
6871                 reg = rd32(E1000_RPLOLR);
6872                 reg |= E1000_RPLOLR_STRVLAN;
6873                 wr32(E1000_RPLOLR, reg);
6874         case e1000_i350:
6875                 /* none of the above registers are supported by i350 */
6876                 break;
6877         }
6878
6879         if (adapter->vfs_allocated_count) {
6880                 igb_vmdq_set_loopback_pf(hw, true);
6881                 igb_vmdq_set_replication_pf(hw, true);
6882                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6883                                                 adapter->vfs_allocated_count);
6884         } else {
6885                 igb_vmdq_set_loopback_pf(hw, false);
6886                 igb_vmdq_set_replication_pf(hw, false);
6887         }
6888 }
6889
6890 /* igb_main.c */