Merge branch 'batman-adv/next' of git://git.open-mesh.org/linux-merge
[pandora-kernel.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/if_ether.h>
49 #include <linux/aer.h>
50 #include <linux/prefetch.h>
51 #ifdef CONFIG_IGB_DCA
52 #include <linux/dca.h>
53 #endif
54 #include "igb.h"
55
56 #define MAJ 3
57 #define MIN 0
58 #define BUILD 6
59 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60 __stringify(BUILD) "-k"
61 char igb_driver_name[] = "igb";
62 char igb_driver_version[] = DRV_VERSION;
63 static const char igb_driver_string[] =
64                                 "Intel(R) Gigabit Ethernet Network Driver";
65 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66
67 static const struct e1000_info *igb_info_tbl[] = {
68         [board_82575] = &e1000_82575_info,
69 };
70
71 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97         /* required last entry */
98         {0, }
99 };
100
101 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102
103 void igb_reset(struct igb_adapter *);
104 static int igb_setup_all_tx_resources(struct igb_adapter *);
105 static int igb_setup_all_rx_resources(struct igb_adapter *);
106 static void igb_free_all_tx_resources(struct igb_adapter *);
107 static void igb_free_all_rx_resources(struct igb_adapter *);
108 static void igb_setup_mrqc(struct igb_adapter *);
109 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110 static void __devexit igb_remove(struct pci_dev *pdev);
111 static void igb_init_hw_timer(struct igb_adapter *adapter);
112 static int igb_sw_init(struct igb_adapter *);
113 static int igb_open(struct net_device *);
114 static int igb_close(struct net_device *);
115 static void igb_configure_tx(struct igb_adapter *);
116 static void igb_configure_rx(struct igb_adapter *);
117 static void igb_clean_all_tx_rings(struct igb_adapter *);
118 static void igb_clean_all_rx_rings(struct igb_adapter *);
119 static void igb_clean_tx_ring(struct igb_ring *);
120 static void igb_clean_rx_ring(struct igb_ring *);
121 static void igb_set_rx_mode(struct net_device *);
122 static void igb_update_phy_info(unsigned long);
123 static void igb_watchdog(unsigned long);
124 static void igb_watchdog_task(struct work_struct *);
125 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
126 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127                                                  struct rtnl_link_stats64 *stats);
128 static int igb_change_mtu(struct net_device *, int);
129 static int igb_set_mac(struct net_device *, void *);
130 static void igb_set_uta(struct igb_adapter *adapter);
131 static irqreturn_t igb_intr(int irq, void *);
132 static irqreturn_t igb_intr_msi(int irq, void *);
133 static irqreturn_t igb_msix_other(int irq, void *);
134 static irqreturn_t igb_msix_ring(int irq, void *);
135 #ifdef CONFIG_IGB_DCA
136 static void igb_update_dca(struct igb_q_vector *);
137 static void igb_setup_dca(struct igb_adapter *);
138 #endif /* CONFIG_IGB_DCA */
139 static bool igb_clean_tx_irq(struct igb_q_vector *);
140 static int igb_poll(struct napi_struct *, int);
141 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
142 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143 static void igb_tx_timeout(struct net_device *);
144 static void igb_reset_task(struct work_struct *);
145 static void igb_vlan_mode(struct net_device *netdev, u32 features);
146 static void igb_vlan_rx_add_vid(struct net_device *, u16);
147 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148 static void igb_restore_vlan(struct igb_adapter *);
149 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150 static void igb_ping_all_vfs(struct igb_adapter *);
151 static void igb_msg_task(struct igb_adapter *);
152 static void igb_vmm_control(struct igb_adapter *);
153 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157                                int vf, u16 vlan, u8 qos);
158 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160                                  struct ifla_vf_info *ivi);
161 static void igb_check_vf_rate_limit(struct igb_adapter *);
162
163 #ifdef CONFIG_PM
164 static int igb_suspend(struct pci_dev *, pm_message_t);
165 static int igb_resume(struct pci_dev *);
166 #endif
167 static void igb_shutdown(struct pci_dev *);
168 #ifdef CONFIG_IGB_DCA
169 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170 static struct notifier_block dca_notifier = {
171         .notifier_call  = igb_notify_dca,
172         .next           = NULL,
173         .priority       = 0
174 };
175 #endif
176 #ifdef CONFIG_NET_POLL_CONTROLLER
177 /* for netdump / net console */
178 static void igb_netpoll(struct net_device *);
179 #endif
180 #ifdef CONFIG_PCI_IOV
181 static unsigned int max_vfs = 0;
182 module_param(max_vfs, uint, 0);
183 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184                  "per physical function");
185 #endif /* CONFIG_PCI_IOV */
186
187 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188                      pci_channel_state_t);
189 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190 static void igb_io_resume(struct pci_dev *);
191
192 static struct pci_error_handlers igb_err_handler = {
193         .error_detected = igb_io_error_detected,
194         .slot_reset = igb_io_slot_reset,
195         .resume = igb_io_resume,
196 };
197
198
199 static struct pci_driver igb_driver = {
200         .name     = igb_driver_name,
201         .id_table = igb_pci_tbl,
202         .probe    = igb_probe,
203         .remove   = __devexit_p(igb_remove),
204 #ifdef CONFIG_PM
205         /* Power Management Hooks */
206         .suspend  = igb_suspend,
207         .resume   = igb_resume,
208 #endif
209         .shutdown = igb_shutdown,
210         .err_handler = &igb_err_handler
211 };
212
213 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215 MODULE_LICENSE("GPL");
216 MODULE_VERSION(DRV_VERSION);
217
218 struct igb_reg_info {
219         u32 ofs;
220         char *name;
221 };
222
223 static const struct igb_reg_info igb_reg_info_tbl[] = {
224
225         /* General Registers */
226         {E1000_CTRL, "CTRL"},
227         {E1000_STATUS, "STATUS"},
228         {E1000_CTRL_EXT, "CTRL_EXT"},
229
230         /* Interrupt Registers */
231         {E1000_ICR, "ICR"},
232
233         /* RX Registers */
234         {E1000_RCTL, "RCTL"},
235         {E1000_RDLEN(0), "RDLEN"},
236         {E1000_RDH(0), "RDH"},
237         {E1000_RDT(0), "RDT"},
238         {E1000_RXDCTL(0), "RXDCTL"},
239         {E1000_RDBAL(0), "RDBAL"},
240         {E1000_RDBAH(0), "RDBAH"},
241
242         /* TX Registers */
243         {E1000_TCTL, "TCTL"},
244         {E1000_TDBAL(0), "TDBAL"},
245         {E1000_TDBAH(0), "TDBAH"},
246         {E1000_TDLEN(0), "TDLEN"},
247         {E1000_TDH(0), "TDH"},
248         {E1000_TDT(0), "TDT"},
249         {E1000_TXDCTL(0), "TXDCTL"},
250         {E1000_TDFH, "TDFH"},
251         {E1000_TDFT, "TDFT"},
252         {E1000_TDFHS, "TDFHS"},
253         {E1000_TDFPC, "TDFPC"},
254
255         /* List Terminator */
256         {}
257 };
258
259 /*
260  * igb_regdump - register printout routine
261  */
262 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263 {
264         int n = 0;
265         char rname[16];
266         u32 regs[8];
267
268         switch (reginfo->ofs) {
269         case E1000_RDLEN(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RDLEN(n));
272                 break;
273         case E1000_RDH(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDH(n));
276                 break;
277         case E1000_RDT(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDT(n));
280                 break;
281         case E1000_RXDCTL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RXDCTL(n));
284                 break;
285         case E1000_RDBAL(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_RDBAL(n));
288                 break;
289         case E1000_RDBAH(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_RDBAH(n));
292                 break;
293         case E1000_TDBAL(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_RDBAL(n));
296                 break;
297         case E1000_TDBAH(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDBAH(n));
300                 break;
301         case E1000_TDLEN(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TDLEN(n));
304                 break;
305         case E1000_TDH(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_TDH(n));
308                 break;
309         case E1000_TDT(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_TDT(n));
312                 break;
313         case E1000_TXDCTL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_TXDCTL(n));
316                 break;
317         default:
318                 printk(KERN_INFO "%-15s %08x\n",
319                         reginfo->name, rd32(reginfo->ofs));
320                 return;
321         }
322
323         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324         printk(KERN_INFO "%-15s ", rname);
325         for (n = 0; n < 4; n++)
326                 printk(KERN_CONT "%08x ", regs[n]);
327         printk(KERN_CONT "\n");
328 }
329
330 /*
331  * igb_dump - Print registers, tx-rings and rx-rings
332  */
333 static void igb_dump(struct igb_adapter *adapter)
334 {
335         struct net_device *netdev = adapter->netdev;
336         struct e1000_hw *hw = &adapter->hw;
337         struct igb_reg_info *reginfo;
338         int n = 0;
339         struct igb_ring *tx_ring;
340         union e1000_adv_tx_desc *tx_desc;
341         struct my_u0 { u64 a; u64 b; } *u0;
342         struct igb_buffer *buffer_info;
343         struct igb_ring *rx_ring;
344         union e1000_adv_rx_desc *rx_desc;
345         u32 staterr;
346         int i = 0;
347
348         if (!netif_msg_hw(adapter))
349                 return;
350
351         /* Print netdevice Info */
352         if (netdev) {
353                 dev_info(&adapter->pdev->dev, "Net device Info\n");
354                 printk(KERN_INFO "Device Name     state            "
355                         "trans_start      last_rx\n");
356                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
357                 netdev->name,
358                 netdev->state,
359                 netdev->trans_start,
360                 netdev->last_rx);
361         }
362
363         /* Print Registers */
364         dev_info(&adapter->pdev->dev, "Register Dump\n");
365         printk(KERN_INFO " Register Name   Value\n");
366         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367              reginfo->name; reginfo++) {
368                 igb_regdump(hw, reginfo);
369         }
370
371         /* Print TX Ring Summary */
372         if (!netdev || !netif_running(netdev))
373                 goto exit;
374
375         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
377                 " leng ntw timestamp\n");
378         for (n = 0; n < adapter->num_tx_queues; n++) {
379                 tx_ring = adapter->tx_ring[n];
380                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
383                            (u64)buffer_info->dma,
384                            buffer_info->length,
385                            buffer_info->next_to_watch,
386                            (u64)buffer_info->time_stamp);
387         }
388
389         /* Print TX Rings */
390         if (!netif_msg_tx_done(adapter))
391                 goto rx_ring_summary;
392
393         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394
395         /* Transmit Descriptor Formats
396          *
397          * Advanced Transmit Descriptor
398          *   +--------------------------------------------------------------+
399          * 0 |         Buffer Address [63:0]                                |
400          *   +--------------------------------------------------------------+
401          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
402          *   +--------------------------------------------------------------+
403          *   63      46 45    40 39 38 36 35 32 31   24             15       0
404          */
405
406         for (n = 0; n < adapter->num_tx_queues; n++) {
407                 tx_ring = adapter->tx_ring[n];
408                 printk(KERN_INFO "------------------------------------\n");
409                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410                 printk(KERN_INFO "------------------------------------\n");
411                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
412                         "[PlPOCIStDDM Ln] [bi->dma       ] "
413                         "leng  ntw timestamp        bi->skb\n");
414
415                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416                         tx_desc = IGB_TX_DESC(tx_ring, i);
417                         buffer_info = &tx_ring->buffer_info[i];
418                         u0 = (struct my_u0 *)tx_desc;
419                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
420                                 " %04X  %3X %016llX %p", i,
421                                 le64_to_cpu(u0->a),
422                                 le64_to_cpu(u0->b),
423                                 (u64)buffer_info->dma,
424                                 buffer_info->length,
425                                 buffer_info->next_to_watch,
426                                 (u64)buffer_info->time_stamp,
427                                 buffer_info->skb);
428                         if (i == tx_ring->next_to_use &&
429                                 i == tx_ring->next_to_clean)
430                                 printk(KERN_CONT " NTC/U\n");
431                         else if (i == tx_ring->next_to_use)
432                                 printk(KERN_CONT " NTU\n");
433                         else if (i == tx_ring->next_to_clean)
434                                 printk(KERN_CONT " NTC\n");
435                         else
436                                 printk(KERN_CONT "\n");
437
438                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439                                 print_hex_dump(KERN_INFO, "",
440                                         DUMP_PREFIX_ADDRESS,
441                                         16, 1, phys_to_virt(buffer_info->dma),
442                                         buffer_info->length, true);
443                 }
444         }
445
446         /* Print RX Rings Summary */
447 rx_ring_summary:
448         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449         printk(KERN_INFO "Queue [NTU] [NTC]\n");
450         for (n = 0; n < adapter->num_rx_queues; n++) {
451                 rx_ring = adapter->rx_ring[n];
452                 printk(KERN_INFO " %5d %5X %5X\n", n,
453                            rx_ring->next_to_use, rx_ring->next_to_clean);
454         }
455
456         /* Print RX Rings */
457         if (!netif_msg_rx_status(adapter))
458                 goto exit;
459
460         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461
462         /* Advanced Receive Descriptor (Read) Format
463          *    63                                           1        0
464          *    +-----------------------------------------------------+
465          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
466          *    +----------------------------------------------+------+
467          *  8 |       Header Buffer Address [63:1]           |  DD  |
468          *    +-----------------------------------------------------+
469          *
470          *
471          * Advanced Receive Descriptor (Write-Back) Format
472          *
473          *   63       48 47    32 31  30      21 20 17 16   4 3     0
474          *   +------------------------------------------------------+
475          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
476          *   | Checksum   Ident  |   |           |    | Type | Type |
477          *   +------------------------------------------------------+
478          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479          *   +------------------------------------------------------+
480          *   63       48 47    32 31            20 19               0
481          */
482
483         for (n = 0; n < adapter->num_rx_queues; n++) {
484                 rx_ring = adapter->rx_ring[n];
485                 printk(KERN_INFO "------------------------------------\n");
486                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487                 printk(KERN_INFO "------------------------------------\n");
488                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
489                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
490                         "<-- Adv Rx Read format\n");
491                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
492                         "[vl er S cks ln] ---------------- [bi->skb] "
493                         "<-- Adv Rx Write-Back format\n");
494
495                 for (i = 0; i < rx_ring->count; i++) {
496                         buffer_info = &rx_ring->buffer_info[i];
497                         rx_desc = IGB_RX_DESC(rx_ring, i);
498                         u0 = (struct my_u0 *)rx_desc;
499                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500                         if (staterr & E1000_RXD_STAT_DD) {
501                                 /* Descriptor Done */
502                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
503                                         "%016llX ---------------- %p", i,
504                                         le64_to_cpu(u0->a),
505                                         le64_to_cpu(u0->b),
506                                         buffer_info->skb);
507                         } else {
508                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
509                                         "%016llX %016llX %p", i,
510                                         le64_to_cpu(u0->a),
511                                         le64_to_cpu(u0->b),
512                                         (u64)buffer_info->dma,
513                                         buffer_info->skb);
514
515                                 if (netif_msg_pktdata(adapter)) {
516                                         print_hex_dump(KERN_INFO, "",
517                                                 DUMP_PREFIX_ADDRESS,
518                                                 16, 1,
519                                                 phys_to_virt(buffer_info->dma),
520                                                 IGB_RX_HDR_LEN, true);
521                                         print_hex_dump(KERN_INFO, "",
522                                           DUMP_PREFIX_ADDRESS,
523                                           16, 1,
524                                           phys_to_virt(
525                                             buffer_info->page_dma +
526                                             buffer_info->page_offset),
527                                           PAGE_SIZE/2, true);
528                                 }
529                         }
530
531                         if (i == rx_ring->next_to_use)
532                                 printk(KERN_CONT " NTU\n");
533                         else if (i == rx_ring->next_to_clean)
534                                 printk(KERN_CONT " NTC\n");
535                         else
536                                 printk(KERN_CONT "\n");
537
538                 }
539         }
540
541 exit:
542         return;
543 }
544
545
546 /**
547  * igb_read_clock - read raw cycle counter (to be used by time counter)
548  */
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
550 {
551         struct igb_adapter *adapter =
552                 container_of(tc, struct igb_adapter, cycles);
553         struct e1000_hw *hw = &adapter->hw;
554         u64 stamp = 0;
555         int shift = 0;
556
557         /*
558          * The timestamp latches on lowest register read. For the 82580
559          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
560          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
561          */
562         if (hw->mac.type == e1000_82580) {
563                 stamp = rd32(E1000_SYSTIMR) >> 8;
564                 shift = IGB_82580_TSYNC_SHIFT;
565         }
566
567         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
569         return stamp;
570 }
571
572 /**
573  * igb_get_hw_dev - return device
574  * used by hardware layer to print debugging information
575  **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578         struct igb_adapter *adapter = hw->back;
579         return adapter->netdev;
580 }
581
582 /**
583  * igb_init_module - Driver Registration Routine
584  *
585  * igb_init_module is the first routine called when the driver is
586  * loaded. All it does is register with the PCI subsystem.
587  **/
588 static int __init igb_init_module(void)
589 {
590         int ret;
591         printk(KERN_INFO "%s - version %s\n",
592                igb_driver_string, igb_driver_version);
593
594         printk(KERN_INFO "%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597         dca_register_notify(&dca_notifier);
598 #endif
599         ret = pci_register_driver(&igb_driver);
600         return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606  * igb_exit_module - Driver Exit Cleanup Routine
607  *
608  * igb_exit_module is called just before the driver is removed
609  * from memory.
610  **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614         dca_unregister_notify(&dca_notifier);
615 #endif
616         pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623  * igb_cache_ring_register - Descriptor ring to register mapping
624  * @adapter: board private structure to initialize
625  *
626  * Once we know the feature-set enabled for the device, we'll cache
627  * the register offset the descriptor ring is assigned to.
628  **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631         int i = 0, j = 0;
632         u32 rbase_offset = adapter->vfs_allocated_count;
633
634         switch (adapter->hw.mac.type) {
635         case e1000_82576:
636                 /* The queues are allocated for virtualization such that VF 0
637                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638                  * In order to avoid collision we start at the first free queue
639                  * and continue consuming queues in the same sequence
640                  */
641                 if (adapter->vfs_allocated_count) {
642                         for (; i < adapter->rss_queues; i++)
643                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
644                                                                Q_IDX_82576(i);
645                 }
646         case e1000_82575:
647         case e1000_82580:
648         case e1000_i350:
649         default:
650                 for (; i < adapter->num_rx_queues; i++)
651                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652                 for (; j < adapter->num_tx_queues; j++)
653                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654                 break;
655         }
656 }
657
658 static void igb_free_queues(struct igb_adapter *adapter)
659 {
660         int i;
661
662         for (i = 0; i < adapter->num_tx_queues; i++) {
663                 kfree(adapter->tx_ring[i]);
664                 adapter->tx_ring[i] = NULL;
665         }
666         for (i = 0; i < adapter->num_rx_queues; i++) {
667                 kfree(adapter->rx_ring[i]);
668                 adapter->rx_ring[i] = NULL;
669         }
670         adapter->num_rx_queues = 0;
671         adapter->num_tx_queues = 0;
672 }
673
674 /**
675  * igb_alloc_queues - Allocate memory for all rings
676  * @adapter: board private structure to initialize
677  *
678  * We allocate one ring per queue at run-time since we don't know the
679  * number of queues at compile-time.
680  **/
681 static int igb_alloc_queues(struct igb_adapter *adapter)
682 {
683         struct igb_ring *ring;
684         int i;
685
686         for (i = 0; i < adapter->num_tx_queues; i++) {
687                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
688                 if (!ring)
689                         goto err;
690                 ring->count = adapter->tx_ring_count;
691                 ring->queue_index = i;
692                 ring->dev = &adapter->pdev->dev;
693                 ring->netdev = adapter->netdev;
694                 /* For 82575, context index must be unique per ring. */
695                 if (adapter->hw.mac.type == e1000_82575)
696                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697                 adapter->tx_ring[i] = ring;
698         }
699
700         for (i = 0; i < adapter->num_rx_queues; i++) {
701                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702                 if (!ring)
703                         goto err;
704                 ring->count = adapter->rx_ring_count;
705                 ring->queue_index = i;
706                 ring->dev = &adapter->pdev->dev;
707                 ring->netdev = adapter->netdev;
708                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
709                 /* set flag indicating ring supports SCTP checksum offload */
710                 if (adapter->hw.mac.type >= e1000_82576)
711                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
712                 adapter->rx_ring[i] = ring;
713         }
714
715         igb_cache_ring_register(adapter);
716
717         return 0;
718
719 err:
720         igb_free_queues(adapter);
721
722         return -ENOMEM;
723 }
724
725 #define IGB_N0_QUEUE -1
726 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
727 {
728         u32 msixbm = 0;
729         struct igb_adapter *adapter = q_vector->adapter;
730         struct e1000_hw *hw = &adapter->hw;
731         u32 ivar, index;
732         int rx_queue = IGB_N0_QUEUE;
733         int tx_queue = IGB_N0_QUEUE;
734
735         if (q_vector->rx_ring)
736                 rx_queue = q_vector->rx_ring->reg_idx;
737         if (q_vector->tx_ring)
738                 tx_queue = q_vector->tx_ring->reg_idx;
739
740         switch (hw->mac.type) {
741         case e1000_82575:
742                 /* The 82575 assigns vectors using a bitmask, which matches the
743                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
744                    or more queues to a vector, we write the appropriate bits
745                    into the MSIXBM register for that vector. */
746                 if (rx_queue > IGB_N0_QUEUE)
747                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
748                 if (tx_queue > IGB_N0_QUEUE)
749                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
750                 if (!adapter->msix_entries && msix_vector == 0)
751                         msixbm |= E1000_EIMS_OTHER;
752                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
753                 q_vector->eims_value = msixbm;
754                 break;
755         case e1000_82576:
756                 /* 82576 uses a table-based method for assigning vectors.
757                    Each queue has a single entry in the table to which we write
758                    a vector number along with a "valid" bit.  Sadly, the layout
759                    of the table is somewhat counterintuitive. */
760                 if (rx_queue > IGB_N0_QUEUE) {
761                         index = (rx_queue & 0x7);
762                         ivar = array_rd32(E1000_IVAR0, index);
763                         if (rx_queue < 8) {
764                                 /* vector goes into low byte of register */
765                                 ivar = ivar & 0xFFFFFF00;
766                                 ivar |= msix_vector | E1000_IVAR_VALID;
767                         } else {
768                                 /* vector goes into third byte of register */
769                                 ivar = ivar & 0xFF00FFFF;
770                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
771                         }
772                         array_wr32(E1000_IVAR0, index, ivar);
773                 }
774                 if (tx_queue > IGB_N0_QUEUE) {
775                         index = (tx_queue & 0x7);
776                         ivar = array_rd32(E1000_IVAR0, index);
777                         if (tx_queue < 8) {
778                                 /* vector goes into second byte of register */
779                                 ivar = ivar & 0xFFFF00FF;
780                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
781                         } else {
782                                 /* vector goes into high byte of register */
783                                 ivar = ivar & 0x00FFFFFF;
784                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
785                         }
786                         array_wr32(E1000_IVAR0, index, ivar);
787                 }
788                 q_vector->eims_value = 1 << msix_vector;
789                 break;
790         case e1000_82580:
791         case e1000_i350:
792                 /* 82580 uses the same table-based approach as 82576 but has fewer
793                    entries as a result we carry over for queues greater than 4. */
794                 if (rx_queue > IGB_N0_QUEUE) {
795                         index = (rx_queue >> 1);
796                         ivar = array_rd32(E1000_IVAR0, index);
797                         if (rx_queue & 0x1) {
798                                 /* vector goes into third byte of register */
799                                 ivar = ivar & 0xFF00FFFF;
800                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
801                         } else {
802                                 /* vector goes into low byte of register */
803                                 ivar = ivar & 0xFFFFFF00;
804                                 ivar |= msix_vector | E1000_IVAR_VALID;
805                         }
806                         array_wr32(E1000_IVAR0, index, ivar);
807                 }
808                 if (tx_queue > IGB_N0_QUEUE) {
809                         index = (tx_queue >> 1);
810                         ivar = array_rd32(E1000_IVAR0, index);
811                         if (tx_queue & 0x1) {
812                                 /* vector goes into high byte of register */
813                                 ivar = ivar & 0x00FFFFFF;
814                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
815                         } else {
816                                 /* vector goes into second byte of register */
817                                 ivar = ivar & 0xFFFF00FF;
818                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
819                         }
820                         array_wr32(E1000_IVAR0, index, ivar);
821                 }
822                 q_vector->eims_value = 1 << msix_vector;
823                 break;
824         default:
825                 BUG();
826                 break;
827         }
828
829         /* add q_vector eims value to global eims_enable_mask */
830         adapter->eims_enable_mask |= q_vector->eims_value;
831
832         /* configure q_vector to set itr on first interrupt */
833         q_vector->set_itr = 1;
834 }
835
836 /**
837  * igb_configure_msix - Configure MSI-X hardware
838  *
839  * igb_configure_msix sets up the hardware to properly
840  * generate MSI-X interrupts.
841  **/
842 static void igb_configure_msix(struct igb_adapter *adapter)
843 {
844         u32 tmp;
845         int i, vector = 0;
846         struct e1000_hw *hw = &adapter->hw;
847
848         adapter->eims_enable_mask = 0;
849
850         /* set vector for other causes, i.e. link changes */
851         switch (hw->mac.type) {
852         case e1000_82575:
853                 tmp = rd32(E1000_CTRL_EXT);
854                 /* enable MSI-X PBA support*/
855                 tmp |= E1000_CTRL_EXT_PBA_CLR;
856
857                 /* Auto-Mask interrupts upon ICR read. */
858                 tmp |= E1000_CTRL_EXT_EIAME;
859                 tmp |= E1000_CTRL_EXT_IRCA;
860
861                 wr32(E1000_CTRL_EXT, tmp);
862
863                 /* enable msix_other interrupt */
864                 array_wr32(E1000_MSIXBM(0), vector++,
865                                       E1000_EIMS_OTHER);
866                 adapter->eims_other = E1000_EIMS_OTHER;
867
868                 break;
869
870         case e1000_82576:
871         case e1000_82580:
872         case e1000_i350:
873                 /* Turn on MSI-X capability first, or our settings
874                  * won't stick.  And it will take days to debug. */
875                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
876                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
877                                 E1000_GPIE_NSICR);
878
879                 /* enable msix_other interrupt */
880                 adapter->eims_other = 1 << vector;
881                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
882
883                 wr32(E1000_IVAR_MISC, tmp);
884                 break;
885         default:
886                 /* do nothing, since nothing else supports MSI-X */
887                 break;
888         } /* switch (hw->mac.type) */
889
890         adapter->eims_enable_mask |= adapter->eims_other;
891
892         for (i = 0; i < adapter->num_q_vectors; i++)
893                 igb_assign_vector(adapter->q_vector[i], vector++);
894
895         wrfl();
896 }
897
898 /**
899  * igb_request_msix - Initialize MSI-X interrupts
900  *
901  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
902  * kernel.
903  **/
904 static int igb_request_msix(struct igb_adapter *adapter)
905 {
906         struct net_device *netdev = adapter->netdev;
907         struct e1000_hw *hw = &adapter->hw;
908         int i, err = 0, vector = 0;
909
910         err = request_irq(adapter->msix_entries[vector].vector,
911                           igb_msix_other, 0, netdev->name, adapter);
912         if (err)
913                 goto out;
914         vector++;
915
916         for (i = 0; i < adapter->num_q_vectors; i++) {
917                 struct igb_q_vector *q_vector = adapter->q_vector[i];
918
919                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
920
921                 if (q_vector->rx_ring && q_vector->tx_ring)
922                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
923                                 q_vector->rx_ring->queue_index);
924                 else if (q_vector->tx_ring)
925                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
926                                 q_vector->tx_ring->queue_index);
927                 else if (q_vector->rx_ring)
928                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
929                                 q_vector->rx_ring->queue_index);
930                 else
931                         sprintf(q_vector->name, "%s-unused", netdev->name);
932
933                 err = request_irq(adapter->msix_entries[vector].vector,
934                                   igb_msix_ring, 0, q_vector->name,
935                                   q_vector);
936                 if (err)
937                         goto out;
938                 vector++;
939         }
940
941         igb_configure_msix(adapter);
942         return 0;
943 out:
944         return err;
945 }
946
947 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
948 {
949         if (adapter->msix_entries) {
950                 pci_disable_msix(adapter->pdev);
951                 kfree(adapter->msix_entries);
952                 adapter->msix_entries = NULL;
953         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
954                 pci_disable_msi(adapter->pdev);
955         }
956 }
957
958 /**
959  * igb_free_q_vectors - Free memory allocated for interrupt vectors
960  * @adapter: board private structure to initialize
961  *
962  * This function frees the memory allocated to the q_vectors.  In addition if
963  * NAPI is enabled it will delete any references to the NAPI struct prior
964  * to freeing the q_vector.
965  **/
966 static void igb_free_q_vectors(struct igb_adapter *adapter)
967 {
968         int v_idx;
969
970         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
971                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
972                 adapter->q_vector[v_idx] = NULL;
973                 if (!q_vector)
974                         continue;
975                 netif_napi_del(&q_vector->napi);
976                 kfree(q_vector);
977         }
978         adapter->num_q_vectors = 0;
979 }
980
981 /**
982  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
983  *
984  * This function resets the device so that it has 0 rx queues, tx queues, and
985  * MSI-X interrupts allocated.
986  */
987 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
988 {
989         igb_free_queues(adapter);
990         igb_free_q_vectors(adapter);
991         igb_reset_interrupt_capability(adapter);
992 }
993
994 /**
995  * igb_set_interrupt_capability - set MSI or MSI-X if supported
996  *
997  * Attempt to configure interrupts using the best available
998  * capabilities of the hardware and kernel.
999  **/
1000 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1001 {
1002         int err;
1003         int numvecs, i;
1004
1005         /* Number of supported queues. */
1006         adapter->num_rx_queues = adapter->rss_queues;
1007         if (adapter->vfs_allocated_count)
1008                 adapter->num_tx_queues = 1;
1009         else
1010                 adapter->num_tx_queues = adapter->rss_queues;
1011
1012         /* start with one vector for every rx queue */
1013         numvecs = adapter->num_rx_queues;
1014
1015         /* if tx handler is separate add 1 for every tx queue */
1016         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1017                 numvecs += adapter->num_tx_queues;
1018
1019         /* store the number of vectors reserved for queues */
1020         adapter->num_q_vectors = numvecs;
1021
1022         /* add 1 vector for link status interrupts */
1023         numvecs++;
1024         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1025                                         GFP_KERNEL);
1026         if (!adapter->msix_entries)
1027                 goto msi_only;
1028
1029         for (i = 0; i < numvecs; i++)
1030                 adapter->msix_entries[i].entry = i;
1031
1032         err = pci_enable_msix(adapter->pdev,
1033                               adapter->msix_entries,
1034                               numvecs);
1035         if (err == 0)
1036                 goto out;
1037
1038         igb_reset_interrupt_capability(adapter);
1039
1040         /* If we can't do MSI-X, try MSI */
1041 msi_only:
1042 #ifdef CONFIG_PCI_IOV
1043         /* disable SR-IOV for non MSI-X configurations */
1044         if (adapter->vf_data) {
1045                 struct e1000_hw *hw = &adapter->hw;
1046                 /* disable iov and allow time for transactions to clear */
1047                 pci_disable_sriov(adapter->pdev);
1048                 msleep(500);
1049
1050                 kfree(adapter->vf_data);
1051                 adapter->vf_data = NULL;
1052                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1053                 wrfl();
1054                 msleep(100);
1055                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1056         }
1057 #endif
1058         adapter->vfs_allocated_count = 0;
1059         adapter->rss_queues = 1;
1060         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061         adapter->num_rx_queues = 1;
1062         adapter->num_tx_queues = 1;
1063         adapter->num_q_vectors = 1;
1064         if (!pci_enable_msi(adapter->pdev))
1065                 adapter->flags |= IGB_FLAG_HAS_MSI;
1066 out:
1067         /* Notify the stack of the (possibly) reduced queue counts. */
1068         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069         return netif_set_real_num_rx_queues(adapter->netdev,
1070                                             adapter->num_rx_queues);
1071 }
1072
1073 /**
1074  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075  * @adapter: board private structure to initialize
1076  *
1077  * We allocate one q_vector per queue interrupt.  If allocation fails we
1078  * return -ENOMEM.
1079  **/
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1081 {
1082         struct igb_q_vector *q_vector;
1083         struct e1000_hw *hw = &adapter->hw;
1084         int v_idx;
1085
1086         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1088                 if (!q_vector)
1089                         goto err_out;
1090                 q_vector->adapter = adapter;
1091                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092                 q_vector->itr_val = IGB_START_ITR;
1093                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094                 adapter->q_vector[v_idx] = q_vector;
1095         }
1096         return 0;
1097
1098 err_out:
1099         igb_free_q_vectors(adapter);
1100         return -ENOMEM;
1101 }
1102
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104                                       int ring_idx, int v_idx)
1105 {
1106         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109         q_vector->rx_ring->q_vector = q_vector;
1110         q_vector->itr_val = adapter->rx_itr_setting;
1111         if (q_vector->itr_val && q_vector->itr_val <= 3)
1112                 q_vector->itr_val = IGB_START_ITR;
1113 }
1114
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116                                       int ring_idx, int v_idx)
1117 {
1118         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1119
1120         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121         q_vector->tx_ring->q_vector = q_vector;
1122         q_vector->itr_val = adapter->tx_itr_setting;
1123         if (q_vector->itr_val && q_vector->itr_val <= 3)
1124                 q_vector->itr_val = IGB_START_ITR;
1125 }
1126
1127 /**
1128  * igb_map_ring_to_vector - maps allocated queues to vectors
1129  *
1130  * This function maps the recently allocated queues to vectors.
1131  **/
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1133 {
1134         int i;
1135         int v_idx = 0;
1136
1137         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138             (adapter->num_q_vectors < adapter->num_tx_queues))
1139                 return -ENOMEM;
1140
1141         if (adapter->num_q_vectors >=
1142             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143                 for (i = 0; i < adapter->num_rx_queues; i++)
1144                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145                 for (i = 0; i < adapter->num_tx_queues; i++)
1146                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147         } else {
1148                 for (i = 0; i < adapter->num_rx_queues; i++) {
1149                         if (i < adapter->num_tx_queues)
1150                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1152                 }
1153                 for (; i < adapter->num_tx_queues; i++)
1154                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1155         }
1156         return 0;
1157 }
1158
1159 /**
1160  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1161  *
1162  * This function initializes the interrupts and allocates all of the queues.
1163  **/
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1165 {
1166         struct pci_dev *pdev = adapter->pdev;
1167         int err;
1168
1169         err = igb_set_interrupt_capability(adapter);
1170         if (err)
1171                 return err;
1172
1173         err = igb_alloc_q_vectors(adapter);
1174         if (err) {
1175                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176                 goto err_alloc_q_vectors;
1177         }
1178
1179         err = igb_alloc_queues(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182                 goto err_alloc_queues;
1183         }
1184
1185         err = igb_map_ring_to_vector(adapter);
1186         if (err) {
1187                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188                 goto err_map_queues;
1189         }
1190
1191
1192         return 0;
1193 err_map_queues:
1194         igb_free_queues(adapter);
1195 err_alloc_queues:
1196         igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198         igb_reset_interrupt_capability(adapter);
1199         return err;
1200 }
1201
1202 /**
1203  * igb_request_irq - initialize interrupts
1204  *
1205  * Attempts to configure interrupts using the best available
1206  * capabilities of the hardware and kernel.
1207  **/
1208 static int igb_request_irq(struct igb_adapter *adapter)
1209 {
1210         struct net_device *netdev = adapter->netdev;
1211         struct pci_dev *pdev = adapter->pdev;
1212         int err = 0;
1213
1214         if (adapter->msix_entries) {
1215                 err = igb_request_msix(adapter);
1216                 if (!err)
1217                         goto request_done;
1218                 /* fall back to MSI */
1219                 igb_clear_interrupt_scheme(adapter);
1220                 if (!pci_enable_msi(adapter->pdev))
1221                         adapter->flags |= IGB_FLAG_HAS_MSI;
1222                 igb_free_all_tx_resources(adapter);
1223                 igb_free_all_rx_resources(adapter);
1224                 adapter->num_tx_queues = 1;
1225                 adapter->num_rx_queues = 1;
1226                 adapter->num_q_vectors = 1;
1227                 err = igb_alloc_q_vectors(adapter);
1228                 if (err) {
1229                         dev_err(&pdev->dev,
1230                                 "Unable to allocate memory for vectors\n");
1231                         goto request_done;
1232                 }
1233                 err = igb_alloc_queues(adapter);
1234                 if (err) {
1235                         dev_err(&pdev->dev,
1236                                 "Unable to allocate memory for queues\n");
1237                         igb_free_q_vectors(adapter);
1238                         goto request_done;
1239                 }
1240                 igb_setup_all_tx_resources(adapter);
1241                 igb_setup_all_rx_resources(adapter);
1242         } else {
1243                 igb_assign_vector(adapter->q_vector[0], 0);
1244         }
1245
1246         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248                                   netdev->name, adapter);
1249                 if (!err)
1250                         goto request_done;
1251
1252                 /* fall back to legacy interrupts */
1253                 igb_reset_interrupt_capability(adapter);
1254                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1255         }
1256
1257         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258                           netdev->name, adapter);
1259
1260         if (err)
1261                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262                         err);
1263
1264 request_done:
1265         return err;
1266 }
1267
1268 static void igb_free_irq(struct igb_adapter *adapter)
1269 {
1270         if (adapter->msix_entries) {
1271                 int vector = 0, i;
1272
1273                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1274
1275                 for (i = 0; i < adapter->num_q_vectors; i++) {
1276                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1277                         free_irq(adapter->msix_entries[vector++].vector,
1278                                  q_vector);
1279                 }
1280         } else {
1281                 free_irq(adapter->pdev->irq, adapter);
1282         }
1283 }
1284
1285 /**
1286  * igb_irq_disable - Mask off interrupt generation on the NIC
1287  * @adapter: board private structure
1288  **/
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1290 {
1291         struct e1000_hw *hw = &adapter->hw;
1292
1293         /*
1294          * we need to be careful when disabling interrupts.  The VFs are also
1295          * mapped into these registers and so clearing the bits can cause
1296          * issues on the VF drivers so we only need to clear what we set
1297          */
1298         if (adapter->msix_entries) {
1299                 u32 regval = rd32(E1000_EIAM);
1300                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302                 regval = rd32(E1000_EIAC);
1303                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1304         }
1305
1306         wr32(E1000_IAM, 0);
1307         wr32(E1000_IMC, ~0);
1308         wrfl();
1309         if (adapter->msix_entries) {
1310                 int i;
1311                 for (i = 0; i < adapter->num_q_vectors; i++)
1312                         synchronize_irq(adapter->msix_entries[i].vector);
1313         } else {
1314                 synchronize_irq(adapter->pdev->irq);
1315         }
1316 }
1317
1318 /**
1319  * igb_irq_enable - Enable default interrupt generation settings
1320  * @adapter: board private structure
1321  **/
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1323 {
1324         struct e1000_hw *hw = &adapter->hw;
1325
1326         if (adapter->msix_entries) {
1327                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328                 u32 regval = rd32(E1000_EIAC);
1329                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330                 regval = rd32(E1000_EIAM);
1331                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333                 if (adapter->vfs_allocated_count) {
1334                         wr32(E1000_MBVFIMR, 0xFF);
1335                         ims |= E1000_IMS_VMMB;
1336                 }
1337                 if (adapter->hw.mac.type == e1000_82580)
1338                         ims |= E1000_IMS_DRSTA;
1339
1340                 wr32(E1000_IMS, ims);
1341         } else {
1342                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1343                                 E1000_IMS_DRSTA);
1344                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1345                                 E1000_IMS_DRSTA);
1346         }
1347 }
1348
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1350 {
1351         struct e1000_hw *hw = &adapter->hw;
1352         u16 vid = adapter->hw.mng_cookie.vlan_id;
1353         u16 old_vid = adapter->mng_vlan_id;
1354
1355         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356                 /* add VID to filter table */
1357                 igb_vfta_set(hw, vid, true);
1358                 adapter->mng_vlan_id = vid;
1359         } else {
1360                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1361         }
1362
1363         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1364             (vid != old_vid) &&
1365             !test_bit(old_vid, adapter->active_vlans)) {
1366                 /* remove VID from filter table */
1367                 igb_vfta_set(hw, old_vid, false);
1368         }
1369 }
1370
1371 /**
1372  * igb_release_hw_control - release control of the h/w to f/w
1373  * @adapter: address of board private structure
1374  *
1375  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376  * For ASF and Pass Through versions of f/w this means that the
1377  * driver is no longer loaded.
1378  *
1379  **/
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1381 {
1382         struct e1000_hw *hw = &adapter->hw;
1383         u32 ctrl_ext;
1384
1385         /* Let firmware take over control of h/w */
1386         ctrl_ext = rd32(E1000_CTRL_EXT);
1387         wr32(E1000_CTRL_EXT,
1388                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1389 }
1390
1391 /**
1392  * igb_get_hw_control - get control of the h/w from f/w
1393  * @adapter: address of board private structure
1394  *
1395  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396  * For ASF and Pass Through versions of f/w this means that
1397  * the driver is loaded.
1398  *
1399  **/
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1401 {
1402         struct e1000_hw *hw = &adapter->hw;
1403         u32 ctrl_ext;
1404
1405         /* Let firmware know the driver has taken over */
1406         ctrl_ext = rd32(E1000_CTRL_EXT);
1407         wr32(E1000_CTRL_EXT,
1408                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1409 }
1410
1411 /**
1412  * igb_configure - configure the hardware for RX and TX
1413  * @adapter: private board structure
1414  **/
1415 static void igb_configure(struct igb_adapter *adapter)
1416 {
1417         struct net_device *netdev = adapter->netdev;
1418         int i;
1419
1420         igb_get_hw_control(adapter);
1421         igb_set_rx_mode(netdev);
1422
1423         igb_restore_vlan(adapter);
1424
1425         igb_setup_tctl(adapter);
1426         igb_setup_mrqc(adapter);
1427         igb_setup_rctl(adapter);
1428
1429         igb_configure_tx(adapter);
1430         igb_configure_rx(adapter);
1431
1432         igb_rx_fifo_flush_82575(&adapter->hw);
1433
1434         /* call igb_desc_unused which always leaves
1435          * at least 1 descriptor unused to make sure
1436          * next_to_use != next_to_clean */
1437         for (i = 0; i < adapter->num_rx_queues; i++) {
1438                 struct igb_ring *ring = adapter->rx_ring[i];
1439                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1440         }
1441 }
1442
1443 /**
1444  * igb_power_up_link - Power up the phy/serdes link
1445  * @adapter: address of board private structure
1446  **/
1447 void igb_power_up_link(struct igb_adapter *adapter)
1448 {
1449         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450                 igb_power_up_phy_copper(&adapter->hw);
1451         else
1452                 igb_power_up_serdes_link_82575(&adapter->hw);
1453 }
1454
1455 /**
1456  * igb_power_down_link - Power down the phy/serdes link
1457  * @adapter: address of board private structure
1458  */
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1460 {
1461         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462                 igb_power_down_phy_copper_82575(&adapter->hw);
1463         else
1464                 igb_shutdown_serdes_link_82575(&adapter->hw);
1465 }
1466
1467 /**
1468  * igb_up - Open the interface and prepare it to handle traffic
1469  * @adapter: board private structure
1470  **/
1471 int igb_up(struct igb_adapter *adapter)
1472 {
1473         struct e1000_hw *hw = &adapter->hw;
1474         int i;
1475
1476         /* hardware has been reset, we need to reload some things */
1477         igb_configure(adapter);
1478
1479         clear_bit(__IGB_DOWN, &adapter->state);
1480
1481         for (i = 0; i < adapter->num_q_vectors; i++) {
1482                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483                 napi_enable(&q_vector->napi);
1484         }
1485         if (adapter->msix_entries)
1486                 igb_configure_msix(adapter);
1487         else
1488                 igb_assign_vector(adapter->q_vector[0], 0);
1489
1490         /* Clear any pending interrupts. */
1491         rd32(E1000_ICR);
1492         igb_irq_enable(adapter);
1493
1494         /* notify VFs that reset has been completed */
1495         if (adapter->vfs_allocated_count) {
1496                 u32 reg_data = rd32(E1000_CTRL_EXT);
1497                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498                 wr32(E1000_CTRL_EXT, reg_data);
1499         }
1500
1501         netif_tx_start_all_queues(adapter->netdev);
1502
1503         /* start the watchdog. */
1504         hw->mac.get_link_status = 1;
1505         schedule_work(&adapter->watchdog_task);
1506
1507         return 0;
1508 }
1509
1510 void igb_down(struct igb_adapter *adapter)
1511 {
1512         struct net_device *netdev = adapter->netdev;
1513         struct e1000_hw *hw = &adapter->hw;
1514         u32 tctl, rctl;
1515         int i;
1516
1517         /* signal that we're down so the interrupt handler does not
1518          * reschedule our watchdog timer */
1519         set_bit(__IGB_DOWN, &adapter->state);
1520
1521         /* disable receives in the hardware */
1522         rctl = rd32(E1000_RCTL);
1523         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524         /* flush and sleep below */
1525
1526         netif_tx_stop_all_queues(netdev);
1527
1528         /* disable transmits in the hardware */
1529         tctl = rd32(E1000_TCTL);
1530         tctl &= ~E1000_TCTL_EN;
1531         wr32(E1000_TCTL, tctl);
1532         /* flush both disables and wait for them to finish */
1533         wrfl();
1534         msleep(10);
1535
1536         for (i = 0; i < adapter->num_q_vectors; i++) {
1537                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538                 napi_disable(&q_vector->napi);
1539         }
1540
1541         igb_irq_disable(adapter);
1542
1543         del_timer_sync(&adapter->watchdog_timer);
1544         del_timer_sync(&adapter->phy_info_timer);
1545
1546         netif_carrier_off(netdev);
1547
1548         /* record the stats before reset*/
1549         spin_lock(&adapter->stats64_lock);
1550         igb_update_stats(adapter, &adapter->stats64);
1551         spin_unlock(&adapter->stats64_lock);
1552
1553         adapter->link_speed = 0;
1554         adapter->link_duplex = 0;
1555
1556         if (!pci_channel_offline(adapter->pdev))
1557                 igb_reset(adapter);
1558         igb_clean_all_tx_rings(adapter);
1559         igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1561
1562         /* since we reset the hardware DCA settings were cleared */
1563         igb_setup_dca(adapter);
1564 #endif
1565 }
1566
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1568 {
1569         WARN_ON(in_interrupt());
1570         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1571                 msleep(1);
1572         igb_down(adapter);
1573         igb_up(adapter);
1574         clear_bit(__IGB_RESETTING, &adapter->state);
1575 }
1576
1577 void igb_reset(struct igb_adapter *adapter)
1578 {
1579         struct pci_dev *pdev = adapter->pdev;
1580         struct e1000_hw *hw = &adapter->hw;
1581         struct e1000_mac_info *mac = &hw->mac;
1582         struct e1000_fc_info *fc = &hw->fc;
1583         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1584         u16 hwm;
1585
1586         /* Repartition Pba for greater than 9k mtu
1587          * To take effect CTRL.RST is required.
1588          */
1589         switch (mac->type) {
1590         case e1000_i350:
1591         case e1000_82580:
1592                 pba = rd32(E1000_RXPBS);
1593                 pba = igb_rxpbs_adjust_82580(pba);
1594                 break;
1595         case e1000_82576:
1596                 pba = rd32(E1000_RXPBS);
1597                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1598                 break;
1599         case e1000_82575:
1600         default:
1601                 pba = E1000_PBA_34K;
1602                 break;
1603         }
1604
1605         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606             (mac->type < e1000_82576)) {
1607                 /* adjust PBA for jumbo frames */
1608                 wr32(E1000_PBA, pba);
1609
1610                 /* To maintain wire speed transmits, the Tx FIFO should be
1611                  * large enough to accommodate two full transmit packets,
1612                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1613                  * the Rx FIFO should be large enough to accommodate at least
1614                  * one full receive packet and is similarly rounded up and
1615                  * expressed in KB. */
1616                 pba = rd32(E1000_PBA);
1617                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618                 tx_space = pba >> 16;
1619                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1620                 pba &= 0xffff;
1621                 /* the tx fifo also stores 16 bytes of information about the tx
1622                  * but don't include ethernet FCS because hardware appends it */
1623                 min_tx_space = (adapter->max_frame_size +
1624                                 sizeof(union e1000_adv_tx_desc) -
1625                                 ETH_FCS_LEN) * 2;
1626                 min_tx_space = ALIGN(min_tx_space, 1024);
1627                 min_tx_space >>= 10;
1628                 /* software strips receive CRC, so leave room for it */
1629                 min_rx_space = adapter->max_frame_size;
1630                 min_rx_space = ALIGN(min_rx_space, 1024);
1631                 min_rx_space >>= 10;
1632
1633                 /* If current Tx allocation is less than the min Tx FIFO size,
1634                  * and the min Tx FIFO size is less than the current Rx FIFO
1635                  * allocation, take space away from current Rx allocation */
1636                 if (tx_space < min_tx_space &&
1637                     ((min_tx_space - tx_space) < pba)) {
1638                         pba = pba - (min_tx_space - tx_space);
1639
1640                         /* if short on rx space, rx wins and must trump tx
1641                          * adjustment */
1642                         if (pba < min_rx_space)
1643                                 pba = min_rx_space;
1644                 }
1645                 wr32(E1000_PBA, pba);
1646         }
1647
1648         /* flow control settings */
1649         /* The high water mark must be low enough to fit one full frame
1650          * (or the size used for early receive) above it in the Rx FIFO.
1651          * Set it to the lower of:
1652          * - 90% of the Rx FIFO size, or
1653          * - the full Rx FIFO size minus one full frame */
1654         hwm = min(((pba << 10) * 9 / 10),
1655                         ((pba << 10) - 2 * adapter->max_frame_size));
1656
1657         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1658         fc->low_water = fc->high_water - 16;
1659         fc->pause_time = 0xFFFF;
1660         fc->send_xon = 1;
1661         fc->current_mode = fc->requested_mode;
1662
1663         /* disable receive for all VFs and wait one second */
1664         if (adapter->vfs_allocated_count) {
1665                 int i;
1666                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1668
1669                 /* ping all the active vfs to let them know we are going down */
1670                 igb_ping_all_vfs(adapter);
1671
1672                 /* disable transmits and receives */
1673                 wr32(E1000_VFRE, 0);
1674                 wr32(E1000_VFTE, 0);
1675         }
1676
1677         /* Allow time for pending master requests to run */
1678         hw->mac.ops.reset_hw(hw);
1679         wr32(E1000_WUC, 0);
1680
1681         if (hw->mac.ops.init_hw(hw))
1682                 dev_err(&pdev->dev, "Hardware Error\n");
1683         if (hw->mac.type > e1000_82580) {
1684                 if (adapter->flags & IGB_FLAG_DMAC) {
1685                         u32 reg;
1686
1687                         /*
1688                          * DMA Coalescing high water mark needs to be higher
1689                          * than * the * Rx threshold.  The Rx threshold is
1690                          * currently * pba - 6, so we * should use a high water
1691                          * mark of pba * - 4. */
1692                         hwm = (pba - 4) << 10;
1693
1694                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695                                & E1000_DMACR_DMACTHR_MASK);
1696
1697                         /* transition to L0x or L1 if available..*/
1698                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1699
1700                         /* watchdog timer= +-1000 usec in 32usec intervals */
1701                         reg |= (1000 >> 5);
1702                         wr32(E1000_DMACR, reg);
1703
1704                         /* no lower threshold to disable coalescing(smart fifb)
1705                          * -UTRESH=0*/
1706                         wr32(E1000_DMCRTRH, 0);
1707
1708                         /* set hwm to PBA -  2 * max frame size */
1709                         wr32(E1000_FCRTC, hwm);
1710
1711                         /*
1712                          * This sets the time to wait before requesting tran-
1713                          * sition to * low power state to number of usecs needed
1714                          * to receive 1 512 * byte frame at gigabit line rate
1715                          */
1716                         reg = rd32(E1000_DMCTLX);
1717                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1718
1719                         /* Delay 255 usec before entering Lx state. */
1720                         reg |= 0xFF;
1721                         wr32(E1000_DMCTLX, reg);
1722
1723                         /* free space in Tx packet buffer to wake from DMAC */
1724                         wr32(E1000_DMCTXTH,
1725                              (IGB_MIN_TXPBSIZE -
1726                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1727                              >> 6);
1728
1729                         /* make low power state decision controlled by DMAC */
1730                         reg = rd32(E1000_PCIEMISC);
1731                         reg |= E1000_PCIEMISC_LX_DECISION;
1732                         wr32(E1000_PCIEMISC, reg);
1733                 } /* end if IGB_FLAG_DMAC set */
1734         }
1735         if (hw->mac.type == e1000_82580) {
1736                 u32 reg = rd32(E1000_PCIEMISC);
1737                 wr32(E1000_PCIEMISC,
1738                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1739         }
1740         if (!netif_running(adapter->netdev))
1741                 igb_power_down_link(adapter);
1742
1743         igb_update_mng_vlan(adapter);
1744
1745         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748         igb_get_phy_info(hw);
1749 }
1750
1751 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1752 {
1753         /*
1754          * Since there is no support for separate rx/tx vlan accel
1755          * enable/disable make sure tx flag is always in same state as rx.
1756          */
1757         if (features & NETIF_F_HW_VLAN_RX)
1758                 features |= NETIF_F_HW_VLAN_TX;
1759         else
1760                 features &= ~NETIF_F_HW_VLAN_TX;
1761
1762         return features;
1763 }
1764
1765 static int igb_set_features(struct net_device *netdev, u32 features)
1766 {
1767         struct igb_adapter *adapter = netdev_priv(netdev);
1768         int i;
1769         u32 changed = netdev->features ^ features;
1770
1771         for (i = 0; i < adapter->num_rx_queues; i++) {
1772                 if (features & NETIF_F_RXCSUM)
1773                         adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1774                 else
1775                         adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1776         }
1777
1778         if (changed & NETIF_F_HW_VLAN_RX)
1779                 igb_vlan_mode(netdev, features);
1780
1781         return 0;
1782 }
1783
1784 static const struct net_device_ops igb_netdev_ops = {
1785         .ndo_open               = igb_open,
1786         .ndo_stop               = igb_close,
1787         .ndo_start_xmit         = igb_xmit_frame,
1788         .ndo_get_stats64        = igb_get_stats64,
1789         .ndo_set_rx_mode        = igb_set_rx_mode,
1790         .ndo_set_mac_address    = igb_set_mac,
1791         .ndo_change_mtu         = igb_change_mtu,
1792         .ndo_do_ioctl           = igb_ioctl,
1793         .ndo_tx_timeout         = igb_tx_timeout,
1794         .ndo_validate_addr      = eth_validate_addr,
1795         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1796         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1797         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1798         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1799         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1800         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1801 #ifdef CONFIG_NET_POLL_CONTROLLER
1802         .ndo_poll_controller    = igb_netpoll,
1803 #endif
1804         .ndo_fix_features       = igb_fix_features,
1805         .ndo_set_features       = igb_set_features,
1806 };
1807
1808 /**
1809  * igb_probe - Device Initialization Routine
1810  * @pdev: PCI device information struct
1811  * @ent: entry in igb_pci_tbl
1812  *
1813  * Returns 0 on success, negative on failure
1814  *
1815  * igb_probe initializes an adapter identified by a pci_dev structure.
1816  * The OS initialization, configuring of the adapter private structure,
1817  * and a hardware reset occur.
1818  **/
1819 static int __devinit igb_probe(struct pci_dev *pdev,
1820                                const struct pci_device_id *ent)
1821 {
1822         struct net_device *netdev;
1823         struct igb_adapter *adapter;
1824         struct e1000_hw *hw;
1825         u16 eeprom_data = 0;
1826         s32 ret_val;
1827         static int global_quad_port_a; /* global quad port a indication */
1828         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1829         unsigned long mmio_start, mmio_len;
1830         int err, pci_using_dac;
1831         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1832         u8 part_str[E1000_PBANUM_LENGTH];
1833
1834         /* Catch broken hardware that put the wrong VF device ID in
1835          * the PCIe SR-IOV capability.
1836          */
1837         if (pdev->is_virtfn) {
1838                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1839                      pci_name(pdev), pdev->vendor, pdev->device);
1840                 return -EINVAL;
1841         }
1842
1843         err = pci_enable_device_mem(pdev);
1844         if (err)
1845                 return err;
1846
1847         pci_using_dac = 0;
1848         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1849         if (!err) {
1850                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1851                 if (!err)
1852                         pci_using_dac = 1;
1853         } else {
1854                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1855                 if (err) {
1856                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1857                         if (err) {
1858                                 dev_err(&pdev->dev, "No usable DMA "
1859                                         "configuration, aborting\n");
1860                                 goto err_dma;
1861                         }
1862                 }
1863         }
1864
1865         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1866                                            IORESOURCE_MEM),
1867                                            igb_driver_name);
1868         if (err)
1869                 goto err_pci_reg;
1870
1871         pci_enable_pcie_error_reporting(pdev);
1872
1873         pci_set_master(pdev);
1874         pci_save_state(pdev);
1875
1876         err = -ENOMEM;
1877         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1878                                    IGB_MAX_TX_QUEUES);
1879         if (!netdev)
1880                 goto err_alloc_etherdev;
1881
1882         SET_NETDEV_DEV(netdev, &pdev->dev);
1883
1884         pci_set_drvdata(pdev, netdev);
1885         adapter = netdev_priv(netdev);
1886         adapter->netdev = netdev;
1887         adapter->pdev = pdev;
1888         hw = &adapter->hw;
1889         hw->back = adapter;
1890         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1891
1892         mmio_start = pci_resource_start(pdev, 0);
1893         mmio_len = pci_resource_len(pdev, 0);
1894
1895         err = -EIO;
1896         hw->hw_addr = ioremap(mmio_start, mmio_len);
1897         if (!hw->hw_addr)
1898                 goto err_ioremap;
1899
1900         netdev->netdev_ops = &igb_netdev_ops;
1901         igb_set_ethtool_ops(netdev);
1902         netdev->watchdog_timeo = 5 * HZ;
1903
1904         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1905
1906         netdev->mem_start = mmio_start;
1907         netdev->mem_end = mmio_start + mmio_len;
1908
1909         /* PCI config space info */
1910         hw->vendor_id = pdev->vendor;
1911         hw->device_id = pdev->device;
1912         hw->revision_id = pdev->revision;
1913         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1914         hw->subsystem_device_id = pdev->subsystem_device;
1915
1916         /* Copy the default MAC, PHY and NVM function pointers */
1917         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1918         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1919         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1920         /* Initialize skew-specific constants */
1921         err = ei->get_invariants(hw);
1922         if (err)
1923                 goto err_sw_init;
1924
1925         /* setup the private structure */
1926         err = igb_sw_init(adapter);
1927         if (err)
1928                 goto err_sw_init;
1929
1930         igb_get_bus_info_pcie(hw);
1931
1932         hw->phy.autoneg_wait_to_complete = false;
1933
1934         /* Copper options */
1935         if (hw->phy.media_type == e1000_media_type_copper) {
1936                 hw->phy.mdix = AUTO_ALL_MODES;
1937                 hw->phy.disable_polarity_correction = false;
1938                 hw->phy.ms_type = e1000_ms_hw_default;
1939         }
1940
1941         if (igb_check_reset_block(hw))
1942                 dev_info(&pdev->dev,
1943                         "PHY reset is blocked due to SOL/IDER session.\n");
1944
1945         netdev->hw_features = NETIF_F_SG |
1946                            NETIF_F_IP_CSUM |
1947                            NETIF_F_IPV6_CSUM |
1948                            NETIF_F_TSO |
1949                            NETIF_F_TSO6 |
1950                            NETIF_F_RXCSUM |
1951                            NETIF_F_HW_VLAN_RX;
1952
1953         netdev->features = netdev->hw_features |
1954                            NETIF_F_HW_VLAN_TX |
1955                            NETIF_F_HW_VLAN_FILTER;
1956
1957         netdev->vlan_features |= NETIF_F_TSO;
1958         netdev->vlan_features |= NETIF_F_TSO6;
1959         netdev->vlan_features |= NETIF_F_IP_CSUM;
1960         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1961         netdev->vlan_features |= NETIF_F_SG;
1962
1963         if (pci_using_dac) {
1964                 netdev->features |= NETIF_F_HIGHDMA;
1965                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1966         }
1967
1968         if (hw->mac.type >= e1000_82576) {
1969                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1970                 netdev->features |= NETIF_F_SCTP_CSUM;
1971         }
1972
1973         netdev->priv_flags |= IFF_UNICAST_FLT;
1974
1975         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1976
1977         /* before reading the NVM, reset the controller to put the device in a
1978          * known good starting state */
1979         hw->mac.ops.reset_hw(hw);
1980
1981         /* make sure the NVM is good */
1982         if (hw->nvm.ops.validate(hw) < 0) {
1983                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1984                 err = -EIO;
1985                 goto err_eeprom;
1986         }
1987
1988         /* copy the MAC address out of the NVM */
1989         if (hw->mac.ops.read_mac_addr(hw))
1990                 dev_err(&pdev->dev, "NVM Read Error\n");
1991
1992         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1993         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1994
1995         if (!is_valid_ether_addr(netdev->perm_addr)) {
1996                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1997                 err = -EIO;
1998                 goto err_eeprom;
1999         }
2000
2001         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2002                     (unsigned long) adapter);
2003         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2004                     (unsigned long) adapter);
2005
2006         INIT_WORK(&adapter->reset_task, igb_reset_task);
2007         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2008
2009         /* Initialize link properties that are user-changeable */
2010         adapter->fc_autoneg = true;
2011         hw->mac.autoneg = true;
2012         hw->phy.autoneg_advertised = 0x2f;
2013
2014         hw->fc.requested_mode = e1000_fc_default;
2015         hw->fc.current_mode = e1000_fc_default;
2016
2017         igb_validate_mdi_setting(hw);
2018
2019         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2020          * enable the ACPI Magic Packet filter
2021          */
2022
2023         if (hw->bus.func == 0)
2024                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2025         else if (hw->mac.type >= e1000_82580)
2026                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2027                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2028                                  &eeprom_data);
2029         else if (hw->bus.func == 1)
2030                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2031
2032         if (eeprom_data & eeprom_apme_mask)
2033                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2034
2035         /* now that we have the eeprom settings, apply the special cases where
2036          * the eeprom may be wrong or the board simply won't support wake on
2037          * lan on a particular port */
2038         switch (pdev->device) {
2039         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2040                 adapter->eeprom_wol = 0;
2041                 break;
2042         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2043         case E1000_DEV_ID_82576_FIBER:
2044         case E1000_DEV_ID_82576_SERDES:
2045                 /* Wake events only supported on port A for dual fiber
2046                  * regardless of eeprom setting */
2047                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2048                         adapter->eeprom_wol = 0;
2049                 break;
2050         case E1000_DEV_ID_82576_QUAD_COPPER:
2051         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2052                 /* if quad port adapter, disable WoL on all but port A */
2053                 if (global_quad_port_a != 0)
2054                         adapter->eeprom_wol = 0;
2055                 else
2056                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2057                 /* Reset for multiple quad port adapters */
2058                 if (++global_quad_port_a == 4)
2059                         global_quad_port_a = 0;
2060                 break;
2061         }
2062
2063         /* initialize the wol settings based on the eeprom settings */
2064         adapter->wol = adapter->eeprom_wol;
2065         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2066
2067         /* reset the hardware with the new settings */
2068         igb_reset(adapter);
2069
2070         /* let the f/w know that the h/w is now under the control of the
2071          * driver. */
2072         igb_get_hw_control(adapter);
2073
2074         strcpy(netdev->name, "eth%d");
2075         err = register_netdev(netdev);
2076         if (err)
2077                 goto err_register;
2078
2079         igb_vlan_mode(netdev, netdev->features);
2080
2081         /* carrier off reporting is important to ethtool even BEFORE open */
2082         netif_carrier_off(netdev);
2083
2084 #ifdef CONFIG_IGB_DCA
2085         if (dca_add_requester(&pdev->dev) == 0) {
2086                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087                 dev_info(&pdev->dev, "DCA enabled\n");
2088                 igb_setup_dca(adapter);
2089         }
2090
2091 #endif
2092         /* do hw tstamp init after resetting */
2093         igb_init_hw_timer(adapter);
2094
2095         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096         /* print bus type/speed/width info */
2097         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2098                  netdev->name,
2099                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2101                                                             "unknown"),
2102                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2105                    "unknown"),
2106                  netdev->dev_addr);
2107
2108         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2109         if (ret_val)
2110                 strcpy(part_str, "Unknown");
2111         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112         dev_info(&pdev->dev,
2113                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114                 adapter->msix_entries ? "MSI-X" :
2115                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116                 adapter->num_rx_queues, adapter->num_tx_queues);
2117         switch (hw->mac.type) {
2118         case e1000_i350:
2119                 igb_set_eee_i350(hw);
2120                 break;
2121         default:
2122                 break;
2123         }
2124         return 0;
2125
2126 err_register:
2127         igb_release_hw_control(adapter);
2128 err_eeprom:
2129         if (!igb_check_reset_block(hw))
2130                 igb_reset_phy(hw);
2131
2132         if (hw->flash_address)
2133                 iounmap(hw->flash_address);
2134 err_sw_init:
2135         igb_clear_interrupt_scheme(adapter);
2136         iounmap(hw->hw_addr);
2137 err_ioremap:
2138         free_netdev(netdev);
2139 err_alloc_etherdev:
2140         pci_release_selected_regions(pdev,
2141                                      pci_select_bars(pdev, IORESOURCE_MEM));
2142 err_pci_reg:
2143 err_dma:
2144         pci_disable_device(pdev);
2145         return err;
2146 }
2147
2148 /**
2149  * igb_remove - Device Removal Routine
2150  * @pdev: PCI device information struct
2151  *
2152  * igb_remove is called by the PCI subsystem to alert the driver
2153  * that it should release a PCI device.  The could be caused by a
2154  * Hot-Plug event, or because the driver is going to be removed from
2155  * memory.
2156  **/
2157 static void __devexit igb_remove(struct pci_dev *pdev)
2158 {
2159         struct net_device *netdev = pci_get_drvdata(pdev);
2160         struct igb_adapter *adapter = netdev_priv(netdev);
2161         struct e1000_hw *hw = &adapter->hw;
2162
2163         /*
2164          * The watchdog timer may be rescheduled, so explicitly
2165          * disable watchdog from being rescheduled.
2166          */
2167         set_bit(__IGB_DOWN, &adapter->state);
2168         del_timer_sync(&adapter->watchdog_timer);
2169         del_timer_sync(&adapter->phy_info_timer);
2170
2171         cancel_work_sync(&adapter->reset_task);
2172         cancel_work_sync(&adapter->watchdog_task);
2173
2174 #ifdef CONFIG_IGB_DCA
2175         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2176                 dev_info(&pdev->dev, "DCA disabled\n");
2177                 dca_remove_requester(&pdev->dev);
2178                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2179                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2180         }
2181 #endif
2182
2183         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2184          * would have already happened in close and is redundant. */
2185         igb_release_hw_control(adapter);
2186
2187         unregister_netdev(netdev);
2188
2189         igb_clear_interrupt_scheme(adapter);
2190
2191 #ifdef CONFIG_PCI_IOV
2192         /* reclaim resources allocated to VFs */
2193         if (adapter->vf_data) {
2194                 /* disable iov and allow time for transactions to clear */
2195                 pci_disable_sriov(pdev);
2196                 msleep(500);
2197
2198                 kfree(adapter->vf_data);
2199                 adapter->vf_data = NULL;
2200                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2201                 wrfl();
2202                 msleep(100);
2203                 dev_info(&pdev->dev, "IOV Disabled\n");
2204         }
2205 #endif
2206
2207         iounmap(hw->hw_addr);
2208         if (hw->flash_address)
2209                 iounmap(hw->flash_address);
2210         pci_release_selected_regions(pdev,
2211                                      pci_select_bars(pdev, IORESOURCE_MEM));
2212
2213         free_netdev(netdev);
2214
2215         pci_disable_pcie_error_reporting(pdev);
2216
2217         pci_disable_device(pdev);
2218 }
2219
2220 /**
2221  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2222  * @adapter: board private structure to initialize
2223  *
2224  * This function initializes the vf specific data storage and then attempts to
2225  * allocate the VFs.  The reason for ordering it this way is because it is much
2226  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2227  * the memory for the VFs.
2228  **/
2229 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2230 {
2231 #ifdef CONFIG_PCI_IOV
2232         struct pci_dev *pdev = adapter->pdev;
2233
2234         if (adapter->vfs_allocated_count) {
2235                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2236                                            sizeof(struct vf_data_storage),
2237                                            GFP_KERNEL);
2238                 /* if allocation failed then we do not support SR-IOV */
2239                 if (!adapter->vf_data) {
2240                         adapter->vfs_allocated_count = 0;
2241                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2242                                 "Data Storage\n");
2243                 }
2244         }
2245
2246         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2247                 kfree(adapter->vf_data);
2248                 adapter->vf_data = NULL;
2249 #endif /* CONFIG_PCI_IOV */
2250                 adapter->vfs_allocated_count = 0;
2251 #ifdef CONFIG_PCI_IOV
2252         } else {
2253                 unsigned char mac_addr[ETH_ALEN];
2254                 int i;
2255                 dev_info(&pdev->dev, "%d vfs allocated\n",
2256                          adapter->vfs_allocated_count);
2257                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2258                         random_ether_addr(mac_addr);
2259                         igb_set_vf_mac(adapter, i, mac_addr);
2260                 }
2261                 /* DMA Coalescing is not supported in IOV mode. */
2262                 if (adapter->flags & IGB_FLAG_DMAC)
2263                         adapter->flags &= ~IGB_FLAG_DMAC;
2264         }
2265 #endif /* CONFIG_PCI_IOV */
2266 }
2267
2268
2269 /**
2270  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2271  * @adapter: board private structure to initialize
2272  *
2273  * igb_init_hw_timer initializes the function pointer and values for the hw
2274  * timer found in hardware.
2275  **/
2276 static void igb_init_hw_timer(struct igb_adapter *adapter)
2277 {
2278         struct e1000_hw *hw = &adapter->hw;
2279
2280         switch (hw->mac.type) {
2281         case e1000_i350:
2282         case e1000_82580:
2283                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2284                 adapter->cycles.read = igb_read_clock;
2285                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2286                 adapter->cycles.mult = 1;
2287                 /*
2288                  * The 82580 timesync updates the system timer every 8ns by 8ns
2289                  * and the value cannot be shifted.  Instead we need to shift
2290                  * the registers to generate a 64bit timer value.  As a result
2291                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2292                  * 24 in order to generate a larger value for synchronization.
2293                  */
2294                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2295                 /* disable system timer temporarily by setting bit 31 */
2296                 wr32(E1000_TSAUXC, 0x80000000);
2297                 wrfl();
2298
2299                 /* Set registers so that rollover occurs soon to test this. */
2300                 wr32(E1000_SYSTIMR, 0x00000000);
2301                 wr32(E1000_SYSTIML, 0x80000000);
2302                 wr32(E1000_SYSTIMH, 0x000000FF);
2303                 wrfl();
2304
2305                 /* enable system timer by clearing bit 31 */
2306                 wr32(E1000_TSAUXC, 0x0);
2307                 wrfl();
2308
2309                 timecounter_init(&adapter->clock,
2310                                  &adapter->cycles,
2311                                  ktime_to_ns(ktime_get_real()));
2312                 /*
2313                  * Synchronize our NIC clock against system wall clock. NIC
2314                  * time stamp reading requires ~3us per sample, each sample
2315                  * was pretty stable even under load => only require 10
2316                  * samples for each offset comparison.
2317                  */
2318                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2319                 adapter->compare.source = &adapter->clock;
2320                 adapter->compare.target = ktime_get_real;
2321                 adapter->compare.num_samples = 10;
2322                 timecompare_update(&adapter->compare, 0);
2323                 break;
2324         case e1000_82576:
2325                 /*
2326                  * Initialize hardware timer: we keep it running just in case
2327                  * that some program needs it later on.
2328                  */
2329                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2330                 adapter->cycles.read = igb_read_clock;
2331                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2332                 adapter->cycles.mult = 1;
2333                 /**
2334                  * Scale the NIC clock cycle by a large factor so that
2335                  * relatively small clock corrections can be added or
2336                  * subtracted at each clock tick. The drawbacks of a large
2337                  * factor are a) that the clock register overflows more quickly
2338                  * (not such a big deal) and b) that the increment per tick has
2339                  * to fit into 24 bits.  As a result we need to use a shift of
2340                  * 19 so we can fit a value of 16 into the TIMINCA register.
2341                  */
2342                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2343                 wr32(E1000_TIMINCA,
2344                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2345                                 (16 << IGB_82576_TSYNC_SHIFT));
2346
2347                 /* Set registers so that rollover occurs soon to test this. */
2348                 wr32(E1000_SYSTIML, 0x00000000);
2349                 wr32(E1000_SYSTIMH, 0xFF800000);
2350                 wrfl();
2351
2352                 timecounter_init(&adapter->clock,
2353                                  &adapter->cycles,
2354                                  ktime_to_ns(ktime_get_real()));
2355                 /*
2356                  * Synchronize our NIC clock against system wall clock. NIC
2357                  * time stamp reading requires ~3us per sample, each sample
2358                  * was pretty stable even under load => only require 10
2359                  * samples for each offset comparison.
2360                  */
2361                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2362                 adapter->compare.source = &adapter->clock;
2363                 adapter->compare.target = ktime_get_real;
2364                 adapter->compare.num_samples = 10;
2365                 timecompare_update(&adapter->compare, 0);
2366                 break;
2367         case e1000_82575:
2368                 /* 82575 does not support timesync */
2369         default:
2370                 break;
2371         }
2372
2373 }
2374
2375 /**
2376  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2377  * @adapter: board private structure to initialize
2378  *
2379  * igb_sw_init initializes the Adapter private data structure.
2380  * Fields are initialized based on PCI device information and
2381  * OS network device settings (MTU size).
2382  **/
2383 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2384 {
2385         struct e1000_hw *hw = &adapter->hw;
2386         struct net_device *netdev = adapter->netdev;
2387         struct pci_dev *pdev = adapter->pdev;
2388
2389         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2390
2391         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2392         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2393         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2394         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2395
2396         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2397                                   VLAN_HLEN;
2398         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2399
2400         spin_lock_init(&adapter->stats64_lock);
2401 #ifdef CONFIG_PCI_IOV
2402         switch (hw->mac.type) {
2403         case e1000_82576:
2404         case e1000_i350:
2405                 if (max_vfs > 7) {
2406                         dev_warn(&pdev->dev,
2407                                  "Maximum of 7 VFs per PF, using max\n");
2408                         adapter->vfs_allocated_count = 7;
2409                 } else
2410                         adapter->vfs_allocated_count = max_vfs;
2411                 break;
2412         default:
2413                 break;
2414         }
2415 #endif /* CONFIG_PCI_IOV */
2416         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2417         /* i350 cannot do RSS and SR-IOV at the same time */
2418         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2419                 adapter->rss_queues = 1;
2420
2421         /*
2422          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2423          * then we should combine the queues into a queue pair in order to
2424          * conserve interrupts due to limited supply
2425          */
2426         if ((adapter->rss_queues > 4) ||
2427             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2428                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2429
2430         /* This call may decrease the number of queues */
2431         if (igb_init_interrupt_scheme(adapter)) {
2432                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2433                 return -ENOMEM;
2434         }
2435
2436         igb_probe_vfs(adapter);
2437
2438         /* Explicitly disable IRQ since the NIC can be in any state. */
2439         igb_irq_disable(adapter);
2440
2441         if (hw->mac.type == e1000_i350)
2442                 adapter->flags &= ~IGB_FLAG_DMAC;
2443
2444         set_bit(__IGB_DOWN, &adapter->state);
2445         return 0;
2446 }
2447
2448 /**
2449  * igb_open - Called when a network interface is made active
2450  * @netdev: network interface device structure
2451  *
2452  * Returns 0 on success, negative value on failure
2453  *
2454  * The open entry point is called when a network interface is made
2455  * active by the system (IFF_UP).  At this point all resources needed
2456  * for transmit and receive operations are allocated, the interrupt
2457  * handler is registered with the OS, the watchdog timer is started,
2458  * and the stack is notified that the interface is ready.
2459  **/
2460 static int igb_open(struct net_device *netdev)
2461 {
2462         struct igb_adapter *adapter = netdev_priv(netdev);
2463         struct e1000_hw *hw = &adapter->hw;
2464         int err;
2465         int i;
2466
2467         /* disallow open during test */
2468         if (test_bit(__IGB_TESTING, &adapter->state))
2469                 return -EBUSY;
2470
2471         netif_carrier_off(netdev);
2472
2473         /* allocate transmit descriptors */
2474         err = igb_setup_all_tx_resources(adapter);
2475         if (err)
2476                 goto err_setup_tx;
2477
2478         /* allocate receive descriptors */
2479         err = igb_setup_all_rx_resources(adapter);
2480         if (err)
2481                 goto err_setup_rx;
2482
2483         igb_power_up_link(adapter);
2484
2485         /* before we allocate an interrupt, we must be ready to handle it.
2486          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2487          * as soon as we call pci_request_irq, so we have to setup our
2488          * clean_rx handler before we do so.  */
2489         igb_configure(adapter);
2490
2491         err = igb_request_irq(adapter);
2492         if (err)
2493                 goto err_req_irq;
2494
2495         /* From here on the code is the same as igb_up() */
2496         clear_bit(__IGB_DOWN, &adapter->state);
2497
2498         for (i = 0; i < adapter->num_q_vectors; i++) {
2499                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2500                 napi_enable(&q_vector->napi);
2501         }
2502
2503         /* Clear any pending interrupts. */
2504         rd32(E1000_ICR);
2505
2506         igb_irq_enable(adapter);
2507
2508         /* notify VFs that reset has been completed */
2509         if (adapter->vfs_allocated_count) {
2510                 u32 reg_data = rd32(E1000_CTRL_EXT);
2511                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2512                 wr32(E1000_CTRL_EXT, reg_data);
2513         }
2514
2515         netif_tx_start_all_queues(netdev);
2516
2517         /* start the watchdog. */
2518         hw->mac.get_link_status = 1;
2519         schedule_work(&adapter->watchdog_task);
2520
2521         return 0;
2522
2523 err_req_irq:
2524         igb_release_hw_control(adapter);
2525         igb_power_down_link(adapter);
2526         igb_free_all_rx_resources(adapter);
2527 err_setup_rx:
2528         igb_free_all_tx_resources(adapter);
2529 err_setup_tx:
2530         igb_reset(adapter);
2531
2532         return err;
2533 }
2534
2535 /**
2536  * igb_close - Disables a network interface
2537  * @netdev: network interface device structure
2538  *
2539  * Returns 0, this is not allowed to fail
2540  *
2541  * The close entry point is called when an interface is de-activated
2542  * by the OS.  The hardware is still under the driver's control, but
2543  * needs to be disabled.  A global MAC reset is issued to stop the
2544  * hardware, and all transmit and receive resources are freed.
2545  **/
2546 static int igb_close(struct net_device *netdev)
2547 {
2548         struct igb_adapter *adapter = netdev_priv(netdev);
2549
2550         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551         igb_down(adapter);
2552
2553         igb_free_irq(adapter);
2554
2555         igb_free_all_tx_resources(adapter);
2556         igb_free_all_rx_resources(adapter);
2557
2558         return 0;
2559 }
2560
2561 /**
2562  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2563  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2564  *
2565  * Return 0 on success, negative on failure
2566  **/
2567 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2568 {
2569         struct device *dev = tx_ring->dev;
2570         int size;
2571
2572         size = sizeof(struct igb_buffer) * tx_ring->count;
2573         tx_ring->buffer_info = vzalloc(size);
2574         if (!tx_ring->buffer_info)
2575                 goto err;
2576
2577         /* round up to nearest 4K */
2578         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2579         tx_ring->size = ALIGN(tx_ring->size, 4096);
2580
2581         tx_ring->desc = dma_alloc_coherent(dev,
2582                                            tx_ring->size,
2583                                            &tx_ring->dma,
2584                                            GFP_KERNEL);
2585
2586         if (!tx_ring->desc)
2587                 goto err;
2588
2589         tx_ring->next_to_use = 0;
2590         tx_ring->next_to_clean = 0;
2591         return 0;
2592
2593 err:
2594         vfree(tx_ring->buffer_info);
2595         dev_err(dev,
2596                 "Unable to allocate memory for the transmit descriptor ring\n");
2597         return -ENOMEM;
2598 }
2599
2600 /**
2601  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2602  *                                (Descriptors) for all queues
2603  * @adapter: board private structure
2604  *
2605  * Return 0 on success, negative on failure
2606  **/
2607 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2608 {
2609         struct pci_dev *pdev = adapter->pdev;
2610         int i, err = 0;
2611
2612         for (i = 0; i < adapter->num_tx_queues; i++) {
2613                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2614                 if (err) {
2615                         dev_err(&pdev->dev,
2616                                 "Allocation for Tx Queue %u failed\n", i);
2617                         for (i--; i >= 0; i--)
2618                                 igb_free_tx_resources(adapter->tx_ring[i]);
2619                         break;
2620                 }
2621         }
2622
2623         return err;
2624 }
2625
2626 /**
2627  * igb_setup_tctl - configure the transmit control registers
2628  * @adapter: Board private structure
2629  **/
2630 void igb_setup_tctl(struct igb_adapter *adapter)
2631 {
2632         struct e1000_hw *hw = &adapter->hw;
2633         u32 tctl;
2634
2635         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2636         wr32(E1000_TXDCTL(0), 0);
2637
2638         /* Program the Transmit Control Register */
2639         tctl = rd32(E1000_TCTL);
2640         tctl &= ~E1000_TCTL_CT;
2641         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2642                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2643
2644         igb_config_collision_dist(hw);
2645
2646         /* Enable transmits */
2647         tctl |= E1000_TCTL_EN;
2648
2649         wr32(E1000_TCTL, tctl);
2650 }
2651
2652 /**
2653  * igb_configure_tx_ring - Configure transmit ring after Reset
2654  * @adapter: board private structure
2655  * @ring: tx ring to configure
2656  *
2657  * Configure a transmit ring after a reset.
2658  **/
2659 void igb_configure_tx_ring(struct igb_adapter *adapter,
2660                            struct igb_ring *ring)
2661 {
2662         struct e1000_hw *hw = &adapter->hw;
2663         u32 txdctl = 0;
2664         u64 tdba = ring->dma;
2665         int reg_idx = ring->reg_idx;
2666
2667         /* disable the queue */
2668         wr32(E1000_TXDCTL(reg_idx), 0);
2669         wrfl();
2670         mdelay(10);
2671
2672         wr32(E1000_TDLEN(reg_idx),
2673                         ring->count * sizeof(union e1000_adv_tx_desc));
2674         wr32(E1000_TDBAL(reg_idx),
2675                         tdba & 0x00000000ffffffffULL);
2676         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2677
2678         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2679         wr32(E1000_TDH(reg_idx), 0);
2680         writel(0, ring->tail);
2681
2682         txdctl |= IGB_TX_PTHRESH;
2683         txdctl |= IGB_TX_HTHRESH << 8;
2684         txdctl |= IGB_TX_WTHRESH << 16;
2685
2686         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2687         wr32(E1000_TXDCTL(reg_idx), txdctl);
2688 }
2689
2690 /**
2691  * igb_configure_tx - Configure transmit Unit after Reset
2692  * @adapter: board private structure
2693  *
2694  * Configure the Tx unit of the MAC after a reset.
2695  **/
2696 static void igb_configure_tx(struct igb_adapter *adapter)
2697 {
2698         int i;
2699
2700         for (i = 0; i < adapter->num_tx_queues; i++)
2701                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2702 }
2703
2704 /**
2705  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2706  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2707  *
2708  * Returns 0 on success, negative on failure
2709  **/
2710 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2711 {
2712         struct device *dev = rx_ring->dev;
2713         int size, desc_len;
2714
2715         size = sizeof(struct igb_buffer) * rx_ring->count;
2716         rx_ring->buffer_info = vzalloc(size);
2717         if (!rx_ring->buffer_info)
2718                 goto err;
2719
2720         desc_len = sizeof(union e1000_adv_rx_desc);
2721
2722         /* Round up to nearest 4K */
2723         rx_ring->size = rx_ring->count * desc_len;
2724         rx_ring->size = ALIGN(rx_ring->size, 4096);
2725
2726         rx_ring->desc = dma_alloc_coherent(dev,
2727                                            rx_ring->size,
2728                                            &rx_ring->dma,
2729                                            GFP_KERNEL);
2730
2731         if (!rx_ring->desc)
2732                 goto err;
2733
2734         rx_ring->next_to_clean = 0;
2735         rx_ring->next_to_use = 0;
2736
2737         return 0;
2738
2739 err:
2740         vfree(rx_ring->buffer_info);
2741         rx_ring->buffer_info = NULL;
2742         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2743                 " ring\n");
2744         return -ENOMEM;
2745 }
2746
2747 /**
2748  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2749  *                                (Descriptors) for all queues
2750  * @adapter: board private structure
2751  *
2752  * Return 0 on success, negative on failure
2753  **/
2754 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2755 {
2756         struct pci_dev *pdev = adapter->pdev;
2757         int i, err = 0;
2758
2759         for (i = 0; i < adapter->num_rx_queues; i++) {
2760                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2761                 if (err) {
2762                         dev_err(&pdev->dev,
2763                                 "Allocation for Rx Queue %u failed\n", i);
2764                         for (i--; i >= 0; i--)
2765                                 igb_free_rx_resources(adapter->rx_ring[i]);
2766                         break;
2767                 }
2768         }
2769
2770         return err;
2771 }
2772
2773 /**
2774  * igb_setup_mrqc - configure the multiple receive queue control registers
2775  * @adapter: Board private structure
2776  **/
2777 static void igb_setup_mrqc(struct igb_adapter *adapter)
2778 {
2779         struct e1000_hw *hw = &adapter->hw;
2780         u32 mrqc, rxcsum;
2781         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2782         union e1000_reta {
2783                 u32 dword;
2784                 u8  bytes[4];
2785         } reta;
2786         static const u8 rsshash[40] = {
2787                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2788                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2789                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2790                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2791
2792         /* Fill out hash function seeds */
2793         for (j = 0; j < 10; j++) {
2794                 u32 rsskey = rsshash[(j * 4)];
2795                 rsskey |= rsshash[(j * 4) + 1] << 8;
2796                 rsskey |= rsshash[(j * 4) + 2] << 16;
2797                 rsskey |= rsshash[(j * 4) + 3] << 24;
2798                 array_wr32(E1000_RSSRK(0), j, rsskey);
2799         }
2800
2801         num_rx_queues = adapter->rss_queues;
2802
2803         if (adapter->vfs_allocated_count) {
2804                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2805                 switch (hw->mac.type) {
2806                 case e1000_i350:
2807                 case e1000_82580:
2808                         num_rx_queues = 1;
2809                         shift = 0;
2810                         break;
2811                 case e1000_82576:
2812                         shift = 3;
2813                         num_rx_queues = 2;
2814                         break;
2815                 case e1000_82575:
2816                         shift = 2;
2817                         shift2 = 6;
2818                 default:
2819                         break;
2820                 }
2821         } else {
2822                 if (hw->mac.type == e1000_82575)
2823                         shift = 6;
2824         }
2825
2826         for (j = 0; j < (32 * 4); j++) {
2827                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2828                 if (shift2)
2829                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2830                 if ((j & 3) == 3)
2831                         wr32(E1000_RETA(j >> 2), reta.dword);
2832         }
2833
2834         /*
2835          * Disable raw packet checksumming so that RSS hash is placed in
2836          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2837          * offloads as they are enabled by default
2838          */
2839         rxcsum = rd32(E1000_RXCSUM);
2840         rxcsum |= E1000_RXCSUM_PCSD;
2841
2842         if (adapter->hw.mac.type >= e1000_82576)
2843                 /* Enable Receive Checksum Offload for SCTP */
2844                 rxcsum |= E1000_RXCSUM_CRCOFL;
2845
2846         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2847         wr32(E1000_RXCSUM, rxcsum);
2848
2849         /* If VMDq is enabled then we set the appropriate mode for that, else
2850          * we default to RSS so that an RSS hash is calculated per packet even
2851          * if we are only using one queue */
2852         if (adapter->vfs_allocated_count) {
2853                 if (hw->mac.type > e1000_82575) {
2854                         /* Set the default pool for the PF's first queue */
2855                         u32 vtctl = rd32(E1000_VT_CTL);
2856                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2857                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2858                         vtctl |= adapter->vfs_allocated_count <<
2859                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2860                         wr32(E1000_VT_CTL, vtctl);
2861                 }
2862                 if (adapter->rss_queues > 1)
2863                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2864                 else
2865                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2866         } else {
2867                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2868         }
2869         igb_vmm_control(adapter);
2870
2871         /*
2872          * Generate RSS hash based on TCP port numbers and/or
2873          * IPv4/v6 src and dst addresses since UDP cannot be
2874          * hashed reliably due to IP fragmentation
2875          */
2876         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2877                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2878                 E1000_MRQC_RSS_FIELD_IPV6 |
2879                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2880                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2881
2882         wr32(E1000_MRQC, mrqc);
2883 }
2884
2885 /**
2886  * igb_setup_rctl - configure the receive control registers
2887  * @adapter: Board private structure
2888  **/
2889 void igb_setup_rctl(struct igb_adapter *adapter)
2890 {
2891         struct e1000_hw *hw = &adapter->hw;
2892         u32 rctl;
2893
2894         rctl = rd32(E1000_RCTL);
2895
2896         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2897         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2898
2899         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2900                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2901
2902         /*
2903          * enable stripping of CRC. It's unlikely this will break BMC
2904          * redirection as it did with e1000. Newer features require
2905          * that the HW strips the CRC.
2906          */
2907         rctl |= E1000_RCTL_SECRC;
2908
2909         /* disable store bad packets and clear size bits. */
2910         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2911
2912         /* enable LPE to prevent packets larger than max_frame_size */
2913         rctl |= E1000_RCTL_LPE;
2914
2915         /* disable queue 0 to prevent tail write w/o re-config */
2916         wr32(E1000_RXDCTL(0), 0);
2917
2918         /* Attention!!!  For SR-IOV PF driver operations you must enable
2919          * queue drop for all VF and PF queues to prevent head of line blocking
2920          * if an un-trusted VF does not provide descriptors to hardware.
2921          */
2922         if (adapter->vfs_allocated_count) {
2923                 /* set all queue drop enable bits */
2924                 wr32(E1000_QDE, ALL_QUEUES);
2925         }
2926
2927         wr32(E1000_RCTL, rctl);
2928 }
2929
2930 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2931                                    int vfn)
2932 {
2933         struct e1000_hw *hw = &adapter->hw;
2934         u32 vmolr;
2935
2936         /* if it isn't the PF check to see if VFs are enabled and
2937          * increase the size to support vlan tags */
2938         if (vfn < adapter->vfs_allocated_count &&
2939             adapter->vf_data[vfn].vlans_enabled)
2940                 size += VLAN_TAG_SIZE;
2941
2942         vmolr = rd32(E1000_VMOLR(vfn));
2943         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2944         vmolr |= size | E1000_VMOLR_LPE;
2945         wr32(E1000_VMOLR(vfn), vmolr);
2946
2947         return 0;
2948 }
2949
2950 /**
2951  * igb_rlpml_set - set maximum receive packet size
2952  * @adapter: board private structure
2953  *
2954  * Configure maximum receivable packet size.
2955  **/
2956 static void igb_rlpml_set(struct igb_adapter *adapter)
2957 {
2958         u32 max_frame_size = adapter->max_frame_size;
2959         struct e1000_hw *hw = &adapter->hw;
2960         u16 pf_id = adapter->vfs_allocated_count;
2961
2962         if (pf_id) {
2963                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2964                 /*
2965                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
2966                  * to our max jumbo frame size, in case we need to enable
2967                  * jumbo frames on one of the rings later.
2968                  * This will not pass over-length frames into the default
2969                  * queue because it's gated by the VMOLR.RLPML.
2970                  */
2971                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2972         }
2973
2974         wr32(E1000_RLPML, max_frame_size);
2975 }
2976
2977 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2978                                  int vfn, bool aupe)
2979 {
2980         struct e1000_hw *hw = &adapter->hw;
2981         u32 vmolr;
2982
2983         /*
2984          * This register exists only on 82576 and newer so if we are older then
2985          * we should exit and do nothing
2986          */
2987         if (hw->mac.type < e1000_82576)
2988                 return;
2989
2990         vmolr = rd32(E1000_VMOLR(vfn));
2991         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2992         if (aupe)
2993                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2994         else
2995                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2996
2997         /* clear all bits that might not be set */
2998         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2999
3000         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3001                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3002         /*
3003          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3004          * multicast packets
3005          */
3006         if (vfn <= adapter->vfs_allocated_count)
3007                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3008
3009         wr32(E1000_VMOLR(vfn), vmolr);
3010 }
3011
3012 /**
3013  * igb_configure_rx_ring - Configure a receive ring after Reset
3014  * @adapter: board private structure
3015  * @ring: receive ring to be configured
3016  *
3017  * Configure the Rx unit of the MAC after a reset.
3018  **/
3019 void igb_configure_rx_ring(struct igb_adapter *adapter,
3020                            struct igb_ring *ring)
3021 {
3022         struct e1000_hw *hw = &adapter->hw;
3023         u64 rdba = ring->dma;
3024         int reg_idx = ring->reg_idx;
3025         u32 srrctl = 0, rxdctl = 0;
3026
3027         /* disable the queue */
3028         wr32(E1000_RXDCTL(reg_idx), 0);
3029
3030         /* Set DMA base address registers */
3031         wr32(E1000_RDBAL(reg_idx),
3032              rdba & 0x00000000ffffffffULL);
3033         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3034         wr32(E1000_RDLEN(reg_idx),
3035                        ring->count * sizeof(union e1000_adv_rx_desc));
3036
3037         /* initialize head and tail */
3038         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3039         wr32(E1000_RDH(reg_idx), 0);
3040         writel(0, ring->tail);
3041
3042         /* set descriptor configuration */
3043         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3044 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3045         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3046 #else
3047         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3048 #endif
3049         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3050         if (hw->mac.type == e1000_82580)
3051                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3052         /* Only set Drop Enable if we are supporting multiple queues */
3053         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3054                 srrctl |= E1000_SRRCTL_DROP_EN;
3055
3056         wr32(E1000_SRRCTL(reg_idx), srrctl);
3057
3058         /* set filtering for VMDQ pools */
3059         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3060
3061         rxdctl |= IGB_RX_PTHRESH;
3062         rxdctl |= IGB_RX_HTHRESH << 8;
3063         rxdctl |= IGB_RX_WTHRESH << 16;
3064
3065         /* enable receive descriptor fetching */
3066         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3067         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3068 }
3069
3070 /**
3071  * igb_configure_rx - Configure receive Unit after Reset
3072  * @adapter: board private structure
3073  *
3074  * Configure the Rx unit of the MAC after a reset.
3075  **/
3076 static void igb_configure_rx(struct igb_adapter *adapter)
3077 {
3078         int i;
3079
3080         /* set UTA to appropriate mode */
3081         igb_set_uta(adapter);
3082
3083         /* set the correct pool for the PF default MAC address in entry 0 */
3084         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3085                          adapter->vfs_allocated_count);
3086
3087         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3088          * the Base and Length of the Rx Descriptor Ring */
3089         for (i = 0; i < adapter->num_rx_queues; i++)
3090                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3091 }
3092
3093 /**
3094  * igb_free_tx_resources - Free Tx Resources per Queue
3095  * @tx_ring: Tx descriptor ring for a specific queue
3096  *
3097  * Free all transmit software resources
3098  **/
3099 void igb_free_tx_resources(struct igb_ring *tx_ring)
3100 {
3101         igb_clean_tx_ring(tx_ring);
3102
3103         vfree(tx_ring->buffer_info);
3104         tx_ring->buffer_info = NULL;
3105
3106         /* if not set, then don't free */
3107         if (!tx_ring->desc)
3108                 return;
3109
3110         dma_free_coherent(tx_ring->dev, tx_ring->size,
3111                           tx_ring->desc, tx_ring->dma);
3112
3113         tx_ring->desc = NULL;
3114 }
3115
3116 /**
3117  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3118  * @adapter: board private structure
3119  *
3120  * Free all transmit software resources
3121  **/
3122 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3123 {
3124         int i;
3125
3126         for (i = 0; i < adapter->num_tx_queues; i++)
3127                 igb_free_tx_resources(adapter->tx_ring[i]);
3128 }
3129
3130 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3131                                     struct igb_buffer *buffer_info)
3132 {
3133         if (buffer_info->dma) {
3134                 if (buffer_info->mapped_as_page)
3135                         dma_unmap_page(tx_ring->dev,
3136                                         buffer_info->dma,
3137                                         buffer_info->length,
3138                                         DMA_TO_DEVICE);
3139                 else
3140                         dma_unmap_single(tx_ring->dev,
3141                                         buffer_info->dma,
3142                                         buffer_info->length,
3143                                         DMA_TO_DEVICE);
3144                 buffer_info->dma = 0;
3145         }
3146         if (buffer_info->skb) {
3147                 dev_kfree_skb_any(buffer_info->skb);
3148                 buffer_info->skb = NULL;
3149         }
3150         buffer_info->time_stamp = 0;
3151         buffer_info->length = 0;
3152         buffer_info->next_to_watch = 0;
3153         buffer_info->mapped_as_page = false;
3154 }
3155
3156 /**
3157  * igb_clean_tx_ring - Free Tx Buffers
3158  * @tx_ring: ring to be cleaned
3159  **/
3160 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3161 {
3162         struct igb_buffer *buffer_info;
3163         unsigned long size;
3164         unsigned int i;
3165
3166         if (!tx_ring->buffer_info)
3167                 return;
3168         /* Free all the Tx ring sk_buffs */
3169
3170         for (i = 0; i < tx_ring->count; i++) {
3171                 buffer_info = &tx_ring->buffer_info[i];
3172                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3173         }
3174
3175         size = sizeof(struct igb_buffer) * tx_ring->count;
3176         memset(tx_ring->buffer_info, 0, size);
3177
3178         /* Zero out the descriptor ring */
3179         memset(tx_ring->desc, 0, tx_ring->size);
3180
3181         tx_ring->next_to_use = 0;
3182         tx_ring->next_to_clean = 0;
3183 }
3184
3185 /**
3186  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3187  * @adapter: board private structure
3188  **/
3189 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3190 {
3191         int i;
3192
3193         for (i = 0; i < adapter->num_tx_queues; i++)
3194                 igb_clean_tx_ring(adapter->tx_ring[i]);
3195 }
3196
3197 /**
3198  * igb_free_rx_resources - Free Rx Resources
3199  * @rx_ring: ring to clean the resources from
3200  *
3201  * Free all receive software resources
3202  **/
3203 void igb_free_rx_resources(struct igb_ring *rx_ring)
3204 {
3205         igb_clean_rx_ring(rx_ring);
3206
3207         vfree(rx_ring->buffer_info);
3208         rx_ring->buffer_info = NULL;
3209
3210         /* if not set, then don't free */
3211         if (!rx_ring->desc)
3212                 return;
3213
3214         dma_free_coherent(rx_ring->dev, rx_ring->size,
3215                           rx_ring->desc, rx_ring->dma);
3216
3217         rx_ring->desc = NULL;
3218 }
3219
3220 /**
3221  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3222  * @adapter: board private structure
3223  *
3224  * Free all receive software resources
3225  **/
3226 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3227 {
3228         int i;
3229
3230         for (i = 0; i < adapter->num_rx_queues; i++)
3231                 igb_free_rx_resources(adapter->rx_ring[i]);
3232 }
3233
3234 /**
3235  * igb_clean_rx_ring - Free Rx Buffers per Queue
3236  * @rx_ring: ring to free buffers from
3237  **/
3238 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3239 {
3240         unsigned long size;
3241         u16 i;
3242
3243         if (!rx_ring->buffer_info)
3244                 return;
3245
3246         /* Free all the Rx ring sk_buffs */
3247         for (i = 0; i < rx_ring->count; i++) {
3248                 struct igb_buffer *buffer_info = &rx_ring->buffer_info[i];
3249                 if (buffer_info->dma) {
3250                         dma_unmap_single(rx_ring->dev,
3251                                          buffer_info->dma,
3252                                          IGB_RX_HDR_LEN,
3253                                          DMA_FROM_DEVICE);
3254                         buffer_info->dma = 0;
3255                 }
3256
3257                 if (buffer_info->skb) {
3258                         dev_kfree_skb(buffer_info->skb);
3259                         buffer_info->skb = NULL;
3260                 }
3261                 if (buffer_info->page_dma) {
3262                         dma_unmap_page(rx_ring->dev,
3263                                        buffer_info->page_dma,
3264                                        PAGE_SIZE / 2,
3265                                        DMA_FROM_DEVICE);
3266                         buffer_info->page_dma = 0;
3267                 }
3268                 if (buffer_info->page) {
3269                         put_page(buffer_info->page);
3270                         buffer_info->page = NULL;
3271                         buffer_info->page_offset = 0;
3272                 }
3273         }
3274
3275         size = sizeof(struct igb_buffer) * rx_ring->count;
3276         memset(rx_ring->buffer_info, 0, size);
3277
3278         /* Zero out the descriptor ring */
3279         memset(rx_ring->desc, 0, rx_ring->size);
3280
3281         rx_ring->next_to_clean = 0;
3282         rx_ring->next_to_use = 0;
3283 }
3284
3285 /**
3286  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3287  * @adapter: board private structure
3288  **/
3289 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3290 {
3291         int i;
3292
3293         for (i = 0; i < adapter->num_rx_queues; i++)
3294                 igb_clean_rx_ring(adapter->rx_ring[i]);
3295 }
3296
3297 /**
3298  * igb_set_mac - Change the Ethernet Address of the NIC
3299  * @netdev: network interface device structure
3300  * @p: pointer to an address structure
3301  *
3302  * Returns 0 on success, negative on failure
3303  **/
3304 static int igb_set_mac(struct net_device *netdev, void *p)
3305 {
3306         struct igb_adapter *adapter = netdev_priv(netdev);
3307         struct e1000_hw *hw = &adapter->hw;
3308         struct sockaddr *addr = p;
3309
3310         if (!is_valid_ether_addr(addr->sa_data))
3311                 return -EADDRNOTAVAIL;
3312
3313         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3314         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3315
3316         /* set the correct pool for the new PF MAC address in entry 0 */
3317         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3318                          adapter->vfs_allocated_count);
3319
3320         return 0;
3321 }
3322
3323 /**
3324  * igb_write_mc_addr_list - write multicast addresses to MTA
3325  * @netdev: network interface device structure
3326  *
3327  * Writes multicast address list to the MTA hash table.
3328  * Returns: -ENOMEM on failure
3329  *                0 on no addresses written
3330  *                X on writing X addresses to MTA
3331  **/
3332 static int igb_write_mc_addr_list(struct net_device *netdev)
3333 {
3334         struct igb_adapter *adapter = netdev_priv(netdev);
3335         struct e1000_hw *hw = &adapter->hw;
3336         struct netdev_hw_addr *ha;
3337         u8  *mta_list;
3338         int i;
3339
3340         if (netdev_mc_empty(netdev)) {
3341                 /* nothing to program, so clear mc list */
3342                 igb_update_mc_addr_list(hw, NULL, 0);
3343                 igb_restore_vf_multicasts(adapter);
3344                 return 0;
3345         }
3346
3347         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3348         if (!mta_list)
3349                 return -ENOMEM;
3350
3351         /* The shared function expects a packed array of only addresses. */
3352         i = 0;
3353         netdev_for_each_mc_addr(ha, netdev)
3354                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3355
3356         igb_update_mc_addr_list(hw, mta_list, i);
3357         kfree(mta_list);
3358
3359         return netdev_mc_count(netdev);
3360 }
3361
3362 /**
3363  * igb_write_uc_addr_list - write unicast addresses to RAR table
3364  * @netdev: network interface device structure
3365  *
3366  * Writes unicast address list to the RAR table.
3367  * Returns: -ENOMEM on failure/insufficient address space
3368  *                0 on no addresses written
3369  *                X on writing X addresses to the RAR table
3370  **/
3371 static int igb_write_uc_addr_list(struct net_device *netdev)
3372 {
3373         struct igb_adapter *adapter = netdev_priv(netdev);
3374         struct e1000_hw *hw = &adapter->hw;
3375         unsigned int vfn = adapter->vfs_allocated_count;
3376         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3377         int count = 0;
3378
3379         /* return ENOMEM indicating insufficient memory for addresses */
3380         if (netdev_uc_count(netdev) > rar_entries)
3381                 return -ENOMEM;
3382
3383         if (!netdev_uc_empty(netdev) && rar_entries) {
3384                 struct netdev_hw_addr *ha;
3385
3386                 netdev_for_each_uc_addr(ha, netdev) {
3387                         if (!rar_entries)
3388                                 break;
3389                         igb_rar_set_qsel(adapter, ha->addr,
3390                                          rar_entries--,
3391                                          vfn);
3392                         count++;
3393                 }
3394         }
3395         /* write the addresses in reverse order to avoid write combining */
3396         for (; rar_entries > 0 ; rar_entries--) {
3397                 wr32(E1000_RAH(rar_entries), 0);
3398                 wr32(E1000_RAL(rar_entries), 0);
3399         }
3400         wrfl();
3401
3402         return count;
3403 }
3404
3405 /**
3406  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3407  * @netdev: network interface device structure
3408  *
3409  * The set_rx_mode entry point is called whenever the unicast or multicast
3410  * address lists or the network interface flags are updated.  This routine is
3411  * responsible for configuring the hardware for proper unicast, multicast,
3412  * promiscuous mode, and all-multi behavior.
3413  **/
3414 static void igb_set_rx_mode(struct net_device *netdev)
3415 {
3416         struct igb_adapter *adapter = netdev_priv(netdev);
3417         struct e1000_hw *hw = &adapter->hw;
3418         unsigned int vfn = adapter->vfs_allocated_count;
3419         u32 rctl, vmolr = 0;
3420         int count;
3421
3422         /* Check for Promiscuous and All Multicast modes */
3423         rctl = rd32(E1000_RCTL);
3424
3425         /* clear the effected bits */
3426         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3427
3428         if (netdev->flags & IFF_PROMISC) {
3429                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3430                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3431         } else {
3432                 if (netdev->flags & IFF_ALLMULTI) {
3433                         rctl |= E1000_RCTL_MPE;
3434                         vmolr |= E1000_VMOLR_MPME;
3435                 } else {
3436                         /*
3437                          * Write addresses to the MTA, if the attempt fails
3438                          * then we should just turn on promiscuous mode so
3439                          * that we can at least receive multicast traffic
3440                          */
3441                         count = igb_write_mc_addr_list(netdev);
3442                         if (count < 0) {
3443                                 rctl |= E1000_RCTL_MPE;
3444                                 vmolr |= E1000_VMOLR_MPME;
3445                         } else if (count) {
3446                                 vmolr |= E1000_VMOLR_ROMPE;
3447                         }
3448                 }
3449                 /*
3450                  * Write addresses to available RAR registers, if there is not
3451                  * sufficient space to store all the addresses then enable
3452                  * unicast promiscuous mode
3453                  */
3454                 count = igb_write_uc_addr_list(netdev);
3455                 if (count < 0) {
3456                         rctl |= E1000_RCTL_UPE;
3457                         vmolr |= E1000_VMOLR_ROPE;
3458                 }
3459                 rctl |= E1000_RCTL_VFE;
3460         }
3461         wr32(E1000_RCTL, rctl);
3462
3463         /*
3464          * In order to support SR-IOV and eventually VMDq it is necessary to set
3465          * the VMOLR to enable the appropriate modes.  Without this workaround
3466          * we will have issues with VLAN tag stripping not being done for frames
3467          * that are only arriving because we are the default pool
3468          */
3469         if (hw->mac.type < e1000_82576)
3470                 return;
3471
3472         vmolr |= rd32(E1000_VMOLR(vfn)) &
3473                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3474         wr32(E1000_VMOLR(vfn), vmolr);
3475         igb_restore_vf_multicasts(adapter);
3476 }
3477
3478 static void igb_check_wvbr(struct igb_adapter *adapter)
3479 {
3480         struct e1000_hw *hw = &adapter->hw;
3481         u32 wvbr = 0;
3482
3483         switch (hw->mac.type) {
3484         case e1000_82576:
3485         case e1000_i350:
3486                 if (!(wvbr = rd32(E1000_WVBR)))
3487                         return;
3488                 break;
3489         default:
3490                 break;
3491         }
3492
3493         adapter->wvbr |= wvbr;
3494 }
3495
3496 #define IGB_STAGGERED_QUEUE_OFFSET 8
3497
3498 static void igb_spoof_check(struct igb_adapter *adapter)
3499 {
3500         int j;
3501
3502         if (!adapter->wvbr)
3503                 return;
3504
3505         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3506                 if (adapter->wvbr & (1 << j) ||
3507                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3508                         dev_warn(&adapter->pdev->dev,
3509                                 "Spoof event(s) detected on VF %d\n", j);
3510                         adapter->wvbr &=
3511                                 ~((1 << j) |
3512                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3513                 }
3514         }
3515 }
3516
3517 /* Need to wait a few seconds after link up to get diagnostic information from
3518  * the phy */
3519 static void igb_update_phy_info(unsigned long data)
3520 {
3521         struct igb_adapter *adapter = (struct igb_adapter *) data;
3522         igb_get_phy_info(&adapter->hw);
3523 }
3524
3525 /**
3526  * igb_has_link - check shared code for link and determine up/down
3527  * @adapter: pointer to driver private info
3528  **/
3529 bool igb_has_link(struct igb_adapter *adapter)
3530 {
3531         struct e1000_hw *hw = &adapter->hw;
3532         bool link_active = false;
3533         s32 ret_val = 0;
3534
3535         /* get_link_status is set on LSC (link status) interrupt or
3536          * rx sequence error interrupt.  get_link_status will stay
3537          * false until the e1000_check_for_link establishes link
3538          * for copper adapters ONLY
3539          */
3540         switch (hw->phy.media_type) {
3541         case e1000_media_type_copper:
3542                 if (hw->mac.get_link_status) {
3543                         ret_val = hw->mac.ops.check_for_link(hw);
3544                         link_active = !hw->mac.get_link_status;
3545                 } else {
3546                         link_active = true;
3547                 }
3548                 break;
3549         case e1000_media_type_internal_serdes:
3550                 ret_val = hw->mac.ops.check_for_link(hw);
3551                 link_active = hw->mac.serdes_has_link;
3552                 break;
3553         default:
3554         case e1000_media_type_unknown:
3555                 break;
3556         }
3557
3558         return link_active;
3559 }
3560
3561 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3562 {
3563         bool ret = false;
3564         u32 ctrl_ext, thstat;
3565
3566         /* check for thermal sensor event on i350, copper only */
3567         if (hw->mac.type == e1000_i350) {
3568                 thstat = rd32(E1000_THSTAT);
3569                 ctrl_ext = rd32(E1000_CTRL_EXT);
3570
3571                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3572                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3573                         ret = !!(thstat & event);
3574                 }
3575         }
3576
3577         return ret;
3578 }
3579
3580 /**
3581  * igb_watchdog - Timer Call-back
3582  * @data: pointer to adapter cast into an unsigned long
3583  **/
3584 static void igb_watchdog(unsigned long data)
3585 {
3586         struct igb_adapter *adapter = (struct igb_adapter *)data;
3587         /* Do the rest outside of interrupt context */
3588         schedule_work(&adapter->watchdog_task);
3589 }
3590
3591 static void igb_watchdog_task(struct work_struct *work)
3592 {
3593         struct igb_adapter *adapter = container_of(work,
3594                                                    struct igb_adapter,
3595                                                    watchdog_task);
3596         struct e1000_hw *hw = &adapter->hw;
3597         struct net_device *netdev = adapter->netdev;
3598         u32 link;
3599         int i;
3600
3601         link = igb_has_link(adapter);
3602         if (link) {
3603                 if (!netif_carrier_ok(netdev)) {
3604                         u32 ctrl;
3605                         hw->mac.ops.get_speed_and_duplex(hw,
3606                                                          &adapter->link_speed,
3607                                                          &adapter->link_duplex);
3608
3609                         ctrl = rd32(E1000_CTRL);
3610                         /* Links status message must follow this format */
3611                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3612                                  "Flow Control: %s\n",
3613                                netdev->name,
3614                                adapter->link_speed,
3615                                adapter->link_duplex == FULL_DUPLEX ?
3616                                  "Full Duplex" : "Half Duplex",
3617                                ((ctrl & E1000_CTRL_TFCE) &&
3618                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3619                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3620                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3621
3622                         /* check for thermal sensor event */
3623                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3624                                 printk(KERN_INFO "igb: %s The network adapter "
3625                                                  "link speed was downshifted "
3626                                                  "because it overheated.\n",
3627                                                  netdev->name);
3628                         }
3629
3630                         /* adjust timeout factor according to speed/duplex */
3631                         adapter->tx_timeout_factor = 1;
3632                         switch (adapter->link_speed) {
3633                         case SPEED_10:
3634                                 adapter->tx_timeout_factor = 14;
3635                                 break;
3636                         case SPEED_100:
3637                                 /* maybe add some timeout factor ? */
3638                                 break;
3639                         }
3640
3641                         netif_carrier_on(netdev);
3642
3643                         igb_ping_all_vfs(adapter);
3644                         igb_check_vf_rate_limit(adapter);
3645
3646                         /* link state has changed, schedule phy info update */
3647                         if (!test_bit(__IGB_DOWN, &adapter->state))
3648                                 mod_timer(&adapter->phy_info_timer,
3649                                           round_jiffies(jiffies + 2 * HZ));
3650                 }
3651         } else {
3652                 if (netif_carrier_ok(netdev)) {
3653                         adapter->link_speed = 0;
3654                         adapter->link_duplex = 0;
3655
3656                         /* check for thermal sensor event */
3657                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3658                                 printk(KERN_ERR "igb: %s The network adapter "
3659                                                 "was stopped because it "
3660                                                 "overheated.\n",
3661                                                 netdev->name);
3662                         }
3663
3664                         /* Links status message must follow this format */
3665                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3666                                netdev->name);
3667                         netif_carrier_off(netdev);
3668
3669                         igb_ping_all_vfs(adapter);
3670
3671                         /* link state has changed, schedule phy info update */
3672                         if (!test_bit(__IGB_DOWN, &adapter->state))
3673                                 mod_timer(&adapter->phy_info_timer,
3674                                           round_jiffies(jiffies + 2 * HZ));
3675                 }
3676         }
3677
3678         spin_lock(&adapter->stats64_lock);
3679         igb_update_stats(adapter, &adapter->stats64);
3680         spin_unlock(&adapter->stats64_lock);
3681
3682         for (i = 0; i < adapter->num_tx_queues; i++) {
3683                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3684                 if (!netif_carrier_ok(netdev)) {
3685                         /* We've lost link, so the controller stops DMA,
3686                          * but we've got queued Tx work that's never going
3687                          * to get done, so reset controller to flush Tx.
3688                          * (Do the reset outside of interrupt context). */
3689                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3690                                 adapter->tx_timeout_count++;
3691                                 schedule_work(&adapter->reset_task);
3692                                 /* return immediately since reset is imminent */
3693                                 return;
3694                         }
3695                 }
3696
3697                 /* Force detection of hung controller every watchdog period */
3698                 tx_ring->detect_tx_hung = true;
3699         }
3700
3701         /* Cause software interrupt to ensure rx ring is cleaned */
3702         if (adapter->msix_entries) {
3703                 u32 eics = 0;
3704                 for (i = 0; i < adapter->num_q_vectors; i++) {
3705                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3706                         eics |= q_vector->eims_value;
3707                 }
3708                 wr32(E1000_EICS, eics);
3709         } else {
3710                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3711         }
3712
3713         igb_spoof_check(adapter);
3714
3715         /* Reset the timer */
3716         if (!test_bit(__IGB_DOWN, &adapter->state))
3717                 mod_timer(&adapter->watchdog_timer,
3718                           round_jiffies(jiffies + 2 * HZ));
3719 }
3720
3721 enum latency_range {
3722         lowest_latency = 0,
3723         low_latency = 1,
3724         bulk_latency = 2,
3725         latency_invalid = 255
3726 };
3727
3728 /**
3729  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3730  *
3731  *      Stores a new ITR value based on strictly on packet size.  This
3732  *      algorithm is less sophisticated than that used in igb_update_itr,
3733  *      due to the difficulty of synchronizing statistics across multiple
3734  *      receive rings.  The divisors and thresholds used by this function
3735  *      were determined based on theoretical maximum wire speed and testing
3736  *      data, in order to minimize response time while increasing bulk
3737  *      throughput.
3738  *      This functionality is controlled by the InterruptThrottleRate module
3739  *      parameter (see igb_param.c)
3740  *      NOTE:  This function is called only when operating in a multiqueue
3741  *             receive environment.
3742  * @q_vector: pointer to q_vector
3743  **/
3744 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3745 {
3746         int new_val = q_vector->itr_val;
3747         int avg_wire_size = 0;
3748         struct igb_adapter *adapter = q_vector->adapter;
3749         struct igb_ring *ring;
3750         unsigned int packets;
3751
3752         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3753          * ints/sec - ITR timer value of 120 ticks.
3754          */
3755         if (adapter->link_speed != SPEED_1000) {
3756                 new_val = 976;
3757                 goto set_itr_val;
3758         }
3759
3760         ring = q_vector->rx_ring;
3761         if (ring) {
3762                 packets = ACCESS_ONCE(ring->total_packets);
3763
3764                 if (packets)
3765                         avg_wire_size = ring->total_bytes / packets;
3766         }
3767
3768         ring = q_vector->tx_ring;
3769         if (ring) {
3770                 packets = ACCESS_ONCE(ring->total_packets);
3771
3772                 if (packets)
3773                         avg_wire_size = max_t(u32, avg_wire_size,
3774                                               ring->total_bytes / packets);
3775         }
3776
3777         /* if avg_wire_size isn't set no work was done */
3778         if (!avg_wire_size)
3779                 goto clear_counts;
3780
3781         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3782         avg_wire_size += 24;
3783
3784         /* Don't starve jumbo frames */
3785         avg_wire_size = min(avg_wire_size, 3000);
3786
3787         /* Give a little boost to mid-size frames */
3788         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3789                 new_val = avg_wire_size / 3;
3790         else
3791                 new_val = avg_wire_size / 2;
3792
3793         /* when in itr mode 3 do not exceed 20K ints/sec */
3794         if (adapter->rx_itr_setting == 3 && new_val < 196)
3795                 new_val = 196;
3796
3797 set_itr_val:
3798         if (new_val != q_vector->itr_val) {
3799                 q_vector->itr_val = new_val;
3800                 q_vector->set_itr = 1;
3801         }
3802 clear_counts:
3803         if (q_vector->rx_ring) {
3804                 q_vector->rx_ring->total_bytes = 0;
3805                 q_vector->rx_ring->total_packets = 0;
3806         }
3807         if (q_vector->tx_ring) {
3808                 q_vector->tx_ring->total_bytes = 0;
3809                 q_vector->tx_ring->total_packets = 0;
3810         }
3811 }
3812
3813 /**
3814  * igb_update_itr - update the dynamic ITR value based on statistics
3815  *      Stores a new ITR value based on packets and byte
3816  *      counts during the last interrupt.  The advantage of per interrupt
3817  *      computation is faster updates and more accurate ITR for the current
3818  *      traffic pattern.  Constants in this function were computed
3819  *      based on theoretical maximum wire speed and thresholds were set based
3820  *      on testing data as well as attempting to minimize response time
3821  *      while increasing bulk throughput.
3822  *      this functionality is controlled by the InterruptThrottleRate module
3823  *      parameter (see igb_param.c)
3824  *      NOTE:  These calculations are only valid when operating in a single-
3825  *             queue environment.
3826  * @adapter: pointer to adapter
3827  * @itr_setting: current q_vector->itr_val
3828  * @packets: the number of packets during this measurement interval
3829  * @bytes: the number of bytes during this measurement interval
3830  **/
3831 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3832                                    int packets, int bytes)
3833 {
3834         unsigned int retval = itr_setting;
3835
3836         if (packets == 0)
3837                 goto update_itr_done;
3838
3839         switch (itr_setting) {
3840         case lowest_latency:
3841                 /* handle TSO and jumbo frames */
3842                 if (bytes/packets > 8000)
3843                         retval = bulk_latency;
3844                 else if ((packets < 5) && (bytes > 512))
3845                         retval = low_latency;
3846                 break;
3847         case low_latency:  /* 50 usec aka 20000 ints/s */
3848                 if (bytes > 10000) {
3849                         /* this if handles the TSO accounting */
3850                         if (bytes/packets > 8000) {
3851                                 retval = bulk_latency;
3852                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3853                                 retval = bulk_latency;
3854                         } else if ((packets > 35)) {
3855                                 retval = lowest_latency;
3856                         }
3857                 } else if (bytes/packets > 2000) {
3858                         retval = bulk_latency;
3859                 } else if (packets <= 2 && bytes < 512) {
3860                         retval = lowest_latency;
3861                 }
3862                 break;
3863         case bulk_latency: /* 250 usec aka 4000 ints/s */
3864                 if (bytes > 25000) {
3865                         if (packets > 35)
3866                                 retval = low_latency;
3867                 } else if (bytes < 1500) {
3868                         retval = low_latency;
3869                 }
3870                 break;
3871         }
3872
3873 update_itr_done:
3874         return retval;
3875 }
3876
3877 static void igb_set_itr(struct igb_adapter *adapter)
3878 {
3879         struct igb_q_vector *q_vector = adapter->q_vector[0];
3880         u16 current_itr;
3881         u32 new_itr = q_vector->itr_val;
3882
3883         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3884         if (adapter->link_speed != SPEED_1000) {
3885                 current_itr = 0;
3886                 new_itr = 4000;
3887                 goto set_itr_now;
3888         }
3889
3890         adapter->rx_itr = igb_update_itr(adapter,
3891                                     adapter->rx_itr,
3892                                     q_vector->rx_ring->total_packets,
3893                                     q_vector->rx_ring->total_bytes);
3894
3895         adapter->tx_itr = igb_update_itr(adapter,
3896                                     adapter->tx_itr,
3897                                     q_vector->tx_ring->total_packets,
3898                                     q_vector->tx_ring->total_bytes);
3899         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3900
3901         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3902         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3903                 current_itr = low_latency;
3904
3905         switch (current_itr) {
3906         /* counts and packets in update_itr are dependent on these numbers */
3907         case lowest_latency:
3908                 new_itr = 56;  /* aka 70,000 ints/sec */
3909                 break;
3910         case low_latency:
3911                 new_itr = 196; /* aka 20,000 ints/sec */
3912                 break;
3913         case bulk_latency:
3914                 new_itr = 980; /* aka 4,000 ints/sec */
3915                 break;
3916         default:
3917                 break;
3918         }
3919
3920 set_itr_now:
3921         q_vector->rx_ring->total_bytes = 0;
3922         q_vector->rx_ring->total_packets = 0;
3923         q_vector->tx_ring->total_bytes = 0;
3924         q_vector->tx_ring->total_packets = 0;
3925
3926         if (new_itr != q_vector->itr_val) {
3927                 /* this attempts to bias the interrupt rate towards Bulk
3928                  * by adding intermediate steps when interrupt rate is
3929                  * increasing */
3930                 new_itr = new_itr > q_vector->itr_val ?
3931                              max((new_itr * q_vector->itr_val) /
3932                                  (new_itr + (q_vector->itr_val >> 2)),
3933                                  new_itr) :
3934                              new_itr;
3935                 /* Don't write the value here; it resets the adapter's
3936                  * internal timer, and causes us to delay far longer than
3937                  * we should between interrupts.  Instead, we write the ITR
3938                  * value at the beginning of the next interrupt so the timing
3939                  * ends up being correct.
3940                  */
3941                 q_vector->itr_val = new_itr;
3942                 q_vector->set_itr = 1;
3943         }
3944 }
3945
3946 #define IGB_TX_FLAGS_CSUM               0x00000001
3947 #define IGB_TX_FLAGS_VLAN               0x00000002
3948 #define IGB_TX_FLAGS_TSO                0x00000004
3949 #define IGB_TX_FLAGS_IPV4               0x00000008
3950 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3951 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3952 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3953
3954 static inline int igb_tso(struct igb_ring *tx_ring,
3955                           struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3956 {
3957         struct e1000_adv_tx_context_desc *context_desc;
3958         unsigned int i;
3959         int err;
3960         struct igb_buffer *buffer_info;
3961         u32 info = 0, tu_cmd = 0;
3962         u32 mss_l4len_idx;
3963         u8 l4len;
3964
3965         if (skb_header_cloned(skb)) {
3966                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3967                 if (err)
3968                         return err;
3969         }
3970
3971         l4len = tcp_hdrlen(skb);
3972         *hdr_len += l4len;
3973
3974         if (skb->protocol == htons(ETH_P_IP)) {
3975                 struct iphdr *iph = ip_hdr(skb);
3976                 iph->tot_len = 0;
3977                 iph->check = 0;
3978                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3979                                                          iph->daddr, 0,
3980                                                          IPPROTO_TCP,
3981                                                          0);
3982         } else if (skb_is_gso_v6(skb)) {
3983                 ipv6_hdr(skb)->payload_len = 0;
3984                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3985                                                        &ipv6_hdr(skb)->daddr,
3986                                                        0, IPPROTO_TCP, 0);
3987         }
3988
3989         i = tx_ring->next_to_use;
3990
3991         buffer_info = &tx_ring->buffer_info[i];
3992         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3993         /* VLAN MACLEN IPLEN */
3994         if (tx_flags & IGB_TX_FLAGS_VLAN)
3995                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3996         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3997         *hdr_len += skb_network_offset(skb);
3998         info |= skb_network_header_len(skb);
3999         *hdr_len += skb_network_header_len(skb);
4000         context_desc->vlan_macip_lens = cpu_to_le32(info);
4001
4002         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4003         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4004
4005         if (skb->protocol == htons(ETH_P_IP))
4006                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4007         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4008
4009         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4010
4011         /* MSS L4LEN IDX */
4012         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4013         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4014
4015         /* For 82575, context index must be unique per ring. */
4016         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4017                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4018
4019         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4020         context_desc->seqnum_seed = 0;
4021
4022         buffer_info->time_stamp = jiffies;
4023         buffer_info->next_to_watch = i;
4024         buffer_info->dma = 0;
4025         i++;
4026         if (i == tx_ring->count)
4027                 i = 0;
4028
4029         tx_ring->next_to_use = i;
4030
4031         return true;
4032 }
4033
4034 static inline bool igb_tx_csum(struct igb_ring *tx_ring,
4035                                struct sk_buff *skb, u32 tx_flags)
4036 {
4037         struct e1000_adv_tx_context_desc *context_desc;
4038         struct device *dev = tx_ring->dev;
4039         struct igb_buffer *buffer_info;
4040         u32 info = 0, tu_cmd = 0;
4041         unsigned int i;
4042
4043         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4044             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4045                 i = tx_ring->next_to_use;
4046                 buffer_info = &tx_ring->buffer_info[i];
4047                 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4048
4049                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4050                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4051
4052                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4053                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4054                         info |= skb_network_header_len(skb);
4055
4056                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4057
4058                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4059
4060                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4061                         __be16 protocol;
4062
4063                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4064                                 const struct vlan_ethhdr *vhdr =
4065                                           (const struct vlan_ethhdr*)skb->data;
4066
4067                                 protocol = vhdr->h_vlan_encapsulated_proto;
4068                         } else {
4069                                 protocol = skb->protocol;
4070                         }
4071
4072                         switch (protocol) {
4073                         case cpu_to_be16(ETH_P_IP):
4074                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4075                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4076                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4077                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4078                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4079                                 break;
4080                         case cpu_to_be16(ETH_P_IPV6):
4081                                 /* XXX what about other V6 headers?? */
4082                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4083                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4084                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4085                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4086                                 break;
4087                         default:
4088                                 if (unlikely(net_ratelimit()))
4089                                         dev_warn(dev,
4090                                             "partial checksum but proto=%x!\n",
4091                                             skb->protocol);
4092                                 break;
4093                         }
4094                 }
4095
4096                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4097                 context_desc->seqnum_seed = 0;
4098                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4099                         context_desc->mss_l4len_idx =
4100                                 cpu_to_le32(tx_ring->reg_idx << 4);
4101
4102                 buffer_info->time_stamp = jiffies;
4103                 buffer_info->next_to_watch = i;
4104                 buffer_info->dma = 0;
4105
4106                 i++;
4107                 if (i == tx_ring->count)
4108                         i = 0;
4109                 tx_ring->next_to_use = i;
4110
4111                 return true;
4112         }
4113         return false;
4114 }
4115
4116 #define IGB_MAX_TXD_PWR 16
4117 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4118
4119 static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
4120                              unsigned int first)
4121 {
4122         struct igb_buffer *buffer_info;
4123         struct device *dev = tx_ring->dev;
4124         unsigned int hlen = skb_headlen(skb);
4125         unsigned int count = 0, i;
4126         unsigned int f;
4127         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4128
4129         i = tx_ring->next_to_use;
4130
4131         buffer_info = &tx_ring->buffer_info[i];
4132         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4133         buffer_info->length = hlen;
4134         /* set time_stamp *before* dma to help avoid a possible race */
4135         buffer_info->time_stamp = jiffies;
4136         buffer_info->next_to_watch = i;
4137         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4138                                           DMA_TO_DEVICE);
4139         if (dma_mapping_error(dev, buffer_info->dma))
4140                 goto dma_error;
4141
4142         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4143                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4144                 unsigned int len = frag->size;
4145
4146                 count++;
4147                 i++;
4148                 if (i == tx_ring->count)
4149                         i = 0;
4150
4151                 buffer_info = &tx_ring->buffer_info[i];
4152                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4153                 buffer_info->length = len;
4154                 buffer_info->time_stamp = jiffies;
4155                 buffer_info->next_to_watch = i;
4156                 buffer_info->mapped_as_page = true;
4157                 buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len,
4158                                                 DMA_TO_DEVICE);
4159                 if (dma_mapping_error(dev, buffer_info->dma))
4160                         goto dma_error;
4161
4162         }
4163
4164         tx_ring->buffer_info[i].skb = skb;
4165         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4166         /* multiply data chunks by size of headers */
4167         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4168         tx_ring->buffer_info[i].gso_segs = gso_segs;
4169         tx_ring->buffer_info[first].next_to_watch = i;
4170
4171         return ++count;
4172
4173 dma_error:
4174         dev_err(dev, "TX DMA map failed\n");
4175
4176         /* clear timestamp and dma mappings for failed buffer_info mapping */
4177         buffer_info->dma = 0;
4178         buffer_info->time_stamp = 0;
4179         buffer_info->length = 0;
4180         buffer_info->next_to_watch = 0;
4181         buffer_info->mapped_as_page = false;
4182
4183         /* clear timestamp and dma mappings for remaining portion of packet */
4184         while (count--) {
4185                 if (i == 0)
4186                         i = tx_ring->count;
4187                 i--;
4188                 buffer_info = &tx_ring->buffer_info[i];
4189                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4190         }
4191
4192         return 0;
4193 }
4194
4195 static inline void igb_tx_queue(struct igb_ring *tx_ring,
4196                                 u32 tx_flags, int count, u32 paylen,
4197                                 u8 hdr_len)
4198 {
4199         union e1000_adv_tx_desc *tx_desc;
4200         struct igb_buffer *buffer_info;
4201         u32 olinfo_status = 0, cmd_type_len;
4202         unsigned int i = tx_ring->next_to_use;
4203
4204         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4205                         E1000_ADVTXD_DCMD_DEXT);
4206
4207         if (tx_flags & IGB_TX_FLAGS_VLAN)
4208                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4209
4210         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4211                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4212
4213         if (tx_flags & IGB_TX_FLAGS_TSO) {
4214                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4215
4216                 /* insert tcp checksum */
4217                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4218
4219                 /* insert ip checksum */
4220                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4221                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4222
4223         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4224                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4225         }
4226
4227         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4228             (tx_flags & (IGB_TX_FLAGS_CSUM |
4229                          IGB_TX_FLAGS_TSO |
4230                          IGB_TX_FLAGS_VLAN)))
4231                 olinfo_status |= tx_ring->reg_idx << 4;
4232
4233         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4234
4235         do {
4236                 buffer_info = &tx_ring->buffer_info[i];
4237                 tx_desc = IGB_TX_DESC(tx_ring, i);
4238                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4239                 tx_desc->read.cmd_type_len =
4240                         cpu_to_le32(cmd_type_len | buffer_info->length);
4241                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4242                 count--;
4243                 i++;
4244                 if (i == tx_ring->count)
4245                         i = 0;
4246         } while (count > 0);
4247
4248         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4249         /* Force memory writes to complete before letting h/w
4250          * know there are new descriptors to fetch.  (Only
4251          * applicable for weak-ordered memory model archs,
4252          * such as IA-64). */
4253         wmb();
4254
4255         tx_ring->next_to_use = i;
4256         writel(i, tx_ring->tail);
4257         /* we need this if more than one processor can write to our tail
4258          * at a time, it syncronizes IO on IA64/Altix systems */
4259         mmiowb();
4260 }
4261
4262 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4263 {
4264         struct net_device *netdev = tx_ring->netdev;
4265
4266         netif_stop_subqueue(netdev, tx_ring->queue_index);
4267
4268         /* Herbert's original patch had:
4269          *  smp_mb__after_netif_stop_queue();
4270          * but since that doesn't exist yet, just open code it. */
4271         smp_mb();
4272
4273         /* We need to check again in a case another CPU has just
4274          * made room available. */
4275         if (igb_desc_unused(tx_ring) < size)
4276                 return -EBUSY;
4277
4278         /* A reprieve! */
4279         netif_wake_subqueue(netdev, tx_ring->queue_index);
4280
4281         u64_stats_update_begin(&tx_ring->tx_syncp2);
4282         tx_ring->tx_stats.restart_queue2++;
4283         u64_stats_update_end(&tx_ring->tx_syncp2);
4284
4285         return 0;
4286 }
4287
4288 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4289 {
4290         if (igb_desc_unused(tx_ring) >= size)
4291                 return 0;
4292         return __igb_maybe_stop_tx(tx_ring, size);
4293 }
4294
4295 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4296                                 struct igb_ring *tx_ring)
4297 {
4298         int tso = 0, count;
4299         u32 tx_flags = 0;
4300         u16 first;
4301         u8 hdr_len = 0;
4302
4303         /* need: 1 descriptor per page,
4304          *       + 2 desc gap to keep tail from touching head,
4305          *       + 1 desc for skb->data,
4306          *       + 1 desc for context descriptor,
4307          * otherwise try next time */
4308         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4309                 /* this is a hard error */
4310                 return NETDEV_TX_BUSY;
4311         }
4312
4313         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4314                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4315                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4316         }
4317
4318         if (vlan_tx_tag_present(skb)) {
4319                 tx_flags |= IGB_TX_FLAGS_VLAN;
4320                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4321         }
4322
4323         if (skb->protocol == htons(ETH_P_IP))
4324                 tx_flags |= IGB_TX_FLAGS_IPV4;
4325
4326         first = tx_ring->next_to_use;
4327         if (skb_is_gso(skb)) {
4328                 tso = igb_tso(tx_ring, skb, tx_flags, &hdr_len);
4329
4330                 if (tso < 0) {
4331                         dev_kfree_skb_any(skb);
4332                         return NETDEV_TX_OK;
4333                 }
4334         }
4335
4336         if (tso)
4337                 tx_flags |= IGB_TX_FLAGS_TSO;
4338         else if (igb_tx_csum(tx_ring, skb, tx_flags) &&
4339                  (skb->ip_summed == CHECKSUM_PARTIAL))
4340                 tx_flags |= IGB_TX_FLAGS_CSUM;
4341
4342         /*
4343          * count reflects descriptors mapped, if 0 or less then mapping error
4344          * has occurred and we need to rewind the descriptor queue
4345          */
4346         count = igb_tx_map(tx_ring, skb, first);
4347         if (!count) {
4348                 dev_kfree_skb_any(skb);
4349                 tx_ring->buffer_info[first].time_stamp = 0;
4350                 tx_ring->next_to_use = first;
4351                 return NETDEV_TX_OK;
4352         }
4353
4354         igb_tx_queue(tx_ring, tx_flags, count, skb->len, hdr_len);
4355
4356         /* Make sure there is space in the ring for the next send. */
4357         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4358
4359         return NETDEV_TX_OK;
4360 }
4361
4362 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4363                                                     struct sk_buff *skb)
4364 {
4365         unsigned int r_idx = skb->queue_mapping;
4366
4367         if (r_idx >= adapter->num_tx_queues)
4368                 r_idx = r_idx % adapter->num_tx_queues;
4369
4370         return adapter->tx_ring[r_idx];
4371 }
4372
4373 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4374                                   struct net_device *netdev)
4375 {
4376         struct igb_adapter *adapter = netdev_priv(netdev);
4377
4378         if (test_bit(__IGB_DOWN, &adapter->state)) {
4379                 dev_kfree_skb_any(skb);
4380                 return NETDEV_TX_OK;
4381         }
4382
4383         if (skb->len <= 0) {
4384                 dev_kfree_skb_any(skb);
4385                 return NETDEV_TX_OK;
4386         }
4387
4388         /*
4389          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4390          * in order to meet this minimum size requirement.
4391          */
4392         if (skb->len < 17) {
4393                 if (skb_padto(skb, 17))
4394                         return NETDEV_TX_OK;
4395                 skb->len = 17;
4396         }
4397
4398         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4399 }
4400
4401 /**
4402  * igb_tx_timeout - Respond to a Tx Hang
4403  * @netdev: network interface device structure
4404  **/
4405 static void igb_tx_timeout(struct net_device *netdev)
4406 {
4407         struct igb_adapter *adapter = netdev_priv(netdev);
4408         struct e1000_hw *hw = &adapter->hw;
4409
4410         /* Do the reset outside of interrupt context */
4411         adapter->tx_timeout_count++;
4412
4413         if (hw->mac.type == e1000_82580)
4414                 hw->dev_spec._82575.global_device_reset = true;
4415
4416         schedule_work(&adapter->reset_task);
4417         wr32(E1000_EICS,
4418              (adapter->eims_enable_mask & ~adapter->eims_other));
4419 }
4420
4421 static void igb_reset_task(struct work_struct *work)
4422 {
4423         struct igb_adapter *adapter;
4424         adapter = container_of(work, struct igb_adapter, reset_task);
4425
4426         igb_dump(adapter);
4427         netdev_err(adapter->netdev, "Reset adapter\n");
4428         igb_reinit_locked(adapter);
4429 }
4430
4431 /**
4432  * igb_get_stats64 - Get System Network Statistics
4433  * @netdev: network interface device structure
4434  * @stats: rtnl_link_stats64 pointer
4435  *
4436  **/
4437 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4438                                                  struct rtnl_link_stats64 *stats)
4439 {
4440         struct igb_adapter *adapter = netdev_priv(netdev);
4441
4442         spin_lock(&adapter->stats64_lock);
4443         igb_update_stats(adapter, &adapter->stats64);
4444         memcpy(stats, &adapter->stats64, sizeof(*stats));
4445         spin_unlock(&adapter->stats64_lock);
4446
4447         return stats;
4448 }
4449
4450 /**
4451  * igb_change_mtu - Change the Maximum Transfer Unit
4452  * @netdev: network interface device structure
4453  * @new_mtu: new value for maximum frame size
4454  *
4455  * Returns 0 on success, negative on failure
4456  **/
4457 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4458 {
4459         struct igb_adapter *adapter = netdev_priv(netdev);
4460         struct pci_dev *pdev = adapter->pdev;
4461         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4462
4463         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4464                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4465                 return -EINVAL;
4466         }
4467
4468 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4469         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4470                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4471                 return -EINVAL;
4472         }
4473
4474         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4475                 msleep(1);
4476
4477         /* igb_down has a dependency on max_frame_size */
4478         adapter->max_frame_size = max_frame;
4479
4480         if (netif_running(netdev))
4481                 igb_down(adapter);
4482
4483         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4484                  netdev->mtu, new_mtu);
4485         netdev->mtu = new_mtu;
4486
4487         if (netif_running(netdev))
4488                 igb_up(adapter);
4489         else
4490                 igb_reset(adapter);
4491
4492         clear_bit(__IGB_RESETTING, &adapter->state);
4493
4494         return 0;
4495 }
4496
4497 /**
4498  * igb_update_stats - Update the board statistics counters
4499  * @adapter: board private structure
4500  **/
4501
4502 void igb_update_stats(struct igb_adapter *adapter,
4503                       struct rtnl_link_stats64 *net_stats)
4504 {
4505         struct e1000_hw *hw = &adapter->hw;
4506         struct pci_dev *pdev = adapter->pdev;
4507         u32 reg, mpc;
4508         u16 phy_tmp;
4509         int i;
4510         u64 bytes, packets;
4511         unsigned int start;
4512         u64 _bytes, _packets;
4513
4514 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4515
4516         /*
4517          * Prevent stats update while adapter is being reset, or if the pci
4518          * connection is down.
4519          */
4520         if (adapter->link_speed == 0)
4521                 return;
4522         if (pci_channel_offline(pdev))
4523                 return;
4524
4525         bytes = 0;
4526         packets = 0;
4527         for (i = 0; i < adapter->num_rx_queues; i++) {
4528                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4529                 struct igb_ring *ring = adapter->rx_ring[i];
4530
4531                 ring->rx_stats.drops += rqdpc_tmp;
4532                 net_stats->rx_fifo_errors += rqdpc_tmp;
4533
4534                 do {
4535                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4536                         _bytes = ring->rx_stats.bytes;
4537                         _packets = ring->rx_stats.packets;
4538                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4539                 bytes += _bytes;
4540                 packets += _packets;
4541         }
4542
4543         net_stats->rx_bytes = bytes;
4544         net_stats->rx_packets = packets;
4545
4546         bytes = 0;
4547         packets = 0;
4548         for (i = 0; i < adapter->num_tx_queues; i++) {
4549                 struct igb_ring *ring = adapter->tx_ring[i];
4550                 do {
4551                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4552                         _bytes = ring->tx_stats.bytes;
4553                         _packets = ring->tx_stats.packets;
4554                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4555                 bytes += _bytes;
4556                 packets += _packets;
4557         }
4558         net_stats->tx_bytes = bytes;
4559         net_stats->tx_packets = packets;
4560
4561         /* read stats registers */
4562         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4563         adapter->stats.gprc += rd32(E1000_GPRC);
4564         adapter->stats.gorc += rd32(E1000_GORCL);
4565         rd32(E1000_GORCH); /* clear GORCL */
4566         adapter->stats.bprc += rd32(E1000_BPRC);
4567         adapter->stats.mprc += rd32(E1000_MPRC);
4568         adapter->stats.roc += rd32(E1000_ROC);
4569
4570         adapter->stats.prc64 += rd32(E1000_PRC64);
4571         adapter->stats.prc127 += rd32(E1000_PRC127);
4572         adapter->stats.prc255 += rd32(E1000_PRC255);
4573         adapter->stats.prc511 += rd32(E1000_PRC511);
4574         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4575         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4576         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4577         adapter->stats.sec += rd32(E1000_SEC);
4578
4579         mpc = rd32(E1000_MPC);
4580         adapter->stats.mpc += mpc;
4581         net_stats->rx_fifo_errors += mpc;
4582         adapter->stats.scc += rd32(E1000_SCC);
4583         adapter->stats.ecol += rd32(E1000_ECOL);
4584         adapter->stats.mcc += rd32(E1000_MCC);
4585         adapter->stats.latecol += rd32(E1000_LATECOL);
4586         adapter->stats.dc += rd32(E1000_DC);
4587         adapter->stats.rlec += rd32(E1000_RLEC);
4588         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4589         adapter->stats.xontxc += rd32(E1000_XONTXC);
4590         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4591         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4592         adapter->stats.fcruc += rd32(E1000_FCRUC);
4593         adapter->stats.gptc += rd32(E1000_GPTC);
4594         adapter->stats.gotc += rd32(E1000_GOTCL);
4595         rd32(E1000_GOTCH); /* clear GOTCL */
4596         adapter->stats.rnbc += rd32(E1000_RNBC);
4597         adapter->stats.ruc += rd32(E1000_RUC);
4598         adapter->stats.rfc += rd32(E1000_RFC);
4599         adapter->stats.rjc += rd32(E1000_RJC);
4600         adapter->stats.tor += rd32(E1000_TORH);
4601         adapter->stats.tot += rd32(E1000_TOTH);
4602         adapter->stats.tpr += rd32(E1000_TPR);
4603
4604         adapter->stats.ptc64 += rd32(E1000_PTC64);
4605         adapter->stats.ptc127 += rd32(E1000_PTC127);
4606         adapter->stats.ptc255 += rd32(E1000_PTC255);
4607         adapter->stats.ptc511 += rd32(E1000_PTC511);
4608         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4609         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4610
4611         adapter->stats.mptc += rd32(E1000_MPTC);
4612         adapter->stats.bptc += rd32(E1000_BPTC);
4613
4614         adapter->stats.tpt += rd32(E1000_TPT);
4615         adapter->stats.colc += rd32(E1000_COLC);
4616
4617         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4618         /* read internal phy specific stats */
4619         reg = rd32(E1000_CTRL_EXT);
4620         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4621                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4622                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4623         }
4624
4625         adapter->stats.tsctc += rd32(E1000_TSCTC);
4626         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4627
4628         adapter->stats.iac += rd32(E1000_IAC);
4629         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4630         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4631         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4632         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4633         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4634         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4635         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4636         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4637
4638         /* Fill out the OS statistics structure */
4639         net_stats->multicast = adapter->stats.mprc;
4640         net_stats->collisions = adapter->stats.colc;
4641
4642         /* Rx Errors */
4643
4644         /* RLEC on some newer hardware can be incorrect so build
4645          * our own version based on RUC and ROC */
4646         net_stats->rx_errors = adapter->stats.rxerrc +
4647                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4648                 adapter->stats.ruc + adapter->stats.roc +
4649                 adapter->stats.cexterr;
4650         net_stats->rx_length_errors = adapter->stats.ruc +
4651                                       adapter->stats.roc;
4652         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4653         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4654         net_stats->rx_missed_errors = adapter->stats.mpc;
4655
4656         /* Tx Errors */
4657         net_stats->tx_errors = adapter->stats.ecol +
4658                                adapter->stats.latecol;
4659         net_stats->tx_aborted_errors = adapter->stats.ecol;
4660         net_stats->tx_window_errors = adapter->stats.latecol;
4661         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4662
4663         /* Tx Dropped needs to be maintained elsewhere */
4664
4665         /* Phy Stats */
4666         if (hw->phy.media_type == e1000_media_type_copper) {
4667                 if ((adapter->link_speed == SPEED_1000) &&
4668                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4669                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4670                         adapter->phy_stats.idle_errors += phy_tmp;
4671                 }
4672         }
4673
4674         /* Management Stats */
4675         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4676         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4677         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4678
4679         /* OS2BMC Stats */
4680         reg = rd32(E1000_MANC);
4681         if (reg & E1000_MANC_EN_BMC2OS) {
4682                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4683                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4684                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4685                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4686         }
4687 }
4688
4689 static irqreturn_t igb_msix_other(int irq, void *data)
4690 {
4691         struct igb_adapter *adapter = data;
4692         struct e1000_hw *hw = &adapter->hw;
4693         u32 icr = rd32(E1000_ICR);
4694         /* reading ICR causes bit 31 of EICR to be cleared */
4695
4696         if (icr & E1000_ICR_DRSTA)
4697                 schedule_work(&adapter->reset_task);
4698
4699         if (icr & E1000_ICR_DOUTSYNC) {
4700                 /* HW is reporting DMA is out of sync */
4701                 adapter->stats.doosync++;
4702                 /* The DMA Out of Sync is also indication of a spoof event
4703                  * in IOV mode. Check the Wrong VM Behavior register to
4704                  * see if it is really a spoof event. */
4705                 igb_check_wvbr(adapter);
4706         }
4707
4708         /* Check for a mailbox event */
4709         if (icr & E1000_ICR_VMMB)
4710                 igb_msg_task(adapter);
4711
4712         if (icr & E1000_ICR_LSC) {
4713                 hw->mac.get_link_status = 1;
4714                 /* guard against interrupt when we're going down */
4715                 if (!test_bit(__IGB_DOWN, &adapter->state))
4716                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4717         }
4718
4719         if (adapter->vfs_allocated_count)
4720                 wr32(E1000_IMS, E1000_IMS_LSC |
4721                                 E1000_IMS_VMMB |
4722                                 E1000_IMS_DOUTSYNC);
4723         else
4724                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4725         wr32(E1000_EIMS, adapter->eims_other);
4726
4727         return IRQ_HANDLED;
4728 }
4729
4730 static void igb_write_itr(struct igb_q_vector *q_vector)
4731 {
4732         struct igb_adapter *adapter = q_vector->adapter;
4733         u32 itr_val = q_vector->itr_val & 0x7FFC;
4734
4735         if (!q_vector->set_itr)
4736                 return;
4737
4738         if (!itr_val)
4739                 itr_val = 0x4;
4740
4741         if (adapter->hw.mac.type == e1000_82575)
4742                 itr_val |= itr_val << 16;
4743         else
4744                 itr_val |= 0x8000000;
4745
4746         writel(itr_val, q_vector->itr_register);
4747         q_vector->set_itr = 0;
4748 }
4749
4750 static irqreturn_t igb_msix_ring(int irq, void *data)
4751 {
4752         struct igb_q_vector *q_vector = data;
4753
4754         /* Write the ITR value calculated from the previous interrupt. */
4755         igb_write_itr(q_vector);
4756
4757         napi_schedule(&q_vector->napi);
4758
4759         return IRQ_HANDLED;
4760 }
4761
4762 #ifdef CONFIG_IGB_DCA
4763 static void igb_update_dca(struct igb_q_vector *q_vector)
4764 {
4765         struct igb_adapter *adapter = q_vector->adapter;
4766         struct e1000_hw *hw = &adapter->hw;
4767         int cpu = get_cpu();
4768
4769         if (q_vector->cpu == cpu)
4770                 goto out_no_update;
4771
4772         if (q_vector->tx_ring) {
4773                 int q = q_vector->tx_ring->reg_idx;
4774                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4775                 if (hw->mac.type == e1000_82575) {
4776                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4777                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4778                 } else {
4779                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4780                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4781                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4782                 }
4783                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4784                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4785         }
4786         if (q_vector->rx_ring) {
4787                 int q = q_vector->rx_ring->reg_idx;
4788                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4789                 if (hw->mac.type == e1000_82575) {
4790                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4791                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4792                 } else {
4793                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4794                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4795                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4796                 }
4797                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4798                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4799                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4800                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4801         }
4802         q_vector->cpu = cpu;
4803 out_no_update:
4804         put_cpu();
4805 }
4806
4807 static void igb_setup_dca(struct igb_adapter *adapter)
4808 {
4809         struct e1000_hw *hw = &adapter->hw;
4810         int i;
4811
4812         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4813                 return;
4814
4815         /* Always use CB2 mode, difference is masked in the CB driver. */
4816         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4817
4818         for (i = 0; i < adapter->num_q_vectors; i++) {
4819                 adapter->q_vector[i]->cpu = -1;
4820                 igb_update_dca(adapter->q_vector[i]);
4821         }
4822 }
4823
4824 static int __igb_notify_dca(struct device *dev, void *data)
4825 {
4826         struct net_device *netdev = dev_get_drvdata(dev);
4827         struct igb_adapter *adapter = netdev_priv(netdev);
4828         struct pci_dev *pdev = adapter->pdev;
4829         struct e1000_hw *hw = &adapter->hw;
4830         unsigned long event = *(unsigned long *)data;
4831
4832         switch (event) {
4833         case DCA_PROVIDER_ADD:
4834                 /* if already enabled, don't do it again */
4835                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4836                         break;
4837                 if (dca_add_requester(dev) == 0) {
4838                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4839                         dev_info(&pdev->dev, "DCA enabled\n");
4840                         igb_setup_dca(adapter);
4841                         break;
4842                 }
4843                 /* Fall Through since DCA is disabled. */
4844         case DCA_PROVIDER_REMOVE:
4845                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4846                         /* without this a class_device is left
4847                          * hanging around in the sysfs model */
4848                         dca_remove_requester(dev);
4849                         dev_info(&pdev->dev, "DCA disabled\n");
4850                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4851                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4852                 }
4853                 break;
4854         }
4855
4856         return 0;
4857 }
4858
4859 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4860                           void *p)
4861 {
4862         int ret_val;
4863
4864         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4865                                          __igb_notify_dca);
4866
4867         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4868 }
4869 #endif /* CONFIG_IGB_DCA */
4870
4871 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4872 {
4873         struct e1000_hw *hw = &adapter->hw;
4874         u32 ping;
4875         int i;
4876
4877         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4878                 ping = E1000_PF_CONTROL_MSG;
4879                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4880                         ping |= E1000_VT_MSGTYPE_CTS;
4881                 igb_write_mbx(hw, &ping, 1, i);
4882         }
4883 }
4884
4885 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4886 {
4887         struct e1000_hw *hw = &adapter->hw;
4888         u32 vmolr = rd32(E1000_VMOLR(vf));
4889         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4890
4891         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4892                             IGB_VF_FLAG_MULTI_PROMISC);
4893         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4894
4895         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4896                 vmolr |= E1000_VMOLR_MPME;
4897                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4898                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4899         } else {
4900                 /*
4901                  * if we have hashes and we are clearing a multicast promisc
4902                  * flag we need to write the hashes to the MTA as this step
4903                  * was previously skipped
4904                  */
4905                 if (vf_data->num_vf_mc_hashes > 30) {
4906                         vmolr |= E1000_VMOLR_MPME;
4907                 } else if (vf_data->num_vf_mc_hashes) {
4908                         int j;
4909                         vmolr |= E1000_VMOLR_ROMPE;
4910                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4911                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4912                 }
4913         }
4914
4915         wr32(E1000_VMOLR(vf), vmolr);
4916
4917         /* there are flags left unprocessed, likely not supported */
4918         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4919                 return -EINVAL;
4920
4921         return 0;
4922
4923 }
4924
4925 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4926                                   u32 *msgbuf, u32 vf)
4927 {
4928         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4929         u16 *hash_list = (u16 *)&msgbuf[1];
4930         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4931         int i;
4932
4933         /* salt away the number of multicast addresses assigned
4934          * to this VF for later use to restore when the PF multi cast
4935          * list changes
4936          */
4937         vf_data->num_vf_mc_hashes = n;
4938
4939         /* only up to 30 hash values supported */
4940         if (n > 30)
4941                 n = 30;
4942
4943         /* store the hashes for later use */
4944         for (i = 0; i < n; i++)
4945                 vf_data->vf_mc_hashes[i] = hash_list[i];
4946
4947         /* Flush and reset the mta with the new values */
4948         igb_set_rx_mode(adapter->netdev);
4949
4950         return 0;
4951 }
4952
4953 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4954 {
4955         struct e1000_hw *hw = &adapter->hw;
4956         struct vf_data_storage *vf_data;
4957         int i, j;
4958
4959         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4960                 u32 vmolr = rd32(E1000_VMOLR(i));
4961                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4962
4963                 vf_data = &adapter->vf_data[i];
4964
4965                 if ((vf_data->num_vf_mc_hashes > 30) ||
4966                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4967                         vmolr |= E1000_VMOLR_MPME;
4968                 } else if (vf_data->num_vf_mc_hashes) {
4969                         vmolr |= E1000_VMOLR_ROMPE;
4970                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4971                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4972                 }
4973                 wr32(E1000_VMOLR(i), vmolr);
4974         }
4975 }
4976
4977 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4978 {
4979         struct e1000_hw *hw = &adapter->hw;
4980         u32 pool_mask, reg, vid;
4981         int i;
4982
4983         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4984
4985         /* Find the vlan filter for this id */
4986         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4987                 reg = rd32(E1000_VLVF(i));
4988
4989                 /* remove the vf from the pool */
4990                 reg &= ~pool_mask;
4991
4992                 /* if pool is empty then remove entry from vfta */
4993                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4994                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4995                         reg = 0;
4996                         vid = reg & E1000_VLVF_VLANID_MASK;
4997                         igb_vfta_set(hw, vid, false);
4998                 }
4999
5000                 wr32(E1000_VLVF(i), reg);
5001         }
5002
5003         adapter->vf_data[vf].vlans_enabled = 0;
5004 }
5005
5006 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5007 {
5008         struct e1000_hw *hw = &adapter->hw;
5009         u32 reg, i;
5010
5011         /* The vlvf table only exists on 82576 hardware and newer */
5012         if (hw->mac.type < e1000_82576)
5013                 return -1;
5014
5015         /* we only need to do this if VMDq is enabled */
5016         if (!adapter->vfs_allocated_count)
5017                 return -1;
5018
5019         /* Find the vlan filter for this id */
5020         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5021                 reg = rd32(E1000_VLVF(i));
5022                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5023                     vid == (reg & E1000_VLVF_VLANID_MASK))
5024                         break;
5025         }
5026
5027         if (add) {
5028                 if (i == E1000_VLVF_ARRAY_SIZE) {
5029                         /* Did not find a matching VLAN ID entry that was
5030                          * enabled.  Search for a free filter entry, i.e.
5031                          * one without the enable bit set
5032                          */
5033                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5034                                 reg = rd32(E1000_VLVF(i));
5035                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5036                                         break;
5037                         }
5038                 }
5039                 if (i < E1000_VLVF_ARRAY_SIZE) {
5040                         /* Found an enabled/available entry */
5041                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5042
5043                         /* if !enabled we need to set this up in vfta */
5044                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5045                                 /* add VID to filter table */
5046                                 igb_vfta_set(hw, vid, true);
5047                                 reg |= E1000_VLVF_VLANID_ENABLE;
5048                         }
5049                         reg &= ~E1000_VLVF_VLANID_MASK;
5050                         reg |= vid;
5051                         wr32(E1000_VLVF(i), reg);
5052
5053                         /* do not modify RLPML for PF devices */
5054                         if (vf >= adapter->vfs_allocated_count)
5055                                 return 0;
5056
5057                         if (!adapter->vf_data[vf].vlans_enabled) {
5058                                 u32 size;
5059                                 reg = rd32(E1000_VMOLR(vf));
5060                                 size = reg & E1000_VMOLR_RLPML_MASK;
5061                                 size += 4;
5062                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5063                                 reg |= size;
5064                                 wr32(E1000_VMOLR(vf), reg);
5065                         }
5066
5067                         adapter->vf_data[vf].vlans_enabled++;
5068                         return 0;
5069                 }
5070         } else {
5071                 if (i < E1000_VLVF_ARRAY_SIZE) {
5072                         /* remove vf from the pool */
5073                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5074                         /* if pool is empty then remove entry from vfta */
5075                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5076                                 reg = 0;
5077                                 igb_vfta_set(hw, vid, false);
5078                         }
5079                         wr32(E1000_VLVF(i), reg);
5080
5081                         /* do not modify RLPML for PF devices */
5082                         if (vf >= adapter->vfs_allocated_count)
5083                                 return 0;
5084
5085                         adapter->vf_data[vf].vlans_enabled--;
5086                         if (!adapter->vf_data[vf].vlans_enabled) {
5087                                 u32 size;
5088                                 reg = rd32(E1000_VMOLR(vf));
5089                                 size = reg & E1000_VMOLR_RLPML_MASK;
5090                                 size -= 4;
5091                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5092                                 reg |= size;
5093                                 wr32(E1000_VMOLR(vf), reg);
5094                         }
5095                 }
5096         }
5097         return 0;
5098 }
5099
5100 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5101 {
5102         struct e1000_hw *hw = &adapter->hw;
5103
5104         if (vid)
5105                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5106         else
5107                 wr32(E1000_VMVIR(vf), 0);
5108 }
5109
5110 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5111                                int vf, u16 vlan, u8 qos)
5112 {
5113         int err = 0;
5114         struct igb_adapter *adapter = netdev_priv(netdev);
5115
5116         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5117                 return -EINVAL;
5118         if (vlan || qos) {
5119                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5120                 if (err)
5121                         goto out;
5122                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5123                 igb_set_vmolr(adapter, vf, !vlan);
5124                 adapter->vf_data[vf].pf_vlan = vlan;
5125                 adapter->vf_data[vf].pf_qos = qos;
5126                 dev_info(&adapter->pdev->dev,
5127                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5128                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5129                         dev_warn(&adapter->pdev->dev,
5130                                  "The VF VLAN has been set,"
5131                                  " but the PF device is not up.\n");
5132                         dev_warn(&adapter->pdev->dev,
5133                                  "Bring the PF device up before"
5134                                  " attempting to use the VF device.\n");
5135                 }
5136         } else {
5137                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5138                                    false, vf);
5139                 igb_set_vmvir(adapter, vlan, vf);
5140                 igb_set_vmolr(adapter, vf, true);
5141                 adapter->vf_data[vf].pf_vlan = 0;
5142                 adapter->vf_data[vf].pf_qos = 0;
5143        }
5144 out:
5145        return err;
5146 }
5147
5148 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5149 {
5150         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5151         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5152
5153         return igb_vlvf_set(adapter, vid, add, vf);
5154 }
5155
5156 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5157 {
5158         /* clear flags - except flag that indicates PF has set the MAC */
5159         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5160         adapter->vf_data[vf].last_nack = jiffies;
5161
5162         /* reset offloads to defaults */
5163         igb_set_vmolr(adapter, vf, true);
5164
5165         /* reset vlans for device */
5166         igb_clear_vf_vfta(adapter, vf);
5167         if (adapter->vf_data[vf].pf_vlan)
5168                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5169                                     adapter->vf_data[vf].pf_vlan,
5170                                     adapter->vf_data[vf].pf_qos);
5171         else
5172                 igb_clear_vf_vfta(adapter, vf);
5173
5174         /* reset multicast table array for vf */
5175         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5176
5177         /* Flush and reset the mta with the new values */
5178         igb_set_rx_mode(adapter->netdev);
5179 }
5180
5181 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5182 {
5183         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5184
5185         /* generate a new mac address as we were hotplug removed/added */
5186         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5187                 random_ether_addr(vf_mac);
5188
5189         /* process remaining reset events */
5190         igb_vf_reset(adapter, vf);
5191 }
5192
5193 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5194 {
5195         struct e1000_hw *hw = &adapter->hw;
5196         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5197         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5198         u32 reg, msgbuf[3];
5199         u8 *addr = (u8 *)(&msgbuf[1]);
5200
5201         /* process all the same items cleared in a function level reset */
5202         igb_vf_reset(adapter, vf);
5203
5204         /* set vf mac address */
5205         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5206
5207         /* enable transmit and receive for vf */
5208         reg = rd32(E1000_VFTE);
5209         wr32(E1000_VFTE, reg | (1 << vf));
5210         reg = rd32(E1000_VFRE);
5211         wr32(E1000_VFRE, reg | (1 << vf));
5212
5213         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5214
5215         /* reply to reset with ack and vf mac address */
5216         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5217         memcpy(addr, vf_mac, 6);
5218         igb_write_mbx(hw, msgbuf, 3, vf);
5219 }
5220
5221 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5222 {
5223         /*
5224          * The VF MAC Address is stored in a packed array of bytes
5225          * starting at the second 32 bit word of the msg array
5226          */
5227         unsigned char *addr = (char *)&msg[1];
5228         int err = -1;
5229
5230         if (is_valid_ether_addr(addr))
5231                 err = igb_set_vf_mac(adapter, vf, addr);
5232
5233         return err;
5234 }
5235
5236 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5237 {
5238         struct e1000_hw *hw = &adapter->hw;
5239         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5240         u32 msg = E1000_VT_MSGTYPE_NACK;
5241
5242         /* if device isn't clear to send it shouldn't be reading either */
5243         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5244             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5245                 igb_write_mbx(hw, &msg, 1, vf);
5246                 vf_data->last_nack = jiffies;
5247         }
5248 }
5249
5250 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5251 {
5252         struct pci_dev *pdev = adapter->pdev;
5253         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5254         struct e1000_hw *hw = &adapter->hw;
5255         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5256         s32 retval;
5257
5258         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5259
5260         if (retval) {
5261                 /* if receive failed revoke VF CTS stats and restart init */
5262                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5263                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5264                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5265                         return;
5266                 goto out;
5267         }
5268
5269         /* this is a message we already processed, do nothing */
5270         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5271                 return;
5272
5273         /*
5274          * until the vf completes a reset it should not be
5275          * allowed to start any configuration.
5276          */
5277
5278         if (msgbuf[0] == E1000_VF_RESET) {
5279                 igb_vf_reset_msg(adapter, vf);
5280                 return;
5281         }
5282
5283         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5284                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5285                         return;
5286                 retval = -1;
5287                 goto out;
5288         }
5289
5290         switch ((msgbuf[0] & 0xFFFF)) {
5291         case E1000_VF_SET_MAC_ADDR:
5292                 retval = -EINVAL;
5293                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5294                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5295                 else
5296                         dev_warn(&pdev->dev,
5297                                  "VF %d attempted to override administratively "
5298                                  "set MAC address\nReload the VF driver to "
5299                                  "resume operations\n", vf);
5300                 break;
5301         case E1000_VF_SET_PROMISC:
5302                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5303                 break;
5304         case E1000_VF_SET_MULTICAST:
5305                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5306                 break;
5307         case E1000_VF_SET_LPE:
5308                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5309                 break;
5310         case E1000_VF_SET_VLAN:
5311                 retval = -1;
5312                 if (vf_data->pf_vlan)
5313                         dev_warn(&pdev->dev,
5314                                  "VF %d attempted to override administratively "
5315                                  "set VLAN tag\nReload the VF driver to "
5316                                  "resume operations\n", vf);
5317                 else
5318                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5319                 break;
5320         default:
5321                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5322                 retval = -1;
5323                 break;
5324         }
5325
5326         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5327 out:
5328         /* notify the VF of the results of what it sent us */
5329         if (retval)
5330                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5331         else
5332                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5333
5334         igb_write_mbx(hw, msgbuf, 1, vf);
5335 }
5336
5337 static void igb_msg_task(struct igb_adapter *adapter)
5338 {
5339         struct e1000_hw *hw = &adapter->hw;
5340         u32 vf;
5341
5342         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5343                 /* process any reset requests */
5344                 if (!igb_check_for_rst(hw, vf))
5345                         igb_vf_reset_event(adapter, vf);
5346
5347                 /* process any messages pending */
5348                 if (!igb_check_for_msg(hw, vf))
5349                         igb_rcv_msg_from_vf(adapter, vf);
5350
5351                 /* process any acks */
5352                 if (!igb_check_for_ack(hw, vf))
5353                         igb_rcv_ack_from_vf(adapter, vf);
5354         }
5355 }
5356
5357 /**
5358  *  igb_set_uta - Set unicast filter table address
5359  *  @adapter: board private structure
5360  *
5361  *  The unicast table address is a register array of 32-bit registers.
5362  *  The table is meant to be used in a way similar to how the MTA is used
5363  *  however due to certain limitations in the hardware it is necessary to
5364  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5365  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5366  **/
5367 static void igb_set_uta(struct igb_adapter *adapter)
5368 {
5369         struct e1000_hw *hw = &adapter->hw;
5370         int i;
5371
5372         /* The UTA table only exists on 82576 hardware and newer */
5373         if (hw->mac.type < e1000_82576)
5374                 return;
5375
5376         /* we only need to do this if VMDq is enabled */
5377         if (!adapter->vfs_allocated_count)
5378                 return;
5379
5380         for (i = 0; i < hw->mac.uta_reg_count; i++)
5381                 array_wr32(E1000_UTA, i, ~0);
5382 }
5383
5384 /**
5385  * igb_intr_msi - Interrupt Handler
5386  * @irq: interrupt number
5387  * @data: pointer to a network interface device structure
5388  **/
5389 static irqreturn_t igb_intr_msi(int irq, void *data)
5390 {
5391         struct igb_adapter *adapter = data;
5392         struct igb_q_vector *q_vector = adapter->q_vector[0];
5393         struct e1000_hw *hw = &adapter->hw;
5394         /* read ICR disables interrupts using IAM */
5395         u32 icr = rd32(E1000_ICR);
5396
5397         igb_write_itr(q_vector);
5398
5399         if (icr & E1000_ICR_DRSTA)
5400                 schedule_work(&adapter->reset_task);
5401
5402         if (icr & E1000_ICR_DOUTSYNC) {
5403                 /* HW is reporting DMA is out of sync */
5404                 adapter->stats.doosync++;
5405         }
5406
5407         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5408                 hw->mac.get_link_status = 1;
5409                 if (!test_bit(__IGB_DOWN, &adapter->state))
5410                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5411         }
5412
5413         napi_schedule(&q_vector->napi);
5414
5415         return IRQ_HANDLED;
5416 }
5417
5418 /**
5419  * igb_intr - Legacy Interrupt Handler
5420  * @irq: interrupt number
5421  * @data: pointer to a network interface device structure
5422  **/
5423 static irqreturn_t igb_intr(int irq, void *data)
5424 {
5425         struct igb_adapter *adapter = data;
5426         struct igb_q_vector *q_vector = adapter->q_vector[0];
5427         struct e1000_hw *hw = &adapter->hw;
5428         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5429          * need for the IMC write */
5430         u32 icr = rd32(E1000_ICR);
5431         if (!icr)
5432                 return IRQ_NONE;  /* Not our interrupt */
5433
5434         igb_write_itr(q_vector);
5435
5436         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5437          * not set, then the adapter didn't send an interrupt */
5438         if (!(icr & E1000_ICR_INT_ASSERTED))
5439                 return IRQ_NONE;
5440
5441         if (icr & E1000_ICR_DRSTA)
5442                 schedule_work(&adapter->reset_task);
5443
5444         if (icr & E1000_ICR_DOUTSYNC) {
5445                 /* HW is reporting DMA is out of sync */
5446                 adapter->stats.doosync++;
5447         }
5448
5449         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5450                 hw->mac.get_link_status = 1;
5451                 /* guard against interrupt when we're going down */
5452                 if (!test_bit(__IGB_DOWN, &adapter->state))
5453                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5454         }
5455
5456         napi_schedule(&q_vector->napi);
5457
5458         return IRQ_HANDLED;
5459 }
5460
5461 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5462 {
5463         struct igb_adapter *adapter = q_vector->adapter;
5464         struct e1000_hw *hw = &adapter->hw;
5465
5466         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5467             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5468                 if (!adapter->msix_entries)
5469                         igb_set_itr(adapter);
5470                 else
5471                         igb_update_ring_itr(q_vector);
5472         }
5473
5474         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5475                 if (adapter->msix_entries)
5476                         wr32(E1000_EIMS, q_vector->eims_value);
5477                 else
5478                         igb_irq_enable(adapter);
5479         }
5480 }
5481
5482 /**
5483  * igb_poll - NAPI Rx polling callback
5484  * @napi: napi polling structure
5485  * @budget: count of how many packets we should handle
5486  **/
5487 static int igb_poll(struct napi_struct *napi, int budget)
5488 {
5489         struct igb_q_vector *q_vector = container_of(napi,
5490                                                      struct igb_q_vector,
5491                                                      napi);
5492         bool clean_complete = true;
5493
5494 #ifdef CONFIG_IGB_DCA
5495         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5496                 igb_update_dca(q_vector);
5497 #endif
5498         if (q_vector->tx_ring)
5499                 clean_complete = !!igb_clean_tx_irq(q_vector);
5500
5501         if (q_vector->rx_ring)
5502                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5503
5504         /* If all work not completed, return budget and keep polling */
5505         if (!clean_complete)
5506                 return budget;
5507
5508         /* If not enough Rx work done, exit the polling mode */
5509         napi_complete(napi);
5510         igb_ring_irq_enable(q_vector);
5511
5512         return 0;
5513 }
5514
5515 /**
5516  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5517  * @adapter: board private structure
5518  * @shhwtstamps: timestamp structure to update
5519  * @regval: unsigned 64bit system time value.
5520  *
5521  * We need to convert the system time value stored in the RX/TXSTMP registers
5522  * into a hwtstamp which can be used by the upper level timestamping functions
5523  */
5524 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5525                                    struct skb_shared_hwtstamps *shhwtstamps,
5526                                    u64 regval)
5527 {
5528         u64 ns;
5529
5530         /*
5531          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5532          * 24 to match clock shift we setup earlier.
5533          */
5534         if (adapter->hw.mac.type == e1000_82580)
5535                 regval <<= IGB_82580_TSYNC_SHIFT;
5536
5537         ns = timecounter_cyc2time(&adapter->clock, regval);
5538         timecompare_update(&adapter->compare, ns);
5539         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5540         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5541         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5542 }
5543
5544 /**
5545  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5546  * @q_vector: pointer to q_vector containing needed info
5547  * @buffer: pointer to igb_buffer structure
5548  *
5549  * If we were asked to do hardware stamping and such a time stamp is
5550  * available, then it must have been for this skb here because we only
5551  * allow only one such packet into the queue.
5552  */
5553 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5554 {
5555         struct igb_adapter *adapter = q_vector->adapter;
5556         struct e1000_hw *hw = &adapter->hw;
5557         struct skb_shared_hwtstamps shhwtstamps;
5558         u64 regval;
5559
5560         /* if skb does not support hw timestamp or TX stamp not valid exit */
5561         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5562             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5563                 return;
5564
5565         regval = rd32(E1000_TXSTMPL);
5566         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5567
5568         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5569         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5570 }
5571
5572 /**
5573  * igb_clean_tx_irq - Reclaim resources after transmit completes
5574  * @q_vector: pointer to q_vector containing needed info
5575  * returns true if ring is completely cleaned
5576  **/
5577 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5578 {
5579         struct igb_adapter *adapter = q_vector->adapter;
5580         struct igb_ring *tx_ring = q_vector->tx_ring;
5581         struct net_device *netdev = tx_ring->netdev;
5582         struct e1000_hw *hw = &adapter->hw;
5583         struct igb_buffer *buffer_info;
5584         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5585         unsigned int total_bytes = 0, total_packets = 0;
5586         unsigned int i, eop, count = 0;
5587         bool cleaned = false;
5588
5589         i = tx_ring->next_to_clean;
5590         eop = tx_ring->buffer_info[i].next_to_watch;
5591         eop_desc = IGB_TX_DESC(tx_ring, eop);
5592
5593         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5594                (count < tx_ring->count)) {
5595                 rmb();  /* read buffer_info after eop_desc status */
5596                 for (cleaned = false; !cleaned; count++) {
5597                         tx_desc = IGB_TX_DESC(tx_ring, i);
5598                         buffer_info = &tx_ring->buffer_info[i];
5599                         cleaned = (i == eop);
5600
5601                         if (buffer_info->skb) {
5602                                 total_bytes += buffer_info->bytecount;
5603                                 /* gso_segs is currently only valid for tcp */
5604                                 total_packets += buffer_info->gso_segs;
5605                                 igb_tx_hwtstamp(q_vector, buffer_info);
5606                         }
5607
5608                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5609                         tx_desc->wb.status = 0;
5610
5611                         i++;
5612                         if (i == tx_ring->count)
5613                                 i = 0;
5614                 }
5615                 eop = tx_ring->buffer_info[i].next_to_watch;
5616                 eop_desc = IGB_TX_DESC(tx_ring, eop);
5617         }
5618
5619         tx_ring->next_to_clean = i;
5620
5621         if (unlikely(count &&
5622                      netif_carrier_ok(netdev) &&
5623                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5624                 /* Make sure that anybody stopping the queue after this
5625                  * sees the new next_to_clean.
5626                  */
5627                 smp_mb();
5628                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5629                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5630                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5631
5632                         u64_stats_update_begin(&tx_ring->tx_syncp);
5633                         tx_ring->tx_stats.restart_queue++;
5634                         u64_stats_update_end(&tx_ring->tx_syncp);
5635                 }
5636         }
5637
5638         if (tx_ring->detect_tx_hung) {
5639                 /* Detect a transmit hang in hardware, this serializes the
5640                  * check with the clearing of time_stamp and movement of i */
5641                 tx_ring->detect_tx_hung = false;
5642                 if (tx_ring->buffer_info[i].time_stamp &&
5643                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5644                                (adapter->tx_timeout_factor * HZ)) &&
5645                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5646
5647                         /* detected Tx unit hang */
5648                         dev_err(tx_ring->dev,
5649                                 "Detected Tx Unit Hang\n"
5650                                 "  Tx Queue             <%d>\n"
5651                                 "  TDH                  <%x>\n"
5652                                 "  TDT                  <%x>\n"
5653                                 "  next_to_use          <%x>\n"
5654                                 "  next_to_clean        <%x>\n"
5655                                 "buffer_info[next_to_clean]\n"
5656                                 "  time_stamp           <%lx>\n"
5657                                 "  next_to_watch        <%x>\n"
5658                                 "  jiffies              <%lx>\n"
5659                                 "  desc.status          <%x>\n",
5660                                 tx_ring->queue_index,
5661                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5662                                 readl(tx_ring->tail),
5663                                 tx_ring->next_to_use,
5664                                 tx_ring->next_to_clean,
5665                                 tx_ring->buffer_info[eop].time_stamp,
5666                                 eop,
5667                                 jiffies,
5668                                 eop_desc->wb.status);
5669                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5670                 }
5671         }
5672         tx_ring->total_bytes += total_bytes;
5673         tx_ring->total_packets += total_packets;
5674         u64_stats_update_begin(&tx_ring->tx_syncp);
5675         tx_ring->tx_stats.bytes += total_bytes;
5676         tx_ring->tx_stats.packets += total_packets;
5677         u64_stats_update_end(&tx_ring->tx_syncp);
5678         return count < tx_ring->count;
5679 }
5680
5681 static inline void igb_rx_checksum(struct igb_ring *ring,
5682                                    u32 status_err, struct sk_buff *skb)
5683 {
5684         skb_checksum_none_assert(skb);
5685
5686         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5687         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5688              (status_err & E1000_RXD_STAT_IXSM))
5689                 return;
5690
5691         /* TCP/UDP checksum error bit is set */
5692         if (status_err &
5693             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5694                 /*
5695                  * work around errata with sctp packets where the TCPE aka
5696                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5697                  * packets, (aka let the stack check the crc32c)
5698                  */
5699                 if ((skb->len == 60) &&
5700                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5701                         u64_stats_update_begin(&ring->rx_syncp);
5702                         ring->rx_stats.csum_err++;
5703                         u64_stats_update_end(&ring->rx_syncp);
5704                 }
5705                 /* let the stack verify checksum errors */
5706                 return;
5707         }
5708         /* It must be a TCP or UDP packet with a valid checksum */
5709         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5710                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5711
5712         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5713 }
5714
5715 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5716                                    struct sk_buff *skb)
5717 {
5718         struct igb_adapter *adapter = q_vector->adapter;
5719         struct e1000_hw *hw = &adapter->hw;
5720         u64 regval;
5721
5722         /*
5723          * If this bit is set, then the RX registers contain the time stamp. No
5724          * other packet will be time stamped until we read these registers, so
5725          * read the registers to make them available again. Because only one
5726          * packet can be time stamped at a time, we know that the register
5727          * values must belong to this one here and therefore we don't need to
5728          * compare any of the additional attributes stored for it.
5729          *
5730          * If nothing went wrong, then it should have a shared tx_flags that we
5731          * can turn into a skb_shared_hwtstamps.
5732          */
5733         if (staterr & E1000_RXDADV_STAT_TSIP) {
5734                 u32 *stamp = (u32 *)skb->data;
5735                 regval = le32_to_cpu(*(stamp + 2));
5736                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5737                 skb_pull(skb, IGB_TS_HDR_LEN);
5738         } else {
5739                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5740                         return;
5741
5742                 regval = rd32(E1000_RXSTMPL);
5743                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5744         }
5745
5746         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5747 }
5748 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5749 {
5750         /* HW will not DMA in data larger than the given buffer, even if it
5751          * parses the (NFS, of course) header to be larger.  In that case, it
5752          * fills the header buffer and spills the rest into the page.
5753          */
5754         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5755                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5756         if (hlen > IGB_RX_HDR_LEN)
5757                 hlen = IGB_RX_HDR_LEN;
5758         return hlen;
5759 }
5760
5761 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5762 {
5763         struct igb_ring *rx_ring = q_vector->rx_ring;
5764         union e1000_adv_rx_desc *rx_desc;
5765         const int current_node = numa_node_id();
5766         unsigned int total_bytes = 0, total_packets = 0;
5767         u32 staterr;
5768         u16 cleaned_count = igb_desc_unused(rx_ring);
5769         u16 i = rx_ring->next_to_clean;
5770
5771         rx_desc = IGB_RX_DESC(rx_ring, i);
5772         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5773
5774         while (staterr & E1000_RXD_STAT_DD) {
5775                 struct igb_buffer *buffer_info = &rx_ring->buffer_info[i];
5776                 struct sk_buff *skb = buffer_info->skb;
5777                 union e1000_adv_rx_desc *next_rxd;
5778
5779                 buffer_info->skb = NULL;
5780                 prefetch(skb->data);
5781
5782                 i++;
5783                 if (i == rx_ring->count)
5784                         i = 0;
5785
5786                 next_rxd = IGB_RX_DESC(rx_ring, i);
5787                 prefetch(next_rxd);
5788
5789                 /*
5790                  * This memory barrier is needed to keep us from reading
5791                  * any other fields out of the rx_desc until we know the
5792                  * RXD_STAT_DD bit is set
5793                  */
5794                 rmb();
5795
5796                 if (!skb_is_nonlinear(skb)) {
5797                         __skb_put(skb, igb_get_hlen(rx_desc));
5798                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
5799                                          IGB_RX_HDR_LEN,
5800                                          DMA_FROM_DEVICE);
5801                         buffer_info->dma = 0;
5802                 }
5803
5804                 if (rx_desc->wb.upper.length) {
5805                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5806
5807                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5808                                                 buffer_info->page,
5809                                                 buffer_info->page_offset,
5810                                                 length);
5811
5812                         skb->len += length;
5813                         skb->data_len += length;
5814                         skb->truesize += length;
5815
5816                         if ((page_count(buffer_info->page) != 1) ||
5817                             (page_to_nid(buffer_info->page) != current_node))
5818                                 buffer_info->page = NULL;
5819                         else
5820                                 get_page(buffer_info->page);
5821
5822                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5823                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5824                         buffer_info->page_dma = 0;
5825                 }
5826
5827                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5828                         struct igb_buffer *next_buffer;
5829                         next_buffer = &rx_ring->buffer_info[i];
5830                         buffer_info->skb = next_buffer->skb;
5831                         buffer_info->dma = next_buffer->dma;
5832                         next_buffer->skb = skb;
5833                         next_buffer->dma = 0;
5834                         goto next_desc;
5835                 }
5836
5837                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5838                         dev_kfree_skb_any(skb);
5839                         goto next_desc;
5840                 }
5841
5842                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5843                         igb_rx_hwtstamp(q_vector, staterr, skb);
5844                 total_bytes += skb->len;
5845                 total_packets++;
5846
5847                 igb_rx_checksum(rx_ring, staterr, skb);
5848
5849                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5850
5851                 if (staterr & E1000_RXD_STAT_VP) {
5852                         u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5853
5854                         __vlan_hwaccel_put_tag(skb, vid);
5855                 }
5856                 napi_gro_receive(&q_vector->napi, skb);
5857
5858                 budget--;
5859 next_desc:
5860                 if (!budget)
5861                         break;
5862
5863                 cleaned_count++;
5864                 /* return some buffers to hardware, one at a time is too slow */
5865                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5866                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
5867                         cleaned_count = 0;
5868                 }
5869
5870                 /* use prefetched values */
5871                 rx_desc = next_rxd;
5872                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5873         }
5874
5875         rx_ring->next_to_clean = i;
5876         u64_stats_update_begin(&rx_ring->rx_syncp);
5877         rx_ring->rx_stats.packets += total_packets;
5878         rx_ring->rx_stats.bytes += total_bytes;
5879         u64_stats_update_end(&rx_ring->rx_syncp);
5880         rx_ring->total_packets += total_packets;
5881         rx_ring->total_bytes += total_bytes;
5882
5883         if (cleaned_count)
5884                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5885
5886         return !!budget;
5887 }
5888
5889 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
5890                                  struct igb_buffer *bi)
5891 {
5892         struct sk_buff *skb = bi->skb;
5893         dma_addr_t dma = bi->dma;
5894
5895         if (dma)
5896                 return true;
5897
5898         if (likely(!skb)) {
5899                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
5900                                                 IGB_RX_HDR_LEN);
5901                 bi->skb = skb;
5902                 if (!skb) {
5903                         rx_ring->rx_stats.alloc_failed++;
5904                         return false;
5905                 }
5906
5907                 /* initialize skb for ring */
5908                 skb_record_rx_queue(skb, rx_ring->queue_index);
5909         }
5910
5911         dma = dma_map_single(rx_ring->dev, skb->data,
5912                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
5913
5914         if (dma_mapping_error(rx_ring->dev, dma)) {
5915                 rx_ring->rx_stats.alloc_failed++;
5916                 return false;
5917         }
5918
5919         bi->dma = dma;
5920         return true;
5921 }
5922
5923 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
5924                                   struct igb_buffer *bi)
5925 {
5926         struct page *page = bi->page;
5927         dma_addr_t page_dma = bi->page_dma;
5928         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
5929
5930         if (page_dma)
5931                 return true;
5932
5933         if (!page) {
5934                 page = netdev_alloc_page(rx_ring->netdev);
5935                 bi->page = page;
5936                 if (unlikely(!page)) {
5937                         rx_ring->rx_stats.alloc_failed++;
5938                         return false;
5939                 }
5940         }
5941
5942         page_dma = dma_map_page(rx_ring->dev, page,
5943                                 page_offset, PAGE_SIZE / 2,
5944                                 DMA_FROM_DEVICE);
5945
5946         if (dma_mapping_error(rx_ring->dev, page_dma)) {
5947                 rx_ring->rx_stats.alloc_failed++;
5948                 return false;
5949         }
5950
5951         bi->page_dma = page_dma;
5952         bi->page_offset = page_offset;
5953         return true;
5954 }
5955
5956 /**
5957  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
5958  * @adapter: address of board private structure
5959  **/
5960 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
5961 {
5962         union e1000_adv_rx_desc *rx_desc;
5963         struct igb_buffer *bi;
5964         u16 i = rx_ring->next_to_use;
5965
5966         rx_desc = IGB_RX_DESC(rx_ring, i);
5967         bi = &rx_ring->buffer_info[i];
5968         i -= rx_ring->count;
5969
5970         while (cleaned_count--) {
5971                 if (!igb_alloc_mapped_skb(rx_ring, bi))
5972                         break;
5973
5974                 /* Refresh the desc even if buffer_addrs didn't change
5975                  * because each write-back erases this info. */
5976                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
5977
5978                 if (!igb_alloc_mapped_page(rx_ring, bi))
5979                         break;
5980
5981                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
5982
5983                 rx_desc++;
5984                 bi++;
5985                 i++;
5986                 if (unlikely(!i)) {
5987                         rx_desc = IGB_RX_DESC(rx_ring, 0);
5988                         bi = rx_ring->buffer_info;
5989                         i -= rx_ring->count;
5990                 }
5991
5992                 /* clear the hdr_addr for the next_to_use descriptor */
5993                 rx_desc->read.hdr_addr = 0;
5994         }
5995
5996         i += rx_ring->count;
5997
5998         if (rx_ring->next_to_use != i) {
5999                 rx_ring->next_to_use = i;
6000
6001                 /* Force memory writes to complete before letting h/w
6002                  * know there are new descriptors to fetch.  (Only
6003                  * applicable for weak-ordered memory model archs,
6004                  * such as IA-64). */
6005                 wmb();
6006                 writel(i, rx_ring->tail);
6007         }
6008 }
6009
6010 /**
6011  * igb_mii_ioctl -
6012  * @netdev:
6013  * @ifreq:
6014  * @cmd:
6015  **/
6016 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6017 {
6018         struct igb_adapter *adapter = netdev_priv(netdev);
6019         struct mii_ioctl_data *data = if_mii(ifr);
6020
6021         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6022                 return -EOPNOTSUPP;
6023
6024         switch (cmd) {
6025         case SIOCGMIIPHY:
6026                 data->phy_id = adapter->hw.phy.addr;
6027                 break;
6028         case SIOCGMIIREG:
6029                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6030                                      &data->val_out))
6031                         return -EIO;
6032                 break;
6033         case SIOCSMIIREG:
6034         default:
6035                 return -EOPNOTSUPP;
6036         }
6037         return 0;
6038 }
6039
6040 /**
6041  * igb_hwtstamp_ioctl - control hardware time stamping
6042  * @netdev:
6043  * @ifreq:
6044  * @cmd:
6045  *
6046  * Outgoing time stamping can be enabled and disabled. Play nice and
6047  * disable it when requested, although it shouldn't case any overhead
6048  * when no packet needs it. At most one packet in the queue may be
6049  * marked for time stamping, otherwise it would be impossible to tell
6050  * for sure to which packet the hardware time stamp belongs.
6051  *
6052  * Incoming time stamping has to be configured via the hardware
6053  * filters. Not all combinations are supported, in particular event
6054  * type has to be specified. Matching the kind of event packet is
6055  * not supported, with the exception of "all V2 events regardless of
6056  * level 2 or 4".
6057  *
6058  **/
6059 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6060                               struct ifreq *ifr, int cmd)
6061 {
6062         struct igb_adapter *adapter = netdev_priv(netdev);
6063         struct e1000_hw *hw = &adapter->hw;
6064         struct hwtstamp_config config;
6065         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6066         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6067         u32 tsync_rx_cfg = 0;
6068         bool is_l4 = false;
6069         bool is_l2 = false;
6070         u32 regval;
6071
6072         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6073                 return -EFAULT;
6074
6075         /* reserved for future extensions */
6076         if (config.flags)
6077                 return -EINVAL;
6078
6079         switch (config.tx_type) {
6080         case HWTSTAMP_TX_OFF:
6081                 tsync_tx_ctl = 0;
6082         case HWTSTAMP_TX_ON:
6083                 break;
6084         default:
6085                 return -ERANGE;
6086         }
6087
6088         switch (config.rx_filter) {
6089         case HWTSTAMP_FILTER_NONE:
6090                 tsync_rx_ctl = 0;
6091                 break;
6092         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6093         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6094         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6095         case HWTSTAMP_FILTER_ALL:
6096                 /*
6097                  * register TSYNCRXCFG must be set, therefore it is not
6098                  * possible to time stamp both Sync and Delay_Req messages
6099                  * => fall back to time stamping all packets
6100                  */
6101                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6102                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6103                 break;
6104         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6105                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6106                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6107                 is_l4 = true;
6108                 break;
6109         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6110                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6111                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6112                 is_l4 = true;
6113                 break;
6114         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6115         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6116                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6117                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6118                 is_l2 = true;
6119                 is_l4 = true;
6120                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6121                 break;
6122         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6123         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6124                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6125                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6126                 is_l2 = true;
6127                 is_l4 = true;
6128                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6129                 break;
6130         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6131         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6132         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6133                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6134                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6135                 is_l2 = true;
6136                 break;
6137         default:
6138                 return -ERANGE;
6139         }
6140
6141         if (hw->mac.type == e1000_82575) {
6142                 if (tsync_rx_ctl | tsync_tx_ctl)
6143                         return -EINVAL;
6144                 return 0;
6145         }
6146
6147         /*
6148          * Per-packet timestamping only works if all packets are
6149          * timestamped, so enable timestamping in all packets as
6150          * long as one rx filter was configured.
6151          */
6152         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6153                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6154                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6155         }
6156
6157         /* enable/disable TX */
6158         regval = rd32(E1000_TSYNCTXCTL);
6159         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6160         regval |= tsync_tx_ctl;
6161         wr32(E1000_TSYNCTXCTL, regval);
6162
6163         /* enable/disable RX */
6164         regval = rd32(E1000_TSYNCRXCTL);
6165         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6166         regval |= tsync_rx_ctl;
6167         wr32(E1000_TSYNCRXCTL, regval);
6168
6169         /* define which PTP packets are time stamped */
6170         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6171
6172         /* define ethertype filter for timestamped packets */
6173         if (is_l2)
6174                 wr32(E1000_ETQF(3),
6175                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6176                                  E1000_ETQF_1588 | /* enable timestamping */
6177                                  ETH_P_1588));     /* 1588 eth protocol type */
6178         else
6179                 wr32(E1000_ETQF(3), 0);
6180
6181 #define PTP_PORT 319
6182         /* L4 Queue Filter[3]: filter by destination port and protocol */
6183         if (is_l4) {
6184                 u32 ftqf = (IPPROTO_UDP /* UDP */
6185                         | E1000_FTQF_VF_BP /* VF not compared */
6186                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6187                         | E1000_FTQF_MASK); /* mask all inputs */
6188                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6189
6190                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6191                 wr32(E1000_IMIREXT(3),
6192                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6193                 if (hw->mac.type == e1000_82576) {
6194                         /* enable source port check */
6195                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6196                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6197                 }
6198                 wr32(E1000_FTQF(3), ftqf);
6199         } else {
6200                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6201         }
6202         wrfl();
6203
6204         adapter->hwtstamp_config = config;
6205
6206         /* clear TX/RX time stamp registers, just to be sure */
6207         regval = rd32(E1000_TXSTMPH);
6208         regval = rd32(E1000_RXSTMPH);
6209
6210         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6211                 -EFAULT : 0;
6212 }
6213
6214 /**
6215  * igb_ioctl -
6216  * @netdev:
6217  * @ifreq:
6218  * @cmd:
6219  **/
6220 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6221 {
6222         switch (cmd) {
6223         case SIOCGMIIPHY:
6224         case SIOCGMIIREG:
6225         case SIOCSMIIREG:
6226                 return igb_mii_ioctl(netdev, ifr, cmd);
6227         case SIOCSHWTSTAMP:
6228                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6229         default:
6230                 return -EOPNOTSUPP;
6231         }
6232 }
6233
6234 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6235 {
6236         struct igb_adapter *adapter = hw->back;
6237         u16 cap_offset;
6238
6239         cap_offset = adapter->pdev->pcie_cap;
6240         if (!cap_offset)
6241                 return -E1000_ERR_CONFIG;
6242
6243         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6244
6245         return 0;
6246 }
6247
6248 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6249 {
6250         struct igb_adapter *adapter = hw->back;
6251         u16 cap_offset;
6252
6253         cap_offset = adapter->pdev->pcie_cap;
6254         if (!cap_offset)
6255                 return -E1000_ERR_CONFIG;
6256
6257         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6258
6259         return 0;
6260 }
6261
6262 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6263 {
6264         struct igb_adapter *adapter = netdev_priv(netdev);
6265         struct e1000_hw *hw = &adapter->hw;
6266         u32 ctrl, rctl;
6267
6268         igb_irq_disable(adapter);
6269
6270         if (features & NETIF_F_HW_VLAN_RX) {
6271                 /* enable VLAN tag insert/strip */
6272                 ctrl = rd32(E1000_CTRL);
6273                 ctrl |= E1000_CTRL_VME;
6274                 wr32(E1000_CTRL, ctrl);
6275
6276                 /* Disable CFI check */
6277                 rctl = rd32(E1000_RCTL);
6278                 rctl &= ~E1000_RCTL_CFIEN;
6279                 wr32(E1000_RCTL, rctl);
6280         } else {
6281                 /* disable VLAN tag insert/strip */
6282                 ctrl = rd32(E1000_CTRL);
6283                 ctrl &= ~E1000_CTRL_VME;
6284                 wr32(E1000_CTRL, ctrl);
6285         }
6286
6287         igb_rlpml_set(adapter);
6288
6289         if (!test_bit(__IGB_DOWN, &adapter->state))
6290                 igb_irq_enable(adapter);
6291 }
6292
6293 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6294 {
6295         struct igb_adapter *adapter = netdev_priv(netdev);
6296         struct e1000_hw *hw = &adapter->hw;
6297         int pf_id = adapter->vfs_allocated_count;
6298
6299         /* attempt to add filter to vlvf array */
6300         igb_vlvf_set(adapter, vid, true, pf_id);
6301
6302         /* add the filter since PF can receive vlans w/o entry in vlvf */
6303         igb_vfta_set(hw, vid, true);
6304
6305         set_bit(vid, adapter->active_vlans);
6306 }
6307
6308 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6309 {
6310         struct igb_adapter *adapter = netdev_priv(netdev);
6311         struct e1000_hw *hw = &adapter->hw;
6312         int pf_id = adapter->vfs_allocated_count;
6313         s32 err;
6314
6315         igb_irq_disable(adapter);
6316
6317         if (!test_bit(__IGB_DOWN, &adapter->state))
6318                 igb_irq_enable(adapter);
6319
6320         /* remove vlan from VLVF table array */
6321         err = igb_vlvf_set(adapter, vid, false, pf_id);
6322
6323         /* if vid was not present in VLVF just remove it from table */
6324         if (err)
6325                 igb_vfta_set(hw, vid, false);
6326
6327         clear_bit(vid, adapter->active_vlans);
6328 }
6329
6330 static void igb_restore_vlan(struct igb_adapter *adapter)
6331 {
6332         u16 vid;
6333
6334         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6335                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6336 }
6337
6338 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6339 {
6340         struct pci_dev *pdev = adapter->pdev;
6341         struct e1000_mac_info *mac = &adapter->hw.mac;
6342
6343         mac->autoneg = 0;
6344
6345         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6346          * for the switch() below to work */
6347         if ((spd & 1) || (dplx & ~1))
6348                 goto err_inval;
6349
6350         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6351         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6352             spd != SPEED_1000 &&
6353             dplx != DUPLEX_FULL)
6354                 goto err_inval;
6355
6356         switch (spd + dplx) {
6357         case SPEED_10 + DUPLEX_HALF:
6358                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6359                 break;
6360         case SPEED_10 + DUPLEX_FULL:
6361                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6362                 break;
6363         case SPEED_100 + DUPLEX_HALF:
6364                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6365                 break;
6366         case SPEED_100 + DUPLEX_FULL:
6367                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6368                 break;
6369         case SPEED_1000 + DUPLEX_FULL:
6370                 mac->autoneg = 1;
6371                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6372                 break;
6373         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6374         default:
6375                 goto err_inval;
6376         }
6377         return 0;
6378
6379 err_inval:
6380         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6381         return -EINVAL;
6382 }
6383
6384 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6385 {
6386         struct net_device *netdev = pci_get_drvdata(pdev);
6387         struct igb_adapter *adapter = netdev_priv(netdev);
6388         struct e1000_hw *hw = &adapter->hw;
6389         u32 ctrl, rctl, status;
6390         u32 wufc = adapter->wol;
6391 #ifdef CONFIG_PM
6392         int retval = 0;
6393 #endif
6394
6395         netif_device_detach(netdev);
6396
6397         if (netif_running(netdev))
6398                 igb_close(netdev);
6399
6400         igb_clear_interrupt_scheme(adapter);
6401
6402 #ifdef CONFIG_PM
6403         retval = pci_save_state(pdev);
6404         if (retval)
6405                 return retval;
6406 #endif
6407
6408         status = rd32(E1000_STATUS);
6409         if (status & E1000_STATUS_LU)
6410                 wufc &= ~E1000_WUFC_LNKC;
6411
6412         if (wufc) {
6413                 igb_setup_rctl(adapter);
6414                 igb_set_rx_mode(netdev);
6415
6416                 /* turn on all-multi mode if wake on multicast is enabled */
6417                 if (wufc & E1000_WUFC_MC) {
6418                         rctl = rd32(E1000_RCTL);
6419                         rctl |= E1000_RCTL_MPE;
6420                         wr32(E1000_RCTL, rctl);
6421                 }
6422
6423                 ctrl = rd32(E1000_CTRL);
6424                 /* advertise wake from D3Cold */
6425                 #define E1000_CTRL_ADVD3WUC 0x00100000
6426                 /* phy power management enable */
6427                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6428                 ctrl |= E1000_CTRL_ADVD3WUC;
6429                 wr32(E1000_CTRL, ctrl);
6430
6431                 /* Allow time for pending master requests to run */
6432                 igb_disable_pcie_master(hw);
6433
6434                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6435                 wr32(E1000_WUFC, wufc);
6436         } else {
6437                 wr32(E1000_WUC, 0);
6438                 wr32(E1000_WUFC, 0);
6439         }
6440
6441         *enable_wake = wufc || adapter->en_mng_pt;
6442         if (!*enable_wake)
6443                 igb_power_down_link(adapter);
6444         else
6445                 igb_power_up_link(adapter);
6446
6447         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6448          * would have already happened in close and is redundant. */
6449         igb_release_hw_control(adapter);
6450
6451         pci_disable_device(pdev);
6452
6453         return 0;
6454 }
6455
6456 #ifdef CONFIG_PM
6457 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6458 {
6459         int retval;
6460         bool wake;
6461
6462         retval = __igb_shutdown(pdev, &wake);
6463         if (retval)
6464                 return retval;
6465
6466         if (wake) {
6467                 pci_prepare_to_sleep(pdev);
6468         } else {
6469                 pci_wake_from_d3(pdev, false);
6470                 pci_set_power_state(pdev, PCI_D3hot);
6471         }
6472
6473         return 0;
6474 }
6475
6476 static int igb_resume(struct pci_dev *pdev)
6477 {
6478         struct net_device *netdev = pci_get_drvdata(pdev);
6479         struct igb_adapter *adapter = netdev_priv(netdev);
6480         struct e1000_hw *hw = &adapter->hw;
6481         u32 err;
6482
6483         pci_set_power_state(pdev, PCI_D0);
6484         pci_restore_state(pdev);
6485         pci_save_state(pdev);
6486
6487         err = pci_enable_device_mem(pdev);
6488         if (err) {
6489                 dev_err(&pdev->dev,
6490                         "igb: Cannot enable PCI device from suspend\n");
6491                 return err;
6492         }
6493         pci_set_master(pdev);
6494
6495         pci_enable_wake(pdev, PCI_D3hot, 0);
6496         pci_enable_wake(pdev, PCI_D3cold, 0);
6497
6498         if (igb_init_interrupt_scheme(adapter)) {
6499                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6500                 return -ENOMEM;
6501         }
6502
6503         igb_reset(adapter);
6504
6505         /* let the f/w know that the h/w is now under the control of the
6506          * driver. */
6507         igb_get_hw_control(adapter);
6508
6509         wr32(E1000_WUS, ~0);
6510
6511         if (netif_running(netdev)) {
6512                 err = igb_open(netdev);
6513                 if (err)
6514                         return err;
6515         }
6516
6517         netif_device_attach(netdev);
6518
6519         return 0;
6520 }
6521 #endif
6522
6523 static void igb_shutdown(struct pci_dev *pdev)
6524 {
6525         bool wake;
6526
6527         __igb_shutdown(pdev, &wake);
6528
6529         if (system_state == SYSTEM_POWER_OFF) {
6530                 pci_wake_from_d3(pdev, wake);
6531                 pci_set_power_state(pdev, PCI_D3hot);
6532         }
6533 }
6534
6535 #ifdef CONFIG_NET_POLL_CONTROLLER
6536 /*
6537  * Polling 'interrupt' - used by things like netconsole to send skbs
6538  * without having to re-enable interrupts. It's not called while
6539  * the interrupt routine is executing.
6540  */
6541 static void igb_netpoll(struct net_device *netdev)
6542 {
6543         struct igb_adapter *adapter = netdev_priv(netdev);
6544         struct e1000_hw *hw = &adapter->hw;
6545         int i;
6546
6547         if (!adapter->msix_entries) {
6548                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6549                 igb_irq_disable(adapter);
6550                 napi_schedule(&q_vector->napi);
6551                 return;
6552         }
6553
6554         for (i = 0; i < adapter->num_q_vectors; i++) {
6555                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6556                 wr32(E1000_EIMC, q_vector->eims_value);
6557                 napi_schedule(&q_vector->napi);
6558         }
6559 }
6560 #endif /* CONFIG_NET_POLL_CONTROLLER */
6561
6562 /**
6563  * igb_io_error_detected - called when PCI error is detected
6564  * @pdev: Pointer to PCI device
6565  * @state: The current pci connection state
6566  *
6567  * This function is called after a PCI bus error affecting
6568  * this device has been detected.
6569  */
6570 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6571                                               pci_channel_state_t state)
6572 {
6573         struct net_device *netdev = pci_get_drvdata(pdev);
6574         struct igb_adapter *adapter = netdev_priv(netdev);
6575
6576         netif_device_detach(netdev);
6577
6578         if (state == pci_channel_io_perm_failure)
6579                 return PCI_ERS_RESULT_DISCONNECT;
6580
6581         if (netif_running(netdev))
6582                 igb_down(adapter);
6583         pci_disable_device(pdev);
6584
6585         /* Request a slot slot reset. */
6586         return PCI_ERS_RESULT_NEED_RESET;
6587 }
6588
6589 /**
6590  * igb_io_slot_reset - called after the pci bus has been reset.
6591  * @pdev: Pointer to PCI device
6592  *
6593  * Restart the card from scratch, as if from a cold-boot. Implementation
6594  * resembles the first-half of the igb_resume routine.
6595  */
6596 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6597 {
6598         struct net_device *netdev = pci_get_drvdata(pdev);
6599         struct igb_adapter *adapter = netdev_priv(netdev);
6600         struct e1000_hw *hw = &adapter->hw;
6601         pci_ers_result_t result;
6602         int err;
6603
6604         if (pci_enable_device_mem(pdev)) {
6605                 dev_err(&pdev->dev,
6606                         "Cannot re-enable PCI device after reset.\n");
6607                 result = PCI_ERS_RESULT_DISCONNECT;
6608         } else {
6609                 pci_set_master(pdev);
6610                 pci_restore_state(pdev);
6611                 pci_save_state(pdev);
6612
6613                 pci_enable_wake(pdev, PCI_D3hot, 0);
6614                 pci_enable_wake(pdev, PCI_D3cold, 0);
6615
6616                 igb_reset(adapter);
6617                 wr32(E1000_WUS, ~0);
6618                 result = PCI_ERS_RESULT_RECOVERED;
6619         }
6620
6621         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6622         if (err) {
6623                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6624                         "failed 0x%0x\n", err);
6625                 /* non-fatal, continue */
6626         }
6627
6628         return result;
6629 }
6630
6631 /**
6632  * igb_io_resume - called when traffic can start flowing again.
6633  * @pdev: Pointer to PCI device
6634  *
6635  * This callback is called when the error recovery driver tells us that
6636  * its OK to resume normal operation. Implementation resembles the
6637  * second-half of the igb_resume routine.
6638  */
6639 static void igb_io_resume(struct pci_dev *pdev)
6640 {
6641         struct net_device *netdev = pci_get_drvdata(pdev);
6642         struct igb_adapter *adapter = netdev_priv(netdev);
6643
6644         if (netif_running(netdev)) {
6645                 if (igb_up(adapter)) {
6646                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6647                         return;
6648                 }
6649         }
6650
6651         netif_device_attach(netdev);
6652
6653         /* let the f/w know that the h/w is now under the control of the
6654          * driver. */
6655         igb_get_hw_control(adapter);
6656 }
6657
6658 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6659                              u8 qsel)
6660 {
6661         u32 rar_low, rar_high;
6662         struct e1000_hw *hw = &adapter->hw;
6663
6664         /* HW expects these in little endian so we reverse the byte order
6665          * from network order (big endian) to little endian
6666          */
6667         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6668                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6669         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6670
6671         /* Indicate to hardware the Address is Valid. */
6672         rar_high |= E1000_RAH_AV;
6673
6674         if (hw->mac.type == e1000_82575)
6675                 rar_high |= E1000_RAH_POOL_1 * qsel;
6676         else
6677                 rar_high |= E1000_RAH_POOL_1 << qsel;
6678
6679         wr32(E1000_RAL(index), rar_low);
6680         wrfl();
6681         wr32(E1000_RAH(index), rar_high);
6682         wrfl();
6683 }
6684
6685 static int igb_set_vf_mac(struct igb_adapter *adapter,
6686                           int vf, unsigned char *mac_addr)
6687 {
6688         struct e1000_hw *hw = &adapter->hw;
6689         /* VF MAC addresses start at end of receive addresses and moves
6690          * torwards the first, as a result a collision should not be possible */
6691         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6692
6693         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6694
6695         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6696
6697         return 0;
6698 }
6699
6700 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6701 {
6702         struct igb_adapter *adapter = netdev_priv(netdev);
6703         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6704                 return -EINVAL;
6705         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6706         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6707         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6708                                       " change effective.");
6709         if (test_bit(__IGB_DOWN, &adapter->state)) {
6710                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6711                          " but the PF device is not up.\n");
6712                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6713                          " attempting to use the VF device.\n");
6714         }
6715         return igb_set_vf_mac(adapter, vf, mac);
6716 }
6717
6718 static int igb_link_mbps(int internal_link_speed)
6719 {
6720         switch (internal_link_speed) {
6721         case SPEED_100:
6722                 return 100;
6723         case SPEED_1000:
6724                 return 1000;
6725         default:
6726                 return 0;
6727         }
6728 }
6729
6730 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6731                                   int link_speed)
6732 {
6733         int rf_dec, rf_int;
6734         u32 bcnrc_val;
6735
6736         if (tx_rate != 0) {
6737                 /* Calculate the rate factor values to set */
6738                 rf_int = link_speed / tx_rate;
6739                 rf_dec = (link_speed - (rf_int * tx_rate));
6740                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6741
6742                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6743                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6744                                E1000_RTTBCNRC_RF_INT_MASK);
6745                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6746         } else {
6747                 bcnrc_val = 0;
6748         }
6749
6750         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6751         wr32(E1000_RTTBCNRC, bcnrc_val);
6752 }
6753
6754 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6755 {
6756         int actual_link_speed, i;
6757         bool reset_rate = false;
6758
6759         /* VF TX rate limit was not set or not supported */
6760         if ((adapter->vf_rate_link_speed == 0) ||
6761             (adapter->hw.mac.type != e1000_82576))
6762                 return;
6763
6764         actual_link_speed = igb_link_mbps(adapter->link_speed);
6765         if (actual_link_speed != adapter->vf_rate_link_speed) {
6766                 reset_rate = true;
6767                 adapter->vf_rate_link_speed = 0;
6768                 dev_info(&adapter->pdev->dev,
6769                          "Link speed has been changed. VF Transmit "
6770                          "rate is disabled\n");
6771         }
6772
6773         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6774                 if (reset_rate)
6775                         adapter->vf_data[i].tx_rate = 0;
6776
6777                 igb_set_vf_rate_limit(&adapter->hw, i,
6778                                       adapter->vf_data[i].tx_rate,
6779                                       actual_link_speed);
6780         }
6781 }
6782
6783 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6784 {
6785         struct igb_adapter *adapter = netdev_priv(netdev);
6786         struct e1000_hw *hw = &adapter->hw;
6787         int actual_link_speed;
6788
6789         if (hw->mac.type != e1000_82576)
6790                 return -EOPNOTSUPP;
6791
6792         actual_link_speed = igb_link_mbps(adapter->link_speed);
6793         if ((vf >= adapter->vfs_allocated_count) ||
6794             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6795             (tx_rate < 0) || (tx_rate > actual_link_speed))
6796                 return -EINVAL;
6797
6798         adapter->vf_rate_link_speed = actual_link_speed;
6799         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6800         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6801
6802         return 0;
6803 }
6804
6805 static int igb_ndo_get_vf_config(struct net_device *netdev,
6806                                  int vf, struct ifla_vf_info *ivi)
6807 {
6808         struct igb_adapter *adapter = netdev_priv(netdev);
6809         if (vf >= adapter->vfs_allocated_count)
6810                 return -EINVAL;
6811         ivi->vf = vf;
6812         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6813         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6814         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6815         ivi->qos = adapter->vf_data[vf].pf_qos;
6816         return 0;
6817 }
6818
6819 static void igb_vmm_control(struct igb_adapter *adapter)
6820 {
6821         struct e1000_hw *hw = &adapter->hw;
6822         u32 reg;
6823
6824         switch (hw->mac.type) {
6825         case e1000_82575:
6826         default:
6827                 /* replication is not supported for 82575 */
6828                 return;
6829         case e1000_82576:
6830                 /* notify HW that the MAC is adding vlan tags */
6831                 reg = rd32(E1000_DTXCTL);
6832                 reg |= E1000_DTXCTL_VLAN_ADDED;
6833                 wr32(E1000_DTXCTL, reg);
6834         case e1000_82580:
6835                 /* enable replication vlan tag stripping */
6836                 reg = rd32(E1000_RPLOLR);
6837                 reg |= E1000_RPLOLR_STRVLAN;
6838                 wr32(E1000_RPLOLR, reg);
6839         case e1000_i350:
6840                 /* none of the above registers are supported by i350 */
6841                 break;
6842         }
6843
6844         if (adapter->vfs_allocated_count) {
6845                 igb_vmdq_set_loopback_pf(hw, true);
6846                 igb_vmdq_set_replication_pf(hw, true);
6847                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6848                                                 adapter->vfs_allocated_count);
6849         } else {
6850                 igb_vmdq_set_loopback_pf(hw, false);
6851                 igb_vmdq_set_replication_pf(hw, false);
6852         }
6853 }
6854
6855 /* igb_main.c */