Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[pandora-kernel.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74         [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103         /* required last entry */
104         {0, }
105 };
106
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static int igb_sw_init(struct igb_adapter *);
118 static int igb_open(struct net_device *);
119 static int igb_close(struct net_device *);
120 static void igb_configure_tx(struct igb_adapter *);
121 static void igb_configure_rx(struct igb_adapter *);
122 static void igb_clean_all_tx_rings(struct igb_adapter *);
123 static void igb_clean_all_rx_rings(struct igb_adapter *);
124 static void igb_clean_tx_ring(struct igb_ring *);
125 static void igb_clean_rx_ring(struct igb_ring *);
126 static void igb_set_rx_mode(struct net_device *);
127 static void igb_update_phy_info(unsigned long);
128 static void igb_watchdog(unsigned long);
129 static void igb_watchdog_task(struct work_struct *);
130 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
131 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
132                                                  struct rtnl_link_stats64 *stats);
133 static int igb_change_mtu(struct net_device *, int);
134 static int igb_set_mac(struct net_device *, void *);
135 static void igb_set_uta(struct igb_adapter *adapter);
136 static irqreturn_t igb_intr(int irq, void *);
137 static irqreturn_t igb_intr_msi(int irq, void *);
138 static irqreturn_t igb_msix_other(int irq, void *);
139 static irqreturn_t igb_msix_ring(int irq, void *);
140 #ifdef CONFIG_IGB_DCA
141 static void igb_update_dca(struct igb_q_vector *);
142 static void igb_setup_dca(struct igb_adapter *);
143 #endif /* CONFIG_IGB_DCA */
144 static int igb_poll(struct napi_struct *, int);
145 static bool igb_clean_tx_irq(struct igb_q_vector *);
146 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
147 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
148 static void igb_tx_timeout(struct net_device *);
149 static void igb_reset_task(struct work_struct *);
150 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
151 static int igb_vlan_rx_add_vid(struct net_device *, u16);
152 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
153 static void igb_restore_vlan(struct igb_adapter *);
154 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
155 static void igb_ping_all_vfs(struct igb_adapter *);
156 static void igb_msg_task(struct igb_adapter *);
157 static void igb_vmm_control(struct igb_adapter *);
158 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
159 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
160 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
161 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
162                                int vf, u16 vlan, u8 qos);
163 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
164 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
165                                  struct ifla_vf_info *ivi);
166 static void igb_check_vf_rate_limit(struct igb_adapter *);
167
168 #ifdef CONFIG_PCI_IOV
169 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
170 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
171 static int igb_check_vf_assignment(struct igb_adapter *adapter);
172 #endif
173
174 #ifdef CONFIG_PM
175 #ifdef CONFIG_PM_SLEEP
176 static int igb_suspend(struct device *);
177 #endif
178 static int igb_resume(struct device *);
179 #ifdef CONFIG_PM_RUNTIME
180 static int igb_runtime_suspend(struct device *dev);
181 static int igb_runtime_resume(struct device *dev);
182 static int igb_runtime_idle(struct device *dev);
183 #endif
184 static const struct dev_pm_ops igb_pm_ops = {
185         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
186         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
187                         igb_runtime_idle)
188 };
189 #endif
190 static void igb_shutdown(struct pci_dev *);
191 #ifdef CONFIG_IGB_DCA
192 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
193 static struct notifier_block dca_notifier = {
194         .notifier_call  = igb_notify_dca,
195         .next           = NULL,
196         .priority       = 0
197 };
198 #endif
199 #ifdef CONFIG_NET_POLL_CONTROLLER
200 /* for netdump / net console */
201 static void igb_netpoll(struct net_device *);
202 #endif
203 #ifdef CONFIG_PCI_IOV
204 static unsigned int max_vfs = 0;
205 module_param(max_vfs, uint, 0);
206 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
207                  "per physical function");
208 #endif /* CONFIG_PCI_IOV */
209
210 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
211                      pci_channel_state_t);
212 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
213 static void igb_io_resume(struct pci_dev *);
214
215 static struct pci_error_handlers igb_err_handler = {
216         .error_detected = igb_io_error_detected,
217         .slot_reset = igb_io_slot_reset,
218         .resume = igb_io_resume,
219 };
220
221 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
222
223 static struct pci_driver igb_driver = {
224         .name     = igb_driver_name,
225         .id_table = igb_pci_tbl,
226         .probe    = igb_probe,
227         .remove   = __devexit_p(igb_remove),
228 #ifdef CONFIG_PM
229         .driver.pm = &igb_pm_ops,
230 #endif
231         .shutdown = igb_shutdown,
232         .err_handler = &igb_err_handler
233 };
234
235 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
236 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
237 MODULE_LICENSE("GPL");
238 MODULE_VERSION(DRV_VERSION);
239
240 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
241 static int debug = -1;
242 module_param(debug, int, 0);
243 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
244
245 struct igb_reg_info {
246         u32 ofs;
247         char *name;
248 };
249
250 static const struct igb_reg_info igb_reg_info_tbl[] = {
251
252         /* General Registers */
253         {E1000_CTRL, "CTRL"},
254         {E1000_STATUS, "STATUS"},
255         {E1000_CTRL_EXT, "CTRL_EXT"},
256
257         /* Interrupt Registers */
258         {E1000_ICR, "ICR"},
259
260         /* RX Registers */
261         {E1000_RCTL, "RCTL"},
262         {E1000_RDLEN(0), "RDLEN"},
263         {E1000_RDH(0), "RDH"},
264         {E1000_RDT(0), "RDT"},
265         {E1000_RXDCTL(0), "RXDCTL"},
266         {E1000_RDBAL(0), "RDBAL"},
267         {E1000_RDBAH(0), "RDBAH"},
268
269         /* TX Registers */
270         {E1000_TCTL, "TCTL"},
271         {E1000_TDBAL(0), "TDBAL"},
272         {E1000_TDBAH(0), "TDBAH"},
273         {E1000_TDLEN(0), "TDLEN"},
274         {E1000_TDH(0), "TDH"},
275         {E1000_TDT(0), "TDT"},
276         {E1000_TXDCTL(0), "TXDCTL"},
277         {E1000_TDFH, "TDFH"},
278         {E1000_TDFT, "TDFT"},
279         {E1000_TDFHS, "TDFHS"},
280         {E1000_TDFPC, "TDFPC"},
281
282         /* List Terminator */
283         {}
284 };
285
286 /*
287  * igb_regdump - register printout routine
288  */
289 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
290 {
291         int n = 0;
292         char rname[16];
293         u32 regs[8];
294
295         switch (reginfo->ofs) {
296         case E1000_RDLEN(0):
297                 for (n = 0; n < 4; n++)
298                         regs[n] = rd32(E1000_RDLEN(n));
299                 break;
300         case E1000_RDH(0):
301                 for (n = 0; n < 4; n++)
302                         regs[n] = rd32(E1000_RDH(n));
303                 break;
304         case E1000_RDT(0):
305                 for (n = 0; n < 4; n++)
306                         regs[n] = rd32(E1000_RDT(n));
307                 break;
308         case E1000_RXDCTL(0):
309                 for (n = 0; n < 4; n++)
310                         regs[n] = rd32(E1000_RXDCTL(n));
311                 break;
312         case E1000_RDBAL(0):
313                 for (n = 0; n < 4; n++)
314                         regs[n] = rd32(E1000_RDBAL(n));
315                 break;
316         case E1000_RDBAH(0):
317                 for (n = 0; n < 4; n++)
318                         regs[n] = rd32(E1000_RDBAH(n));
319                 break;
320         case E1000_TDBAL(0):
321                 for (n = 0; n < 4; n++)
322                         regs[n] = rd32(E1000_RDBAL(n));
323                 break;
324         case E1000_TDBAH(0):
325                 for (n = 0; n < 4; n++)
326                         regs[n] = rd32(E1000_TDBAH(n));
327                 break;
328         case E1000_TDLEN(0):
329                 for (n = 0; n < 4; n++)
330                         regs[n] = rd32(E1000_TDLEN(n));
331                 break;
332         case E1000_TDH(0):
333                 for (n = 0; n < 4; n++)
334                         regs[n] = rd32(E1000_TDH(n));
335                 break;
336         case E1000_TDT(0):
337                 for (n = 0; n < 4; n++)
338                         regs[n] = rd32(E1000_TDT(n));
339                 break;
340         case E1000_TXDCTL(0):
341                 for (n = 0; n < 4; n++)
342                         regs[n] = rd32(E1000_TXDCTL(n));
343                 break;
344         default:
345                 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
346                 return;
347         }
348
349         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
350         pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
351                 regs[2], regs[3]);
352 }
353
354 /*
355  * igb_dump - Print registers, tx-rings and rx-rings
356  */
357 static void igb_dump(struct igb_adapter *adapter)
358 {
359         struct net_device *netdev = adapter->netdev;
360         struct e1000_hw *hw = &adapter->hw;
361         struct igb_reg_info *reginfo;
362         struct igb_ring *tx_ring;
363         union e1000_adv_tx_desc *tx_desc;
364         struct my_u0 { u64 a; u64 b; } *u0;
365         struct igb_ring *rx_ring;
366         union e1000_adv_rx_desc *rx_desc;
367         u32 staterr;
368         u16 i, n;
369
370         if (!netif_msg_hw(adapter))
371                 return;
372
373         /* Print netdevice Info */
374         if (netdev) {
375                 dev_info(&adapter->pdev->dev, "Net device Info\n");
376                 pr_info("Device Name     state            trans_start      "
377                         "last_rx\n");
378                 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
379                         netdev->state, netdev->trans_start, netdev->last_rx);
380         }
381
382         /* Print Registers */
383         dev_info(&adapter->pdev->dev, "Register Dump\n");
384         pr_info(" Register Name   Value\n");
385         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
386              reginfo->name; reginfo++) {
387                 igb_regdump(hw, reginfo);
388         }
389
390         /* Print TX Ring Summary */
391         if (!netdev || !netif_running(netdev))
392                 goto exit;
393
394         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
395         pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
396         for (n = 0; n < adapter->num_tx_queues; n++) {
397                 struct igb_tx_buffer *buffer_info;
398                 tx_ring = adapter->tx_ring[n];
399                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
400                 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
401                         n, tx_ring->next_to_use, tx_ring->next_to_clean,
402                         (u64)buffer_info->dma,
403                         buffer_info->length,
404                         buffer_info->next_to_watch,
405                         (u64)buffer_info->time_stamp);
406         }
407
408         /* Print TX Rings */
409         if (!netif_msg_tx_done(adapter))
410                 goto rx_ring_summary;
411
412         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
413
414         /* Transmit Descriptor Formats
415          *
416          * Advanced Transmit Descriptor
417          *   +--------------------------------------------------------------+
418          * 0 |         Buffer Address [63:0]                                |
419          *   +--------------------------------------------------------------+
420          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
421          *   +--------------------------------------------------------------+
422          *   63      46 45    40 39 38 36 35 32 31   24             15       0
423          */
424
425         for (n = 0; n < adapter->num_tx_queues; n++) {
426                 tx_ring = adapter->tx_ring[n];
427                 pr_info("------------------------------------\n");
428                 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
429                 pr_info("------------------------------------\n");
430                 pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
431                         "[bi->dma       ] leng  ntw timestamp        "
432                         "bi->skb\n");
433
434                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
435                         const char *next_desc;
436                         struct igb_tx_buffer *buffer_info;
437                         tx_desc = IGB_TX_DESC(tx_ring, i);
438                         buffer_info = &tx_ring->tx_buffer_info[i];
439                         u0 = (struct my_u0 *)tx_desc;
440                         if (i == tx_ring->next_to_use &&
441                             i == tx_ring->next_to_clean)
442                                 next_desc = " NTC/U";
443                         else if (i == tx_ring->next_to_use)
444                                 next_desc = " NTU";
445                         else if (i == tx_ring->next_to_clean)
446                                 next_desc = " NTC";
447                         else
448                                 next_desc = "";
449
450                         pr_info("T [0x%03X]    %016llX %016llX %016llX"
451                                 " %04X  %p %016llX %p%s\n", i,
452                                 le64_to_cpu(u0->a),
453                                 le64_to_cpu(u0->b),
454                                 (u64)buffer_info->dma,
455                                 buffer_info->length,
456                                 buffer_info->next_to_watch,
457                                 (u64)buffer_info->time_stamp,
458                                 buffer_info->skb, next_desc);
459
460                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
461                                 print_hex_dump(KERN_INFO, "",
462                                         DUMP_PREFIX_ADDRESS,
463                                         16, 1, phys_to_virt(buffer_info->dma),
464                                         buffer_info->length, true);
465                 }
466         }
467
468         /* Print RX Rings Summary */
469 rx_ring_summary:
470         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
471         pr_info("Queue [NTU] [NTC]\n");
472         for (n = 0; n < adapter->num_rx_queues; n++) {
473                 rx_ring = adapter->rx_ring[n];
474                 pr_info(" %5d %5X %5X\n",
475                         n, rx_ring->next_to_use, rx_ring->next_to_clean);
476         }
477
478         /* Print RX Rings */
479         if (!netif_msg_rx_status(adapter))
480                 goto exit;
481
482         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
483
484         /* Advanced Receive Descriptor (Read) Format
485          *    63                                           1        0
486          *    +-----------------------------------------------------+
487          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
488          *    +----------------------------------------------+------+
489          *  8 |       Header Buffer Address [63:1]           |  DD  |
490          *    +-----------------------------------------------------+
491          *
492          *
493          * Advanced Receive Descriptor (Write-Back) Format
494          *
495          *   63       48 47    32 31  30      21 20 17 16   4 3     0
496          *   +------------------------------------------------------+
497          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
498          *   | Checksum   Ident  |   |           |    | Type | Type |
499          *   +------------------------------------------------------+
500          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
501          *   +------------------------------------------------------+
502          *   63       48 47    32 31            20 19               0
503          */
504
505         for (n = 0; n < adapter->num_rx_queues; n++) {
506                 rx_ring = adapter->rx_ring[n];
507                 pr_info("------------------------------------\n");
508                 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
509                 pr_info("------------------------------------\n");
510                 pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
511                         "[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
512                 pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
513                         "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
514
515                 for (i = 0; i < rx_ring->count; i++) {
516                         const char *next_desc;
517                         struct igb_rx_buffer *buffer_info;
518                         buffer_info = &rx_ring->rx_buffer_info[i];
519                         rx_desc = IGB_RX_DESC(rx_ring, i);
520                         u0 = (struct my_u0 *)rx_desc;
521                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
522
523                         if (i == rx_ring->next_to_use)
524                                 next_desc = " NTU";
525                         else if (i == rx_ring->next_to_clean)
526                                 next_desc = " NTC";
527                         else
528                                 next_desc = "";
529
530                         if (staterr & E1000_RXD_STAT_DD) {
531                                 /* Descriptor Done */
532                                 pr_info("%s[0x%03X]     %016llX %016llX -------"
533                                         "--------- %p%s\n", "RWB", i,
534                                         le64_to_cpu(u0->a),
535                                         le64_to_cpu(u0->b),
536                                         buffer_info->skb, next_desc);
537                         } else {
538                                 pr_info("%s[0x%03X]     %016llX %016llX %016llX"
539                                         " %p%s\n", "R  ", i,
540                                         le64_to_cpu(u0->a),
541                                         le64_to_cpu(u0->b),
542                                         (u64)buffer_info->dma,
543                                         buffer_info->skb, next_desc);
544
545                                 if (netif_msg_pktdata(adapter)) {
546                                         print_hex_dump(KERN_INFO, "",
547                                                 DUMP_PREFIX_ADDRESS,
548                                                 16, 1,
549                                                 phys_to_virt(buffer_info->dma),
550                                                 IGB_RX_HDR_LEN, true);
551                                         print_hex_dump(KERN_INFO, "",
552                                           DUMP_PREFIX_ADDRESS,
553                                           16, 1,
554                                           phys_to_virt(
555                                             buffer_info->page_dma +
556                                             buffer_info->page_offset),
557                                           PAGE_SIZE/2, true);
558                                 }
559                         }
560                 }
561         }
562
563 exit:
564         return;
565 }
566
567 /**
568  * igb_get_hw_dev - return device
569  * used by hardware layer to print debugging information
570  **/
571 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
572 {
573         struct igb_adapter *adapter = hw->back;
574         return adapter->netdev;
575 }
576
577 /**
578  * igb_init_module - Driver Registration Routine
579  *
580  * igb_init_module is the first routine called when the driver is
581  * loaded. All it does is register with the PCI subsystem.
582  **/
583 static int __init igb_init_module(void)
584 {
585         int ret;
586         pr_info("%s - version %s\n",
587                igb_driver_string, igb_driver_version);
588
589         pr_info("%s\n", igb_copyright);
590
591 #ifdef CONFIG_IGB_DCA
592         dca_register_notify(&dca_notifier);
593 #endif
594         ret = pci_register_driver(&igb_driver);
595         return ret;
596 }
597
598 module_init(igb_init_module);
599
600 /**
601  * igb_exit_module - Driver Exit Cleanup Routine
602  *
603  * igb_exit_module is called just before the driver is removed
604  * from memory.
605  **/
606 static void __exit igb_exit_module(void)
607 {
608 #ifdef CONFIG_IGB_DCA
609         dca_unregister_notify(&dca_notifier);
610 #endif
611         pci_unregister_driver(&igb_driver);
612 }
613
614 module_exit(igb_exit_module);
615
616 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
617 /**
618  * igb_cache_ring_register - Descriptor ring to register mapping
619  * @adapter: board private structure to initialize
620  *
621  * Once we know the feature-set enabled for the device, we'll cache
622  * the register offset the descriptor ring is assigned to.
623  **/
624 static void igb_cache_ring_register(struct igb_adapter *adapter)
625 {
626         int i = 0, j = 0;
627         u32 rbase_offset = adapter->vfs_allocated_count;
628
629         switch (adapter->hw.mac.type) {
630         case e1000_82576:
631                 /* The queues are allocated for virtualization such that VF 0
632                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
633                  * In order to avoid collision we start at the first free queue
634                  * and continue consuming queues in the same sequence
635                  */
636                 if (adapter->vfs_allocated_count) {
637                         for (; i < adapter->rss_queues; i++)
638                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
639                                                                Q_IDX_82576(i);
640                 }
641         case e1000_82575:
642         case e1000_82580:
643         case e1000_i350:
644         default:
645                 for (; i < adapter->num_rx_queues; i++)
646                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
647                 for (; j < adapter->num_tx_queues; j++)
648                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
649                 break;
650         }
651 }
652
653 static void igb_free_queues(struct igb_adapter *adapter)
654 {
655         int i;
656
657         for (i = 0; i < adapter->num_tx_queues; i++) {
658                 kfree(adapter->tx_ring[i]);
659                 adapter->tx_ring[i] = NULL;
660         }
661         for (i = 0; i < adapter->num_rx_queues; i++) {
662                 kfree(adapter->rx_ring[i]);
663                 adapter->rx_ring[i] = NULL;
664         }
665         adapter->num_rx_queues = 0;
666         adapter->num_tx_queues = 0;
667 }
668
669 /**
670  * igb_alloc_queues - Allocate memory for all rings
671  * @adapter: board private structure to initialize
672  *
673  * We allocate one ring per queue at run-time since we don't know the
674  * number of queues at compile-time.
675  **/
676 static int igb_alloc_queues(struct igb_adapter *adapter)
677 {
678         struct igb_ring *ring;
679         int i;
680         int orig_node = adapter->node;
681
682         for (i = 0; i < adapter->num_tx_queues; i++) {
683                 if (orig_node == -1) {
684                         int cur_node = next_online_node(adapter->node);
685                         if (cur_node == MAX_NUMNODES)
686                                 cur_node = first_online_node;
687                         adapter->node = cur_node;
688                 }
689                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
690                                     adapter->node);
691                 if (!ring)
692                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
693                 if (!ring)
694                         goto err;
695                 ring->count = adapter->tx_ring_count;
696                 ring->queue_index = i;
697                 ring->dev = &adapter->pdev->dev;
698                 ring->netdev = adapter->netdev;
699                 ring->numa_node = adapter->node;
700                 /* For 82575, context index must be unique per ring. */
701                 if (adapter->hw.mac.type == e1000_82575)
702                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
703                 adapter->tx_ring[i] = ring;
704         }
705         /* Restore the adapter's original node */
706         adapter->node = orig_node;
707
708         for (i = 0; i < adapter->num_rx_queues; i++) {
709                 if (orig_node == -1) {
710                         int cur_node = next_online_node(adapter->node);
711                         if (cur_node == MAX_NUMNODES)
712                                 cur_node = first_online_node;
713                         adapter->node = cur_node;
714                 }
715                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
716                                     adapter->node);
717                 if (!ring)
718                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
719                 if (!ring)
720                         goto err;
721                 ring->count = adapter->rx_ring_count;
722                 ring->queue_index = i;
723                 ring->dev = &adapter->pdev->dev;
724                 ring->netdev = adapter->netdev;
725                 ring->numa_node = adapter->node;
726                 /* set flag indicating ring supports SCTP checksum offload */
727                 if (adapter->hw.mac.type >= e1000_82576)
728                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
729
730                 /* On i350, loopback VLAN packets have the tag byte-swapped. */
731                 if (adapter->hw.mac.type == e1000_i350)
732                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
733
734                 adapter->rx_ring[i] = ring;
735         }
736         /* Restore the adapter's original node */
737         adapter->node = orig_node;
738
739         igb_cache_ring_register(adapter);
740
741         return 0;
742
743 err:
744         /* Restore the adapter's original node */
745         adapter->node = orig_node;
746         igb_free_queues(adapter);
747
748         return -ENOMEM;
749 }
750
751 /**
752  *  igb_write_ivar - configure ivar for given MSI-X vector
753  *  @hw: pointer to the HW structure
754  *  @msix_vector: vector number we are allocating to a given ring
755  *  @index: row index of IVAR register to write within IVAR table
756  *  @offset: column offset of in IVAR, should be multiple of 8
757  *
758  *  This function is intended to handle the writing of the IVAR register
759  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
760  *  each containing an cause allocation for an Rx and Tx ring, and a
761  *  variable number of rows depending on the number of queues supported.
762  **/
763 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
764                            int index, int offset)
765 {
766         u32 ivar = array_rd32(E1000_IVAR0, index);
767
768         /* clear any bits that are currently set */
769         ivar &= ~((u32)0xFF << offset);
770
771         /* write vector and valid bit */
772         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
773
774         array_wr32(E1000_IVAR0, index, ivar);
775 }
776
777 #define IGB_N0_QUEUE -1
778 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
779 {
780         struct igb_adapter *adapter = q_vector->adapter;
781         struct e1000_hw *hw = &adapter->hw;
782         int rx_queue = IGB_N0_QUEUE;
783         int tx_queue = IGB_N0_QUEUE;
784         u32 msixbm = 0;
785
786         if (q_vector->rx.ring)
787                 rx_queue = q_vector->rx.ring->reg_idx;
788         if (q_vector->tx.ring)
789                 tx_queue = q_vector->tx.ring->reg_idx;
790
791         switch (hw->mac.type) {
792         case e1000_82575:
793                 /* The 82575 assigns vectors using a bitmask, which matches the
794                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
795                    or more queues to a vector, we write the appropriate bits
796                    into the MSIXBM register for that vector. */
797                 if (rx_queue > IGB_N0_QUEUE)
798                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
799                 if (tx_queue > IGB_N0_QUEUE)
800                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
801                 if (!adapter->msix_entries && msix_vector == 0)
802                         msixbm |= E1000_EIMS_OTHER;
803                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
804                 q_vector->eims_value = msixbm;
805                 break;
806         case e1000_82576:
807                 /*
808                  * 82576 uses a table that essentially consists of 2 columns
809                  * with 8 rows.  The ordering is column-major so we use the
810                  * lower 3 bits as the row index, and the 4th bit as the
811                  * column offset.
812                  */
813                 if (rx_queue > IGB_N0_QUEUE)
814                         igb_write_ivar(hw, msix_vector,
815                                        rx_queue & 0x7,
816                                        (rx_queue & 0x8) << 1);
817                 if (tx_queue > IGB_N0_QUEUE)
818                         igb_write_ivar(hw, msix_vector,
819                                        tx_queue & 0x7,
820                                        ((tx_queue & 0x8) << 1) + 8);
821                 q_vector->eims_value = 1 << msix_vector;
822                 break;
823         case e1000_82580:
824         case e1000_i350:
825                 /*
826                  * On 82580 and newer adapters the scheme is similar to 82576
827                  * however instead of ordering column-major we have things
828                  * ordered row-major.  So we traverse the table by using
829                  * bit 0 as the column offset, and the remaining bits as the
830                  * row index.
831                  */
832                 if (rx_queue > IGB_N0_QUEUE)
833                         igb_write_ivar(hw, msix_vector,
834                                        rx_queue >> 1,
835                                        (rx_queue & 0x1) << 4);
836                 if (tx_queue > IGB_N0_QUEUE)
837                         igb_write_ivar(hw, msix_vector,
838                                        tx_queue >> 1,
839                                        ((tx_queue & 0x1) << 4) + 8);
840                 q_vector->eims_value = 1 << msix_vector;
841                 break;
842         default:
843                 BUG();
844                 break;
845         }
846
847         /* add q_vector eims value to global eims_enable_mask */
848         adapter->eims_enable_mask |= q_vector->eims_value;
849
850         /* configure q_vector to set itr on first interrupt */
851         q_vector->set_itr = 1;
852 }
853
854 /**
855  * igb_configure_msix - Configure MSI-X hardware
856  *
857  * igb_configure_msix sets up the hardware to properly
858  * generate MSI-X interrupts.
859  **/
860 static void igb_configure_msix(struct igb_adapter *adapter)
861 {
862         u32 tmp;
863         int i, vector = 0;
864         struct e1000_hw *hw = &adapter->hw;
865
866         adapter->eims_enable_mask = 0;
867
868         /* set vector for other causes, i.e. link changes */
869         switch (hw->mac.type) {
870         case e1000_82575:
871                 tmp = rd32(E1000_CTRL_EXT);
872                 /* enable MSI-X PBA support*/
873                 tmp |= E1000_CTRL_EXT_PBA_CLR;
874
875                 /* Auto-Mask interrupts upon ICR read. */
876                 tmp |= E1000_CTRL_EXT_EIAME;
877                 tmp |= E1000_CTRL_EXT_IRCA;
878
879                 wr32(E1000_CTRL_EXT, tmp);
880
881                 /* enable msix_other interrupt */
882                 array_wr32(E1000_MSIXBM(0), vector++,
883                                       E1000_EIMS_OTHER);
884                 adapter->eims_other = E1000_EIMS_OTHER;
885
886                 break;
887
888         case e1000_82576:
889         case e1000_82580:
890         case e1000_i350:
891                 /* Turn on MSI-X capability first, or our settings
892                  * won't stick.  And it will take days to debug. */
893                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
894                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
895                                 E1000_GPIE_NSICR);
896
897                 /* enable msix_other interrupt */
898                 adapter->eims_other = 1 << vector;
899                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
900
901                 wr32(E1000_IVAR_MISC, tmp);
902                 break;
903         default:
904                 /* do nothing, since nothing else supports MSI-X */
905                 break;
906         } /* switch (hw->mac.type) */
907
908         adapter->eims_enable_mask |= adapter->eims_other;
909
910         for (i = 0; i < adapter->num_q_vectors; i++)
911                 igb_assign_vector(adapter->q_vector[i], vector++);
912
913         wrfl();
914 }
915
916 /**
917  * igb_request_msix - Initialize MSI-X interrupts
918  *
919  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
920  * kernel.
921  **/
922 static int igb_request_msix(struct igb_adapter *adapter)
923 {
924         struct net_device *netdev = adapter->netdev;
925         struct e1000_hw *hw = &adapter->hw;
926         int i, err = 0, vector = 0;
927
928         err = request_irq(adapter->msix_entries[vector].vector,
929                           igb_msix_other, 0, netdev->name, adapter);
930         if (err)
931                 goto out;
932         vector++;
933
934         for (i = 0; i < adapter->num_q_vectors; i++) {
935                 struct igb_q_vector *q_vector = adapter->q_vector[i];
936
937                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
938
939                 if (q_vector->rx.ring && q_vector->tx.ring)
940                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
941                                 q_vector->rx.ring->queue_index);
942                 else if (q_vector->tx.ring)
943                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
944                                 q_vector->tx.ring->queue_index);
945                 else if (q_vector->rx.ring)
946                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
947                                 q_vector->rx.ring->queue_index);
948                 else
949                         sprintf(q_vector->name, "%s-unused", netdev->name);
950
951                 err = request_irq(adapter->msix_entries[vector].vector,
952                                   igb_msix_ring, 0, q_vector->name,
953                                   q_vector);
954                 if (err)
955                         goto out;
956                 vector++;
957         }
958
959         igb_configure_msix(adapter);
960         return 0;
961 out:
962         return err;
963 }
964
965 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
966 {
967         if (adapter->msix_entries) {
968                 pci_disable_msix(adapter->pdev);
969                 kfree(adapter->msix_entries);
970                 adapter->msix_entries = NULL;
971         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
972                 pci_disable_msi(adapter->pdev);
973         }
974 }
975
976 /**
977  * igb_free_q_vectors - Free memory allocated for interrupt vectors
978  * @adapter: board private structure to initialize
979  *
980  * This function frees the memory allocated to the q_vectors.  In addition if
981  * NAPI is enabled it will delete any references to the NAPI struct prior
982  * to freeing the q_vector.
983  **/
984 static void igb_free_q_vectors(struct igb_adapter *adapter)
985 {
986         int v_idx;
987
988         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
989                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
990                 adapter->q_vector[v_idx] = NULL;
991                 if (!q_vector)
992                         continue;
993                 netif_napi_del(&q_vector->napi);
994                 kfree(q_vector);
995         }
996         adapter->num_q_vectors = 0;
997 }
998
999 /**
1000  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1001  *
1002  * This function resets the device so that it has 0 rx queues, tx queues, and
1003  * MSI-X interrupts allocated.
1004  */
1005 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1006 {
1007         igb_free_queues(adapter);
1008         igb_free_q_vectors(adapter);
1009         igb_reset_interrupt_capability(adapter);
1010 }
1011
1012 /**
1013  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1014  *
1015  * Attempt to configure interrupts using the best available
1016  * capabilities of the hardware and kernel.
1017  **/
1018 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1019 {
1020         int err;
1021         int numvecs, i;
1022
1023         /* Number of supported queues. */
1024         adapter->num_rx_queues = adapter->rss_queues;
1025         if (adapter->vfs_allocated_count)
1026                 adapter->num_tx_queues = 1;
1027         else
1028                 adapter->num_tx_queues = adapter->rss_queues;
1029
1030         /* start with one vector for every rx queue */
1031         numvecs = adapter->num_rx_queues;
1032
1033         /* if tx handler is separate add 1 for every tx queue */
1034         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1035                 numvecs += adapter->num_tx_queues;
1036
1037         /* store the number of vectors reserved for queues */
1038         adapter->num_q_vectors = numvecs;
1039
1040         /* add 1 vector for link status interrupts */
1041         numvecs++;
1042         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1043                                         GFP_KERNEL);
1044         if (!adapter->msix_entries)
1045                 goto msi_only;
1046
1047         for (i = 0; i < numvecs; i++)
1048                 adapter->msix_entries[i].entry = i;
1049
1050         err = pci_enable_msix(adapter->pdev,
1051                               adapter->msix_entries,
1052                               numvecs);
1053         if (err == 0)
1054                 goto out;
1055
1056         igb_reset_interrupt_capability(adapter);
1057
1058         /* If we can't do MSI-X, try MSI */
1059 msi_only:
1060 #ifdef CONFIG_PCI_IOV
1061         /* disable SR-IOV for non MSI-X configurations */
1062         if (adapter->vf_data) {
1063                 struct e1000_hw *hw = &adapter->hw;
1064                 /* disable iov and allow time for transactions to clear */
1065                 pci_disable_sriov(adapter->pdev);
1066                 msleep(500);
1067
1068                 kfree(adapter->vf_data);
1069                 adapter->vf_data = NULL;
1070                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1071                 wrfl();
1072                 msleep(100);
1073                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1074         }
1075 #endif
1076         adapter->vfs_allocated_count = 0;
1077         adapter->rss_queues = 1;
1078         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1079         adapter->num_rx_queues = 1;
1080         adapter->num_tx_queues = 1;
1081         adapter->num_q_vectors = 1;
1082         if (!pci_enable_msi(adapter->pdev))
1083                 adapter->flags |= IGB_FLAG_HAS_MSI;
1084 out:
1085         /* Notify the stack of the (possibly) reduced queue counts. */
1086         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1087         return netif_set_real_num_rx_queues(adapter->netdev,
1088                                             adapter->num_rx_queues);
1089 }
1090
1091 /**
1092  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1093  * @adapter: board private structure to initialize
1094  *
1095  * We allocate one q_vector per queue interrupt.  If allocation fails we
1096  * return -ENOMEM.
1097  **/
1098 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1099 {
1100         struct igb_q_vector *q_vector;
1101         struct e1000_hw *hw = &adapter->hw;
1102         int v_idx;
1103         int orig_node = adapter->node;
1104
1105         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1106                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1107                                                 adapter->num_tx_queues)) &&
1108                     (adapter->num_rx_queues == v_idx))
1109                         adapter->node = orig_node;
1110                 if (orig_node == -1) {
1111                         int cur_node = next_online_node(adapter->node);
1112                         if (cur_node == MAX_NUMNODES)
1113                                 cur_node = first_online_node;
1114                         adapter->node = cur_node;
1115                 }
1116                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1117                                         adapter->node);
1118                 if (!q_vector)
1119                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1120                                            GFP_KERNEL);
1121                 if (!q_vector)
1122                         goto err_out;
1123                 q_vector->adapter = adapter;
1124                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1125                 q_vector->itr_val = IGB_START_ITR;
1126                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1127                 adapter->q_vector[v_idx] = q_vector;
1128         }
1129         /* Restore the adapter's original node */
1130         adapter->node = orig_node;
1131
1132         return 0;
1133
1134 err_out:
1135         /* Restore the adapter's original node */
1136         adapter->node = orig_node;
1137         igb_free_q_vectors(adapter);
1138         return -ENOMEM;
1139 }
1140
1141 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1142                                       int ring_idx, int v_idx)
1143 {
1144         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1145
1146         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1147         q_vector->rx.ring->q_vector = q_vector;
1148         q_vector->rx.count++;
1149         q_vector->itr_val = adapter->rx_itr_setting;
1150         if (q_vector->itr_val && q_vector->itr_val <= 3)
1151                 q_vector->itr_val = IGB_START_ITR;
1152 }
1153
1154 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1155                                       int ring_idx, int v_idx)
1156 {
1157         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1158
1159         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1160         q_vector->tx.ring->q_vector = q_vector;
1161         q_vector->tx.count++;
1162         q_vector->itr_val = adapter->tx_itr_setting;
1163         q_vector->tx.work_limit = adapter->tx_work_limit;
1164         if (q_vector->itr_val && q_vector->itr_val <= 3)
1165                 q_vector->itr_val = IGB_START_ITR;
1166 }
1167
1168 /**
1169  * igb_map_ring_to_vector - maps allocated queues to vectors
1170  *
1171  * This function maps the recently allocated queues to vectors.
1172  **/
1173 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1174 {
1175         int i;
1176         int v_idx = 0;
1177
1178         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1179             (adapter->num_q_vectors < adapter->num_tx_queues))
1180                 return -ENOMEM;
1181
1182         if (adapter->num_q_vectors >=
1183             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1184                 for (i = 0; i < adapter->num_rx_queues; i++)
1185                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1186                 for (i = 0; i < adapter->num_tx_queues; i++)
1187                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1188         } else {
1189                 for (i = 0; i < adapter->num_rx_queues; i++) {
1190                         if (i < adapter->num_tx_queues)
1191                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1192                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1193                 }
1194                 for (; i < adapter->num_tx_queues; i++)
1195                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1196         }
1197         return 0;
1198 }
1199
1200 /**
1201  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1202  *
1203  * This function initializes the interrupts and allocates all of the queues.
1204  **/
1205 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1206 {
1207         struct pci_dev *pdev = adapter->pdev;
1208         int err;
1209
1210         err = igb_set_interrupt_capability(adapter);
1211         if (err)
1212                 return err;
1213
1214         err = igb_alloc_q_vectors(adapter);
1215         if (err) {
1216                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1217                 goto err_alloc_q_vectors;
1218         }
1219
1220         err = igb_alloc_queues(adapter);
1221         if (err) {
1222                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1223                 goto err_alloc_queues;
1224         }
1225
1226         err = igb_map_ring_to_vector(adapter);
1227         if (err) {
1228                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1229                 goto err_map_queues;
1230         }
1231
1232
1233         return 0;
1234 err_map_queues:
1235         igb_free_queues(adapter);
1236 err_alloc_queues:
1237         igb_free_q_vectors(adapter);
1238 err_alloc_q_vectors:
1239         igb_reset_interrupt_capability(adapter);
1240         return err;
1241 }
1242
1243 /**
1244  * igb_request_irq - initialize interrupts
1245  *
1246  * Attempts to configure interrupts using the best available
1247  * capabilities of the hardware and kernel.
1248  **/
1249 static int igb_request_irq(struct igb_adapter *adapter)
1250 {
1251         struct net_device *netdev = adapter->netdev;
1252         struct pci_dev *pdev = adapter->pdev;
1253         int err = 0;
1254
1255         if (adapter->msix_entries) {
1256                 err = igb_request_msix(adapter);
1257                 if (!err)
1258                         goto request_done;
1259                 /* fall back to MSI */
1260                 igb_clear_interrupt_scheme(adapter);
1261                 if (!pci_enable_msi(pdev))
1262                         adapter->flags |= IGB_FLAG_HAS_MSI;
1263                 igb_free_all_tx_resources(adapter);
1264                 igb_free_all_rx_resources(adapter);
1265                 adapter->num_tx_queues = 1;
1266                 adapter->num_rx_queues = 1;
1267                 adapter->num_q_vectors = 1;
1268                 err = igb_alloc_q_vectors(adapter);
1269                 if (err) {
1270                         dev_err(&pdev->dev,
1271                                 "Unable to allocate memory for vectors\n");
1272                         goto request_done;
1273                 }
1274                 err = igb_alloc_queues(adapter);
1275                 if (err) {
1276                         dev_err(&pdev->dev,
1277                                 "Unable to allocate memory for queues\n");
1278                         igb_free_q_vectors(adapter);
1279                         goto request_done;
1280                 }
1281                 igb_setup_all_tx_resources(adapter);
1282                 igb_setup_all_rx_resources(adapter);
1283         }
1284
1285         igb_assign_vector(adapter->q_vector[0], 0);
1286
1287         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1288                 err = request_irq(pdev->irq, igb_intr_msi, 0,
1289                                   netdev->name, adapter);
1290                 if (!err)
1291                         goto request_done;
1292
1293                 /* fall back to legacy interrupts */
1294                 igb_reset_interrupt_capability(adapter);
1295                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1296         }
1297
1298         err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1299                           netdev->name, adapter);
1300
1301         if (err)
1302                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1303                         err);
1304
1305 request_done:
1306         return err;
1307 }
1308
1309 static void igb_free_irq(struct igb_adapter *adapter)
1310 {
1311         if (adapter->msix_entries) {
1312                 int vector = 0, i;
1313
1314                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1315
1316                 for (i = 0; i < adapter->num_q_vectors; i++)
1317                         free_irq(adapter->msix_entries[vector++].vector,
1318                                  adapter->q_vector[i]);
1319         } else {
1320                 free_irq(adapter->pdev->irq, adapter);
1321         }
1322 }
1323
1324 /**
1325  * igb_irq_disable - Mask off interrupt generation on the NIC
1326  * @adapter: board private structure
1327  **/
1328 static void igb_irq_disable(struct igb_adapter *adapter)
1329 {
1330         struct e1000_hw *hw = &adapter->hw;
1331
1332         /*
1333          * we need to be careful when disabling interrupts.  The VFs are also
1334          * mapped into these registers and so clearing the bits can cause
1335          * issues on the VF drivers so we only need to clear what we set
1336          */
1337         if (adapter->msix_entries) {
1338                 u32 regval = rd32(E1000_EIAM);
1339                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1340                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1341                 regval = rd32(E1000_EIAC);
1342                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1343         }
1344
1345         wr32(E1000_IAM, 0);
1346         wr32(E1000_IMC, ~0);
1347         wrfl();
1348         if (adapter->msix_entries) {
1349                 int i;
1350                 for (i = 0; i < adapter->num_q_vectors; i++)
1351                         synchronize_irq(adapter->msix_entries[i].vector);
1352         } else {
1353                 synchronize_irq(adapter->pdev->irq);
1354         }
1355 }
1356
1357 /**
1358  * igb_irq_enable - Enable default interrupt generation settings
1359  * @adapter: board private structure
1360  **/
1361 static void igb_irq_enable(struct igb_adapter *adapter)
1362 {
1363         struct e1000_hw *hw = &adapter->hw;
1364
1365         if (adapter->msix_entries) {
1366                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1367                 u32 regval = rd32(E1000_EIAC);
1368                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1369                 regval = rd32(E1000_EIAM);
1370                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1371                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1372                 if (adapter->vfs_allocated_count) {
1373                         wr32(E1000_MBVFIMR, 0xFF);
1374                         ims |= E1000_IMS_VMMB;
1375                 }
1376                 wr32(E1000_IMS, ims);
1377         } else {
1378                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1379                                 E1000_IMS_DRSTA);
1380                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1381                                 E1000_IMS_DRSTA);
1382         }
1383 }
1384
1385 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1386 {
1387         struct e1000_hw *hw = &adapter->hw;
1388         u16 vid = adapter->hw.mng_cookie.vlan_id;
1389         u16 old_vid = adapter->mng_vlan_id;
1390
1391         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1392                 /* add VID to filter table */
1393                 igb_vfta_set(hw, vid, true);
1394                 adapter->mng_vlan_id = vid;
1395         } else {
1396                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1397         }
1398
1399         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1400             (vid != old_vid) &&
1401             !test_bit(old_vid, adapter->active_vlans)) {
1402                 /* remove VID from filter table */
1403                 igb_vfta_set(hw, old_vid, false);
1404         }
1405 }
1406
1407 /**
1408  * igb_release_hw_control - release control of the h/w to f/w
1409  * @adapter: address of board private structure
1410  *
1411  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1412  * For ASF and Pass Through versions of f/w this means that the
1413  * driver is no longer loaded.
1414  *
1415  **/
1416 static void igb_release_hw_control(struct igb_adapter *adapter)
1417 {
1418         struct e1000_hw *hw = &adapter->hw;
1419         u32 ctrl_ext;
1420
1421         /* Let firmware take over control of h/w */
1422         ctrl_ext = rd32(E1000_CTRL_EXT);
1423         wr32(E1000_CTRL_EXT,
1424                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1425 }
1426
1427 /**
1428  * igb_get_hw_control - get control of the h/w from f/w
1429  * @adapter: address of board private structure
1430  *
1431  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1432  * For ASF and Pass Through versions of f/w this means that
1433  * the driver is loaded.
1434  *
1435  **/
1436 static void igb_get_hw_control(struct igb_adapter *adapter)
1437 {
1438         struct e1000_hw *hw = &adapter->hw;
1439         u32 ctrl_ext;
1440
1441         /* Let firmware know the driver has taken over */
1442         ctrl_ext = rd32(E1000_CTRL_EXT);
1443         wr32(E1000_CTRL_EXT,
1444                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1445 }
1446
1447 /**
1448  * igb_configure - configure the hardware for RX and TX
1449  * @adapter: private board structure
1450  **/
1451 static void igb_configure(struct igb_adapter *adapter)
1452 {
1453         struct net_device *netdev = adapter->netdev;
1454         int i;
1455
1456         igb_get_hw_control(adapter);
1457         igb_set_rx_mode(netdev);
1458
1459         igb_restore_vlan(adapter);
1460
1461         igb_setup_tctl(adapter);
1462         igb_setup_mrqc(adapter);
1463         igb_setup_rctl(adapter);
1464
1465         igb_configure_tx(adapter);
1466         igb_configure_rx(adapter);
1467
1468         igb_rx_fifo_flush_82575(&adapter->hw);
1469
1470         /* call igb_desc_unused which always leaves
1471          * at least 1 descriptor unused to make sure
1472          * next_to_use != next_to_clean */
1473         for (i = 0; i < adapter->num_rx_queues; i++) {
1474                 struct igb_ring *ring = adapter->rx_ring[i];
1475                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1476         }
1477 }
1478
1479 /**
1480  * igb_power_up_link - Power up the phy/serdes link
1481  * @adapter: address of board private structure
1482  **/
1483 void igb_power_up_link(struct igb_adapter *adapter)
1484 {
1485         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1486                 igb_power_up_phy_copper(&adapter->hw);
1487         else
1488                 igb_power_up_serdes_link_82575(&adapter->hw);
1489         igb_reset_phy(&adapter->hw);
1490 }
1491
1492 /**
1493  * igb_power_down_link - Power down the phy/serdes link
1494  * @adapter: address of board private structure
1495  */
1496 static void igb_power_down_link(struct igb_adapter *adapter)
1497 {
1498         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1499                 igb_power_down_phy_copper_82575(&adapter->hw);
1500         else
1501                 igb_shutdown_serdes_link_82575(&adapter->hw);
1502 }
1503
1504 /**
1505  * igb_up - Open the interface and prepare it to handle traffic
1506  * @adapter: board private structure
1507  **/
1508 int igb_up(struct igb_adapter *adapter)
1509 {
1510         struct e1000_hw *hw = &adapter->hw;
1511         int i;
1512
1513         /* hardware has been reset, we need to reload some things */
1514         igb_configure(adapter);
1515
1516         clear_bit(__IGB_DOWN, &adapter->state);
1517
1518         for (i = 0; i < adapter->num_q_vectors; i++)
1519                 napi_enable(&(adapter->q_vector[i]->napi));
1520
1521         if (adapter->msix_entries)
1522                 igb_configure_msix(adapter);
1523         else
1524                 igb_assign_vector(adapter->q_vector[0], 0);
1525
1526         /* Clear any pending interrupts. */
1527         rd32(E1000_ICR);
1528         igb_irq_enable(adapter);
1529
1530         /* notify VFs that reset has been completed */
1531         if (adapter->vfs_allocated_count) {
1532                 u32 reg_data = rd32(E1000_CTRL_EXT);
1533                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1534                 wr32(E1000_CTRL_EXT, reg_data);
1535         }
1536
1537         netif_tx_start_all_queues(adapter->netdev);
1538
1539         /* start the watchdog. */
1540         hw->mac.get_link_status = 1;
1541         schedule_work(&adapter->watchdog_task);
1542
1543         return 0;
1544 }
1545
1546 void igb_down(struct igb_adapter *adapter)
1547 {
1548         struct net_device *netdev = adapter->netdev;
1549         struct e1000_hw *hw = &adapter->hw;
1550         u32 tctl, rctl;
1551         int i;
1552
1553         /* signal that we're down so the interrupt handler does not
1554          * reschedule our watchdog timer */
1555         set_bit(__IGB_DOWN, &adapter->state);
1556
1557         /* disable receives in the hardware */
1558         rctl = rd32(E1000_RCTL);
1559         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1560         /* flush and sleep below */
1561
1562         netif_tx_stop_all_queues(netdev);
1563
1564         /* disable transmits in the hardware */
1565         tctl = rd32(E1000_TCTL);
1566         tctl &= ~E1000_TCTL_EN;
1567         wr32(E1000_TCTL, tctl);
1568         /* flush both disables and wait for them to finish */
1569         wrfl();
1570         msleep(10);
1571
1572         for (i = 0; i < adapter->num_q_vectors; i++)
1573                 napi_disable(&(adapter->q_vector[i]->napi));
1574
1575         igb_irq_disable(adapter);
1576
1577         del_timer_sync(&adapter->watchdog_timer);
1578         del_timer_sync(&adapter->phy_info_timer);
1579
1580         netif_carrier_off(netdev);
1581
1582         /* record the stats before reset*/
1583         spin_lock(&adapter->stats64_lock);
1584         igb_update_stats(adapter, &adapter->stats64);
1585         spin_unlock(&adapter->stats64_lock);
1586
1587         adapter->link_speed = 0;
1588         adapter->link_duplex = 0;
1589
1590         if (!pci_channel_offline(adapter->pdev))
1591                 igb_reset(adapter);
1592         igb_clean_all_tx_rings(adapter);
1593         igb_clean_all_rx_rings(adapter);
1594 #ifdef CONFIG_IGB_DCA
1595
1596         /* since we reset the hardware DCA settings were cleared */
1597         igb_setup_dca(adapter);
1598 #endif
1599 }
1600
1601 void igb_reinit_locked(struct igb_adapter *adapter)
1602 {
1603         WARN_ON(in_interrupt());
1604         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1605                 msleep(1);
1606         igb_down(adapter);
1607         igb_up(adapter);
1608         clear_bit(__IGB_RESETTING, &adapter->state);
1609 }
1610
1611 void igb_reset(struct igb_adapter *adapter)
1612 {
1613         struct pci_dev *pdev = adapter->pdev;
1614         struct e1000_hw *hw = &adapter->hw;
1615         struct e1000_mac_info *mac = &hw->mac;
1616         struct e1000_fc_info *fc = &hw->fc;
1617         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1618         u16 hwm;
1619
1620         /* Repartition Pba for greater than 9k mtu
1621          * To take effect CTRL.RST is required.
1622          */
1623         switch (mac->type) {
1624         case e1000_i350:
1625         case e1000_82580:
1626                 pba = rd32(E1000_RXPBS);
1627                 pba = igb_rxpbs_adjust_82580(pba);
1628                 break;
1629         case e1000_82576:
1630                 pba = rd32(E1000_RXPBS);
1631                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1632                 break;
1633         case e1000_82575:
1634         default:
1635                 pba = E1000_PBA_34K;
1636                 break;
1637         }
1638
1639         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1640             (mac->type < e1000_82576)) {
1641                 /* adjust PBA for jumbo frames */
1642                 wr32(E1000_PBA, pba);
1643
1644                 /* To maintain wire speed transmits, the Tx FIFO should be
1645                  * large enough to accommodate two full transmit packets,
1646                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1647                  * the Rx FIFO should be large enough to accommodate at least
1648                  * one full receive packet and is similarly rounded up and
1649                  * expressed in KB. */
1650                 pba = rd32(E1000_PBA);
1651                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1652                 tx_space = pba >> 16;
1653                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1654                 pba &= 0xffff;
1655                 /* the tx fifo also stores 16 bytes of information about the tx
1656                  * but don't include ethernet FCS because hardware appends it */
1657                 min_tx_space = (adapter->max_frame_size +
1658                                 sizeof(union e1000_adv_tx_desc) -
1659                                 ETH_FCS_LEN) * 2;
1660                 min_tx_space = ALIGN(min_tx_space, 1024);
1661                 min_tx_space >>= 10;
1662                 /* software strips receive CRC, so leave room for it */
1663                 min_rx_space = adapter->max_frame_size;
1664                 min_rx_space = ALIGN(min_rx_space, 1024);
1665                 min_rx_space >>= 10;
1666
1667                 /* If current Tx allocation is less than the min Tx FIFO size,
1668                  * and the min Tx FIFO size is less than the current Rx FIFO
1669                  * allocation, take space away from current Rx allocation */
1670                 if (tx_space < min_tx_space &&
1671                     ((min_tx_space - tx_space) < pba)) {
1672                         pba = pba - (min_tx_space - tx_space);
1673
1674                         /* if short on rx space, rx wins and must trump tx
1675                          * adjustment */
1676                         if (pba < min_rx_space)
1677                                 pba = min_rx_space;
1678                 }
1679                 wr32(E1000_PBA, pba);
1680         }
1681
1682         /* flow control settings */
1683         /* The high water mark must be low enough to fit one full frame
1684          * (or the size used for early receive) above it in the Rx FIFO.
1685          * Set it to the lower of:
1686          * - 90% of the Rx FIFO size, or
1687          * - the full Rx FIFO size minus one full frame */
1688         hwm = min(((pba << 10) * 9 / 10),
1689                         ((pba << 10) - 2 * adapter->max_frame_size));
1690
1691         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1692         fc->low_water = fc->high_water - 16;
1693         fc->pause_time = 0xFFFF;
1694         fc->send_xon = 1;
1695         fc->current_mode = fc->requested_mode;
1696
1697         /* disable receive for all VFs and wait one second */
1698         if (adapter->vfs_allocated_count) {
1699                 int i;
1700                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1701                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1702
1703                 /* ping all the active vfs to let them know we are going down */
1704                 igb_ping_all_vfs(adapter);
1705
1706                 /* disable transmits and receives */
1707                 wr32(E1000_VFRE, 0);
1708                 wr32(E1000_VFTE, 0);
1709         }
1710
1711         /* Allow time for pending master requests to run */
1712         hw->mac.ops.reset_hw(hw);
1713         wr32(E1000_WUC, 0);
1714
1715         if (hw->mac.ops.init_hw(hw))
1716                 dev_err(&pdev->dev, "Hardware Error\n");
1717
1718         igb_init_dmac(adapter, pba);
1719         if (!netif_running(adapter->netdev))
1720                 igb_power_down_link(adapter);
1721
1722         igb_update_mng_vlan(adapter);
1723
1724         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1725         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1726
1727         igb_get_phy_info(hw);
1728 }
1729
1730 static netdev_features_t igb_fix_features(struct net_device *netdev,
1731         netdev_features_t features)
1732 {
1733         /*
1734          * Since there is no support for separate rx/tx vlan accel
1735          * enable/disable make sure tx flag is always in same state as rx.
1736          */
1737         if (features & NETIF_F_HW_VLAN_RX)
1738                 features |= NETIF_F_HW_VLAN_TX;
1739         else
1740                 features &= ~NETIF_F_HW_VLAN_TX;
1741
1742         return features;
1743 }
1744
1745 static int igb_set_features(struct net_device *netdev,
1746         netdev_features_t features)
1747 {
1748         netdev_features_t changed = netdev->features ^ features;
1749         struct igb_adapter *adapter = netdev_priv(netdev);
1750
1751         if (changed & NETIF_F_HW_VLAN_RX)
1752                 igb_vlan_mode(netdev, features);
1753
1754         if (!(changed & NETIF_F_RXALL))
1755                 return 0;
1756
1757         netdev->features = features;
1758
1759         if (netif_running(netdev))
1760                 igb_reinit_locked(adapter);
1761         else
1762                 igb_reset(adapter);
1763
1764         return 0;
1765 }
1766
1767 static const struct net_device_ops igb_netdev_ops = {
1768         .ndo_open               = igb_open,
1769         .ndo_stop               = igb_close,
1770         .ndo_start_xmit         = igb_xmit_frame,
1771         .ndo_get_stats64        = igb_get_stats64,
1772         .ndo_set_rx_mode        = igb_set_rx_mode,
1773         .ndo_set_mac_address    = igb_set_mac,
1774         .ndo_change_mtu         = igb_change_mtu,
1775         .ndo_do_ioctl           = igb_ioctl,
1776         .ndo_tx_timeout         = igb_tx_timeout,
1777         .ndo_validate_addr      = eth_validate_addr,
1778         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1779         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1780         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1781         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1782         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1783         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1784 #ifdef CONFIG_NET_POLL_CONTROLLER
1785         .ndo_poll_controller    = igb_netpoll,
1786 #endif
1787         .ndo_fix_features       = igb_fix_features,
1788         .ndo_set_features       = igb_set_features,
1789 };
1790
1791 /**
1792  * igb_probe - Device Initialization Routine
1793  * @pdev: PCI device information struct
1794  * @ent: entry in igb_pci_tbl
1795  *
1796  * Returns 0 on success, negative on failure
1797  *
1798  * igb_probe initializes an adapter identified by a pci_dev structure.
1799  * The OS initialization, configuring of the adapter private structure,
1800  * and a hardware reset occur.
1801  **/
1802 static int __devinit igb_probe(struct pci_dev *pdev,
1803                                const struct pci_device_id *ent)
1804 {
1805         struct net_device *netdev;
1806         struct igb_adapter *adapter;
1807         struct e1000_hw *hw;
1808         u16 eeprom_data = 0;
1809         s32 ret_val;
1810         static int global_quad_port_a; /* global quad port a indication */
1811         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1812         unsigned long mmio_start, mmio_len;
1813         int err, pci_using_dac;
1814         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1815         u8 part_str[E1000_PBANUM_LENGTH];
1816
1817         /* Catch broken hardware that put the wrong VF device ID in
1818          * the PCIe SR-IOV capability.
1819          */
1820         if (pdev->is_virtfn) {
1821                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1822                      pci_name(pdev), pdev->vendor, pdev->device);
1823                 return -EINVAL;
1824         }
1825
1826         err = pci_enable_device_mem(pdev);
1827         if (err)
1828                 return err;
1829
1830         pci_using_dac = 0;
1831         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1832         if (!err) {
1833                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1834                 if (!err)
1835                         pci_using_dac = 1;
1836         } else {
1837                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1838                 if (err) {
1839                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1840                         if (err) {
1841                                 dev_err(&pdev->dev, "No usable DMA "
1842                                         "configuration, aborting\n");
1843                                 goto err_dma;
1844                         }
1845                 }
1846         }
1847
1848         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1849                                            IORESOURCE_MEM),
1850                                            igb_driver_name);
1851         if (err)
1852                 goto err_pci_reg;
1853
1854         pci_enable_pcie_error_reporting(pdev);
1855
1856         pci_set_master(pdev);
1857         pci_save_state(pdev);
1858
1859         err = -ENOMEM;
1860         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1861                                    IGB_MAX_TX_QUEUES);
1862         if (!netdev)
1863                 goto err_alloc_etherdev;
1864
1865         SET_NETDEV_DEV(netdev, &pdev->dev);
1866
1867         pci_set_drvdata(pdev, netdev);
1868         adapter = netdev_priv(netdev);
1869         adapter->netdev = netdev;
1870         adapter->pdev = pdev;
1871         hw = &adapter->hw;
1872         hw->back = adapter;
1873         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1874
1875         mmio_start = pci_resource_start(pdev, 0);
1876         mmio_len = pci_resource_len(pdev, 0);
1877
1878         err = -EIO;
1879         hw->hw_addr = ioremap(mmio_start, mmio_len);
1880         if (!hw->hw_addr)
1881                 goto err_ioremap;
1882
1883         netdev->netdev_ops = &igb_netdev_ops;
1884         igb_set_ethtool_ops(netdev);
1885         netdev->watchdog_timeo = 5 * HZ;
1886
1887         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1888
1889         netdev->mem_start = mmio_start;
1890         netdev->mem_end = mmio_start + mmio_len;
1891
1892         /* PCI config space info */
1893         hw->vendor_id = pdev->vendor;
1894         hw->device_id = pdev->device;
1895         hw->revision_id = pdev->revision;
1896         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1897         hw->subsystem_device_id = pdev->subsystem_device;
1898
1899         /* Copy the default MAC, PHY and NVM function pointers */
1900         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1901         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1902         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1903         /* Initialize skew-specific constants */
1904         err = ei->get_invariants(hw);
1905         if (err)
1906                 goto err_sw_init;
1907
1908         /* setup the private structure */
1909         err = igb_sw_init(adapter);
1910         if (err)
1911                 goto err_sw_init;
1912
1913         igb_get_bus_info_pcie(hw);
1914
1915         hw->phy.autoneg_wait_to_complete = false;
1916
1917         /* Copper options */
1918         if (hw->phy.media_type == e1000_media_type_copper) {
1919                 hw->phy.mdix = AUTO_ALL_MODES;
1920                 hw->phy.disable_polarity_correction = false;
1921                 hw->phy.ms_type = e1000_ms_hw_default;
1922         }
1923
1924         if (igb_check_reset_block(hw))
1925                 dev_info(&pdev->dev,
1926                         "PHY reset is blocked due to SOL/IDER session.\n");
1927
1928         /*
1929          * features is initialized to 0 in allocation, it might have bits
1930          * set by igb_sw_init so we should use an or instead of an
1931          * assignment.
1932          */
1933         netdev->features |= NETIF_F_SG |
1934                             NETIF_F_IP_CSUM |
1935                             NETIF_F_IPV6_CSUM |
1936                             NETIF_F_TSO |
1937                             NETIF_F_TSO6 |
1938                             NETIF_F_RXHASH |
1939                             NETIF_F_RXCSUM |
1940                             NETIF_F_HW_VLAN_RX |
1941                             NETIF_F_HW_VLAN_TX;
1942
1943         /* copy netdev features into list of user selectable features */
1944         netdev->hw_features |= netdev->features;
1945         netdev->hw_features |= NETIF_F_RXALL;
1946
1947         /* set this bit last since it cannot be part of hw_features */
1948         netdev->features |= NETIF_F_HW_VLAN_FILTER;
1949
1950         netdev->vlan_features |= NETIF_F_TSO |
1951                                  NETIF_F_TSO6 |
1952                                  NETIF_F_IP_CSUM |
1953                                  NETIF_F_IPV6_CSUM |
1954                                  NETIF_F_SG;
1955
1956         netdev->priv_flags |= IFF_SUPP_NOFCS;
1957
1958         if (pci_using_dac) {
1959                 netdev->features |= NETIF_F_HIGHDMA;
1960                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1961         }
1962
1963         if (hw->mac.type >= e1000_82576) {
1964                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1965                 netdev->features |= NETIF_F_SCTP_CSUM;
1966         }
1967
1968         netdev->priv_flags |= IFF_UNICAST_FLT;
1969
1970         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1971
1972         /* before reading the NVM, reset the controller to put the device in a
1973          * known good starting state */
1974         hw->mac.ops.reset_hw(hw);
1975
1976         /* make sure the NVM is good */
1977         if (hw->nvm.ops.validate(hw) < 0) {
1978                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1979                 err = -EIO;
1980                 goto err_eeprom;
1981         }
1982
1983         /* copy the MAC address out of the NVM */
1984         if (hw->mac.ops.read_mac_addr(hw))
1985                 dev_err(&pdev->dev, "NVM Read Error\n");
1986
1987         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1988         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1989
1990         if (!is_valid_ether_addr(netdev->perm_addr)) {
1991                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1992                 err = -EIO;
1993                 goto err_eeprom;
1994         }
1995
1996         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1997                     (unsigned long) adapter);
1998         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1999                     (unsigned long) adapter);
2000
2001         INIT_WORK(&adapter->reset_task, igb_reset_task);
2002         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2003
2004         /* Initialize link properties that are user-changeable */
2005         adapter->fc_autoneg = true;
2006         hw->mac.autoneg = true;
2007         hw->phy.autoneg_advertised = 0x2f;
2008
2009         hw->fc.requested_mode = e1000_fc_default;
2010         hw->fc.current_mode = e1000_fc_default;
2011
2012         igb_validate_mdi_setting(hw);
2013
2014         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2015          * enable the ACPI Magic Packet filter
2016          */
2017
2018         if (hw->bus.func == 0)
2019                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2020         else if (hw->mac.type >= e1000_82580)
2021                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2022                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2023                                  &eeprom_data);
2024         else if (hw->bus.func == 1)
2025                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2026
2027         if (eeprom_data & eeprom_apme_mask)
2028                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2029
2030         /* now that we have the eeprom settings, apply the special cases where
2031          * the eeprom may be wrong or the board simply won't support wake on
2032          * lan on a particular port */
2033         switch (pdev->device) {
2034         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2035                 adapter->eeprom_wol = 0;
2036                 break;
2037         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2038         case E1000_DEV_ID_82576_FIBER:
2039         case E1000_DEV_ID_82576_SERDES:
2040                 /* Wake events only supported on port A for dual fiber
2041                  * regardless of eeprom setting */
2042                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2043                         adapter->eeprom_wol = 0;
2044                 break;
2045         case E1000_DEV_ID_82576_QUAD_COPPER:
2046         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2047                 /* if quad port adapter, disable WoL on all but port A */
2048                 if (global_quad_port_a != 0)
2049                         adapter->eeprom_wol = 0;
2050                 else
2051                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2052                 /* Reset for multiple quad port adapters */
2053                 if (++global_quad_port_a == 4)
2054                         global_quad_port_a = 0;
2055                 break;
2056         }
2057
2058         /* initialize the wol settings based on the eeprom settings */
2059         adapter->wol = adapter->eeprom_wol;
2060         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2061
2062         /* reset the hardware with the new settings */
2063         igb_reset(adapter);
2064
2065         /* let the f/w know that the h/w is now under the control of the
2066          * driver. */
2067         igb_get_hw_control(adapter);
2068
2069         strcpy(netdev->name, "eth%d");
2070         err = register_netdev(netdev);
2071         if (err)
2072                 goto err_register;
2073
2074         /* carrier off reporting is important to ethtool even BEFORE open */
2075         netif_carrier_off(netdev);
2076
2077 #ifdef CONFIG_IGB_DCA
2078         if (dca_add_requester(&pdev->dev) == 0) {
2079                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2080                 dev_info(&pdev->dev, "DCA enabled\n");
2081                 igb_setup_dca(adapter);
2082         }
2083
2084 #endif
2085 #ifdef CONFIG_IGB_PTP
2086         /* do hw tstamp init after resetting */
2087         igb_ptp_init(adapter);
2088
2089 #endif
2090         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2091         /* print bus type/speed/width info */
2092         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2093                  netdev->name,
2094                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2095                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2096                                                             "unknown"),
2097                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2098                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2099                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2100                    "unknown"),
2101                  netdev->dev_addr);
2102
2103         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2104         if (ret_val)
2105                 strcpy(part_str, "Unknown");
2106         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2107         dev_info(&pdev->dev,
2108                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2109                 adapter->msix_entries ? "MSI-X" :
2110                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2111                 adapter->num_rx_queues, adapter->num_tx_queues);
2112         switch (hw->mac.type) {
2113         case e1000_i350:
2114                 igb_set_eee_i350(hw);
2115                 break;
2116         default:
2117                 break;
2118         }
2119
2120         pm_runtime_put_noidle(&pdev->dev);
2121         return 0;
2122
2123 err_register:
2124         igb_release_hw_control(adapter);
2125 err_eeprom:
2126         if (!igb_check_reset_block(hw))
2127                 igb_reset_phy(hw);
2128
2129         if (hw->flash_address)
2130                 iounmap(hw->flash_address);
2131 err_sw_init:
2132         igb_clear_interrupt_scheme(adapter);
2133         iounmap(hw->hw_addr);
2134 err_ioremap:
2135         free_netdev(netdev);
2136 err_alloc_etherdev:
2137         pci_release_selected_regions(pdev,
2138                                      pci_select_bars(pdev, IORESOURCE_MEM));
2139 err_pci_reg:
2140 err_dma:
2141         pci_disable_device(pdev);
2142         return err;
2143 }
2144
2145 /**
2146  * igb_remove - Device Removal Routine
2147  * @pdev: PCI device information struct
2148  *
2149  * igb_remove is called by the PCI subsystem to alert the driver
2150  * that it should release a PCI device.  The could be caused by a
2151  * Hot-Plug event, or because the driver is going to be removed from
2152  * memory.
2153  **/
2154 static void __devexit igb_remove(struct pci_dev *pdev)
2155 {
2156         struct net_device *netdev = pci_get_drvdata(pdev);
2157         struct igb_adapter *adapter = netdev_priv(netdev);
2158         struct e1000_hw *hw = &adapter->hw;
2159
2160         pm_runtime_get_noresume(&pdev->dev);
2161 #ifdef CONFIG_IGB_PTP
2162         igb_ptp_remove(adapter);
2163
2164 #endif
2165         /*
2166          * The watchdog timer may be rescheduled, so explicitly
2167          * disable watchdog from being rescheduled.
2168          */
2169         set_bit(__IGB_DOWN, &adapter->state);
2170         del_timer_sync(&adapter->watchdog_timer);
2171         del_timer_sync(&adapter->phy_info_timer);
2172
2173         cancel_work_sync(&adapter->reset_task);
2174         cancel_work_sync(&adapter->watchdog_task);
2175
2176 #ifdef CONFIG_IGB_DCA
2177         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2178                 dev_info(&pdev->dev, "DCA disabled\n");
2179                 dca_remove_requester(&pdev->dev);
2180                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2181                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2182         }
2183 #endif
2184
2185         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2186          * would have already happened in close and is redundant. */
2187         igb_release_hw_control(adapter);
2188
2189         unregister_netdev(netdev);
2190
2191         igb_clear_interrupt_scheme(adapter);
2192
2193 #ifdef CONFIG_PCI_IOV
2194         /* reclaim resources allocated to VFs */
2195         if (adapter->vf_data) {
2196                 /* disable iov and allow time for transactions to clear */
2197                 if (!igb_check_vf_assignment(adapter)) {
2198                         pci_disable_sriov(pdev);
2199                         msleep(500);
2200                 } else {
2201                         dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2202                 }
2203
2204                 kfree(adapter->vf_data);
2205                 adapter->vf_data = NULL;
2206                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2207                 wrfl();
2208                 msleep(100);
2209                 dev_info(&pdev->dev, "IOV Disabled\n");
2210         }
2211 #endif
2212
2213         iounmap(hw->hw_addr);
2214         if (hw->flash_address)
2215                 iounmap(hw->flash_address);
2216         pci_release_selected_regions(pdev,
2217                                      pci_select_bars(pdev, IORESOURCE_MEM));
2218
2219         kfree(adapter->shadow_vfta);
2220         free_netdev(netdev);
2221
2222         pci_disable_pcie_error_reporting(pdev);
2223
2224         pci_disable_device(pdev);
2225 }
2226
2227 /**
2228  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2229  * @adapter: board private structure to initialize
2230  *
2231  * This function initializes the vf specific data storage and then attempts to
2232  * allocate the VFs.  The reason for ordering it this way is because it is much
2233  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2234  * the memory for the VFs.
2235  **/
2236 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2237 {
2238 #ifdef CONFIG_PCI_IOV
2239         struct pci_dev *pdev = adapter->pdev;
2240         int old_vfs = igb_find_enabled_vfs(adapter);
2241         int i;
2242
2243         if (old_vfs) {
2244                 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2245                          "max_vfs setting of %d\n", old_vfs, max_vfs);
2246                 adapter->vfs_allocated_count = old_vfs;
2247         }
2248
2249         if (!adapter->vfs_allocated_count)
2250                 return;
2251
2252         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2253                                 sizeof(struct vf_data_storage), GFP_KERNEL);
2254         /* if allocation failed then we do not support SR-IOV */
2255         if (!adapter->vf_data) {
2256                 adapter->vfs_allocated_count = 0;
2257                 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2258                         "Data Storage\n");
2259                 goto out;
2260         }
2261
2262         if (!old_vfs) {
2263                 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2264                         goto err_out;
2265         }
2266         dev_info(&pdev->dev, "%d VFs allocated\n",
2267                  adapter->vfs_allocated_count);
2268         for (i = 0; i < adapter->vfs_allocated_count; i++)
2269                 igb_vf_configure(adapter, i);
2270
2271         /* DMA Coalescing is not supported in IOV mode. */
2272         adapter->flags &= ~IGB_FLAG_DMAC;
2273         goto out;
2274 err_out:
2275         kfree(adapter->vf_data);
2276         adapter->vf_data = NULL;
2277         adapter->vfs_allocated_count = 0;
2278 out:
2279         return;
2280 #endif /* CONFIG_PCI_IOV */
2281 }
2282
2283 /**
2284  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2285  * @adapter: board private structure to initialize
2286  *
2287  * igb_sw_init initializes the Adapter private data structure.
2288  * Fields are initialized based on PCI device information and
2289  * OS network device settings (MTU size).
2290  **/
2291 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2292 {
2293         struct e1000_hw *hw = &adapter->hw;
2294         struct net_device *netdev = adapter->netdev;
2295         struct pci_dev *pdev = adapter->pdev;
2296
2297         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2298
2299         /* set default ring sizes */
2300         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2301         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2302
2303         /* set default ITR values */
2304         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2305         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2306
2307         /* set default work limits */
2308         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2309
2310         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2311                                   VLAN_HLEN;
2312         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2313
2314         adapter->node = -1;
2315
2316         spin_lock_init(&adapter->stats64_lock);
2317 #ifdef CONFIG_PCI_IOV
2318         switch (hw->mac.type) {
2319         case e1000_82576:
2320         case e1000_i350:
2321                 if (max_vfs > 7) {
2322                         dev_warn(&pdev->dev,
2323                                  "Maximum of 7 VFs per PF, using max\n");
2324                         adapter->vfs_allocated_count = 7;
2325                 } else
2326                         adapter->vfs_allocated_count = max_vfs;
2327                 break;
2328         default:
2329                 break;
2330         }
2331 #endif /* CONFIG_PCI_IOV */
2332         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2333         /* i350 cannot do RSS and SR-IOV at the same time */
2334         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2335                 adapter->rss_queues = 1;
2336
2337         /*
2338          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2339          * then we should combine the queues into a queue pair in order to
2340          * conserve interrupts due to limited supply
2341          */
2342         if ((adapter->rss_queues > 4) ||
2343             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2344                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2345
2346         /* Setup and initialize a copy of the hw vlan table array */
2347         adapter->shadow_vfta = kzalloc(sizeof(u32) *
2348                                 E1000_VLAN_FILTER_TBL_SIZE,
2349                                 GFP_ATOMIC);
2350
2351         /* This call may decrease the number of queues */
2352         if (igb_init_interrupt_scheme(adapter)) {
2353                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2354                 return -ENOMEM;
2355         }
2356
2357         igb_probe_vfs(adapter);
2358
2359         /* Explicitly disable IRQ since the NIC can be in any state. */
2360         igb_irq_disable(adapter);
2361
2362         if (hw->mac.type == e1000_i350)
2363                 adapter->flags &= ~IGB_FLAG_DMAC;
2364
2365         set_bit(__IGB_DOWN, &adapter->state);
2366         return 0;
2367 }
2368
2369 /**
2370  * igb_open - Called when a network interface is made active
2371  * @netdev: network interface device structure
2372  *
2373  * Returns 0 on success, negative value on failure
2374  *
2375  * The open entry point is called when a network interface is made
2376  * active by the system (IFF_UP).  At this point all resources needed
2377  * for transmit and receive operations are allocated, the interrupt
2378  * handler is registered with the OS, the watchdog timer is started,
2379  * and the stack is notified that the interface is ready.
2380  **/
2381 static int __igb_open(struct net_device *netdev, bool resuming)
2382 {
2383         struct igb_adapter *adapter = netdev_priv(netdev);
2384         struct e1000_hw *hw = &adapter->hw;
2385         struct pci_dev *pdev = adapter->pdev;
2386         int err;
2387         int i;
2388
2389         /* disallow open during test */
2390         if (test_bit(__IGB_TESTING, &adapter->state)) {
2391                 WARN_ON(resuming);
2392                 return -EBUSY;
2393         }
2394
2395         if (!resuming)
2396                 pm_runtime_get_sync(&pdev->dev);
2397
2398         netif_carrier_off(netdev);
2399
2400         /* allocate transmit descriptors */
2401         err = igb_setup_all_tx_resources(adapter);
2402         if (err)
2403                 goto err_setup_tx;
2404
2405         /* allocate receive descriptors */
2406         err = igb_setup_all_rx_resources(adapter);
2407         if (err)
2408                 goto err_setup_rx;
2409
2410         igb_power_up_link(adapter);
2411
2412         /* before we allocate an interrupt, we must be ready to handle it.
2413          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2414          * as soon as we call pci_request_irq, so we have to setup our
2415          * clean_rx handler before we do so.  */
2416         igb_configure(adapter);
2417
2418         err = igb_request_irq(adapter);
2419         if (err)
2420                 goto err_req_irq;
2421
2422         /* From here on the code is the same as igb_up() */
2423         clear_bit(__IGB_DOWN, &adapter->state);
2424
2425         for (i = 0; i < adapter->num_q_vectors; i++)
2426                 napi_enable(&(adapter->q_vector[i]->napi));
2427
2428         /* Clear any pending interrupts. */
2429         rd32(E1000_ICR);
2430
2431         igb_irq_enable(adapter);
2432
2433         /* notify VFs that reset has been completed */
2434         if (adapter->vfs_allocated_count) {
2435                 u32 reg_data = rd32(E1000_CTRL_EXT);
2436                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2437                 wr32(E1000_CTRL_EXT, reg_data);
2438         }
2439
2440         netif_tx_start_all_queues(netdev);
2441
2442         if (!resuming)
2443                 pm_runtime_put(&pdev->dev);
2444
2445         /* start the watchdog. */
2446         hw->mac.get_link_status = 1;
2447         schedule_work(&adapter->watchdog_task);
2448
2449         return 0;
2450
2451 err_req_irq:
2452         igb_release_hw_control(adapter);
2453         igb_power_down_link(adapter);
2454         igb_free_all_rx_resources(adapter);
2455 err_setup_rx:
2456         igb_free_all_tx_resources(adapter);
2457 err_setup_tx:
2458         igb_reset(adapter);
2459         if (!resuming)
2460                 pm_runtime_put(&pdev->dev);
2461
2462         return err;
2463 }
2464
2465 static int igb_open(struct net_device *netdev)
2466 {
2467         return __igb_open(netdev, false);
2468 }
2469
2470 /**
2471  * igb_close - Disables a network interface
2472  * @netdev: network interface device structure
2473  *
2474  * Returns 0, this is not allowed to fail
2475  *
2476  * The close entry point is called when an interface is de-activated
2477  * by the OS.  The hardware is still under the driver's control, but
2478  * needs to be disabled.  A global MAC reset is issued to stop the
2479  * hardware, and all transmit and receive resources are freed.
2480  **/
2481 static int __igb_close(struct net_device *netdev, bool suspending)
2482 {
2483         struct igb_adapter *adapter = netdev_priv(netdev);
2484         struct pci_dev *pdev = adapter->pdev;
2485
2486         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2487
2488         if (!suspending)
2489                 pm_runtime_get_sync(&pdev->dev);
2490
2491         igb_down(adapter);
2492         igb_free_irq(adapter);
2493
2494         igb_free_all_tx_resources(adapter);
2495         igb_free_all_rx_resources(adapter);
2496
2497         if (!suspending)
2498                 pm_runtime_put_sync(&pdev->dev);
2499         return 0;
2500 }
2501
2502 static int igb_close(struct net_device *netdev)
2503 {
2504         return __igb_close(netdev, false);
2505 }
2506
2507 /**
2508  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2509  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2510  *
2511  * Return 0 on success, negative on failure
2512  **/
2513 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2514 {
2515         struct device *dev = tx_ring->dev;
2516         int orig_node = dev_to_node(dev);
2517         int size;
2518
2519         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2520         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2521         if (!tx_ring->tx_buffer_info)
2522                 tx_ring->tx_buffer_info = vzalloc(size);
2523         if (!tx_ring->tx_buffer_info)
2524                 goto err;
2525
2526         /* round up to nearest 4K */
2527         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2528         tx_ring->size = ALIGN(tx_ring->size, 4096);
2529
2530         set_dev_node(dev, tx_ring->numa_node);
2531         tx_ring->desc = dma_alloc_coherent(dev,
2532                                            tx_ring->size,
2533                                            &tx_ring->dma,
2534                                            GFP_KERNEL);
2535         set_dev_node(dev, orig_node);
2536         if (!tx_ring->desc)
2537                 tx_ring->desc = dma_alloc_coherent(dev,
2538                                                    tx_ring->size,
2539                                                    &tx_ring->dma,
2540                                                    GFP_KERNEL);
2541
2542         if (!tx_ring->desc)
2543                 goto err;
2544
2545         tx_ring->next_to_use = 0;
2546         tx_ring->next_to_clean = 0;
2547
2548         return 0;
2549
2550 err:
2551         vfree(tx_ring->tx_buffer_info);
2552         dev_err(dev,
2553                 "Unable to allocate memory for the transmit descriptor ring\n");
2554         return -ENOMEM;
2555 }
2556
2557 /**
2558  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2559  *                                (Descriptors) for all queues
2560  * @adapter: board private structure
2561  *
2562  * Return 0 on success, negative on failure
2563  **/
2564 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2565 {
2566         struct pci_dev *pdev = adapter->pdev;
2567         int i, err = 0;
2568
2569         for (i = 0; i < adapter->num_tx_queues; i++) {
2570                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2571                 if (err) {
2572                         dev_err(&pdev->dev,
2573                                 "Allocation for Tx Queue %u failed\n", i);
2574                         for (i--; i >= 0; i--)
2575                                 igb_free_tx_resources(adapter->tx_ring[i]);
2576                         break;
2577                 }
2578         }
2579
2580         return err;
2581 }
2582
2583 /**
2584  * igb_setup_tctl - configure the transmit control registers
2585  * @adapter: Board private structure
2586  **/
2587 void igb_setup_tctl(struct igb_adapter *adapter)
2588 {
2589         struct e1000_hw *hw = &adapter->hw;
2590         u32 tctl;
2591
2592         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2593         wr32(E1000_TXDCTL(0), 0);
2594
2595         /* Program the Transmit Control Register */
2596         tctl = rd32(E1000_TCTL);
2597         tctl &= ~E1000_TCTL_CT;
2598         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2599                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2600
2601         igb_config_collision_dist(hw);
2602
2603         /* Enable transmits */
2604         tctl |= E1000_TCTL_EN;
2605
2606         wr32(E1000_TCTL, tctl);
2607 }
2608
2609 /**
2610  * igb_configure_tx_ring - Configure transmit ring after Reset
2611  * @adapter: board private structure
2612  * @ring: tx ring to configure
2613  *
2614  * Configure a transmit ring after a reset.
2615  **/
2616 void igb_configure_tx_ring(struct igb_adapter *adapter,
2617                            struct igb_ring *ring)
2618 {
2619         struct e1000_hw *hw = &adapter->hw;
2620         u32 txdctl = 0;
2621         u64 tdba = ring->dma;
2622         int reg_idx = ring->reg_idx;
2623
2624         /* disable the queue */
2625         wr32(E1000_TXDCTL(reg_idx), 0);
2626         wrfl();
2627         mdelay(10);
2628
2629         wr32(E1000_TDLEN(reg_idx),
2630                         ring->count * sizeof(union e1000_adv_tx_desc));
2631         wr32(E1000_TDBAL(reg_idx),
2632                         tdba & 0x00000000ffffffffULL);
2633         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2634
2635         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2636         wr32(E1000_TDH(reg_idx), 0);
2637         writel(0, ring->tail);
2638
2639         txdctl |= IGB_TX_PTHRESH;
2640         txdctl |= IGB_TX_HTHRESH << 8;
2641         txdctl |= IGB_TX_WTHRESH << 16;
2642
2643         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2644         wr32(E1000_TXDCTL(reg_idx), txdctl);
2645
2646         netdev_tx_reset_queue(txring_txq(ring));
2647 }
2648
2649 /**
2650  * igb_configure_tx - Configure transmit Unit after Reset
2651  * @adapter: board private structure
2652  *
2653  * Configure the Tx unit of the MAC after a reset.
2654  **/
2655 static void igb_configure_tx(struct igb_adapter *adapter)
2656 {
2657         int i;
2658
2659         for (i = 0; i < adapter->num_tx_queues; i++)
2660                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2661 }
2662
2663 /**
2664  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2665  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2666  *
2667  * Returns 0 on success, negative on failure
2668  **/
2669 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2670 {
2671         struct device *dev = rx_ring->dev;
2672         int orig_node = dev_to_node(dev);
2673         int size, desc_len;
2674
2675         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2676         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2677         if (!rx_ring->rx_buffer_info)
2678                 rx_ring->rx_buffer_info = vzalloc(size);
2679         if (!rx_ring->rx_buffer_info)
2680                 goto err;
2681
2682         desc_len = sizeof(union e1000_adv_rx_desc);
2683
2684         /* Round up to nearest 4K */
2685         rx_ring->size = rx_ring->count * desc_len;
2686         rx_ring->size = ALIGN(rx_ring->size, 4096);
2687
2688         set_dev_node(dev, rx_ring->numa_node);
2689         rx_ring->desc = dma_alloc_coherent(dev,
2690                                            rx_ring->size,
2691                                            &rx_ring->dma,
2692                                            GFP_KERNEL);
2693         set_dev_node(dev, orig_node);
2694         if (!rx_ring->desc)
2695                 rx_ring->desc = dma_alloc_coherent(dev,
2696                                                    rx_ring->size,
2697                                                    &rx_ring->dma,
2698                                                    GFP_KERNEL);
2699
2700         if (!rx_ring->desc)
2701                 goto err;
2702
2703         rx_ring->next_to_clean = 0;
2704         rx_ring->next_to_use = 0;
2705
2706         return 0;
2707
2708 err:
2709         vfree(rx_ring->rx_buffer_info);
2710         rx_ring->rx_buffer_info = NULL;
2711         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2712                 " ring\n");
2713         return -ENOMEM;
2714 }
2715
2716 /**
2717  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2718  *                                (Descriptors) for all queues
2719  * @adapter: board private structure
2720  *
2721  * Return 0 on success, negative on failure
2722  **/
2723 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2724 {
2725         struct pci_dev *pdev = adapter->pdev;
2726         int i, err = 0;
2727
2728         for (i = 0; i < adapter->num_rx_queues; i++) {
2729                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2730                 if (err) {
2731                         dev_err(&pdev->dev,
2732                                 "Allocation for Rx Queue %u failed\n", i);
2733                         for (i--; i >= 0; i--)
2734                                 igb_free_rx_resources(adapter->rx_ring[i]);
2735                         break;
2736                 }
2737         }
2738
2739         return err;
2740 }
2741
2742 /**
2743  * igb_setup_mrqc - configure the multiple receive queue control registers
2744  * @adapter: Board private structure
2745  **/
2746 static void igb_setup_mrqc(struct igb_adapter *adapter)
2747 {
2748         struct e1000_hw *hw = &adapter->hw;
2749         u32 mrqc, rxcsum;
2750         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2751         union e1000_reta {
2752                 u32 dword;
2753                 u8  bytes[4];
2754         } reta;
2755         static const u8 rsshash[40] = {
2756                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2757                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2758                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2759                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2760
2761         /* Fill out hash function seeds */
2762         for (j = 0; j < 10; j++) {
2763                 u32 rsskey = rsshash[(j * 4)];
2764                 rsskey |= rsshash[(j * 4) + 1] << 8;
2765                 rsskey |= rsshash[(j * 4) + 2] << 16;
2766                 rsskey |= rsshash[(j * 4) + 3] << 24;
2767                 array_wr32(E1000_RSSRK(0), j, rsskey);
2768         }
2769
2770         num_rx_queues = adapter->rss_queues;
2771
2772         if (adapter->vfs_allocated_count) {
2773                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2774                 switch (hw->mac.type) {
2775                 case e1000_i350:
2776                 case e1000_82580:
2777                         num_rx_queues = 1;
2778                         shift = 0;
2779                         break;
2780                 case e1000_82576:
2781                         shift = 3;
2782                         num_rx_queues = 2;
2783                         break;
2784                 case e1000_82575:
2785                         shift = 2;
2786                         shift2 = 6;
2787                 default:
2788                         break;
2789                 }
2790         } else {
2791                 if (hw->mac.type == e1000_82575)
2792                         shift = 6;
2793         }
2794
2795         for (j = 0; j < (32 * 4); j++) {
2796                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2797                 if (shift2)
2798                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2799                 if ((j & 3) == 3)
2800                         wr32(E1000_RETA(j >> 2), reta.dword);
2801         }
2802
2803         /*
2804          * Disable raw packet checksumming so that RSS hash is placed in
2805          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2806          * offloads as they are enabled by default
2807          */
2808         rxcsum = rd32(E1000_RXCSUM);
2809         rxcsum |= E1000_RXCSUM_PCSD;
2810
2811         if (adapter->hw.mac.type >= e1000_82576)
2812                 /* Enable Receive Checksum Offload for SCTP */
2813                 rxcsum |= E1000_RXCSUM_CRCOFL;
2814
2815         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2816         wr32(E1000_RXCSUM, rxcsum);
2817
2818         /* If VMDq is enabled then we set the appropriate mode for that, else
2819          * we default to RSS so that an RSS hash is calculated per packet even
2820          * if we are only using one queue */
2821         if (adapter->vfs_allocated_count) {
2822                 if (hw->mac.type > e1000_82575) {
2823                         /* Set the default pool for the PF's first queue */
2824                         u32 vtctl = rd32(E1000_VT_CTL);
2825                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2826                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2827                         vtctl |= adapter->vfs_allocated_count <<
2828                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2829                         wr32(E1000_VT_CTL, vtctl);
2830                 }
2831                 if (adapter->rss_queues > 1)
2832                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2833                 else
2834                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2835         } else {
2836                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2837         }
2838         igb_vmm_control(adapter);
2839
2840         /*
2841          * Generate RSS hash based on TCP port numbers and/or
2842          * IPv4/v6 src and dst addresses since UDP cannot be
2843          * hashed reliably due to IP fragmentation
2844          */
2845         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2846                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2847                 E1000_MRQC_RSS_FIELD_IPV6 |
2848                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2849                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2850
2851         wr32(E1000_MRQC, mrqc);
2852 }
2853
2854 /**
2855  * igb_setup_rctl - configure the receive control registers
2856  * @adapter: Board private structure
2857  **/
2858 void igb_setup_rctl(struct igb_adapter *adapter)
2859 {
2860         struct e1000_hw *hw = &adapter->hw;
2861         u32 rctl;
2862
2863         rctl = rd32(E1000_RCTL);
2864
2865         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2866         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2867
2868         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2869                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2870
2871         /*
2872          * enable stripping of CRC. It's unlikely this will break BMC
2873          * redirection as it did with e1000. Newer features require
2874          * that the HW strips the CRC.
2875          */
2876         rctl |= E1000_RCTL_SECRC;
2877
2878         /* disable store bad packets and clear size bits. */
2879         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2880
2881         /* enable LPE to prevent packets larger than max_frame_size */
2882         rctl |= E1000_RCTL_LPE;
2883
2884         /* disable queue 0 to prevent tail write w/o re-config */
2885         wr32(E1000_RXDCTL(0), 0);
2886
2887         /* Attention!!!  For SR-IOV PF driver operations you must enable
2888          * queue drop for all VF and PF queues to prevent head of line blocking
2889          * if an un-trusted VF does not provide descriptors to hardware.
2890          */
2891         if (adapter->vfs_allocated_count) {
2892                 /* set all queue drop enable bits */
2893                 wr32(E1000_QDE, ALL_QUEUES);
2894         }
2895
2896         /* This is useful for sniffing bad packets. */
2897         if (adapter->netdev->features & NETIF_F_RXALL) {
2898                 /* UPE and MPE will be handled by normal PROMISC logic
2899                  * in e1000e_set_rx_mode */
2900                 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
2901                          E1000_RCTL_BAM | /* RX All Bcast Pkts */
2902                          E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
2903
2904                 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
2905                           E1000_RCTL_DPF | /* Allow filtered pause */
2906                           E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
2907                 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
2908                  * and that breaks VLANs.
2909                  */
2910         }
2911
2912         wr32(E1000_RCTL, rctl);
2913 }
2914
2915 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2916                                    int vfn)
2917 {
2918         struct e1000_hw *hw = &adapter->hw;
2919         u32 vmolr;
2920
2921         /* if it isn't the PF check to see if VFs are enabled and
2922          * increase the size to support vlan tags */
2923         if (vfn < adapter->vfs_allocated_count &&
2924             adapter->vf_data[vfn].vlans_enabled)
2925                 size += VLAN_TAG_SIZE;
2926
2927         vmolr = rd32(E1000_VMOLR(vfn));
2928         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2929         vmolr |= size | E1000_VMOLR_LPE;
2930         wr32(E1000_VMOLR(vfn), vmolr);
2931
2932         return 0;
2933 }
2934
2935 /**
2936  * igb_rlpml_set - set maximum receive packet size
2937  * @adapter: board private structure
2938  *
2939  * Configure maximum receivable packet size.
2940  **/
2941 static void igb_rlpml_set(struct igb_adapter *adapter)
2942 {
2943         u32 max_frame_size = adapter->max_frame_size;
2944         struct e1000_hw *hw = &adapter->hw;
2945         u16 pf_id = adapter->vfs_allocated_count;
2946
2947         if (pf_id) {
2948                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2949                 /*
2950                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
2951                  * to our max jumbo frame size, in case we need to enable
2952                  * jumbo frames on one of the rings later.
2953                  * This will not pass over-length frames into the default
2954                  * queue because it's gated by the VMOLR.RLPML.
2955                  */
2956                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2957         }
2958
2959         wr32(E1000_RLPML, max_frame_size);
2960 }
2961
2962 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2963                                  int vfn, bool aupe)
2964 {
2965         struct e1000_hw *hw = &adapter->hw;
2966         u32 vmolr;
2967
2968         /*
2969          * This register exists only on 82576 and newer so if we are older then
2970          * we should exit and do nothing
2971          */
2972         if (hw->mac.type < e1000_82576)
2973                 return;
2974
2975         vmolr = rd32(E1000_VMOLR(vfn));
2976         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2977         if (aupe)
2978                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2979         else
2980                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2981
2982         /* clear all bits that might not be set */
2983         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2984
2985         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2986                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2987         /*
2988          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2989          * multicast packets
2990          */
2991         if (vfn <= adapter->vfs_allocated_count)
2992                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2993
2994         wr32(E1000_VMOLR(vfn), vmolr);
2995 }
2996
2997 /**
2998  * igb_configure_rx_ring - Configure a receive ring after Reset
2999  * @adapter: board private structure
3000  * @ring: receive ring to be configured
3001  *
3002  * Configure the Rx unit of the MAC after a reset.
3003  **/
3004 void igb_configure_rx_ring(struct igb_adapter *adapter,
3005                            struct igb_ring *ring)
3006 {
3007         struct e1000_hw *hw = &adapter->hw;
3008         u64 rdba = ring->dma;
3009         int reg_idx = ring->reg_idx;
3010         u32 srrctl = 0, rxdctl = 0;
3011
3012         /* disable the queue */
3013         wr32(E1000_RXDCTL(reg_idx), 0);
3014
3015         /* Set DMA base address registers */
3016         wr32(E1000_RDBAL(reg_idx),
3017              rdba & 0x00000000ffffffffULL);
3018         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3019         wr32(E1000_RDLEN(reg_idx),
3020                        ring->count * sizeof(union e1000_adv_rx_desc));
3021
3022         /* initialize head and tail */
3023         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3024         wr32(E1000_RDH(reg_idx), 0);
3025         writel(0, ring->tail);
3026
3027         /* set descriptor configuration */
3028         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3029 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3030         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3031 #else
3032         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3033 #endif
3034         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3035         if (hw->mac.type >= e1000_82580)
3036                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3037         /* Only set Drop Enable if we are supporting multiple queues */
3038         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3039                 srrctl |= E1000_SRRCTL_DROP_EN;
3040
3041         wr32(E1000_SRRCTL(reg_idx), srrctl);
3042
3043         /* set filtering for VMDQ pools */
3044         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3045
3046         rxdctl |= IGB_RX_PTHRESH;
3047         rxdctl |= IGB_RX_HTHRESH << 8;
3048         rxdctl |= IGB_RX_WTHRESH << 16;
3049
3050         /* enable receive descriptor fetching */
3051         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3052         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3053 }
3054
3055 /**
3056  * igb_configure_rx - Configure receive Unit after Reset
3057  * @adapter: board private structure
3058  *
3059  * Configure the Rx unit of the MAC after a reset.
3060  **/
3061 static void igb_configure_rx(struct igb_adapter *adapter)
3062 {
3063         int i;
3064
3065         /* set UTA to appropriate mode */
3066         igb_set_uta(adapter);
3067
3068         /* set the correct pool for the PF default MAC address in entry 0 */
3069         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3070                          adapter->vfs_allocated_count);
3071
3072         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3073          * the Base and Length of the Rx Descriptor Ring */
3074         for (i = 0; i < adapter->num_rx_queues; i++)
3075                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3076 }
3077
3078 /**
3079  * igb_free_tx_resources - Free Tx Resources per Queue
3080  * @tx_ring: Tx descriptor ring for a specific queue
3081  *
3082  * Free all transmit software resources
3083  **/
3084 void igb_free_tx_resources(struct igb_ring *tx_ring)
3085 {
3086         igb_clean_tx_ring(tx_ring);
3087
3088         vfree(tx_ring->tx_buffer_info);
3089         tx_ring->tx_buffer_info = NULL;
3090
3091         /* if not set, then don't free */
3092         if (!tx_ring->desc)
3093                 return;
3094
3095         dma_free_coherent(tx_ring->dev, tx_ring->size,
3096                           tx_ring->desc, tx_ring->dma);
3097
3098         tx_ring->desc = NULL;
3099 }
3100
3101 /**
3102  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3103  * @adapter: board private structure
3104  *
3105  * Free all transmit software resources
3106  **/
3107 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3108 {
3109         int i;
3110
3111         for (i = 0; i < adapter->num_tx_queues; i++)
3112                 igb_free_tx_resources(adapter->tx_ring[i]);
3113 }
3114
3115 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3116                                     struct igb_tx_buffer *tx_buffer)
3117 {
3118         if (tx_buffer->skb) {
3119                 dev_kfree_skb_any(tx_buffer->skb);
3120                 if (tx_buffer->dma)
3121                         dma_unmap_single(ring->dev,
3122                                          tx_buffer->dma,
3123                                          tx_buffer->length,
3124                                          DMA_TO_DEVICE);
3125         } else if (tx_buffer->dma) {
3126                 dma_unmap_page(ring->dev,
3127                                tx_buffer->dma,
3128                                tx_buffer->length,
3129                                DMA_TO_DEVICE);
3130         }
3131         tx_buffer->next_to_watch = NULL;
3132         tx_buffer->skb = NULL;
3133         tx_buffer->dma = 0;
3134         /* buffer_info must be completely set up in the transmit path */
3135 }
3136
3137 /**
3138  * igb_clean_tx_ring - Free Tx Buffers
3139  * @tx_ring: ring to be cleaned
3140  **/
3141 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3142 {
3143         struct igb_tx_buffer *buffer_info;
3144         unsigned long size;
3145         u16 i;
3146
3147         if (!tx_ring->tx_buffer_info)
3148                 return;
3149         /* Free all the Tx ring sk_buffs */
3150
3151         for (i = 0; i < tx_ring->count; i++) {
3152                 buffer_info = &tx_ring->tx_buffer_info[i];
3153                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3154         }
3155
3156         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3157         memset(tx_ring->tx_buffer_info, 0, size);
3158
3159         /* Zero out the descriptor ring */
3160         memset(tx_ring->desc, 0, tx_ring->size);
3161
3162         tx_ring->next_to_use = 0;
3163         tx_ring->next_to_clean = 0;
3164 }
3165
3166 /**
3167  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3168  * @adapter: board private structure
3169  **/
3170 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3171 {
3172         int i;
3173
3174         for (i = 0; i < adapter->num_tx_queues; i++)
3175                 igb_clean_tx_ring(adapter->tx_ring[i]);
3176 }
3177
3178 /**
3179  * igb_free_rx_resources - Free Rx Resources
3180  * @rx_ring: ring to clean the resources from
3181  *
3182  * Free all receive software resources
3183  **/
3184 void igb_free_rx_resources(struct igb_ring *rx_ring)
3185 {
3186         igb_clean_rx_ring(rx_ring);
3187
3188         vfree(rx_ring->rx_buffer_info);
3189         rx_ring->rx_buffer_info = NULL;
3190
3191         /* if not set, then don't free */
3192         if (!rx_ring->desc)
3193                 return;
3194
3195         dma_free_coherent(rx_ring->dev, rx_ring->size,
3196                           rx_ring->desc, rx_ring->dma);
3197
3198         rx_ring->desc = NULL;
3199 }
3200
3201 /**
3202  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3203  * @adapter: board private structure
3204  *
3205  * Free all receive software resources
3206  **/
3207 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3208 {
3209         int i;
3210
3211         for (i = 0; i < adapter->num_rx_queues; i++)
3212                 igb_free_rx_resources(adapter->rx_ring[i]);
3213 }
3214
3215 /**
3216  * igb_clean_rx_ring - Free Rx Buffers per Queue
3217  * @rx_ring: ring to free buffers from
3218  **/
3219 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3220 {
3221         unsigned long size;
3222         u16 i;
3223
3224         if (!rx_ring->rx_buffer_info)
3225                 return;
3226
3227         /* Free all the Rx ring sk_buffs */
3228         for (i = 0; i < rx_ring->count; i++) {
3229                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3230                 if (buffer_info->dma) {
3231                         dma_unmap_single(rx_ring->dev,
3232                                          buffer_info->dma,
3233                                          IGB_RX_HDR_LEN,
3234                                          DMA_FROM_DEVICE);
3235                         buffer_info->dma = 0;
3236                 }
3237
3238                 if (buffer_info->skb) {
3239                         dev_kfree_skb(buffer_info->skb);
3240                         buffer_info->skb = NULL;
3241                 }
3242                 if (buffer_info->page_dma) {
3243                         dma_unmap_page(rx_ring->dev,
3244                                        buffer_info->page_dma,
3245                                        PAGE_SIZE / 2,
3246                                        DMA_FROM_DEVICE);
3247                         buffer_info->page_dma = 0;
3248                 }
3249                 if (buffer_info->page) {
3250                         put_page(buffer_info->page);
3251                         buffer_info->page = NULL;
3252                         buffer_info->page_offset = 0;
3253                 }
3254         }
3255
3256         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3257         memset(rx_ring->rx_buffer_info, 0, size);
3258
3259         /* Zero out the descriptor ring */
3260         memset(rx_ring->desc, 0, rx_ring->size);
3261
3262         rx_ring->next_to_clean = 0;
3263         rx_ring->next_to_use = 0;
3264 }
3265
3266 /**
3267  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3268  * @adapter: board private structure
3269  **/
3270 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3271 {
3272         int i;
3273
3274         for (i = 0; i < adapter->num_rx_queues; i++)
3275                 igb_clean_rx_ring(adapter->rx_ring[i]);
3276 }
3277
3278 /**
3279  * igb_set_mac - Change the Ethernet Address of the NIC
3280  * @netdev: network interface device structure
3281  * @p: pointer to an address structure
3282  *
3283  * Returns 0 on success, negative on failure
3284  **/
3285 static int igb_set_mac(struct net_device *netdev, void *p)
3286 {
3287         struct igb_adapter *adapter = netdev_priv(netdev);
3288         struct e1000_hw *hw = &adapter->hw;
3289         struct sockaddr *addr = p;
3290
3291         if (!is_valid_ether_addr(addr->sa_data))
3292                 return -EADDRNOTAVAIL;
3293
3294         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3295         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3296
3297         /* set the correct pool for the new PF MAC address in entry 0 */
3298         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3299                          adapter->vfs_allocated_count);
3300
3301         return 0;
3302 }
3303
3304 /**
3305  * igb_write_mc_addr_list - write multicast addresses to MTA
3306  * @netdev: network interface device structure
3307  *
3308  * Writes multicast address list to the MTA hash table.
3309  * Returns: -ENOMEM on failure
3310  *                0 on no addresses written
3311  *                X on writing X addresses to MTA
3312  **/
3313 static int igb_write_mc_addr_list(struct net_device *netdev)
3314 {
3315         struct igb_adapter *adapter = netdev_priv(netdev);
3316         struct e1000_hw *hw = &adapter->hw;
3317         struct netdev_hw_addr *ha;
3318         u8  *mta_list;
3319         int i;
3320
3321         if (netdev_mc_empty(netdev)) {
3322                 /* nothing to program, so clear mc list */
3323                 igb_update_mc_addr_list(hw, NULL, 0);
3324                 igb_restore_vf_multicasts(adapter);
3325                 return 0;
3326         }
3327
3328         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3329         if (!mta_list)
3330                 return -ENOMEM;
3331
3332         /* The shared function expects a packed array of only addresses. */
3333         i = 0;
3334         netdev_for_each_mc_addr(ha, netdev)
3335                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3336
3337         igb_update_mc_addr_list(hw, mta_list, i);
3338         kfree(mta_list);
3339
3340         return netdev_mc_count(netdev);
3341 }
3342
3343 /**
3344  * igb_write_uc_addr_list - write unicast addresses to RAR table
3345  * @netdev: network interface device structure
3346  *
3347  * Writes unicast address list to the RAR table.
3348  * Returns: -ENOMEM on failure/insufficient address space
3349  *                0 on no addresses written
3350  *                X on writing X addresses to the RAR table
3351  **/
3352 static int igb_write_uc_addr_list(struct net_device *netdev)
3353 {
3354         struct igb_adapter *adapter = netdev_priv(netdev);
3355         struct e1000_hw *hw = &adapter->hw;
3356         unsigned int vfn = adapter->vfs_allocated_count;
3357         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3358         int count = 0;
3359
3360         /* return ENOMEM indicating insufficient memory for addresses */
3361         if (netdev_uc_count(netdev) > rar_entries)
3362                 return -ENOMEM;
3363
3364         if (!netdev_uc_empty(netdev) && rar_entries) {
3365                 struct netdev_hw_addr *ha;
3366
3367                 netdev_for_each_uc_addr(ha, netdev) {
3368                         if (!rar_entries)
3369                                 break;
3370                         igb_rar_set_qsel(adapter, ha->addr,
3371                                          rar_entries--,
3372                                          vfn);
3373                         count++;
3374                 }
3375         }
3376         /* write the addresses in reverse order to avoid write combining */
3377         for (; rar_entries > 0 ; rar_entries--) {
3378                 wr32(E1000_RAH(rar_entries), 0);
3379                 wr32(E1000_RAL(rar_entries), 0);
3380         }
3381         wrfl();
3382
3383         return count;
3384 }
3385
3386 /**
3387  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3388  * @netdev: network interface device structure
3389  *
3390  * The set_rx_mode entry point is called whenever the unicast or multicast
3391  * address lists or the network interface flags are updated.  This routine is
3392  * responsible for configuring the hardware for proper unicast, multicast,
3393  * promiscuous mode, and all-multi behavior.
3394  **/
3395 static void igb_set_rx_mode(struct net_device *netdev)
3396 {
3397         struct igb_adapter *adapter = netdev_priv(netdev);
3398         struct e1000_hw *hw = &adapter->hw;
3399         unsigned int vfn = adapter->vfs_allocated_count;
3400         u32 rctl, vmolr = 0;
3401         int count;
3402
3403         /* Check for Promiscuous and All Multicast modes */
3404         rctl = rd32(E1000_RCTL);
3405
3406         /* clear the effected bits */
3407         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3408
3409         if (netdev->flags & IFF_PROMISC) {
3410                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3411                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3412         } else {
3413                 if (netdev->flags & IFF_ALLMULTI) {
3414                         rctl |= E1000_RCTL_MPE;
3415                         vmolr |= E1000_VMOLR_MPME;
3416                 } else {
3417                         /*
3418                          * Write addresses to the MTA, if the attempt fails
3419                          * then we should just turn on promiscuous mode so
3420                          * that we can at least receive multicast traffic
3421                          */
3422                         count = igb_write_mc_addr_list(netdev);
3423                         if (count < 0) {
3424                                 rctl |= E1000_RCTL_MPE;
3425                                 vmolr |= E1000_VMOLR_MPME;
3426                         } else if (count) {
3427                                 vmolr |= E1000_VMOLR_ROMPE;
3428                         }
3429                 }
3430                 /*
3431                  * Write addresses to available RAR registers, if there is not
3432                  * sufficient space to store all the addresses then enable
3433                  * unicast promiscuous mode
3434                  */
3435                 count = igb_write_uc_addr_list(netdev);
3436                 if (count < 0) {
3437                         rctl |= E1000_RCTL_UPE;
3438                         vmolr |= E1000_VMOLR_ROPE;
3439                 }
3440                 rctl |= E1000_RCTL_VFE;
3441         }
3442         wr32(E1000_RCTL, rctl);
3443
3444         /*
3445          * In order to support SR-IOV and eventually VMDq it is necessary to set
3446          * the VMOLR to enable the appropriate modes.  Without this workaround
3447          * we will have issues with VLAN tag stripping not being done for frames
3448          * that are only arriving because we are the default pool
3449          */
3450         if (hw->mac.type < e1000_82576)
3451                 return;
3452
3453         vmolr |= rd32(E1000_VMOLR(vfn)) &
3454                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3455         wr32(E1000_VMOLR(vfn), vmolr);
3456         igb_restore_vf_multicasts(adapter);
3457 }
3458
3459 static void igb_check_wvbr(struct igb_adapter *adapter)
3460 {
3461         struct e1000_hw *hw = &adapter->hw;
3462         u32 wvbr = 0;
3463
3464         switch (hw->mac.type) {
3465         case e1000_82576:
3466         case e1000_i350:
3467                 if (!(wvbr = rd32(E1000_WVBR)))
3468                         return;
3469                 break;
3470         default:
3471                 break;
3472         }
3473
3474         adapter->wvbr |= wvbr;
3475 }
3476
3477 #define IGB_STAGGERED_QUEUE_OFFSET 8
3478
3479 static void igb_spoof_check(struct igb_adapter *adapter)
3480 {
3481         int j;
3482
3483         if (!adapter->wvbr)
3484                 return;
3485
3486         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3487                 if (adapter->wvbr & (1 << j) ||
3488                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3489                         dev_warn(&adapter->pdev->dev,
3490                                 "Spoof event(s) detected on VF %d\n", j);
3491                         adapter->wvbr &=
3492                                 ~((1 << j) |
3493                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3494                 }
3495         }
3496 }
3497
3498 /* Need to wait a few seconds after link up to get diagnostic information from
3499  * the phy */
3500 static void igb_update_phy_info(unsigned long data)
3501 {
3502         struct igb_adapter *adapter = (struct igb_adapter *) data;
3503         igb_get_phy_info(&adapter->hw);
3504 }
3505
3506 /**
3507  * igb_has_link - check shared code for link and determine up/down
3508  * @adapter: pointer to driver private info
3509  **/
3510 bool igb_has_link(struct igb_adapter *adapter)
3511 {
3512         struct e1000_hw *hw = &adapter->hw;
3513         bool link_active = false;
3514         s32 ret_val = 0;
3515
3516         /* get_link_status is set on LSC (link status) interrupt or
3517          * rx sequence error interrupt.  get_link_status will stay
3518          * false until the e1000_check_for_link establishes link
3519          * for copper adapters ONLY
3520          */
3521         switch (hw->phy.media_type) {
3522         case e1000_media_type_copper:
3523                 if (hw->mac.get_link_status) {
3524                         ret_val = hw->mac.ops.check_for_link(hw);
3525                         link_active = !hw->mac.get_link_status;
3526                 } else {
3527                         link_active = true;
3528                 }
3529                 break;
3530         case e1000_media_type_internal_serdes:
3531                 ret_val = hw->mac.ops.check_for_link(hw);
3532                 link_active = hw->mac.serdes_has_link;
3533                 break;
3534         default:
3535         case e1000_media_type_unknown:
3536                 break;
3537         }
3538
3539         return link_active;
3540 }
3541
3542 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3543 {
3544         bool ret = false;
3545         u32 ctrl_ext, thstat;
3546
3547         /* check for thermal sensor event on i350, copper only */
3548         if (hw->mac.type == e1000_i350) {
3549                 thstat = rd32(E1000_THSTAT);
3550                 ctrl_ext = rd32(E1000_CTRL_EXT);
3551
3552                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3553                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3554                         ret = !!(thstat & event);
3555                 }
3556         }
3557
3558         return ret;
3559 }
3560
3561 /**
3562  * igb_watchdog - Timer Call-back
3563  * @data: pointer to adapter cast into an unsigned long
3564  **/
3565 static void igb_watchdog(unsigned long data)
3566 {
3567         struct igb_adapter *adapter = (struct igb_adapter *)data;
3568         /* Do the rest outside of interrupt context */
3569         schedule_work(&adapter->watchdog_task);
3570 }
3571
3572 static void igb_watchdog_task(struct work_struct *work)
3573 {
3574         struct igb_adapter *adapter = container_of(work,
3575                                                    struct igb_adapter,
3576                                                    watchdog_task);
3577         struct e1000_hw *hw = &adapter->hw;
3578         struct net_device *netdev = adapter->netdev;
3579         u32 link;
3580         int i;
3581
3582         link = igb_has_link(adapter);
3583         if (link) {
3584                 /* Cancel scheduled suspend requests. */
3585                 pm_runtime_resume(netdev->dev.parent);
3586
3587                 if (!netif_carrier_ok(netdev)) {
3588                         u32 ctrl;
3589                         hw->mac.ops.get_speed_and_duplex(hw,
3590                                                          &adapter->link_speed,
3591                                                          &adapter->link_duplex);
3592
3593                         ctrl = rd32(E1000_CTRL);
3594                         /* Links status message must follow this format */
3595                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3596                                "Duplex, Flow Control: %s\n",
3597                                netdev->name,
3598                                adapter->link_speed,
3599                                adapter->link_duplex == FULL_DUPLEX ?
3600                                "Full" : "Half",
3601                                (ctrl & E1000_CTRL_TFCE) &&
3602                                (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3603                                (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3604                                (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3605
3606                         /* check for thermal sensor event */
3607                         if (igb_thermal_sensor_event(hw,
3608                             E1000_THSTAT_LINK_THROTTLE)) {
3609                                 netdev_info(netdev, "The network adapter link "
3610                                             "speed was downshifted because it "
3611                                             "overheated\n");
3612                         }
3613
3614                         /* adjust timeout factor according to speed/duplex */
3615                         adapter->tx_timeout_factor = 1;
3616                         switch (adapter->link_speed) {
3617                         case SPEED_10:
3618                                 adapter->tx_timeout_factor = 14;
3619                                 break;
3620                         case SPEED_100:
3621                                 /* maybe add some timeout factor ? */
3622                                 break;
3623                         }
3624
3625                         netif_carrier_on(netdev);
3626
3627                         igb_ping_all_vfs(adapter);
3628                         igb_check_vf_rate_limit(adapter);
3629
3630                         /* link state has changed, schedule phy info update */
3631                         if (!test_bit(__IGB_DOWN, &adapter->state))
3632                                 mod_timer(&adapter->phy_info_timer,
3633                                           round_jiffies(jiffies + 2 * HZ));
3634                 }
3635         } else {
3636                 if (netif_carrier_ok(netdev)) {
3637                         adapter->link_speed = 0;
3638                         adapter->link_duplex = 0;
3639
3640                         /* check for thermal sensor event */
3641                         if (igb_thermal_sensor_event(hw,
3642                             E1000_THSTAT_PWR_DOWN)) {
3643                                 netdev_err(netdev, "The network adapter was "
3644                                            "stopped because it overheated\n");
3645                         }
3646
3647                         /* Links status message must follow this format */
3648                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3649                                netdev->name);
3650                         netif_carrier_off(netdev);
3651
3652                         igb_ping_all_vfs(adapter);
3653
3654                         /* link state has changed, schedule phy info update */
3655                         if (!test_bit(__IGB_DOWN, &adapter->state))
3656                                 mod_timer(&adapter->phy_info_timer,
3657                                           round_jiffies(jiffies + 2 * HZ));
3658
3659                         pm_schedule_suspend(netdev->dev.parent,
3660                                             MSEC_PER_SEC * 5);
3661                 }
3662         }
3663
3664         spin_lock(&adapter->stats64_lock);
3665         igb_update_stats(adapter, &adapter->stats64);
3666         spin_unlock(&adapter->stats64_lock);
3667
3668         for (i = 0; i < adapter->num_tx_queues; i++) {
3669                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3670                 if (!netif_carrier_ok(netdev)) {
3671                         /* We've lost link, so the controller stops DMA,
3672                          * but we've got queued Tx work that's never going
3673                          * to get done, so reset controller to flush Tx.
3674                          * (Do the reset outside of interrupt context). */
3675                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3676                                 adapter->tx_timeout_count++;
3677                                 schedule_work(&adapter->reset_task);
3678                                 /* return immediately since reset is imminent */
3679                                 return;
3680                         }
3681                 }
3682
3683                 /* Force detection of hung controller every watchdog period */
3684                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3685         }
3686
3687         /* Cause software interrupt to ensure rx ring is cleaned */
3688         if (adapter->msix_entries) {
3689                 u32 eics = 0;
3690                 for (i = 0; i < adapter->num_q_vectors; i++)
3691                         eics |= adapter->q_vector[i]->eims_value;
3692                 wr32(E1000_EICS, eics);
3693         } else {
3694                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3695         }
3696
3697         igb_spoof_check(adapter);
3698
3699         /* Reset the timer */
3700         if (!test_bit(__IGB_DOWN, &adapter->state))
3701                 mod_timer(&adapter->watchdog_timer,
3702                           round_jiffies(jiffies + 2 * HZ));
3703 }
3704
3705 enum latency_range {
3706         lowest_latency = 0,
3707         low_latency = 1,
3708         bulk_latency = 2,
3709         latency_invalid = 255
3710 };
3711
3712 /**
3713  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3714  *
3715  *      Stores a new ITR value based on strictly on packet size.  This
3716  *      algorithm is less sophisticated than that used in igb_update_itr,
3717  *      due to the difficulty of synchronizing statistics across multiple
3718  *      receive rings.  The divisors and thresholds used by this function
3719  *      were determined based on theoretical maximum wire speed and testing
3720  *      data, in order to minimize response time while increasing bulk
3721  *      throughput.
3722  *      This functionality is controlled by the InterruptThrottleRate module
3723  *      parameter (see igb_param.c)
3724  *      NOTE:  This function is called only when operating in a multiqueue
3725  *             receive environment.
3726  * @q_vector: pointer to q_vector
3727  **/
3728 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3729 {
3730         int new_val = q_vector->itr_val;
3731         int avg_wire_size = 0;
3732         struct igb_adapter *adapter = q_vector->adapter;
3733         unsigned int packets;
3734
3735         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3736          * ints/sec - ITR timer value of 120 ticks.
3737          */
3738         if (adapter->link_speed != SPEED_1000) {
3739                 new_val = IGB_4K_ITR;
3740                 goto set_itr_val;
3741         }
3742
3743         packets = q_vector->rx.total_packets;
3744         if (packets)
3745                 avg_wire_size = q_vector->rx.total_bytes / packets;
3746
3747         packets = q_vector->tx.total_packets;
3748         if (packets)
3749                 avg_wire_size = max_t(u32, avg_wire_size,
3750                                       q_vector->tx.total_bytes / packets);
3751
3752         /* if avg_wire_size isn't set no work was done */
3753         if (!avg_wire_size)
3754                 goto clear_counts;
3755
3756         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3757         avg_wire_size += 24;
3758
3759         /* Don't starve jumbo frames */
3760         avg_wire_size = min(avg_wire_size, 3000);
3761
3762         /* Give a little boost to mid-size frames */
3763         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3764                 new_val = avg_wire_size / 3;
3765         else
3766                 new_val = avg_wire_size / 2;
3767
3768         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3769         if (new_val < IGB_20K_ITR &&
3770             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3771              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3772                 new_val = IGB_20K_ITR;
3773
3774 set_itr_val:
3775         if (new_val != q_vector->itr_val) {
3776                 q_vector->itr_val = new_val;
3777                 q_vector->set_itr = 1;
3778         }
3779 clear_counts:
3780         q_vector->rx.total_bytes = 0;
3781         q_vector->rx.total_packets = 0;
3782         q_vector->tx.total_bytes = 0;
3783         q_vector->tx.total_packets = 0;
3784 }
3785
3786 /**
3787  * igb_update_itr - update the dynamic ITR value based on statistics
3788  *      Stores a new ITR value based on packets and byte
3789  *      counts during the last interrupt.  The advantage of per interrupt
3790  *      computation is faster updates and more accurate ITR for the current
3791  *      traffic pattern.  Constants in this function were computed
3792  *      based on theoretical maximum wire speed and thresholds were set based
3793  *      on testing data as well as attempting to minimize response time
3794  *      while increasing bulk throughput.
3795  *      this functionality is controlled by the InterruptThrottleRate module
3796  *      parameter (see igb_param.c)
3797  *      NOTE:  These calculations are only valid when operating in a single-
3798  *             queue environment.
3799  * @q_vector: pointer to q_vector
3800  * @ring_container: ring info to update the itr for
3801  **/
3802 static void igb_update_itr(struct igb_q_vector *q_vector,
3803                            struct igb_ring_container *ring_container)
3804 {
3805         unsigned int packets = ring_container->total_packets;
3806         unsigned int bytes = ring_container->total_bytes;
3807         u8 itrval = ring_container->itr;
3808
3809         /* no packets, exit with status unchanged */
3810         if (packets == 0)
3811                 return;
3812
3813         switch (itrval) {
3814         case lowest_latency:
3815                 /* handle TSO and jumbo frames */
3816                 if (bytes/packets > 8000)
3817                         itrval = bulk_latency;
3818                 else if ((packets < 5) && (bytes > 512))
3819                         itrval = low_latency;
3820                 break;
3821         case low_latency:  /* 50 usec aka 20000 ints/s */
3822                 if (bytes > 10000) {
3823                         /* this if handles the TSO accounting */
3824                         if (bytes/packets > 8000) {
3825                                 itrval = bulk_latency;
3826                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3827                                 itrval = bulk_latency;
3828                         } else if ((packets > 35)) {
3829                                 itrval = lowest_latency;
3830                         }
3831                 } else if (bytes/packets > 2000) {
3832                         itrval = bulk_latency;
3833                 } else if (packets <= 2 && bytes < 512) {
3834                         itrval = lowest_latency;
3835                 }
3836                 break;
3837         case bulk_latency: /* 250 usec aka 4000 ints/s */
3838                 if (bytes > 25000) {
3839                         if (packets > 35)
3840                                 itrval = low_latency;
3841                 } else if (bytes < 1500) {
3842                         itrval = low_latency;
3843                 }
3844                 break;
3845         }
3846
3847         /* clear work counters since we have the values we need */
3848         ring_container->total_bytes = 0;
3849         ring_container->total_packets = 0;
3850
3851         /* write updated itr to ring container */
3852         ring_container->itr = itrval;
3853 }
3854
3855 static void igb_set_itr(struct igb_q_vector *q_vector)
3856 {
3857         struct igb_adapter *adapter = q_vector->adapter;
3858         u32 new_itr = q_vector->itr_val;
3859         u8 current_itr = 0;
3860
3861         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3862         if (adapter->link_speed != SPEED_1000) {
3863                 current_itr = 0;
3864                 new_itr = IGB_4K_ITR;
3865                 goto set_itr_now;
3866         }
3867
3868         igb_update_itr(q_vector, &q_vector->tx);
3869         igb_update_itr(q_vector, &q_vector->rx);
3870
3871         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3872
3873         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3874         if (current_itr == lowest_latency &&
3875             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3876              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3877                 current_itr = low_latency;
3878
3879         switch (current_itr) {
3880         /* counts and packets in update_itr are dependent on these numbers */
3881         case lowest_latency:
3882                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3883                 break;
3884         case low_latency:
3885                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3886                 break;
3887         case bulk_latency:
3888                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3889                 break;
3890         default:
3891                 break;
3892         }
3893
3894 set_itr_now:
3895         if (new_itr != q_vector->itr_val) {
3896                 /* this attempts to bias the interrupt rate towards Bulk
3897                  * by adding intermediate steps when interrupt rate is
3898                  * increasing */
3899                 new_itr = new_itr > q_vector->itr_val ?
3900                              max((new_itr * q_vector->itr_val) /
3901                                  (new_itr + (q_vector->itr_val >> 2)),
3902                                  new_itr) :
3903                              new_itr;
3904                 /* Don't write the value here; it resets the adapter's
3905                  * internal timer, and causes us to delay far longer than
3906                  * we should between interrupts.  Instead, we write the ITR
3907                  * value at the beginning of the next interrupt so the timing
3908                  * ends up being correct.
3909                  */
3910                 q_vector->itr_val = new_itr;
3911                 q_vector->set_itr = 1;
3912         }
3913 }
3914
3915 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3916                             u32 type_tucmd, u32 mss_l4len_idx)
3917 {
3918         struct e1000_adv_tx_context_desc *context_desc;
3919         u16 i = tx_ring->next_to_use;
3920
3921         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3922
3923         i++;
3924         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3925
3926         /* set bits to identify this as an advanced context descriptor */
3927         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3928
3929         /* For 82575, context index must be unique per ring. */
3930         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3931                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3932
3933         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
3934         context_desc->seqnum_seed       = 0;
3935         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
3936         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
3937 }
3938
3939 static int igb_tso(struct igb_ring *tx_ring,
3940                    struct igb_tx_buffer *first,
3941                    u8 *hdr_len)
3942 {
3943         struct sk_buff *skb = first->skb;
3944         u32 vlan_macip_lens, type_tucmd;
3945         u32 mss_l4len_idx, l4len;
3946
3947         if (!skb_is_gso(skb))
3948                 return 0;
3949
3950         if (skb_header_cloned(skb)) {
3951                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3952                 if (err)
3953                         return err;
3954         }
3955
3956         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3957         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
3958
3959         if (first->protocol == __constant_htons(ETH_P_IP)) {
3960                 struct iphdr *iph = ip_hdr(skb);
3961                 iph->tot_len = 0;
3962                 iph->check = 0;
3963                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3964                                                          iph->daddr, 0,
3965                                                          IPPROTO_TCP,
3966                                                          0);
3967                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
3968                 first->tx_flags |= IGB_TX_FLAGS_TSO |
3969                                    IGB_TX_FLAGS_CSUM |
3970                                    IGB_TX_FLAGS_IPV4;
3971         } else if (skb_is_gso_v6(skb)) {
3972                 ipv6_hdr(skb)->payload_len = 0;
3973                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3974                                                        &ipv6_hdr(skb)->daddr,
3975                                                        0, IPPROTO_TCP, 0);
3976                 first->tx_flags |= IGB_TX_FLAGS_TSO |
3977                                    IGB_TX_FLAGS_CSUM;
3978         }
3979
3980         /* compute header lengths */
3981         l4len = tcp_hdrlen(skb);
3982         *hdr_len = skb_transport_offset(skb) + l4len;
3983
3984         /* update gso size and bytecount with header size */
3985         first->gso_segs = skb_shinfo(skb)->gso_segs;
3986         first->bytecount += (first->gso_segs - 1) * *hdr_len;
3987
3988         /* MSS L4LEN IDX */
3989         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
3990         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
3991
3992         /* VLAN MACLEN IPLEN */
3993         vlan_macip_lens = skb_network_header_len(skb);
3994         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
3995         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
3996
3997         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
3998
3999         return 1;
4000 }
4001
4002 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4003 {
4004         struct sk_buff *skb = first->skb;
4005         u32 vlan_macip_lens = 0;
4006         u32 mss_l4len_idx = 0;
4007         u32 type_tucmd = 0;
4008
4009         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4010                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4011                         return;
4012         } else {
4013                 u8 l4_hdr = 0;
4014                 switch (first->protocol) {
4015                 case __constant_htons(ETH_P_IP):
4016                         vlan_macip_lens |= skb_network_header_len(skb);
4017                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4018                         l4_hdr = ip_hdr(skb)->protocol;
4019                         break;
4020                 case __constant_htons(ETH_P_IPV6):
4021                         vlan_macip_lens |= skb_network_header_len(skb);
4022                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4023                         break;
4024                 default:
4025                         if (unlikely(net_ratelimit())) {
4026                                 dev_warn(tx_ring->dev,
4027                                  "partial checksum but proto=%x!\n",
4028                                  first->protocol);
4029                         }
4030                         break;
4031                 }
4032
4033                 switch (l4_hdr) {
4034                 case IPPROTO_TCP:
4035                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4036                         mss_l4len_idx = tcp_hdrlen(skb) <<
4037                                         E1000_ADVTXD_L4LEN_SHIFT;
4038                         break;
4039                 case IPPROTO_SCTP:
4040                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4041                         mss_l4len_idx = sizeof(struct sctphdr) <<
4042                                         E1000_ADVTXD_L4LEN_SHIFT;
4043                         break;
4044                 case IPPROTO_UDP:
4045                         mss_l4len_idx = sizeof(struct udphdr) <<
4046                                         E1000_ADVTXD_L4LEN_SHIFT;
4047                         break;
4048                 default:
4049                         if (unlikely(net_ratelimit())) {
4050                                 dev_warn(tx_ring->dev,
4051                                  "partial checksum but l4 proto=%x!\n",
4052                                  l4_hdr);
4053                         }
4054                         break;
4055                 }
4056
4057                 /* update TX checksum flag */
4058                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4059         }
4060
4061         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4062         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4063
4064         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4065 }
4066
4067 static __le32 igb_tx_cmd_type(u32 tx_flags)
4068 {
4069         /* set type for advanced descriptor with frame checksum insertion */
4070         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4071                                       E1000_ADVTXD_DCMD_IFCS |
4072                                       E1000_ADVTXD_DCMD_DEXT);
4073
4074         /* set HW vlan bit if vlan is present */
4075         if (tx_flags & IGB_TX_FLAGS_VLAN)
4076                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4077
4078         /* set timestamp bit if present */
4079         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4080                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4081
4082         /* set segmentation bits for TSO */
4083         if (tx_flags & IGB_TX_FLAGS_TSO)
4084                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4085
4086         return cmd_type;
4087 }
4088
4089 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4090                                  union e1000_adv_tx_desc *tx_desc,
4091                                  u32 tx_flags, unsigned int paylen)
4092 {
4093         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4094
4095         /* 82575 requires a unique index per ring if any offload is enabled */
4096         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4097             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4098                 olinfo_status |= tx_ring->reg_idx << 4;
4099
4100         /* insert L4 checksum */
4101         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4102                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4103
4104                 /* insert IPv4 checksum */
4105                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4106                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4107         }
4108
4109         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4110 }
4111
4112 /*
4113  * The largest size we can write to the descriptor is 65535.  In order to
4114  * maintain a power of two alignment we have to limit ourselves to 32K.
4115  */
4116 #define IGB_MAX_TXD_PWR 15
4117 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4118
4119 static void igb_tx_map(struct igb_ring *tx_ring,
4120                        struct igb_tx_buffer *first,
4121                        const u8 hdr_len)
4122 {
4123         struct sk_buff *skb = first->skb;
4124         struct igb_tx_buffer *tx_buffer_info;
4125         union e1000_adv_tx_desc *tx_desc;
4126         dma_addr_t dma;
4127         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4128         unsigned int data_len = skb->data_len;
4129         unsigned int size = skb_headlen(skb);
4130         unsigned int paylen = skb->len - hdr_len;
4131         __le32 cmd_type;
4132         u32 tx_flags = first->tx_flags;
4133         u16 i = tx_ring->next_to_use;
4134
4135         tx_desc = IGB_TX_DESC(tx_ring, i);
4136
4137         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4138         cmd_type = igb_tx_cmd_type(tx_flags);
4139
4140         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4141         if (dma_mapping_error(tx_ring->dev, dma))
4142                 goto dma_error;
4143
4144         /* record length, and DMA address */
4145         first->length = size;
4146         first->dma = dma;
4147         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4148
4149         for (;;) {
4150                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4151                         tx_desc->read.cmd_type_len =
4152                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4153
4154                         i++;
4155                         tx_desc++;
4156                         if (i == tx_ring->count) {
4157                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4158                                 i = 0;
4159                         }
4160
4161                         dma += IGB_MAX_DATA_PER_TXD;
4162                         size -= IGB_MAX_DATA_PER_TXD;
4163
4164                         tx_desc->read.olinfo_status = 0;
4165                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4166                 }
4167
4168                 if (likely(!data_len))
4169                         break;
4170
4171                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4172
4173                 i++;
4174                 tx_desc++;
4175                 if (i == tx_ring->count) {
4176                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4177                         i = 0;
4178                 }
4179
4180                 size = skb_frag_size(frag);
4181                 data_len -= size;
4182
4183                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4184                                    size, DMA_TO_DEVICE);
4185                 if (dma_mapping_error(tx_ring->dev, dma))
4186                         goto dma_error;
4187
4188                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4189                 tx_buffer_info->length = size;
4190                 tx_buffer_info->dma = dma;
4191
4192                 tx_desc->read.olinfo_status = 0;
4193                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4194
4195                 frag++;
4196         }
4197
4198         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4199
4200         /* write last descriptor with RS and EOP bits */
4201         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4202         if (unlikely(skb->no_fcs))
4203                 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4204         tx_desc->read.cmd_type_len = cmd_type;
4205
4206         /* set the timestamp */
4207         first->time_stamp = jiffies;
4208
4209         /*
4210          * Force memory writes to complete before letting h/w know there
4211          * are new descriptors to fetch.  (Only applicable for weak-ordered
4212          * memory model archs, such as IA-64).
4213          *
4214          * We also need this memory barrier to make certain all of the
4215          * status bits have been updated before next_to_watch is written.
4216          */
4217         wmb();
4218
4219         /* set next_to_watch value indicating a packet is present */
4220         first->next_to_watch = tx_desc;
4221
4222         i++;
4223         if (i == tx_ring->count)
4224                 i = 0;
4225
4226         tx_ring->next_to_use = i;
4227
4228         writel(i, tx_ring->tail);
4229
4230         /* we need this if more than one processor can write to our tail
4231          * at a time, it syncronizes IO on IA64/Altix systems */
4232         mmiowb();
4233
4234         return;
4235
4236 dma_error:
4237         dev_err(tx_ring->dev, "TX DMA map failed\n");
4238
4239         /* clear dma mappings for failed tx_buffer_info map */
4240         for (;;) {
4241                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4242                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4243                 if (tx_buffer_info == first)
4244                         break;
4245                 if (i == 0)
4246                         i = tx_ring->count;
4247                 i--;
4248         }
4249
4250         tx_ring->next_to_use = i;
4251 }
4252
4253 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4254 {
4255         struct net_device *netdev = tx_ring->netdev;
4256
4257         netif_stop_subqueue(netdev, tx_ring->queue_index);
4258
4259         /* Herbert's original patch had:
4260          *  smp_mb__after_netif_stop_queue();
4261          * but since that doesn't exist yet, just open code it. */
4262         smp_mb();
4263
4264         /* We need to check again in a case another CPU has just
4265          * made room available. */
4266         if (igb_desc_unused(tx_ring) < size)
4267                 return -EBUSY;
4268
4269         /* A reprieve! */
4270         netif_wake_subqueue(netdev, tx_ring->queue_index);
4271
4272         u64_stats_update_begin(&tx_ring->tx_syncp2);
4273         tx_ring->tx_stats.restart_queue2++;
4274         u64_stats_update_end(&tx_ring->tx_syncp2);
4275
4276         return 0;
4277 }
4278
4279 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4280 {
4281         if (igb_desc_unused(tx_ring) >= size)
4282                 return 0;
4283         return __igb_maybe_stop_tx(tx_ring, size);
4284 }
4285
4286 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4287                                 struct igb_ring *tx_ring)
4288 {
4289         struct igb_tx_buffer *first;
4290         int tso;
4291         u32 tx_flags = 0;
4292         __be16 protocol = vlan_get_protocol(skb);
4293         u8 hdr_len = 0;
4294
4295         /* need: 1 descriptor per page,
4296          *       + 2 desc gap to keep tail from touching head,
4297          *       + 1 desc for skb->data,
4298          *       + 1 desc for context descriptor,
4299          * otherwise try next time */
4300         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4301                 /* this is a hard error */
4302                 return NETDEV_TX_BUSY;
4303         }
4304
4305         /* record the location of the first descriptor for this packet */
4306         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4307         first->skb = skb;
4308         first->bytecount = skb->len;
4309         first->gso_segs = 1;
4310
4311         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4312                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4313                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4314         }
4315
4316         if (vlan_tx_tag_present(skb)) {
4317                 tx_flags |= IGB_TX_FLAGS_VLAN;
4318                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4319         }
4320
4321         /* record initial flags and protocol */
4322         first->tx_flags = tx_flags;
4323         first->protocol = protocol;
4324
4325         tso = igb_tso(tx_ring, first, &hdr_len);
4326         if (tso < 0)
4327                 goto out_drop;
4328         else if (!tso)
4329                 igb_tx_csum(tx_ring, first);
4330
4331         igb_tx_map(tx_ring, first, hdr_len);
4332
4333         /* Make sure there is space in the ring for the next send. */
4334         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4335
4336         return NETDEV_TX_OK;
4337
4338 out_drop:
4339         igb_unmap_and_free_tx_resource(tx_ring, first);
4340
4341         return NETDEV_TX_OK;
4342 }
4343
4344 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4345                                                     struct sk_buff *skb)
4346 {
4347         unsigned int r_idx = skb->queue_mapping;
4348
4349         if (r_idx >= adapter->num_tx_queues)
4350                 r_idx = r_idx % adapter->num_tx_queues;
4351
4352         return adapter->tx_ring[r_idx];
4353 }
4354
4355 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4356                                   struct net_device *netdev)
4357 {
4358         struct igb_adapter *adapter = netdev_priv(netdev);
4359
4360         if (test_bit(__IGB_DOWN, &adapter->state)) {
4361                 dev_kfree_skb_any(skb);
4362                 return NETDEV_TX_OK;
4363         }
4364
4365         if (skb->len <= 0) {
4366                 dev_kfree_skb_any(skb);
4367                 return NETDEV_TX_OK;
4368         }
4369
4370         /*
4371          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4372          * in order to meet this minimum size requirement.
4373          */
4374         if (skb->len < 17) {
4375                 if (skb_padto(skb, 17))
4376                         return NETDEV_TX_OK;
4377                 skb->len = 17;
4378         }
4379
4380         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4381 }
4382
4383 /**
4384  * igb_tx_timeout - Respond to a Tx Hang
4385  * @netdev: network interface device structure
4386  **/
4387 static void igb_tx_timeout(struct net_device *netdev)
4388 {
4389         struct igb_adapter *adapter = netdev_priv(netdev);
4390         struct e1000_hw *hw = &adapter->hw;
4391
4392         /* Do the reset outside of interrupt context */
4393         adapter->tx_timeout_count++;
4394
4395         if (hw->mac.type >= e1000_82580)
4396                 hw->dev_spec._82575.global_device_reset = true;
4397
4398         schedule_work(&adapter->reset_task);
4399         wr32(E1000_EICS,
4400              (adapter->eims_enable_mask & ~adapter->eims_other));
4401 }
4402
4403 static void igb_reset_task(struct work_struct *work)
4404 {
4405         struct igb_adapter *adapter;
4406         adapter = container_of(work, struct igb_adapter, reset_task);
4407
4408         igb_dump(adapter);
4409         netdev_err(adapter->netdev, "Reset adapter\n");
4410         igb_reinit_locked(adapter);
4411 }
4412
4413 /**
4414  * igb_get_stats64 - Get System Network Statistics
4415  * @netdev: network interface device structure
4416  * @stats: rtnl_link_stats64 pointer
4417  *
4418  **/
4419 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4420                                                  struct rtnl_link_stats64 *stats)
4421 {
4422         struct igb_adapter *adapter = netdev_priv(netdev);
4423
4424         spin_lock(&adapter->stats64_lock);
4425         igb_update_stats(adapter, &adapter->stats64);
4426         memcpy(stats, &adapter->stats64, sizeof(*stats));
4427         spin_unlock(&adapter->stats64_lock);
4428
4429         return stats;
4430 }
4431
4432 /**
4433  * igb_change_mtu - Change the Maximum Transfer Unit
4434  * @netdev: network interface device structure
4435  * @new_mtu: new value for maximum frame size
4436  *
4437  * Returns 0 on success, negative on failure
4438  **/
4439 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4440 {
4441         struct igb_adapter *adapter = netdev_priv(netdev);
4442         struct pci_dev *pdev = adapter->pdev;
4443         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4444
4445         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4446                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4447                 return -EINVAL;
4448         }
4449
4450 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4451         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4452                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4453                 return -EINVAL;
4454         }
4455
4456         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4457                 msleep(1);
4458
4459         /* igb_down has a dependency on max_frame_size */
4460         adapter->max_frame_size = max_frame;
4461
4462         if (netif_running(netdev))
4463                 igb_down(adapter);
4464
4465         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4466                  netdev->mtu, new_mtu);
4467         netdev->mtu = new_mtu;
4468
4469         if (netif_running(netdev))
4470                 igb_up(adapter);
4471         else
4472                 igb_reset(adapter);
4473
4474         clear_bit(__IGB_RESETTING, &adapter->state);
4475
4476         return 0;
4477 }
4478
4479 /**
4480  * igb_update_stats - Update the board statistics counters
4481  * @adapter: board private structure
4482  **/
4483
4484 void igb_update_stats(struct igb_adapter *adapter,
4485                       struct rtnl_link_stats64 *net_stats)
4486 {
4487         struct e1000_hw *hw = &adapter->hw;
4488         struct pci_dev *pdev = adapter->pdev;
4489         u32 reg, mpc;
4490         u16 phy_tmp;
4491         int i;
4492         u64 bytes, packets;
4493         unsigned int start;
4494         u64 _bytes, _packets;
4495
4496 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4497
4498         /*
4499          * Prevent stats update while adapter is being reset, or if the pci
4500          * connection is down.
4501          */
4502         if (adapter->link_speed == 0)
4503                 return;
4504         if (pci_channel_offline(pdev))
4505                 return;
4506
4507         bytes = 0;
4508         packets = 0;
4509         for (i = 0; i < adapter->num_rx_queues; i++) {
4510                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4511                 struct igb_ring *ring = adapter->rx_ring[i];
4512
4513                 ring->rx_stats.drops += rqdpc_tmp;
4514                 net_stats->rx_fifo_errors += rqdpc_tmp;
4515
4516                 do {
4517                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4518                         _bytes = ring->rx_stats.bytes;
4519                         _packets = ring->rx_stats.packets;
4520                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4521                 bytes += _bytes;
4522                 packets += _packets;
4523         }
4524
4525         net_stats->rx_bytes = bytes;
4526         net_stats->rx_packets = packets;
4527
4528         bytes = 0;
4529         packets = 0;
4530         for (i = 0; i < adapter->num_tx_queues; i++) {
4531                 struct igb_ring *ring = adapter->tx_ring[i];
4532                 do {
4533                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4534                         _bytes = ring->tx_stats.bytes;
4535                         _packets = ring->tx_stats.packets;
4536                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4537                 bytes += _bytes;
4538                 packets += _packets;
4539         }
4540         net_stats->tx_bytes = bytes;
4541         net_stats->tx_packets = packets;
4542
4543         /* read stats registers */
4544         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4545         adapter->stats.gprc += rd32(E1000_GPRC);
4546         adapter->stats.gorc += rd32(E1000_GORCL);
4547         rd32(E1000_GORCH); /* clear GORCL */
4548         adapter->stats.bprc += rd32(E1000_BPRC);
4549         adapter->stats.mprc += rd32(E1000_MPRC);
4550         adapter->stats.roc += rd32(E1000_ROC);
4551
4552         adapter->stats.prc64 += rd32(E1000_PRC64);
4553         adapter->stats.prc127 += rd32(E1000_PRC127);
4554         adapter->stats.prc255 += rd32(E1000_PRC255);
4555         adapter->stats.prc511 += rd32(E1000_PRC511);
4556         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4557         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4558         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4559         adapter->stats.sec += rd32(E1000_SEC);
4560
4561         mpc = rd32(E1000_MPC);
4562         adapter->stats.mpc += mpc;
4563         net_stats->rx_fifo_errors += mpc;
4564         adapter->stats.scc += rd32(E1000_SCC);
4565         adapter->stats.ecol += rd32(E1000_ECOL);
4566         adapter->stats.mcc += rd32(E1000_MCC);
4567         adapter->stats.latecol += rd32(E1000_LATECOL);
4568         adapter->stats.dc += rd32(E1000_DC);
4569         adapter->stats.rlec += rd32(E1000_RLEC);
4570         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4571         adapter->stats.xontxc += rd32(E1000_XONTXC);
4572         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4573         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4574         adapter->stats.fcruc += rd32(E1000_FCRUC);
4575         adapter->stats.gptc += rd32(E1000_GPTC);
4576         adapter->stats.gotc += rd32(E1000_GOTCL);
4577         rd32(E1000_GOTCH); /* clear GOTCL */
4578         adapter->stats.rnbc += rd32(E1000_RNBC);
4579         adapter->stats.ruc += rd32(E1000_RUC);
4580         adapter->stats.rfc += rd32(E1000_RFC);
4581         adapter->stats.rjc += rd32(E1000_RJC);
4582         adapter->stats.tor += rd32(E1000_TORH);
4583         adapter->stats.tot += rd32(E1000_TOTH);
4584         adapter->stats.tpr += rd32(E1000_TPR);
4585
4586         adapter->stats.ptc64 += rd32(E1000_PTC64);
4587         adapter->stats.ptc127 += rd32(E1000_PTC127);
4588         adapter->stats.ptc255 += rd32(E1000_PTC255);
4589         adapter->stats.ptc511 += rd32(E1000_PTC511);
4590         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4591         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4592
4593         adapter->stats.mptc += rd32(E1000_MPTC);
4594         adapter->stats.bptc += rd32(E1000_BPTC);
4595
4596         adapter->stats.tpt += rd32(E1000_TPT);
4597         adapter->stats.colc += rd32(E1000_COLC);
4598
4599         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4600         /* read internal phy specific stats */
4601         reg = rd32(E1000_CTRL_EXT);
4602         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4603                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4604                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4605         }
4606
4607         adapter->stats.tsctc += rd32(E1000_TSCTC);
4608         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4609
4610         adapter->stats.iac += rd32(E1000_IAC);
4611         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4612         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4613         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4614         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4615         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4616         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4617         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4618         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4619
4620         /* Fill out the OS statistics structure */
4621         net_stats->multicast = adapter->stats.mprc;
4622         net_stats->collisions = adapter->stats.colc;
4623
4624         /* Rx Errors */
4625
4626         /* RLEC on some newer hardware can be incorrect so build
4627          * our own version based on RUC and ROC */
4628         net_stats->rx_errors = adapter->stats.rxerrc +
4629                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4630                 adapter->stats.ruc + adapter->stats.roc +
4631                 adapter->stats.cexterr;
4632         net_stats->rx_length_errors = adapter->stats.ruc +
4633                                       adapter->stats.roc;
4634         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4635         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4636         net_stats->rx_missed_errors = adapter->stats.mpc;
4637
4638         /* Tx Errors */
4639         net_stats->tx_errors = adapter->stats.ecol +
4640                                adapter->stats.latecol;
4641         net_stats->tx_aborted_errors = adapter->stats.ecol;
4642         net_stats->tx_window_errors = adapter->stats.latecol;
4643         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4644
4645         /* Tx Dropped needs to be maintained elsewhere */
4646
4647         /* Phy Stats */
4648         if (hw->phy.media_type == e1000_media_type_copper) {
4649                 if ((adapter->link_speed == SPEED_1000) &&
4650                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4651                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4652                         adapter->phy_stats.idle_errors += phy_tmp;
4653                 }
4654         }
4655
4656         /* Management Stats */
4657         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4658         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4659         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4660
4661         /* OS2BMC Stats */
4662         reg = rd32(E1000_MANC);
4663         if (reg & E1000_MANC_EN_BMC2OS) {
4664                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4665                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4666                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4667                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4668         }
4669 }
4670
4671 static irqreturn_t igb_msix_other(int irq, void *data)
4672 {
4673         struct igb_adapter *adapter = data;
4674         struct e1000_hw *hw = &adapter->hw;
4675         u32 icr = rd32(E1000_ICR);
4676         /* reading ICR causes bit 31 of EICR to be cleared */
4677
4678         if (icr & E1000_ICR_DRSTA)
4679                 schedule_work(&adapter->reset_task);
4680
4681         if (icr & E1000_ICR_DOUTSYNC) {
4682                 /* HW is reporting DMA is out of sync */
4683                 adapter->stats.doosync++;
4684                 /* The DMA Out of Sync is also indication of a spoof event
4685                  * in IOV mode. Check the Wrong VM Behavior register to
4686                  * see if it is really a spoof event. */
4687                 igb_check_wvbr(adapter);
4688         }
4689
4690         /* Check for a mailbox event */
4691         if (icr & E1000_ICR_VMMB)
4692                 igb_msg_task(adapter);
4693
4694         if (icr & E1000_ICR_LSC) {
4695                 hw->mac.get_link_status = 1;
4696                 /* guard against interrupt when we're going down */
4697                 if (!test_bit(__IGB_DOWN, &adapter->state))
4698                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4699         }
4700
4701         wr32(E1000_EIMS, adapter->eims_other);
4702
4703         return IRQ_HANDLED;
4704 }
4705
4706 static void igb_write_itr(struct igb_q_vector *q_vector)
4707 {
4708         struct igb_adapter *adapter = q_vector->adapter;
4709         u32 itr_val = q_vector->itr_val & 0x7FFC;
4710
4711         if (!q_vector->set_itr)
4712                 return;
4713
4714         if (!itr_val)
4715                 itr_val = 0x4;
4716
4717         if (adapter->hw.mac.type == e1000_82575)
4718                 itr_val |= itr_val << 16;
4719         else
4720                 itr_val |= E1000_EITR_CNT_IGNR;
4721
4722         writel(itr_val, q_vector->itr_register);
4723         q_vector->set_itr = 0;
4724 }
4725
4726 static irqreturn_t igb_msix_ring(int irq, void *data)
4727 {
4728         struct igb_q_vector *q_vector = data;
4729
4730         /* Write the ITR value calculated from the previous interrupt. */
4731         igb_write_itr(q_vector);
4732
4733         napi_schedule(&q_vector->napi);
4734
4735         return IRQ_HANDLED;
4736 }
4737
4738 #ifdef CONFIG_IGB_DCA
4739 static void igb_update_dca(struct igb_q_vector *q_vector)
4740 {
4741         struct igb_adapter *adapter = q_vector->adapter;
4742         struct e1000_hw *hw = &adapter->hw;
4743         int cpu = get_cpu();
4744
4745         if (q_vector->cpu == cpu)
4746                 goto out_no_update;
4747
4748         if (q_vector->tx.ring) {
4749                 int q = q_vector->tx.ring->reg_idx;
4750                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4751                 if (hw->mac.type == e1000_82575) {
4752                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4753                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4754                 } else {
4755                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4756                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4757                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4758                 }
4759                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4760                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4761         }
4762         if (q_vector->rx.ring) {
4763                 int q = q_vector->rx.ring->reg_idx;
4764                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4765                 if (hw->mac.type == e1000_82575) {
4766                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4767                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4768                 } else {
4769                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4770                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4771                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4772                 }
4773                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4774                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4775                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4776                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4777         }
4778         q_vector->cpu = cpu;
4779 out_no_update:
4780         put_cpu();
4781 }
4782
4783 static void igb_setup_dca(struct igb_adapter *adapter)
4784 {
4785         struct e1000_hw *hw = &adapter->hw;
4786         int i;
4787
4788         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4789                 return;
4790
4791         /* Always use CB2 mode, difference is masked in the CB driver. */
4792         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4793
4794         for (i = 0; i < adapter->num_q_vectors; i++) {
4795                 adapter->q_vector[i]->cpu = -1;
4796                 igb_update_dca(adapter->q_vector[i]);
4797         }
4798 }
4799
4800 static int __igb_notify_dca(struct device *dev, void *data)
4801 {
4802         struct net_device *netdev = dev_get_drvdata(dev);
4803         struct igb_adapter *adapter = netdev_priv(netdev);
4804         struct pci_dev *pdev = adapter->pdev;
4805         struct e1000_hw *hw = &adapter->hw;
4806         unsigned long event = *(unsigned long *)data;
4807
4808         switch (event) {
4809         case DCA_PROVIDER_ADD:
4810                 /* if already enabled, don't do it again */
4811                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4812                         break;
4813                 if (dca_add_requester(dev) == 0) {
4814                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4815                         dev_info(&pdev->dev, "DCA enabled\n");
4816                         igb_setup_dca(adapter);
4817                         break;
4818                 }
4819                 /* Fall Through since DCA is disabled. */
4820         case DCA_PROVIDER_REMOVE:
4821                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4822                         /* without this a class_device is left
4823                          * hanging around in the sysfs model */
4824                         dca_remove_requester(dev);
4825                         dev_info(&pdev->dev, "DCA disabled\n");
4826                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4827                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4828                 }
4829                 break;
4830         }
4831
4832         return 0;
4833 }
4834
4835 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4836                           void *p)
4837 {
4838         int ret_val;
4839
4840         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4841                                          __igb_notify_dca);
4842
4843         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4844 }
4845 #endif /* CONFIG_IGB_DCA */
4846
4847 #ifdef CONFIG_PCI_IOV
4848 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4849 {
4850         unsigned char mac_addr[ETH_ALEN];
4851         struct pci_dev *pdev = adapter->pdev;
4852         struct e1000_hw *hw = &adapter->hw;
4853         struct pci_dev *pvfdev;
4854         unsigned int device_id;
4855         u16 thisvf_devfn;
4856
4857         random_ether_addr(mac_addr);
4858         igb_set_vf_mac(adapter, vf, mac_addr);
4859
4860         switch (adapter->hw.mac.type) {
4861         case e1000_82576:
4862                 device_id = IGB_82576_VF_DEV_ID;
4863                 /* VF Stride for 82576 is 2 */
4864                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4865                         (pdev->devfn & 1);
4866                 break;
4867         case e1000_i350:
4868                 device_id = IGB_I350_VF_DEV_ID;
4869                 /* VF Stride for I350 is 4 */
4870                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4871                                 (pdev->devfn & 3);
4872                 break;
4873         default:
4874                 device_id = 0;
4875                 thisvf_devfn = 0;
4876                 break;
4877         }
4878
4879         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4880         while (pvfdev) {
4881                 if (pvfdev->devfn == thisvf_devfn)
4882                         break;
4883                 pvfdev = pci_get_device(hw->vendor_id,
4884                                         device_id, pvfdev);
4885         }
4886
4887         if (pvfdev)
4888                 adapter->vf_data[vf].vfdev = pvfdev;
4889         else
4890                 dev_err(&pdev->dev,
4891                         "Couldn't find pci dev ptr for VF %4.4x\n",
4892                         thisvf_devfn);
4893         return pvfdev != NULL;
4894 }
4895
4896 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4897 {
4898         struct e1000_hw *hw = &adapter->hw;
4899         struct pci_dev *pdev = adapter->pdev;
4900         struct pci_dev *pvfdev;
4901         u16 vf_devfn = 0;
4902         u16 vf_stride;
4903         unsigned int device_id;
4904         int vfs_found = 0;
4905
4906         switch (adapter->hw.mac.type) {
4907         case e1000_82576:
4908                 device_id = IGB_82576_VF_DEV_ID;
4909                 /* VF Stride for 82576 is 2 */
4910                 vf_stride = 2;
4911                 break;
4912         case e1000_i350:
4913                 device_id = IGB_I350_VF_DEV_ID;
4914                 /* VF Stride for I350 is 4 */
4915                 vf_stride = 4;
4916                 break;
4917         default:
4918                 device_id = 0;
4919                 vf_stride = 0;
4920                 break;
4921         }
4922
4923         vf_devfn = pdev->devfn + 0x80;
4924         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4925         while (pvfdev) {
4926                 if (pvfdev->devfn == vf_devfn &&
4927                     (pvfdev->bus->number >= pdev->bus->number))
4928                         vfs_found++;
4929                 vf_devfn += vf_stride;
4930                 pvfdev = pci_get_device(hw->vendor_id,
4931                                         device_id, pvfdev);
4932         }
4933
4934         return vfs_found;
4935 }
4936
4937 static int igb_check_vf_assignment(struct igb_adapter *adapter)
4938 {
4939         int i;
4940         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4941                 if (adapter->vf_data[i].vfdev) {
4942                         if (adapter->vf_data[i].vfdev->dev_flags &
4943                             PCI_DEV_FLAGS_ASSIGNED)
4944                                 return true;
4945                 }
4946         }
4947         return false;
4948 }
4949
4950 #endif
4951 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4952 {
4953         struct e1000_hw *hw = &adapter->hw;
4954         u32 ping;
4955         int i;
4956
4957         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4958                 ping = E1000_PF_CONTROL_MSG;
4959                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4960                         ping |= E1000_VT_MSGTYPE_CTS;
4961                 igb_write_mbx(hw, &ping, 1, i);
4962         }
4963 }
4964
4965 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4966 {
4967         struct e1000_hw *hw = &adapter->hw;
4968         u32 vmolr = rd32(E1000_VMOLR(vf));
4969         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4970
4971         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4972                             IGB_VF_FLAG_MULTI_PROMISC);
4973         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4974
4975         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4976                 vmolr |= E1000_VMOLR_MPME;
4977                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4978                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4979         } else {
4980                 /*
4981                  * if we have hashes and we are clearing a multicast promisc
4982                  * flag we need to write the hashes to the MTA as this step
4983                  * was previously skipped
4984                  */
4985                 if (vf_data->num_vf_mc_hashes > 30) {
4986                         vmolr |= E1000_VMOLR_MPME;
4987                 } else if (vf_data->num_vf_mc_hashes) {
4988                         int j;
4989                         vmolr |= E1000_VMOLR_ROMPE;
4990                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4991                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4992                 }
4993         }
4994
4995         wr32(E1000_VMOLR(vf), vmolr);
4996
4997         /* there are flags left unprocessed, likely not supported */
4998         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4999                 return -EINVAL;
5000
5001         return 0;
5002
5003 }
5004
5005 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5006                                   u32 *msgbuf, u32 vf)
5007 {
5008         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5009         u16 *hash_list = (u16 *)&msgbuf[1];
5010         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5011         int i;
5012
5013         /* salt away the number of multicast addresses assigned
5014          * to this VF for later use to restore when the PF multi cast
5015          * list changes
5016          */
5017         vf_data->num_vf_mc_hashes = n;
5018
5019         /* only up to 30 hash values supported */
5020         if (n > 30)
5021                 n = 30;
5022
5023         /* store the hashes for later use */
5024         for (i = 0; i < n; i++)
5025                 vf_data->vf_mc_hashes[i] = hash_list[i];
5026
5027         /* Flush and reset the mta with the new values */
5028         igb_set_rx_mode(adapter->netdev);
5029
5030         return 0;
5031 }
5032
5033 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5034 {
5035         struct e1000_hw *hw = &adapter->hw;
5036         struct vf_data_storage *vf_data;
5037         int i, j;
5038
5039         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5040                 u32 vmolr = rd32(E1000_VMOLR(i));
5041                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5042
5043                 vf_data = &adapter->vf_data[i];
5044
5045                 if ((vf_data->num_vf_mc_hashes > 30) ||
5046                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5047                         vmolr |= E1000_VMOLR_MPME;
5048                 } else if (vf_data->num_vf_mc_hashes) {
5049                         vmolr |= E1000_VMOLR_ROMPE;
5050                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5051                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5052                 }
5053                 wr32(E1000_VMOLR(i), vmolr);
5054         }
5055 }
5056
5057 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5058 {
5059         struct e1000_hw *hw = &adapter->hw;
5060         u32 pool_mask, reg, vid;
5061         int i;
5062
5063         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5064
5065         /* Find the vlan filter for this id */
5066         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5067                 reg = rd32(E1000_VLVF(i));
5068
5069                 /* remove the vf from the pool */
5070                 reg &= ~pool_mask;
5071
5072                 /* if pool is empty then remove entry from vfta */
5073                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5074                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5075                         reg = 0;
5076                         vid = reg & E1000_VLVF_VLANID_MASK;
5077                         igb_vfta_set(hw, vid, false);
5078                 }
5079
5080                 wr32(E1000_VLVF(i), reg);
5081         }
5082
5083         adapter->vf_data[vf].vlans_enabled = 0;
5084 }
5085
5086 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5087 {
5088         struct e1000_hw *hw = &adapter->hw;
5089         u32 reg, i;
5090
5091         /* The vlvf table only exists on 82576 hardware and newer */
5092         if (hw->mac.type < e1000_82576)
5093                 return -1;
5094
5095         /* we only need to do this if VMDq is enabled */
5096         if (!adapter->vfs_allocated_count)
5097                 return -1;
5098
5099         /* Find the vlan filter for this id */
5100         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5101                 reg = rd32(E1000_VLVF(i));
5102                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5103                     vid == (reg & E1000_VLVF_VLANID_MASK))
5104                         break;
5105         }
5106
5107         if (add) {
5108                 if (i == E1000_VLVF_ARRAY_SIZE) {
5109                         /* Did not find a matching VLAN ID entry that was
5110                          * enabled.  Search for a free filter entry, i.e.
5111                          * one without the enable bit set
5112                          */
5113                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5114                                 reg = rd32(E1000_VLVF(i));
5115                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5116                                         break;
5117                         }
5118                 }
5119                 if (i < E1000_VLVF_ARRAY_SIZE) {
5120                         /* Found an enabled/available entry */
5121                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5122
5123                         /* if !enabled we need to set this up in vfta */
5124                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5125                                 /* add VID to filter table */
5126                                 igb_vfta_set(hw, vid, true);
5127                                 reg |= E1000_VLVF_VLANID_ENABLE;
5128                         }
5129                         reg &= ~E1000_VLVF_VLANID_MASK;
5130                         reg |= vid;
5131                         wr32(E1000_VLVF(i), reg);
5132
5133                         /* do not modify RLPML for PF devices */
5134                         if (vf >= adapter->vfs_allocated_count)
5135                                 return 0;
5136
5137                         if (!adapter->vf_data[vf].vlans_enabled) {
5138                                 u32 size;
5139                                 reg = rd32(E1000_VMOLR(vf));
5140                                 size = reg & E1000_VMOLR_RLPML_MASK;
5141                                 size += 4;
5142                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5143                                 reg |= size;
5144                                 wr32(E1000_VMOLR(vf), reg);
5145                         }
5146
5147                         adapter->vf_data[vf].vlans_enabled++;
5148                 }
5149         } else {
5150                 if (i < E1000_VLVF_ARRAY_SIZE) {
5151                         /* remove vf from the pool */
5152                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5153                         /* if pool is empty then remove entry from vfta */
5154                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5155                                 reg = 0;
5156                                 igb_vfta_set(hw, vid, false);
5157                         }
5158                         wr32(E1000_VLVF(i), reg);
5159
5160                         /* do not modify RLPML for PF devices */
5161                         if (vf >= adapter->vfs_allocated_count)
5162                                 return 0;
5163
5164                         adapter->vf_data[vf].vlans_enabled--;
5165                         if (!adapter->vf_data[vf].vlans_enabled) {
5166                                 u32 size;
5167                                 reg = rd32(E1000_VMOLR(vf));
5168                                 size = reg & E1000_VMOLR_RLPML_MASK;
5169                                 size -= 4;
5170                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5171                                 reg |= size;
5172                                 wr32(E1000_VMOLR(vf), reg);
5173                         }
5174                 }
5175         }
5176         return 0;
5177 }
5178
5179 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5180 {
5181         struct e1000_hw *hw = &adapter->hw;
5182
5183         if (vid)
5184                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5185         else
5186                 wr32(E1000_VMVIR(vf), 0);
5187 }
5188
5189 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5190                                int vf, u16 vlan, u8 qos)
5191 {
5192         int err = 0;
5193         struct igb_adapter *adapter = netdev_priv(netdev);
5194
5195         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5196                 return -EINVAL;
5197         if (vlan || qos) {
5198                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5199                 if (err)
5200                         goto out;
5201                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5202                 igb_set_vmolr(adapter, vf, !vlan);
5203                 adapter->vf_data[vf].pf_vlan = vlan;
5204                 adapter->vf_data[vf].pf_qos = qos;
5205                 dev_info(&adapter->pdev->dev,
5206                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5207                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5208                         dev_warn(&adapter->pdev->dev,
5209                                  "The VF VLAN has been set,"
5210                                  " but the PF device is not up.\n");
5211                         dev_warn(&adapter->pdev->dev,
5212                                  "Bring the PF device up before"
5213                                  " attempting to use the VF device.\n");
5214                 }
5215         } else {
5216                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5217                                    false, vf);
5218                 igb_set_vmvir(adapter, vlan, vf);
5219                 igb_set_vmolr(adapter, vf, true);
5220                 adapter->vf_data[vf].pf_vlan = 0;
5221                 adapter->vf_data[vf].pf_qos = 0;
5222        }
5223 out:
5224        return err;
5225 }
5226
5227 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5228 {
5229         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5230         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5231
5232         return igb_vlvf_set(adapter, vid, add, vf);
5233 }
5234
5235 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5236 {
5237         /* clear flags - except flag that indicates PF has set the MAC */
5238         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5239         adapter->vf_data[vf].last_nack = jiffies;
5240
5241         /* reset offloads to defaults */
5242         igb_set_vmolr(adapter, vf, true);
5243
5244         /* reset vlans for device */
5245         igb_clear_vf_vfta(adapter, vf);
5246         if (adapter->vf_data[vf].pf_vlan)
5247                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5248                                     adapter->vf_data[vf].pf_vlan,
5249                                     adapter->vf_data[vf].pf_qos);
5250         else
5251                 igb_clear_vf_vfta(adapter, vf);
5252
5253         /* reset multicast table array for vf */
5254         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5255
5256         /* Flush and reset the mta with the new values */
5257         igb_set_rx_mode(adapter->netdev);
5258 }
5259
5260 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5261 {
5262         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5263
5264         /* generate a new mac address as we were hotplug removed/added */
5265         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5266                 random_ether_addr(vf_mac);
5267
5268         /* process remaining reset events */
5269         igb_vf_reset(adapter, vf);
5270 }
5271
5272 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5273 {
5274         struct e1000_hw *hw = &adapter->hw;
5275         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5276         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5277         u32 reg, msgbuf[3];
5278         u8 *addr = (u8 *)(&msgbuf[1]);
5279
5280         /* process all the same items cleared in a function level reset */
5281         igb_vf_reset(adapter, vf);
5282
5283         /* set vf mac address */
5284         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5285
5286         /* enable transmit and receive for vf */
5287         reg = rd32(E1000_VFTE);
5288         wr32(E1000_VFTE, reg | (1 << vf));
5289         reg = rd32(E1000_VFRE);
5290         wr32(E1000_VFRE, reg | (1 << vf));
5291
5292         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5293
5294         /* reply to reset with ack and vf mac address */
5295         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5296         memcpy(addr, vf_mac, 6);
5297         igb_write_mbx(hw, msgbuf, 3, vf);
5298 }
5299
5300 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5301 {
5302         /*
5303          * The VF MAC Address is stored in a packed array of bytes
5304          * starting at the second 32 bit word of the msg array
5305          */
5306         unsigned char *addr = (char *)&msg[1];
5307         int err = -1;
5308
5309         if (is_valid_ether_addr(addr))
5310                 err = igb_set_vf_mac(adapter, vf, addr);
5311
5312         return err;
5313 }
5314
5315 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5316 {
5317         struct e1000_hw *hw = &adapter->hw;
5318         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5319         u32 msg = E1000_VT_MSGTYPE_NACK;
5320
5321         /* if device isn't clear to send it shouldn't be reading either */
5322         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5323             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5324                 igb_write_mbx(hw, &msg, 1, vf);
5325                 vf_data->last_nack = jiffies;
5326         }
5327 }
5328
5329 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5330 {
5331         struct pci_dev *pdev = adapter->pdev;
5332         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5333         struct e1000_hw *hw = &adapter->hw;
5334         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5335         s32 retval;
5336
5337         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5338
5339         if (retval) {
5340                 /* if receive failed revoke VF CTS stats and restart init */
5341                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5342                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5343                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5344                         return;
5345                 goto out;
5346         }
5347
5348         /* this is a message we already processed, do nothing */
5349         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5350                 return;
5351
5352         /*
5353          * until the vf completes a reset it should not be
5354          * allowed to start any configuration.
5355          */
5356
5357         if (msgbuf[0] == E1000_VF_RESET) {
5358                 igb_vf_reset_msg(adapter, vf);
5359                 return;
5360         }
5361
5362         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5363                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5364                         return;
5365                 retval = -1;
5366                 goto out;
5367         }
5368
5369         switch ((msgbuf[0] & 0xFFFF)) {
5370         case E1000_VF_SET_MAC_ADDR:
5371                 retval = -EINVAL;
5372                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5373                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5374                 else
5375                         dev_warn(&pdev->dev,
5376                                  "VF %d attempted to override administratively "
5377                                  "set MAC address\nReload the VF driver to "
5378                                  "resume operations\n", vf);
5379                 break;
5380         case E1000_VF_SET_PROMISC:
5381                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5382                 break;
5383         case E1000_VF_SET_MULTICAST:
5384                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5385                 break;
5386         case E1000_VF_SET_LPE:
5387                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5388                 break;
5389         case E1000_VF_SET_VLAN:
5390                 retval = -1;
5391                 if (vf_data->pf_vlan)
5392                         dev_warn(&pdev->dev,
5393                                  "VF %d attempted to override administratively "
5394                                  "set VLAN tag\nReload the VF driver to "
5395                                  "resume operations\n", vf);
5396                 else
5397                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5398                 break;
5399         default:
5400                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5401                 retval = -1;
5402                 break;
5403         }
5404
5405         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5406 out:
5407         /* notify the VF of the results of what it sent us */
5408         if (retval)
5409                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5410         else
5411                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5412
5413         igb_write_mbx(hw, msgbuf, 1, vf);
5414 }
5415
5416 static void igb_msg_task(struct igb_adapter *adapter)
5417 {
5418         struct e1000_hw *hw = &adapter->hw;
5419         u32 vf;
5420
5421         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5422                 /* process any reset requests */
5423                 if (!igb_check_for_rst(hw, vf))
5424                         igb_vf_reset_event(adapter, vf);
5425
5426                 /* process any messages pending */
5427                 if (!igb_check_for_msg(hw, vf))
5428                         igb_rcv_msg_from_vf(adapter, vf);
5429
5430                 /* process any acks */
5431                 if (!igb_check_for_ack(hw, vf))
5432                         igb_rcv_ack_from_vf(adapter, vf);
5433         }
5434 }
5435
5436 /**
5437  *  igb_set_uta - Set unicast filter table address
5438  *  @adapter: board private structure
5439  *
5440  *  The unicast table address is a register array of 32-bit registers.
5441  *  The table is meant to be used in a way similar to how the MTA is used
5442  *  however due to certain limitations in the hardware it is necessary to
5443  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5444  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5445  **/
5446 static void igb_set_uta(struct igb_adapter *adapter)
5447 {
5448         struct e1000_hw *hw = &adapter->hw;
5449         int i;
5450
5451         /* The UTA table only exists on 82576 hardware and newer */
5452         if (hw->mac.type < e1000_82576)
5453                 return;
5454
5455         /* we only need to do this if VMDq is enabled */
5456         if (!adapter->vfs_allocated_count)
5457                 return;
5458
5459         for (i = 0; i < hw->mac.uta_reg_count; i++)
5460                 array_wr32(E1000_UTA, i, ~0);
5461 }
5462
5463 /**
5464  * igb_intr_msi - Interrupt Handler
5465  * @irq: interrupt number
5466  * @data: pointer to a network interface device structure
5467  **/
5468 static irqreturn_t igb_intr_msi(int irq, void *data)
5469 {
5470         struct igb_adapter *adapter = data;
5471         struct igb_q_vector *q_vector = adapter->q_vector[0];
5472         struct e1000_hw *hw = &adapter->hw;
5473         /* read ICR disables interrupts using IAM */
5474         u32 icr = rd32(E1000_ICR);
5475
5476         igb_write_itr(q_vector);
5477
5478         if (icr & E1000_ICR_DRSTA)
5479                 schedule_work(&adapter->reset_task);
5480
5481         if (icr & E1000_ICR_DOUTSYNC) {
5482                 /* HW is reporting DMA is out of sync */
5483                 adapter->stats.doosync++;
5484         }
5485
5486         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5487                 hw->mac.get_link_status = 1;
5488                 if (!test_bit(__IGB_DOWN, &adapter->state))
5489                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5490         }
5491
5492         napi_schedule(&q_vector->napi);
5493
5494         return IRQ_HANDLED;
5495 }
5496
5497 /**
5498  * igb_intr - Legacy Interrupt Handler
5499  * @irq: interrupt number
5500  * @data: pointer to a network interface device structure
5501  **/
5502 static irqreturn_t igb_intr(int irq, void *data)
5503 {
5504         struct igb_adapter *adapter = data;
5505         struct igb_q_vector *q_vector = adapter->q_vector[0];
5506         struct e1000_hw *hw = &adapter->hw;
5507         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5508          * need for the IMC write */
5509         u32 icr = rd32(E1000_ICR);
5510
5511         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5512          * not set, then the adapter didn't send an interrupt */
5513         if (!(icr & E1000_ICR_INT_ASSERTED))
5514                 return IRQ_NONE;
5515
5516         igb_write_itr(q_vector);
5517
5518         if (icr & E1000_ICR_DRSTA)
5519                 schedule_work(&adapter->reset_task);
5520
5521         if (icr & E1000_ICR_DOUTSYNC) {
5522                 /* HW is reporting DMA is out of sync */
5523                 adapter->stats.doosync++;
5524         }
5525
5526         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5527                 hw->mac.get_link_status = 1;
5528                 /* guard against interrupt when we're going down */
5529                 if (!test_bit(__IGB_DOWN, &adapter->state))
5530                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5531         }
5532
5533         napi_schedule(&q_vector->napi);
5534
5535         return IRQ_HANDLED;
5536 }
5537
5538 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5539 {
5540         struct igb_adapter *adapter = q_vector->adapter;
5541         struct e1000_hw *hw = &adapter->hw;
5542
5543         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5544             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5545                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5546                         igb_set_itr(q_vector);
5547                 else
5548                         igb_update_ring_itr(q_vector);
5549         }
5550
5551         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5552                 if (adapter->msix_entries)
5553                         wr32(E1000_EIMS, q_vector->eims_value);
5554                 else
5555                         igb_irq_enable(adapter);
5556         }
5557 }
5558
5559 /**
5560  * igb_poll - NAPI Rx polling callback
5561  * @napi: napi polling structure
5562  * @budget: count of how many packets we should handle
5563  **/
5564 static int igb_poll(struct napi_struct *napi, int budget)
5565 {
5566         struct igb_q_vector *q_vector = container_of(napi,
5567                                                      struct igb_q_vector,
5568                                                      napi);
5569         bool clean_complete = true;
5570
5571 #ifdef CONFIG_IGB_DCA
5572         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5573                 igb_update_dca(q_vector);
5574 #endif
5575         if (q_vector->tx.ring)
5576                 clean_complete = igb_clean_tx_irq(q_vector);
5577
5578         if (q_vector->rx.ring)
5579                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5580
5581         /* If all work not completed, return budget and keep polling */
5582         if (!clean_complete)
5583                 return budget;
5584
5585         /* If not enough Rx work done, exit the polling mode */
5586         napi_complete(napi);
5587         igb_ring_irq_enable(q_vector);
5588
5589         return 0;
5590 }
5591
5592 #ifdef CONFIG_IGB_PTP
5593 /**
5594  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5595  * @q_vector: pointer to q_vector containing needed info
5596  * @buffer: pointer to igb_tx_buffer structure
5597  *
5598  * If we were asked to do hardware stamping and such a time stamp is
5599  * available, then it must have been for this skb here because we only
5600  * allow only one such packet into the queue.
5601  */
5602 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5603                             struct igb_tx_buffer *buffer_info)
5604 {
5605         struct igb_adapter *adapter = q_vector->adapter;
5606         struct e1000_hw *hw = &adapter->hw;
5607         struct skb_shared_hwtstamps shhwtstamps;
5608         u64 regval;
5609
5610         /* if skb does not support hw timestamp or TX stamp not valid exit */
5611         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5612             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5613                 return;
5614
5615         regval = rd32(E1000_TXSTMPL);
5616         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5617
5618         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5619         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5620 }
5621
5622 #endif
5623 /**
5624  * igb_clean_tx_irq - Reclaim resources after transmit completes
5625  * @q_vector: pointer to q_vector containing needed info
5626  * returns true if ring is completely cleaned
5627  **/
5628 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5629 {
5630         struct igb_adapter *adapter = q_vector->adapter;
5631         struct igb_ring *tx_ring = q_vector->tx.ring;
5632         struct igb_tx_buffer *tx_buffer;
5633         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5634         unsigned int total_bytes = 0, total_packets = 0;
5635         unsigned int budget = q_vector->tx.work_limit;
5636         unsigned int i = tx_ring->next_to_clean;
5637
5638         if (test_bit(__IGB_DOWN, &adapter->state))
5639                 return true;
5640
5641         tx_buffer = &tx_ring->tx_buffer_info[i];
5642         tx_desc = IGB_TX_DESC(tx_ring, i);
5643         i -= tx_ring->count;
5644
5645         for (; budget; budget--) {
5646                 eop_desc = tx_buffer->next_to_watch;
5647
5648                 /* prevent any other reads prior to eop_desc */
5649                 rmb();
5650
5651                 /* if next_to_watch is not set then there is no work pending */
5652                 if (!eop_desc)
5653                         break;
5654
5655                 /* if DD is not set pending work has not been completed */
5656                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5657                         break;
5658
5659                 /* clear next_to_watch to prevent false hangs */
5660                 tx_buffer->next_to_watch = NULL;
5661
5662                 /* update the statistics for this packet */
5663                 total_bytes += tx_buffer->bytecount;
5664                 total_packets += tx_buffer->gso_segs;
5665
5666 #ifdef CONFIG_IGB_PTP
5667                 /* retrieve hardware timestamp */
5668                 igb_tx_hwtstamp(q_vector, tx_buffer);
5669
5670 #endif
5671                 /* free the skb */
5672                 dev_kfree_skb_any(tx_buffer->skb);
5673                 tx_buffer->skb = NULL;
5674
5675                 /* unmap skb header data */
5676                 dma_unmap_single(tx_ring->dev,
5677                                  tx_buffer->dma,
5678                                  tx_buffer->length,
5679                                  DMA_TO_DEVICE);
5680
5681                 /* clear last DMA location and unmap remaining buffers */
5682                 while (tx_desc != eop_desc) {
5683                         tx_buffer->dma = 0;
5684
5685                         tx_buffer++;
5686                         tx_desc++;
5687                         i++;
5688                         if (unlikely(!i)) {
5689                                 i -= tx_ring->count;
5690                                 tx_buffer = tx_ring->tx_buffer_info;
5691                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5692                         }
5693
5694                         /* unmap any remaining paged data */
5695                         if (tx_buffer->dma) {
5696                                 dma_unmap_page(tx_ring->dev,
5697                                                tx_buffer->dma,
5698                                                tx_buffer->length,
5699                                                DMA_TO_DEVICE);
5700                         }
5701                 }
5702
5703                 /* clear last DMA location */
5704                 tx_buffer->dma = 0;
5705
5706                 /* move us one more past the eop_desc for start of next pkt */
5707                 tx_buffer++;
5708                 tx_desc++;
5709                 i++;
5710                 if (unlikely(!i)) {
5711                         i -= tx_ring->count;
5712                         tx_buffer = tx_ring->tx_buffer_info;
5713                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5714                 }
5715         }
5716
5717         netdev_tx_completed_queue(txring_txq(tx_ring),
5718                                   total_packets, total_bytes);
5719         i += tx_ring->count;
5720         tx_ring->next_to_clean = i;
5721         u64_stats_update_begin(&tx_ring->tx_syncp);
5722         tx_ring->tx_stats.bytes += total_bytes;
5723         tx_ring->tx_stats.packets += total_packets;
5724         u64_stats_update_end(&tx_ring->tx_syncp);
5725         q_vector->tx.total_bytes += total_bytes;
5726         q_vector->tx.total_packets += total_packets;
5727
5728         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5729                 struct e1000_hw *hw = &adapter->hw;
5730
5731                 eop_desc = tx_buffer->next_to_watch;
5732
5733                 /* Detect a transmit hang in hardware, this serializes the
5734                  * check with the clearing of time_stamp and movement of i */
5735                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5736                 if (eop_desc &&
5737                     time_after(jiffies, tx_buffer->time_stamp +
5738                                (adapter->tx_timeout_factor * HZ)) &&
5739                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5740
5741                         /* detected Tx unit hang */
5742                         dev_err(tx_ring->dev,
5743                                 "Detected Tx Unit Hang\n"
5744                                 "  Tx Queue             <%d>\n"
5745                                 "  TDH                  <%x>\n"
5746                                 "  TDT                  <%x>\n"
5747                                 "  next_to_use          <%x>\n"
5748                                 "  next_to_clean        <%x>\n"
5749                                 "buffer_info[next_to_clean]\n"
5750                                 "  time_stamp           <%lx>\n"
5751                                 "  next_to_watch        <%p>\n"
5752                                 "  jiffies              <%lx>\n"
5753                                 "  desc.status          <%x>\n",
5754                                 tx_ring->queue_index,
5755                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5756                                 readl(tx_ring->tail),
5757                                 tx_ring->next_to_use,
5758                                 tx_ring->next_to_clean,
5759                                 tx_buffer->time_stamp,
5760                                 eop_desc,
5761                                 jiffies,
5762                                 eop_desc->wb.status);
5763                         netif_stop_subqueue(tx_ring->netdev,
5764                                             tx_ring->queue_index);
5765
5766                         /* we are about to reset, no point in enabling stuff */
5767                         return true;
5768                 }
5769         }
5770
5771         if (unlikely(total_packets &&
5772                      netif_carrier_ok(tx_ring->netdev) &&
5773                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5774                 /* Make sure that anybody stopping the queue after this
5775                  * sees the new next_to_clean.
5776                  */
5777                 smp_mb();
5778                 if (__netif_subqueue_stopped(tx_ring->netdev,
5779                                              tx_ring->queue_index) &&
5780                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5781                         netif_wake_subqueue(tx_ring->netdev,
5782                                             tx_ring->queue_index);
5783
5784                         u64_stats_update_begin(&tx_ring->tx_syncp);
5785                         tx_ring->tx_stats.restart_queue++;
5786                         u64_stats_update_end(&tx_ring->tx_syncp);
5787                 }
5788         }
5789
5790         return !!budget;
5791 }
5792
5793 static inline void igb_rx_checksum(struct igb_ring *ring,
5794                                    union e1000_adv_rx_desc *rx_desc,
5795                                    struct sk_buff *skb)
5796 {
5797         skb_checksum_none_assert(skb);
5798
5799         /* Ignore Checksum bit is set */
5800         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5801                 return;
5802
5803         /* Rx checksum disabled via ethtool */
5804         if (!(ring->netdev->features & NETIF_F_RXCSUM))
5805                 return;
5806
5807         /* TCP/UDP checksum error bit is set */
5808         if (igb_test_staterr(rx_desc,
5809                              E1000_RXDEXT_STATERR_TCPE |
5810                              E1000_RXDEXT_STATERR_IPE)) {
5811                 /*
5812                  * work around errata with sctp packets where the TCPE aka
5813                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5814                  * packets, (aka let the stack check the crc32c)
5815                  */
5816                 if (!((skb->len == 60) &&
5817                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5818                         u64_stats_update_begin(&ring->rx_syncp);
5819                         ring->rx_stats.csum_err++;
5820                         u64_stats_update_end(&ring->rx_syncp);
5821                 }
5822                 /* let the stack verify checksum errors */
5823                 return;
5824         }
5825         /* It must be a TCP or UDP packet with a valid checksum */
5826         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5827                                       E1000_RXD_STAT_UDPCS))
5828                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5829
5830         dev_dbg(ring->dev, "cksum success: bits %08X\n",
5831                 le32_to_cpu(rx_desc->wb.upper.status_error));
5832 }
5833
5834 static inline void igb_rx_hash(struct igb_ring *ring,
5835                                union e1000_adv_rx_desc *rx_desc,
5836                                struct sk_buff *skb)
5837 {
5838         if (ring->netdev->features & NETIF_F_RXHASH)
5839                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5840 }
5841
5842 #ifdef CONFIG_IGB_PTP
5843 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5844                             union e1000_adv_rx_desc *rx_desc,
5845                             struct sk_buff *skb)
5846 {
5847         struct igb_adapter *adapter = q_vector->adapter;
5848         struct e1000_hw *hw = &adapter->hw;
5849         u64 regval;
5850
5851         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5852                                        E1000_RXDADV_STAT_TS))
5853                 return;
5854
5855         /*
5856          * If this bit is set, then the RX registers contain the time stamp. No
5857          * other packet will be time stamped until we read these registers, so
5858          * read the registers to make them available again. Because only one
5859          * packet can be time stamped at a time, we know that the register
5860          * values must belong to this one here and therefore we don't need to
5861          * compare any of the additional attributes stored for it.
5862          *
5863          * If nothing went wrong, then it should have a shared tx_flags that we
5864          * can turn into a skb_shared_hwtstamps.
5865          */
5866         if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5867                 u32 *stamp = (u32 *)skb->data;
5868                 regval = le32_to_cpu(*(stamp + 2));
5869                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5870                 skb_pull(skb, IGB_TS_HDR_LEN);
5871         } else {
5872                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5873                         return;
5874
5875                 regval = rd32(E1000_RXSTMPL);
5876                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5877         }
5878
5879         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5880 }
5881
5882 #endif
5883 static void igb_rx_vlan(struct igb_ring *ring,
5884                         union e1000_adv_rx_desc *rx_desc,
5885                         struct sk_buff *skb)
5886 {
5887         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5888                 u16 vid;
5889                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5890                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5891                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5892                 else
5893                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5894
5895                 __vlan_hwaccel_put_tag(skb, vid);
5896         }
5897 }
5898
5899 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5900 {
5901         /* HW will not DMA in data larger than the given buffer, even if it
5902          * parses the (NFS, of course) header to be larger.  In that case, it
5903          * fills the header buffer and spills the rest into the page.
5904          */
5905         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5906                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5907         if (hlen > IGB_RX_HDR_LEN)
5908                 hlen = IGB_RX_HDR_LEN;
5909         return hlen;
5910 }
5911
5912 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5913 {
5914         struct igb_ring *rx_ring = q_vector->rx.ring;
5915         union e1000_adv_rx_desc *rx_desc;
5916         const int current_node = numa_node_id();
5917         unsigned int total_bytes = 0, total_packets = 0;
5918         u16 cleaned_count = igb_desc_unused(rx_ring);
5919         u16 i = rx_ring->next_to_clean;
5920
5921         rx_desc = IGB_RX_DESC(rx_ring, i);
5922
5923         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5924                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5925                 struct sk_buff *skb = buffer_info->skb;
5926                 union e1000_adv_rx_desc *next_rxd;
5927
5928                 buffer_info->skb = NULL;
5929                 prefetch(skb->data);
5930
5931                 i++;
5932                 if (i == rx_ring->count)
5933                         i = 0;
5934
5935                 next_rxd = IGB_RX_DESC(rx_ring, i);
5936                 prefetch(next_rxd);
5937
5938                 /*
5939                  * This memory barrier is needed to keep us from reading
5940                  * any other fields out of the rx_desc until we know the
5941                  * RXD_STAT_DD bit is set
5942                  */
5943                 rmb();
5944
5945                 if (!skb_is_nonlinear(skb)) {
5946                         __skb_put(skb, igb_get_hlen(rx_desc));
5947                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
5948                                          IGB_RX_HDR_LEN,
5949                                          DMA_FROM_DEVICE);
5950                         buffer_info->dma = 0;
5951                 }
5952
5953                 if (rx_desc->wb.upper.length) {
5954                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5955
5956                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5957                                                 buffer_info->page,
5958                                                 buffer_info->page_offset,
5959                                                 length);
5960
5961                         skb->len += length;
5962                         skb->data_len += length;
5963                         skb->truesize += PAGE_SIZE / 2;
5964
5965                         if ((page_count(buffer_info->page) != 1) ||
5966                             (page_to_nid(buffer_info->page) != current_node))
5967                                 buffer_info->page = NULL;
5968                         else
5969                                 get_page(buffer_info->page);
5970
5971                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5972                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5973                         buffer_info->page_dma = 0;
5974                 }
5975
5976                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
5977                         struct igb_rx_buffer *next_buffer;
5978                         next_buffer = &rx_ring->rx_buffer_info[i];
5979                         buffer_info->skb = next_buffer->skb;
5980                         buffer_info->dma = next_buffer->dma;
5981                         next_buffer->skb = skb;
5982                         next_buffer->dma = 0;
5983                         goto next_desc;
5984                 }
5985
5986                 if (unlikely((igb_test_staterr(rx_desc,
5987                                                E1000_RXDEXT_ERR_FRAME_ERR_MASK))
5988                              && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
5989                         dev_kfree_skb_any(skb);
5990                         goto next_desc;
5991                 }
5992
5993 #ifdef CONFIG_IGB_PTP
5994                 igb_rx_hwtstamp(q_vector, rx_desc, skb);
5995 #endif
5996                 igb_rx_hash(rx_ring, rx_desc, skb);
5997                 igb_rx_checksum(rx_ring, rx_desc, skb);
5998                 igb_rx_vlan(rx_ring, rx_desc, skb);
5999
6000                 total_bytes += skb->len;
6001                 total_packets++;
6002
6003                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6004
6005                 napi_gro_receive(&q_vector->napi, skb);
6006
6007                 budget--;
6008 next_desc:
6009                 if (!budget)
6010                         break;
6011
6012                 cleaned_count++;
6013                 /* return some buffers to hardware, one at a time is too slow */
6014                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6015                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
6016                         cleaned_count = 0;
6017                 }
6018
6019                 /* use prefetched values */
6020                 rx_desc = next_rxd;
6021         }
6022
6023         rx_ring->next_to_clean = i;
6024         u64_stats_update_begin(&rx_ring->rx_syncp);
6025         rx_ring->rx_stats.packets += total_packets;
6026         rx_ring->rx_stats.bytes += total_bytes;
6027         u64_stats_update_end(&rx_ring->rx_syncp);
6028         q_vector->rx.total_packets += total_packets;
6029         q_vector->rx.total_bytes += total_bytes;
6030
6031         if (cleaned_count)
6032                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6033
6034         return !!budget;
6035 }
6036
6037 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6038                                  struct igb_rx_buffer *bi)
6039 {
6040         struct sk_buff *skb = bi->skb;
6041         dma_addr_t dma = bi->dma;
6042
6043         if (dma)
6044                 return true;
6045
6046         if (likely(!skb)) {
6047                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6048                                                 IGB_RX_HDR_LEN);
6049                 bi->skb = skb;
6050                 if (!skb) {
6051                         rx_ring->rx_stats.alloc_failed++;
6052                         return false;
6053                 }
6054
6055                 /* initialize skb for ring */
6056                 skb_record_rx_queue(skb, rx_ring->queue_index);
6057         }
6058
6059         dma = dma_map_single(rx_ring->dev, skb->data,
6060                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6061
6062         if (dma_mapping_error(rx_ring->dev, dma)) {
6063                 rx_ring->rx_stats.alloc_failed++;
6064                 return false;
6065         }
6066
6067         bi->dma = dma;
6068         return true;
6069 }
6070
6071 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6072                                   struct igb_rx_buffer *bi)
6073 {
6074         struct page *page = bi->page;
6075         dma_addr_t page_dma = bi->page_dma;
6076         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6077
6078         if (page_dma)
6079                 return true;
6080
6081         if (!page) {
6082                 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6083                 bi->page = page;
6084                 if (unlikely(!page)) {
6085                         rx_ring->rx_stats.alloc_failed++;
6086                         return false;
6087                 }
6088         }
6089
6090         page_dma = dma_map_page(rx_ring->dev, page,
6091                                 page_offset, PAGE_SIZE / 2,
6092                                 DMA_FROM_DEVICE);
6093
6094         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6095                 rx_ring->rx_stats.alloc_failed++;
6096                 return false;
6097         }
6098
6099         bi->page_dma = page_dma;
6100         bi->page_offset = page_offset;
6101         return true;
6102 }
6103
6104 /**
6105  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6106  * @adapter: address of board private structure
6107  **/
6108 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6109 {
6110         union e1000_adv_rx_desc *rx_desc;
6111         struct igb_rx_buffer *bi;
6112         u16 i = rx_ring->next_to_use;
6113
6114         rx_desc = IGB_RX_DESC(rx_ring, i);
6115         bi = &rx_ring->rx_buffer_info[i];
6116         i -= rx_ring->count;
6117
6118         while (cleaned_count--) {
6119                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6120                         break;
6121
6122                 /* Refresh the desc even if buffer_addrs didn't change
6123                  * because each write-back erases this info. */
6124                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6125
6126                 if (!igb_alloc_mapped_page(rx_ring, bi))
6127                         break;
6128
6129                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6130
6131                 rx_desc++;
6132                 bi++;
6133                 i++;
6134                 if (unlikely(!i)) {
6135                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6136                         bi = rx_ring->rx_buffer_info;
6137                         i -= rx_ring->count;
6138                 }
6139
6140                 /* clear the hdr_addr for the next_to_use descriptor */
6141                 rx_desc->read.hdr_addr = 0;
6142         }
6143
6144         i += rx_ring->count;
6145
6146         if (rx_ring->next_to_use != i) {
6147                 rx_ring->next_to_use = i;
6148
6149                 /* Force memory writes to complete before letting h/w
6150                  * know there are new descriptors to fetch.  (Only
6151                  * applicable for weak-ordered memory model archs,
6152                  * such as IA-64). */
6153                 wmb();
6154                 writel(i, rx_ring->tail);
6155         }
6156 }
6157
6158 /**
6159  * igb_mii_ioctl -
6160  * @netdev:
6161  * @ifreq:
6162  * @cmd:
6163  **/
6164 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6165 {
6166         struct igb_adapter *adapter = netdev_priv(netdev);
6167         struct mii_ioctl_data *data = if_mii(ifr);
6168
6169         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6170                 return -EOPNOTSUPP;
6171
6172         switch (cmd) {
6173         case SIOCGMIIPHY:
6174                 data->phy_id = adapter->hw.phy.addr;
6175                 break;
6176         case SIOCGMIIREG:
6177                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6178                                      &data->val_out))
6179                         return -EIO;
6180                 break;
6181         case SIOCSMIIREG:
6182         default:
6183                 return -EOPNOTSUPP;
6184         }
6185         return 0;
6186 }
6187
6188 /**
6189  * igb_hwtstamp_ioctl - control hardware time stamping
6190  * @netdev:
6191  * @ifreq:
6192  * @cmd:
6193  *
6194  * Outgoing time stamping can be enabled and disabled. Play nice and
6195  * disable it when requested, although it shouldn't case any overhead
6196  * when no packet needs it. At most one packet in the queue may be
6197  * marked for time stamping, otherwise it would be impossible to tell
6198  * for sure to which packet the hardware time stamp belongs.
6199  *
6200  * Incoming time stamping has to be configured via the hardware
6201  * filters. Not all combinations are supported, in particular event
6202  * type has to be specified. Matching the kind of event packet is
6203  * not supported, with the exception of "all V2 events regardless of
6204  * level 2 or 4".
6205  *
6206  **/
6207 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6208                               struct ifreq *ifr, int cmd)
6209 {
6210         struct igb_adapter *adapter = netdev_priv(netdev);
6211         struct e1000_hw *hw = &adapter->hw;
6212         struct hwtstamp_config config;
6213         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6214         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6215         u32 tsync_rx_cfg = 0;
6216         bool is_l4 = false;
6217         bool is_l2 = false;
6218         u32 regval;
6219
6220         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6221                 return -EFAULT;
6222
6223         /* reserved for future extensions */
6224         if (config.flags)
6225                 return -EINVAL;
6226
6227         switch (config.tx_type) {
6228         case HWTSTAMP_TX_OFF:
6229                 tsync_tx_ctl = 0;
6230         case HWTSTAMP_TX_ON:
6231                 break;
6232         default:
6233                 return -ERANGE;
6234         }
6235
6236         switch (config.rx_filter) {
6237         case HWTSTAMP_FILTER_NONE:
6238                 tsync_rx_ctl = 0;
6239                 break;
6240         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6241         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6242         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6243         case HWTSTAMP_FILTER_ALL:
6244                 /*
6245                  * register TSYNCRXCFG must be set, therefore it is not
6246                  * possible to time stamp both Sync and Delay_Req messages
6247                  * => fall back to time stamping all packets
6248                  */
6249                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6250                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6251                 break;
6252         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6253                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6254                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6255                 is_l4 = true;
6256                 break;
6257         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6258                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6259                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6260                 is_l4 = true;
6261                 break;
6262         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6263         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6264                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6265                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6266                 is_l2 = true;
6267                 is_l4 = true;
6268                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6269                 break;
6270         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6271         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6272                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6273                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6274                 is_l2 = true;
6275                 is_l4 = true;
6276                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6277                 break;
6278         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6279         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6280         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6281                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6282                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6283                 is_l2 = true;
6284                 is_l4 = true;
6285                 break;
6286         default:
6287                 return -ERANGE;
6288         }
6289
6290         if (hw->mac.type == e1000_82575) {
6291                 if (tsync_rx_ctl | tsync_tx_ctl)
6292                         return -EINVAL;
6293                 return 0;
6294         }
6295
6296         /*
6297          * Per-packet timestamping only works if all packets are
6298          * timestamped, so enable timestamping in all packets as
6299          * long as one rx filter was configured.
6300          */
6301         if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6302                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6303                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6304         }
6305
6306         /* enable/disable TX */
6307         regval = rd32(E1000_TSYNCTXCTL);
6308         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6309         regval |= tsync_tx_ctl;
6310         wr32(E1000_TSYNCTXCTL, regval);
6311
6312         /* enable/disable RX */
6313         regval = rd32(E1000_TSYNCRXCTL);
6314         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6315         regval |= tsync_rx_ctl;
6316         wr32(E1000_TSYNCRXCTL, regval);
6317
6318         /* define which PTP packets are time stamped */
6319         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6320
6321         /* define ethertype filter for timestamped packets */
6322         if (is_l2)
6323                 wr32(E1000_ETQF(3),
6324                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6325                                  E1000_ETQF_1588 | /* enable timestamping */
6326                                  ETH_P_1588));     /* 1588 eth protocol type */
6327         else
6328                 wr32(E1000_ETQF(3), 0);
6329
6330 #define PTP_PORT 319
6331         /* L4 Queue Filter[3]: filter by destination port and protocol */
6332         if (is_l4) {
6333                 u32 ftqf = (IPPROTO_UDP /* UDP */
6334                         | E1000_FTQF_VF_BP /* VF not compared */
6335                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6336                         | E1000_FTQF_MASK); /* mask all inputs */
6337                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6338
6339                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6340                 wr32(E1000_IMIREXT(3),
6341                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6342                 if (hw->mac.type == e1000_82576) {
6343                         /* enable source port check */
6344                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6345                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6346                 }
6347                 wr32(E1000_FTQF(3), ftqf);
6348         } else {
6349                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6350         }
6351         wrfl();
6352
6353         adapter->hwtstamp_config = config;
6354
6355         /* clear TX/RX time stamp registers, just to be sure */
6356         regval = rd32(E1000_TXSTMPH);
6357         regval = rd32(E1000_RXSTMPH);
6358
6359         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6360                 -EFAULT : 0;
6361 }
6362
6363 /**
6364  * igb_ioctl -
6365  * @netdev:
6366  * @ifreq:
6367  * @cmd:
6368  **/
6369 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6370 {
6371         switch (cmd) {
6372         case SIOCGMIIPHY:
6373         case SIOCGMIIREG:
6374         case SIOCSMIIREG:
6375                 return igb_mii_ioctl(netdev, ifr, cmd);
6376         case SIOCSHWTSTAMP:
6377                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6378         default:
6379                 return -EOPNOTSUPP;
6380         }
6381 }
6382
6383 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6384 {
6385         struct igb_adapter *adapter = hw->back;
6386         u16 cap_offset;
6387
6388         cap_offset = adapter->pdev->pcie_cap;
6389         if (!cap_offset)
6390                 return -E1000_ERR_CONFIG;
6391
6392         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6393
6394         return 0;
6395 }
6396
6397 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6398 {
6399         struct igb_adapter *adapter = hw->back;
6400         u16 cap_offset;
6401
6402         cap_offset = adapter->pdev->pcie_cap;
6403         if (!cap_offset)
6404                 return -E1000_ERR_CONFIG;
6405
6406         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6407
6408         return 0;
6409 }
6410
6411 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6412 {
6413         struct igb_adapter *adapter = netdev_priv(netdev);
6414         struct e1000_hw *hw = &adapter->hw;
6415         u32 ctrl, rctl;
6416         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6417
6418         if (enable) {
6419                 /* enable VLAN tag insert/strip */
6420                 ctrl = rd32(E1000_CTRL);
6421                 ctrl |= E1000_CTRL_VME;
6422                 wr32(E1000_CTRL, ctrl);
6423
6424                 /* Disable CFI check */
6425                 rctl = rd32(E1000_RCTL);
6426                 rctl &= ~E1000_RCTL_CFIEN;
6427                 wr32(E1000_RCTL, rctl);
6428         } else {
6429                 /* disable VLAN tag insert/strip */
6430                 ctrl = rd32(E1000_CTRL);
6431                 ctrl &= ~E1000_CTRL_VME;
6432                 wr32(E1000_CTRL, ctrl);
6433         }
6434
6435         igb_rlpml_set(adapter);
6436 }
6437
6438 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6439 {
6440         struct igb_adapter *adapter = netdev_priv(netdev);
6441         struct e1000_hw *hw = &adapter->hw;
6442         int pf_id = adapter->vfs_allocated_count;
6443
6444         /* attempt to add filter to vlvf array */
6445         igb_vlvf_set(adapter, vid, true, pf_id);
6446
6447         /* add the filter since PF can receive vlans w/o entry in vlvf */
6448         igb_vfta_set(hw, vid, true);
6449
6450         set_bit(vid, adapter->active_vlans);
6451
6452         return 0;
6453 }
6454
6455 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6456 {
6457         struct igb_adapter *adapter = netdev_priv(netdev);
6458         struct e1000_hw *hw = &adapter->hw;
6459         int pf_id = adapter->vfs_allocated_count;
6460         s32 err;
6461
6462         /* remove vlan from VLVF table array */
6463         err = igb_vlvf_set(adapter, vid, false, pf_id);
6464
6465         /* if vid was not present in VLVF just remove it from table */
6466         if (err)
6467                 igb_vfta_set(hw, vid, false);
6468
6469         clear_bit(vid, adapter->active_vlans);
6470
6471         return 0;
6472 }
6473
6474 static void igb_restore_vlan(struct igb_adapter *adapter)
6475 {
6476         u16 vid;
6477
6478         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6479
6480         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6481                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6482 }
6483
6484 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6485 {
6486         struct pci_dev *pdev = adapter->pdev;
6487         struct e1000_mac_info *mac = &adapter->hw.mac;
6488
6489         mac->autoneg = 0;
6490
6491         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6492          * for the switch() below to work */
6493         if ((spd & 1) || (dplx & ~1))
6494                 goto err_inval;
6495
6496         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6497         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6498             spd != SPEED_1000 &&
6499             dplx != DUPLEX_FULL)
6500                 goto err_inval;
6501
6502         switch (spd + dplx) {
6503         case SPEED_10 + DUPLEX_HALF:
6504                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6505                 break;
6506         case SPEED_10 + DUPLEX_FULL:
6507                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6508                 break;
6509         case SPEED_100 + DUPLEX_HALF:
6510                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6511                 break;
6512         case SPEED_100 + DUPLEX_FULL:
6513                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6514                 break;
6515         case SPEED_1000 + DUPLEX_FULL:
6516                 mac->autoneg = 1;
6517                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6518                 break;
6519         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6520         default:
6521                 goto err_inval;
6522         }
6523         return 0;
6524
6525 err_inval:
6526         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6527         return -EINVAL;
6528 }
6529
6530 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6531                           bool runtime)
6532 {
6533         struct net_device *netdev = pci_get_drvdata(pdev);
6534         struct igb_adapter *adapter = netdev_priv(netdev);
6535         struct e1000_hw *hw = &adapter->hw;
6536         u32 ctrl, rctl, status;
6537         u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6538 #ifdef CONFIG_PM
6539         int retval = 0;
6540 #endif
6541
6542         netif_device_detach(netdev);
6543
6544         if (netif_running(netdev))
6545                 __igb_close(netdev, true);
6546
6547         igb_clear_interrupt_scheme(adapter);
6548
6549 #ifdef CONFIG_PM
6550         retval = pci_save_state(pdev);
6551         if (retval)
6552                 return retval;
6553 #endif
6554
6555         status = rd32(E1000_STATUS);
6556         if (status & E1000_STATUS_LU)
6557                 wufc &= ~E1000_WUFC_LNKC;
6558
6559         if (wufc) {
6560                 igb_setup_rctl(adapter);
6561                 igb_set_rx_mode(netdev);
6562
6563                 /* turn on all-multi mode if wake on multicast is enabled */
6564                 if (wufc & E1000_WUFC_MC) {
6565                         rctl = rd32(E1000_RCTL);
6566                         rctl |= E1000_RCTL_MPE;
6567                         wr32(E1000_RCTL, rctl);
6568                 }
6569
6570                 ctrl = rd32(E1000_CTRL);
6571                 /* advertise wake from D3Cold */
6572                 #define E1000_CTRL_ADVD3WUC 0x00100000
6573                 /* phy power management enable */
6574                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6575                 ctrl |= E1000_CTRL_ADVD3WUC;
6576                 wr32(E1000_CTRL, ctrl);
6577
6578                 /* Allow time for pending master requests to run */
6579                 igb_disable_pcie_master(hw);
6580
6581                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6582                 wr32(E1000_WUFC, wufc);
6583         } else {
6584                 wr32(E1000_WUC, 0);
6585                 wr32(E1000_WUFC, 0);
6586         }
6587
6588         *enable_wake = wufc || adapter->en_mng_pt;
6589         if (!*enable_wake)
6590                 igb_power_down_link(adapter);
6591         else
6592                 igb_power_up_link(adapter);
6593
6594         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6595          * would have already happened in close and is redundant. */
6596         igb_release_hw_control(adapter);
6597
6598         pci_disable_device(pdev);
6599
6600         return 0;
6601 }
6602
6603 #ifdef CONFIG_PM
6604 #ifdef CONFIG_PM_SLEEP
6605 static int igb_suspend(struct device *dev)
6606 {
6607         int retval;
6608         bool wake;
6609         struct pci_dev *pdev = to_pci_dev(dev);
6610
6611         retval = __igb_shutdown(pdev, &wake, 0);
6612         if (retval)
6613                 return retval;
6614
6615         if (wake) {
6616                 pci_prepare_to_sleep(pdev);
6617         } else {
6618                 pci_wake_from_d3(pdev, false);
6619                 pci_set_power_state(pdev, PCI_D3hot);
6620         }
6621
6622         return 0;
6623 }
6624 #endif /* CONFIG_PM_SLEEP */
6625
6626 static int igb_resume(struct device *dev)
6627 {
6628         struct pci_dev *pdev = to_pci_dev(dev);
6629         struct net_device *netdev = pci_get_drvdata(pdev);
6630         struct igb_adapter *adapter = netdev_priv(netdev);
6631         struct e1000_hw *hw = &adapter->hw;
6632         u32 err;
6633
6634         pci_set_power_state(pdev, PCI_D0);
6635         pci_restore_state(pdev);
6636         pci_save_state(pdev);
6637
6638         err = pci_enable_device_mem(pdev);
6639         if (err) {
6640                 dev_err(&pdev->dev,
6641                         "igb: Cannot enable PCI device from suspend\n");
6642                 return err;
6643         }
6644         pci_set_master(pdev);
6645
6646         pci_enable_wake(pdev, PCI_D3hot, 0);
6647         pci_enable_wake(pdev, PCI_D3cold, 0);
6648
6649         if (!rtnl_is_locked()) {
6650                 /*
6651                  * shut up ASSERT_RTNL() warning in
6652                  * netif_set_real_num_tx/rx_queues.
6653                  */
6654                 rtnl_lock();
6655                 err = igb_init_interrupt_scheme(adapter);
6656                 rtnl_unlock();
6657         } else {
6658                 err = igb_init_interrupt_scheme(adapter);
6659         }
6660         if (err) {
6661                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6662                 return -ENOMEM;
6663         }
6664
6665         igb_reset(adapter);
6666
6667         /* let the f/w know that the h/w is now under the control of the
6668          * driver. */
6669         igb_get_hw_control(adapter);
6670
6671         wr32(E1000_WUS, ~0);
6672
6673         if (netdev->flags & IFF_UP) {
6674                 err = __igb_open(netdev, true);
6675                 if (err)
6676                         return err;
6677         }
6678
6679         netif_device_attach(netdev);
6680         return 0;
6681 }
6682
6683 #ifdef CONFIG_PM_RUNTIME
6684 static int igb_runtime_idle(struct device *dev)
6685 {
6686         struct pci_dev *pdev = to_pci_dev(dev);
6687         struct net_device *netdev = pci_get_drvdata(pdev);
6688         struct igb_adapter *adapter = netdev_priv(netdev);
6689
6690         if (!igb_has_link(adapter))
6691                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6692
6693         return -EBUSY;
6694 }
6695
6696 static int igb_runtime_suspend(struct device *dev)
6697 {
6698         struct pci_dev *pdev = to_pci_dev(dev);
6699         int retval;
6700         bool wake;
6701
6702         retval = __igb_shutdown(pdev, &wake, 1);
6703         if (retval)
6704                 return retval;
6705
6706         if (wake) {
6707                 pci_prepare_to_sleep(pdev);
6708         } else {
6709                 pci_wake_from_d3(pdev, false);
6710                 pci_set_power_state(pdev, PCI_D3hot);
6711         }
6712
6713         return 0;
6714 }
6715
6716 static int igb_runtime_resume(struct device *dev)
6717 {
6718         return igb_resume(dev);
6719 }
6720 #endif /* CONFIG_PM_RUNTIME */
6721 #endif
6722
6723 static void igb_shutdown(struct pci_dev *pdev)
6724 {
6725         bool wake;
6726
6727         __igb_shutdown(pdev, &wake, 0);
6728
6729         if (system_state == SYSTEM_POWER_OFF) {
6730                 pci_wake_from_d3(pdev, wake);
6731                 pci_set_power_state(pdev, PCI_D3hot);
6732         }
6733 }
6734
6735 #ifdef CONFIG_NET_POLL_CONTROLLER
6736 /*
6737  * Polling 'interrupt' - used by things like netconsole to send skbs
6738  * without having to re-enable interrupts. It's not called while
6739  * the interrupt routine is executing.
6740  */
6741 static void igb_netpoll(struct net_device *netdev)
6742 {
6743         struct igb_adapter *adapter = netdev_priv(netdev);
6744         struct e1000_hw *hw = &adapter->hw;
6745         struct igb_q_vector *q_vector;
6746         int i;
6747
6748         for (i = 0; i < adapter->num_q_vectors; i++) {
6749                 q_vector = adapter->q_vector[i];
6750                 if (adapter->msix_entries)
6751                         wr32(E1000_EIMC, q_vector->eims_value);
6752                 else
6753                         igb_irq_disable(adapter);
6754                 napi_schedule(&q_vector->napi);
6755         }
6756 }
6757 #endif /* CONFIG_NET_POLL_CONTROLLER */
6758
6759 /**
6760  * igb_io_error_detected - called when PCI error is detected
6761  * @pdev: Pointer to PCI device
6762  * @state: The current pci connection state
6763  *
6764  * This function is called after a PCI bus error affecting
6765  * this device has been detected.
6766  */
6767 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6768                                               pci_channel_state_t state)
6769 {
6770         struct net_device *netdev = pci_get_drvdata(pdev);
6771         struct igb_adapter *adapter = netdev_priv(netdev);
6772
6773         netif_device_detach(netdev);
6774
6775         if (state == pci_channel_io_perm_failure)
6776                 return PCI_ERS_RESULT_DISCONNECT;
6777
6778         if (netif_running(netdev))
6779                 igb_down(adapter);
6780         pci_disable_device(pdev);
6781
6782         /* Request a slot slot reset. */
6783         return PCI_ERS_RESULT_NEED_RESET;
6784 }
6785
6786 /**
6787  * igb_io_slot_reset - called after the pci bus has been reset.
6788  * @pdev: Pointer to PCI device
6789  *
6790  * Restart the card from scratch, as if from a cold-boot. Implementation
6791  * resembles the first-half of the igb_resume routine.
6792  */
6793 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6794 {
6795         struct net_device *netdev = pci_get_drvdata(pdev);
6796         struct igb_adapter *adapter = netdev_priv(netdev);
6797         struct e1000_hw *hw = &adapter->hw;
6798         pci_ers_result_t result;
6799         int err;
6800
6801         if (pci_enable_device_mem(pdev)) {
6802                 dev_err(&pdev->dev,
6803                         "Cannot re-enable PCI device after reset.\n");
6804                 result = PCI_ERS_RESULT_DISCONNECT;
6805         } else {
6806                 pci_set_master(pdev);
6807                 pci_restore_state(pdev);
6808                 pci_save_state(pdev);
6809
6810                 pci_enable_wake(pdev, PCI_D3hot, 0);
6811                 pci_enable_wake(pdev, PCI_D3cold, 0);
6812
6813                 igb_reset(adapter);
6814                 wr32(E1000_WUS, ~0);
6815                 result = PCI_ERS_RESULT_RECOVERED;
6816         }
6817
6818         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6819         if (err) {
6820                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6821                         "failed 0x%0x\n", err);
6822                 /* non-fatal, continue */
6823         }
6824
6825         return result;
6826 }
6827
6828 /**
6829  * igb_io_resume - called when traffic can start flowing again.
6830  * @pdev: Pointer to PCI device
6831  *
6832  * This callback is called when the error recovery driver tells us that
6833  * its OK to resume normal operation. Implementation resembles the
6834  * second-half of the igb_resume routine.
6835  */
6836 static void igb_io_resume(struct pci_dev *pdev)
6837 {
6838         struct net_device *netdev = pci_get_drvdata(pdev);
6839         struct igb_adapter *adapter = netdev_priv(netdev);
6840
6841         if (netif_running(netdev)) {
6842                 if (igb_up(adapter)) {
6843                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6844                         return;
6845                 }
6846         }
6847
6848         netif_device_attach(netdev);
6849
6850         /* let the f/w know that the h/w is now under the control of the
6851          * driver. */
6852         igb_get_hw_control(adapter);
6853 }
6854
6855 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6856                              u8 qsel)
6857 {
6858         u32 rar_low, rar_high;
6859         struct e1000_hw *hw = &adapter->hw;
6860
6861         /* HW expects these in little endian so we reverse the byte order
6862          * from network order (big endian) to little endian
6863          */
6864         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6865                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6866         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6867
6868         /* Indicate to hardware the Address is Valid. */
6869         rar_high |= E1000_RAH_AV;
6870
6871         if (hw->mac.type == e1000_82575)
6872                 rar_high |= E1000_RAH_POOL_1 * qsel;
6873         else
6874                 rar_high |= E1000_RAH_POOL_1 << qsel;
6875
6876         wr32(E1000_RAL(index), rar_low);
6877         wrfl();
6878         wr32(E1000_RAH(index), rar_high);
6879         wrfl();
6880 }
6881
6882 static int igb_set_vf_mac(struct igb_adapter *adapter,
6883                           int vf, unsigned char *mac_addr)
6884 {
6885         struct e1000_hw *hw = &adapter->hw;
6886         /* VF MAC addresses start at end of receive addresses and moves
6887          * torwards the first, as a result a collision should not be possible */
6888         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6889
6890         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6891
6892         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6893
6894         return 0;
6895 }
6896
6897 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6898 {
6899         struct igb_adapter *adapter = netdev_priv(netdev);
6900         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6901                 return -EINVAL;
6902         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6903         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6904         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6905                                       " change effective.");
6906         if (test_bit(__IGB_DOWN, &adapter->state)) {
6907                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6908                          " but the PF device is not up.\n");
6909                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6910                          " attempting to use the VF device.\n");
6911         }
6912         return igb_set_vf_mac(adapter, vf, mac);
6913 }
6914
6915 static int igb_link_mbps(int internal_link_speed)
6916 {
6917         switch (internal_link_speed) {
6918         case SPEED_100:
6919                 return 100;
6920         case SPEED_1000:
6921                 return 1000;
6922         default:
6923                 return 0;
6924         }
6925 }
6926
6927 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6928                                   int link_speed)
6929 {
6930         int rf_dec, rf_int;
6931         u32 bcnrc_val;
6932
6933         if (tx_rate != 0) {
6934                 /* Calculate the rate factor values to set */
6935                 rf_int = link_speed / tx_rate;
6936                 rf_dec = (link_speed - (rf_int * tx_rate));
6937                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6938
6939                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6940                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6941                                E1000_RTTBCNRC_RF_INT_MASK);
6942                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6943         } else {
6944                 bcnrc_val = 0;
6945         }
6946
6947         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6948         wr32(E1000_RTTBCNRC, bcnrc_val);
6949 }
6950
6951 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6952 {
6953         int actual_link_speed, i;
6954         bool reset_rate = false;
6955
6956         /* VF TX rate limit was not set or not supported */
6957         if ((adapter->vf_rate_link_speed == 0) ||
6958             (adapter->hw.mac.type != e1000_82576))
6959                 return;
6960
6961         actual_link_speed = igb_link_mbps(adapter->link_speed);
6962         if (actual_link_speed != adapter->vf_rate_link_speed) {
6963                 reset_rate = true;
6964                 adapter->vf_rate_link_speed = 0;
6965                 dev_info(&adapter->pdev->dev,
6966                          "Link speed has been changed. VF Transmit "
6967                          "rate is disabled\n");
6968         }
6969
6970         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6971                 if (reset_rate)
6972                         adapter->vf_data[i].tx_rate = 0;
6973
6974                 igb_set_vf_rate_limit(&adapter->hw, i,
6975                                       adapter->vf_data[i].tx_rate,
6976                                       actual_link_speed);
6977         }
6978 }
6979
6980 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6981 {
6982         struct igb_adapter *adapter = netdev_priv(netdev);
6983         struct e1000_hw *hw = &adapter->hw;
6984         int actual_link_speed;
6985
6986         if (hw->mac.type != e1000_82576)
6987                 return -EOPNOTSUPP;
6988
6989         actual_link_speed = igb_link_mbps(adapter->link_speed);
6990         if ((vf >= adapter->vfs_allocated_count) ||
6991             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6992             (tx_rate < 0) || (tx_rate > actual_link_speed))
6993                 return -EINVAL;
6994
6995         adapter->vf_rate_link_speed = actual_link_speed;
6996         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6997         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6998
6999         return 0;
7000 }
7001
7002 static int igb_ndo_get_vf_config(struct net_device *netdev,
7003                                  int vf, struct ifla_vf_info *ivi)
7004 {
7005         struct igb_adapter *adapter = netdev_priv(netdev);
7006         if (vf >= adapter->vfs_allocated_count)
7007                 return -EINVAL;
7008         ivi->vf = vf;
7009         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7010         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7011         ivi->vlan = adapter->vf_data[vf].pf_vlan;
7012         ivi->qos = adapter->vf_data[vf].pf_qos;
7013         return 0;
7014 }
7015
7016 static void igb_vmm_control(struct igb_adapter *adapter)
7017 {
7018         struct e1000_hw *hw = &adapter->hw;
7019         u32 reg;
7020
7021         switch (hw->mac.type) {
7022         case e1000_82575:
7023         default:
7024                 /* replication is not supported for 82575 */
7025                 return;
7026         case e1000_82576:
7027                 /* notify HW that the MAC is adding vlan tags */
7028                 reg = rd32(E1000_DTXCTL);
7029                 reg |= E1000_DTXCTL_VLAN_ADDED;
7030                 wr32(E1000_DTXCTL, reg);
7031         case e1000_82580:
7032                 /* enable replication vlan tag stripping */
7033                 reg = rd32(E1000_RPLOLR);
7034                 reg |= E1000_RPLOLR_STRVLAN;
7035                 wr32(E1000_RPLOLR, reg);
7036         case e1000_i350:
7037                 /* none of the above registers are supported by i350 */
7038                 break;
7039         }
7040
7041         if (adapter->vfs_allocated_count) {
7042                 igb_vmdq_set_loopback_pf(hw, true);
7043                 igb_vmdq_set_replication_pf(hw, true);
7044                 igb_vmdq_set_anti_spoofing_pf(hw, true,
7045                                                 adapter->vfs_allocated_count);
7046         } else {
7047                 igb_vmdq_set_loopback_pf(hw, false);
7048                 igb_vmdq_set_replication_pf(hw, false);
7049         }
7050 }
7051
7052 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7053 {
7054         struct e1000_hw *hw = &adapter->hw;
7055         u32 dmac_thr;
7056         u16 hwm;
7057
7058         if (hw->mac.type > e1000_82580) {
7059                 if (adapter->flags & IGB_FLAG_DMAC) {
7060                         u32 reg;
7061
7062                         /* force threshold to 0. */
7063                         wr32(E1000_DMCTXTH, 0);
7064
7065                         /*
7066                          * DMA Coalescing high water mark needs to be greater
7067                          * than the Rx threshold. Set hwm to PBA - max frame
7068                          * size in 16B units, capping it at PBA - 6KB.
7069                          */
7070                         hwm = 64 * pba - adapter->max_frame_size / 16;
7071                         if (hwm < 64 * (pba - 6))
7072                                 hwm = 64 * (pba - 6);
7073                         reg = rd32(E1000_FCRTC);
7074                         reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7075                         reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7076                                 & E1000_FCRTC_RTH_COAL_MASK);
7077                         wr32(E1000_FCRTC, reg);
7078
7079                         /*
7080                          * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7081                          * frame size, capping it at PBA - 10KB.
7082                          */
7083                         dmac_thr = pba - adapter->max_frame_size / 512;
7084                         if (dmac_thr < pba - 10)
7085                                 dmac_thr = pba - 10;
7086                         reg = rd32(E1000_DMACR);
7087                         reg &= ~E1000_DMACR_DMACTHR_MASK;
7088                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7089                                 & E1000_DMACR_DMACTHR_MASK);
7090
7091                         /* transition to L0x or L1 if available..*/
7092                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7093
7094                         /* watchdog timer= +-1000 usec in 32usec intervals */
7095                         reg |= (1000 >> 5);
7096                         wr32(E1000_DMACR, reg);
7097
7098                         /*
7099                          * no lower threshold to disable
7100                          * coalescing(smart fifb)-UTRESH=0
7101                          */
7102                         wr32(E1000_DMCRTRH, 0);
7103
7104                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7105
7106                         wr32(E1000_DMCTLX, reg);
7107
7108                         /*
7109                          * free space in tx packet buffer to wake from
7110                          * DMA coal
7111                          */
7112                         wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7113                              (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7114
7115                         /*
7116                          * make low power state decision controlled
7117                          * by DMA coal
7118                          */
7119                         reg = rd32(E1000_PCIEMISC);
7120                         reg &= ~E1000_PCIEMISC_LX_DECISION;
7121                         wr32(E1000_PCIEMISC, reg);
7122                 } /* endif adapter->dmac is not disabled */
7123         } else if (hw->mac.type == e1000_82580) {
7124                 u32 reg = rd32(E1000_PCIEMISC);
7125                 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7126                 wr32(E1000_DMACR, 0);
7127         }
7128 }
7129
7130 /* igb_main.c */