Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6
[pandora-kernel.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/ip.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
54 #ifdef CONFIG_IGB_DCA
55 #include <linux/dca.h>
56 #endif
57 #include "igb.h"
58
59 #define MAJ 3
60 #define MIN 2
61 #define BUILD 10
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67                                 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
69
70 static const struct e1000_info *igb_info_tbl[] = {
71         [board_82575] = &e1000_82575_info,
72 };
73
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100         /* required last entry */
101         {0, }
102 };
103
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
105
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130                                                  struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160                                int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163                                  struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
165
166 #ifdef CONFIG_PCI_IOV
167 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
168 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
169 static int igb_check_vf_assignment(struct igb_adapter *adapter);
170 #endif
171
172 #ifdef CONFIG_PM
173 static int igb_suspend(struct pci_dev *, pm_message_t);
174 static int igb_resume(struct pci_dev *);
175 #endif
176 static void igb_shutdown(struct pci_dev *);
177 #ifdef CONFIG_IGB_DCA
178 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
179 static struct notifier_block dca_notifier = {
180         .notifier_call  = igb_notify_dca,
181         .next           = NULL,
182         .priority       = 0
183 };
184 #endif
185 #ifdef CONFIG_NET_POLL_CONTROLLER
186 /* for netdump / net console */
187 static void igb_netpoll(struct net_device *);
188 #endif
189 #ifdef CONFIG_PCI_IOV
190 static unsigned int max_vfs = 0;
191 module_param(max_vfs, uint, 0);
192 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
193                  "per physical function");
194 #endif /* CONFIG_PCI_IOV */
195
196 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
197                      pci_channel_state_t);
198 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
199 static void igb_io_resume(struct pci_dev *);
200
201 static struct pci_error_handlers igb_err_handler = {
202         .error_detected = igb_io_error_detected,
203         .slot_reset = igb_io_slot_reset,
204         .resume = igb_io_resume,
205 };
206
207 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
208
209 static struct pci_driver igb_driver = {
210         .name     = igb_driver_name,
211         .id_table = igb_pci_tbl,
212         .probe    = igb_probe,
213         .remove   = __devexit_p(igb_remove),
214 #ifdef CONFIG_PM
215         /* Power Management Hooks */
216         .suspend  = igb_suspend,
217         .resume   = igb_resume,
218 #endif
219         .shutdown = igb_shutdown,
220         .err_handler = &igb_err_handler
221 };
222
223 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225 MODULE_LICENSE("GPL");
226 MODULE_VERSION(DRV_VERSION);
227
228 struct igb_reg_info {
229         u32 ofs;
230         char *name;
231 };
232
233 static const struct igb_reg_info igb_reg_info_tbl[] = {
234
235         /* General Registers */
236         {E1000_CTRL, "CTRL"},
237         {E1000_STATUS, "STATUS"},
238         {E1000_CTRL_EXT, "CTRL_EXT"},
239
240         /* Interrupt Registers */
241         {E1000_ICR, "ICR"},
242
243         /* RX Registers */
244         {E1000_RCTL, "RCTL"},
245         {E1000_RDLEN(0), "RDLEN"},
246         {E1000_RDH(0), "RDH"},
247         {E1000_RDT(0), "RDT"},
248         {E1000_RXDCTL(0), "RXDCTL"},
249         {E1000_RDBAL(0), "RDBAL"},
250         {E1000_RDBAH(0), "RDBAH"},
251
252         /* TX Registers */
253         {E1000_TCTL, "TCTL"},
254         {E1000_TDBAL(0), "TDBAL"},
255         {E1000_TDBAH(0), "TDBAH"},
256         {E1000_TDLEN(0), "TDLEN"},
257         {E1000_TDH(0), "TDH"},
258         {E1000_TDT(0), "TDT"},
259         {E1000_TXDCTL(0), "TXDCTL"},
260         {E1000_TDFH, "TDFH"},
261         {E1000_TDFT, "TDFT"},
262         {E1000_TDFHS, "TDFHS"},
263         {E1000_TDFPC, "TDFPC"},
264
265         /* List Terminator */
266         {}
267 };
268
269 /*
270  * igb_regdump - register printout routine
271  */
272 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
273 {
274         int n = 0;
275         char rname[16];
276         u32 regs[8];
277
278         switch (reginfo->ofs) {
279         case E1000_RDLEN(0):
280                 for (n = 0; n < 4; n++)
281                         regs[n] = rd32(E1000_RDLEN(n));
282                 break;
283         case E1000_RDH(0):
284                 for (n = 0; n < 4; n++)
285                         regs[n] = rd32(E1000_RDH(n));
286                 break;
287         case E1000_RDT(0):
288                 for (n = 0; n < 4; n++)
289                         regs[n] = rd32(E1000_RDT(n));
290                 break;
291         case E1000_RXDCTL(0):
292                 for (n = 0; n < 4; n++)
293                         regs[n] = rd32(E1000_RXDCTL(n));
294                 break;
295         case E1000_RDBAL(0):
296                 for (n = 0; n < 4; n++)
297                         regs[n] = rd32(E1000_RDBAL(n));
298                 break;
299         case E1000_RDBAH(0):
300                 for (n = 0; n < 4; n++)
301                         regs[n] = rd32(E1000_RDBAH(n));
302                 break;
303         case E1000_TDBAL(0):
304                 for (n = 0; n < 4; n++)
305                         regs[n] = rd32(E1000_RDBAL(n));
306                 break;
307         case E1000_TDBAH(0):
308                 for (n = 0; n < 4; n++)
309                         regs[n] = rd32(E1000_TDBAH(n));
310                 break;
311         case E1000_TDLEN(0):
312                 for (n = 0; n < 4; n++)
313                         regs[n] = rd32(E1000_TDLEN(n));
314                 break;
315         case E1000_TDH(0):
316                 for (n = 0; n < 4; n++)
317                         regs[n] = rd32(E1000_TDH(n));
318                 break;
319         case E1000_TDT(0):
320                 for (n = 0; n < 4; n++)
321                         regs[n] = rd32(E1000_TDT(n));
322                 break;
323         case E1000_TXDCTL(0):
324                 for (n = 0; n < 4; n++)
325                         regs[n] = rd32(E1000_TXDCTL(n));
326                 break;
327         default:
328                 printk(KERN_INFO "%-15s %08x\n",
329                         reginfo->name, rd32(reginfo->ofs));
330                 return;
331         }
332
333         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
334         printk(KERN_INFO "%-15s ", rname);
335         for (n = 0; n < 4; n++)
336                 printk(KERN_CONT "%08x ", regs[n]);
337         printk(KERN_CONT "\n");
338 }
339
340 /*
341  * igb_dump - Print registers, tx-rings and rx-rings
342  */
343 static void igb_dump(struct igb_adapter *adapter)
344 {
345         struct net_device *netdev = adapter->netdev;
346         struct e1000_hw *hw = &adapter->hw;
347         struct igb_reg_info *reginfo;
348         struct igb_ring *tx_ring;
349         union e1000_adv_tx_desc *tx_desc;
350         struct my_u0 { u64 a; u64 b; } *u0;
351         struct igb_ring *rx_ring;
352         union e1000_adv_rx_desc *rx_desc;
353         u32 staterr;
354         u16 i, n;
355
356         if (!netif_msg_hw(adapter))
357                 return;
358
359         /* Print netdevice Info */
360         if (netdev) {
361                 dev_info(&adapter->pdev->dev, "Net device Info\n");
362                 printk(KERN_INFO "Device Name     state            "
363                         "trans_start      last_rx\n");
364                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
365                 netdev->name,
366                 netdev->state,
367                 netdev->trans_start,
368                 netdev->last_rx);
369         }
370
371         /* Print Registers */
372         dev_info(&adapter->pdev->dev, "Register Dump\n");
373         printk(KERN_INFO " Register Name   Value\n");
374         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
375              reginfo->name; reginfo++) {
376                 igb_regdump(hw, reginfo);
377         }
378
379         /* Print TX Ring Summary */
380         if (!netdev || !netif_running(netdev))
381                 goto exit;
382
383         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
384         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
385                 " leng ntw timestamp\n");
386         for (n = 0; n < adapter->num_tx_queues; n++) {
387                 struct igb_tx_buffer *buffer_info;
388                 tx_ring = adapter->tx_ring[n];
389                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
390                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
391                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
392                            (u64)buffer_info->dma,
393                            buffer_info->length,
394                            buffer_info->next_to_watch,
395                            (u64)buffer_info->time_stamp);
396         }
397
398         /* Print TX Rings */
399         if (!netif_msg_tx_done(adapter))
400                 goto rx_ring_summary;
401
402         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
403
404         /* Transmit Descriptor Formats
405          *
406          * Advanced Transmit Descriptor
407          *   +--------------------------------------------------------------+
408          * 0 |         Buffer Address [63:0]                                |
409          *   +--------------------------------------------------------------+
410          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
411          *   +--------------------------------------------------------------+
412          *   63      46 45    40 39 38 36 35 32 31   24             15       0
413          */
414
415         for (n = 0; n < adapter->num_tx_queues; n++) {
416                 tx_ring = adapter->tx_ring[n];
417                 printk(KERN_INFO "------------------------------------\n");
418                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
419                 printk(KERN_INFO "------------------------------------\n");
420                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
421                         "[PlPOCIStDDM Ln] [bi->dma       ] "
422                         "leng  ntw timestamp        bi->skb\n");
423
424                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
425                         struct igb_tx_buffer *buffer_info;
426                         tx_desc = IGB_TX_DESC(tx_ring, i);
427                         buffer_info = &tx_ring->tx_buffer_info[i];
428                         u0 = (struct my_u0 *)tx_desc;
429                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
430                                 " %04X  %p %016llX %p", i,
431                                 le64_to_cpu(u0->a),
432                                 le64_to_cpu(u0->b),
433                                 (u64)buffer_info->dma,
434                                 buffer_info->length,
435                                 buffer_info->next_to_watch,
436                                 (u64)buffer_info->time_stamp,
437                                 buffer_info->skb);
438                         if (i == tx_ring->next_to_use &&
439                                 i == tx_ring->next_to_clean)
440                                 printk(KERN_CONT " NTC/U\n");
441                         else if (i == tx_ring->next_to_use)
442                                 printk(KERN_CONT " NTU\n");
443                         else if (i == tx_ring->next_to_clean)
444                                 printk(KERN_CONT " NTC\n");
445                         else
446                                 printk(KERN_CONT "\n");
447
448                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
449                                 print_hex_dump(KERN_INFO, "",
450                                         DUMP_PREFIX_ADDRESS,
451                                         16, 1, phys_to_virt(buffer_info->dma),
452                                         buffer_info->length, true);
453                 }
454         }
455
456         /* Print RX Rings Summary */
457 rx_ring_summary:
458         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
459         printk(KERN_INFO "Queue [NTU] [NTC]\n");
460         for (n = 0; n < adapter->num_rx_queues; n++) {
461                 rx_ring = adapter->rx_ring[n];
462                 printk(KERN_INFO " %5d %5X %5X\n", n,
463                            rx_ring->next_to_use, rx_ring->next_to_clean);
464         }
465
466         /* Print RX Rings */
467         if (!netif_msg_rx_status(adapter))
468                 goto exit;
469
470         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
471
472         /* Advanced Receive Descriptor (Read) Format
473          *    63                                           1        0
474          *    +-----------------------------------------------------+
475          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
476          *    +----------------------------------------------+------+
477          *  8 |       Header Buffer Address [63:1]           |  DD  |
478          *    +-----------------------------------------------------+
479          *
480          *
481          * Advanced Receive Descriptor (Write-Back) Format
482          *
483          *   63       48 47    32 31  30      21 20 17 16   4 3     0
484          *   +------------------------------------------------------+
485          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
486          *   | Checksum   Ident  |   |           |    | Type | Type |
487          *   +------------------------------------------------------+
488          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
489          *   +------------------------------------------------------+
490          *   63       48 47    32 31            20 19               0
491          */
492
493         for (n = 0; n < adapter->num_rx_queues; n++) {
494                 rx_ring = adapter->rx_ring[n];
495                 printk(KERN_INFO "------------------------------------\n");
496                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
497                 printk(KERN_INFO "------------------------------------\n");
498                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
499                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
500                         "<-- Adv Rx Read format\n");
501                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
502                         "[vl er S cks ln] ---------------- [bi->skb] "
503                         "<-- Adv Rx Write-Back format\n");
504
505                 for (i = 0; i < rx_ring->count; i++) {
506                         struct igb_rx_buffer *buffer_info;
507                         buffer_info = &rx_ring->rx_buffer_info[i];
508                         rx_desc = IGB_RX_DESC(rx_ring, i);
509                         u0 = (struct my_u0 *)rx_desc;
510                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
511                         if (staterr & E1000_RXD_STAT_DD) {
512                                 /* Descriptor Done */
513                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
514                                         "%016llX ---------------- %p", i,
515                                         le64_to_cpu(u0->a),
516                                         le64_to_cpu(u0->b),
517                                         buffer_info->skb);
518                         } else {
519                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
520                                         "%016llX %016llX %p", i,
521                                         le64_to_cpu(u0->a),
522                                         le64_to_cpu(u0->b),
523                                         (u64)buffer_info->dma,
524                                         buffer_info->skb);
525
526                                 if (netif_msg_pktdata(adapter)) {
527                                         print_hex_dump(KERN_INFO, "",
528                                                 DUMP_PREFIX_ADDRESS,
529                                                 16, 1,
530                                                 phys_to_virt(buffer_info->dma),
531                                                 IGB_RX_HDR_LEN, true);
532                                         print_hex_dump(KERN_INFO, "",
533                                           DUMP_PREFIX_ADDRESS,
534                                           16, 1,
535                                           phys_to_virt(
536                                             buffer_info->page_dma +
537                                             buffer_info->page_offset),
538                                           PAGE_SIZE/2, true);
539                                 }
540                         }
541
542                         if (i == rx_ring->next_to_use)
543                                 printk(KERN_CONT " NTU\n");
544                         else if (i == rx_ring->next_to_clean)
545                                 printk(KERN_CONT " NTC\n");
546                         else
547                                 printk(KERN_CONT "\n");
548
549                 }
550         }
551
552 exit:
553         return;
554 }
555
556
557 /**
558  * igb_read_clock - read raw cycle counter (to be used by time counter)
559  */
560 static cycle_t igb_read_clock(const struct cyclecounter *tc)
561 {
562         struct igb_adapter *adapter =
563                 container_of(tc, struct igb_adapter, cycles);
564         struct e1000_hw *hw = &adapter->hw;
565         u64 stamp = 0;
566         int shift = 0;
567
568         /*
569          * The timestamp latches on lowest register read. For the 82580
570          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
571          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
572          */
573         if (hw->mac.type >= e1000_82580) {
574                 stamp = rd32(E1000_SYSTIMR) >> 8;
575                 shift = IGB_82580_TSYNC_SHIFT;
576         }
577
578         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
579         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
580         return stamp;
581 }
582
583 /**
584  * igb_get_hw_dev - return device
585  * used by hardware layer to print debugging information
586  **/
587 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
588 {
589         struct igb_adapter *adapter = hw->back;
590         return adapter->netdev;
591 }
592
593 /**
594  * igb_init_module - Driver Registration Routine
595  *
596  * igb_init_module is the first routine called when the driver is
597  * loaded. All it does is register with the PCI subsystem.
598  **/
599 static int __init igb_init_module(void)
600 {
601         int ret;
602         printk(KERN_INFO "%s - version %s\n",
603                igb_driver_string, igb_driver_version);
604
605         printk(KERN_INFO "%s\n", igb_copyright);
606
607 #ifdef CONFIG_IGB_DCA
608         dca_register_notify(&dca_notifier);
609 #endif
610         ret = pci_register_driver(&igb_driver);
611         return ret;
612 }
613
614 module_init(igb_init_module);
615
616 /**
617  * igb_exit_module - Driver Exit Cleanup Routine
618  *
619  * igb_exit_module is called just before the driver is removed
620  * from memory.
621  **/
622 static void __exit igb_exit_module(void)
623 {
624 #ifdef CONFIG_IGB_DCA
625         dca_unregister_notify(&dca_notifier);
626 #endif
627         pci_unregister_driver(&igb_driver);
628 }
629
630 module_exit(igb_exit_module);
631
632 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
633 /**
634  * igb_cache_ring_register - Descriptor ring to register mapping
635  * @adapter: board private structure to initialize
636  *
637  * Once we know the feature-set enabled for the device, we'll cache
638  * the register offset the descriptor ring is assigned to.
639  **/
640 static void igb_cache_ring_register(struct igb_adapter *adapter)
641 {
642         int i = 0, j = 0;
643         u32 rbase_offset = adapter->vfs_allocated_count;
644
645         switch (adapter->hw.mac.type) {
646         case e1000_82576:
647                 /* The queues are allocated for virtualization such that VF 0
648                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
649                  * In order to avoid collision we start at the first free queue
650                  * and continue consuming queues in the same sequence
651                  */
652                 if (adapter->vfs_allocated_count) {
653                         for (; i < adapter->rss_queues; i++)
654                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
655                                                                Q_IDX_82576(i);
656                 }
657         case e1000_82575:
658         case e1000_82580:
659         case e1000_i350:
660         default:
661                 for (; i < adapter->num_rx_queues; i++)
662                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
663                 for (; j < adapter->num_tx_queues; j++)
664                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
665                 break;
666         }
667 }
668
669 static void igb_free_queues(struct igb_adapter *adapter)
670 {
671         int i;
672
673         for (i = 0; i < adapter->num_tx_queues; i++) {
674                 kfree(adapter->tx_ring[i]);
675                 adapter->tx_ring[i] = NULL;
676         }
677         for (i = 0; i < adapter->num_rx_queues; i++) {
678                 kfree(adapter->rx_ring[i]);
679                 adapter->rx_ring[i] = NULL;
680         }
681         adapter->num_rx_queues = 0;
682         adapter->num_tx_queues = 0;
683 }
684
685 /**
686  * igb_alloc_queues - Allocate memory for all rings
687  * @adapter: board private structure to initialize
688  *
689  * We allocate one ring per queue at run-time since we don't know the
690  * number of queues at compile-time.
691  **/
692 static int igb_alloc_queues(struct igb_adapter *adapter)
693 {
694         struct igb_ring *ring;
695         int i;
696         int orig_node = adapter->node;
697
698         for (i = 0; i < adapter->num_tx_queues; i++) {
699                 if (orig_node == -1) {
700                         int cur_node = next_online_node(adapter->node);
701                         if (cur_node == MAX_NUMNODES)
702                                 cur_node = first_online_node;
703                         adapter->node = cur_node;
704                 }
705                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
706                                     adapter->node);
707                 if (!ring)
708                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
709                 if (!ring)
710                         goto err;
711                 ring->count = adapter->tx_ring_count;
712                 ring->queue_index = i;
713                 ring->dev = &adapter->pdev->dev;
714                 ring->netdev = adapter->netdev;
715                 ring->numa_node = adapter->node;
716                 /* For 82575, context index must be unique per ring. */
717                 if (adapter->hw.mac.type == e1000_82575)
718                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
719                 adapter->tx_ring[i] = ring;
720         }
721         /* Restore the adapter's original node */
722         adapter->node = orig_node;
723
724         for (i = 0; i < adapter->num_rx_queues; i++) {
725                 if (orig_node == -1) {
726                         int cur_node = next_online_node(adapter->node);
727                         if (cur_node == MAX_NUMNODES)
728                                 cur_node = first_online_node;
729                         adapter->node = cur_node;
730                 }
731                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
732                                     adapter->node);
733                 if (!ring)
734                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
735                 if (!ring)
736                         goto err;
737                 ring->count = adapter->rx_ring_count;
738                 ring->queue_index = i;
739                 ring->dev = &adapter->pdev->dev;
740                 ring->netdev = adapter->netdev;
741                 ring->numa_node = adapter->node;
742                 /* set flag indicating ring supports SCTP checksum offload */
743                 if (adapter->hw.mac.type >= e1000_82576)
744                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
745
746                 /* On i350, loopback VLAN packets have the tag byte-swapped. */
747                 if (adapter->hw.mac.type == e1000_i350)
748                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
749
750                 adapter->rx_ring[i] = ring;
751         }
752         /* Restore the adapter's original node */
753         adapter->node = orig_node;
754
755         igb_cache_ring_register(adapter);
756
757         return 0;
758
759 err:
760         /* Restore the adapter's original node */
761         adapter->node = orig_node;
762         igb_free_queues(adapter);
763
764         return -ENOMEM;
765 }
766
767 /**
768  *  igb_write_ivar - configure ivar for given MSI-X vector
769  *  @hw: pointer to the HW structure
770  *  @msix_vector: vector number we are allocating to a given ring
771  *  @index: row index of IVAR register to write within IVAR table
772  *  @offset: column offset of in IVAR, should be multiple of 8
773  *
774  *  This function is intended to handle the writing of the IVAR register
775  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
776  *  each containing an cause allocation for an Rx and Tx ring, and a
777  *  variable number of rows depending on the number of queues supported.
778  **/
779 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
780                            int index, int offset)
781 {
782         u32 ivar = array_rd32(E1000_IVAR0, index);
783
784         /* clear any bits that are currently set */
785         ivar &= ~((u32)0xFF << offset);
786
787         /* write vector and valid bit */
788         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
789
790         array_wr32(E1000_IVAR0, index, ivar);
791 }
792
793 #define IGB_N0_QUEUE -1
794 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
795 {
796         struct igb_adapter *adapter = q_vector->adapter;
797         struct e1000_hw *hw = &adapter->hw;
798         int rx_queue = IGB_N0_QUEUE;
799         int tx_queue = IGB_N0_QUEUE;
800         u32 msixbm = 0;
801
802         if (q_vector->rx.ring)
803                 rx_queue = q_vector->rx.ring->reg_idx;
804         if (q_vector->tx.ring)
805                 tx_queue = q_vector->tx.ring->reg_idx;
806
807         switch (hw->mac.type) {
808         case e1000_82575:
809                 /* The 82575 assigns vectors using a bitmask, which matches the
810                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
811                    or more queues to a vector, we write the appropriate bits
812                    into the MSIXBM register for that vector. */
813                 if (rx_queue > IGB_N0_QUEUE)
814                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
815                 if (tx_queue > IGB_N0_QUEUE)
816                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
817                 if (!adapter->msix_entries && msix_vector == 0)
818                         msixbm |= E1000_EIMS_OTHER;
819                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
820                 q_vector->eims_value = msixbm;
821                 break;
822         case e1000_82576:
823                 /*
824                  * 82576 uses a table that essentially consists of 2 columns
825                  * with 8 rows.  The ordering is column-major so we use the
826                  * lower 3 bits as the row index, and the 4th bit as the
827                  * column offset.
828                  */
829                 if (rx_queue > IGB_N0_QUEUE)
830                         igb_write_ivar(hw, msix_vector,
831                                        rx_queue & 0x7,
832                                        (rx_queue & 0x8) << 1);
833                 if (tx_queue > IGB_N0_QUEUE)
834                         igb_write_ivar(hw, msix_vector,
835                                        tx_queue & 0x7,
836                                        ((tx_queue & 0x8) << 1) + 8);
837                 q_vector->eims_value = 1 << msix_vector;
838                 break;
839         case e1000_82580:
840         case e1000_i350:
841                 /*
842                  * On 82580 and newer adapters the scheme is similar to 82576
843                  * however instead of ordering column-major we have things
844                  * ordered row-major.  So we traverse the table by using
845                  * bit 0 as the column offset, and the remaining bits as the
846                  * row index.
847                  */
848                 if (rx_queue > IGB_N0_QUEUE)
849                         igb_write_ivar(hw, msix_vector,
850                                        rx_queue >> 1,
851                                        (rx_queue & 0x1) << 4);
852                 if (tx_queue > IGB_N0_QUEUE)
853                         igb_write_ivar(hw, msix_vector,
854                                        tx_queue >> 1,
855                                        ((tx_queue & 0x1) << 4) + 8);
856                 q_vector->eims_value = 1 << msix_vector;
857                 break;
858         default:
859                 BUG();
860                 break;
861         }
862
863         /* add q_vector eims value to global eims_enable_mask */
864         adapter->eims_enable_mask |= q_vector->eims_value;
865
866         /* configure q_vector to set itr on first interrupt */
867         q_vector->set_itr = 1;
868 }
869
870 /**
871  * igb_configure_msix - Configure MSI-X hardware
872  *
873  * igb_configure_msix sets up the hardware to properly
874  * generate MSI-X interrupts.
875  **/
876 static void igb_configure_msix(struct igb_adapter *adapter)
877 {
878         u32 tmp;
879         int i, vector = 0;
880         struct e1000_hw *hw = &adapter->hw;
881
882         adapter->eims_enable_mask = 0;
883
884         /* set vector for other causes, i.e. link changes */
885         switch (hw->mac.type) {
886         case e1000_82575:
887                 tmp = rd32(E1000_CTRL_EXT);
888                 /* enable MSI-X PBA support*/
889                 tmp |= E1000_CTRL_EXT_PBA_CLR;
890
891                 /* Auto-Mask interrupts upon ICR read. */
892                 tmp |= E1000_CTRL_EXT_EIAME;
893                 tmp |= E1000_CTRL_EXT_IRCA;
894
895                 wr32(E1000_CTRL_EXT, tmp);
896
897                 /* enable msix_other interrupt */
898                 array_wr32(E1000_MSIXBM(0), vector++,
899                                       E1000_EIMS_OTHER);
900                 adapter->eims_other = E1000_EIMS_OTHER;
901
902                 break;
903
904         case e1000_82576:
905         case e1000_82580:
906         case e1000_i350:
907                 /* Turn on MSI-X capability first, or our settings
908                  * won't stick.  And it will take days to debug. */
909                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
910                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
911                                 E1000_GPIE_NSICR);
912
913                 /* enable msix_other interrupt */
914                 adapter->eims_other = 1 << vector;
915                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
916
917                 wr32(E1000_IVAR_MISC, tmp);
918                 break;
919         default:
920                 /* do nothing, since nothing else supports MSI-X */
921                 break;
922         } /* switch (hw->mac.type) */
923
924         adapter->eims_enable_mask |= adapter->eims_other;
925
926         for (i = 0; i < adapter->num_q_vectors; i++)
927                 igb_assign_vector(adapter->q_vector[i], vector++);
928
929         wrfl();
930 }
931
932 /**
933  * igb_request_msix - Initialize MSI-X interrupts
934  *
935  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
936  * kernel.
937  **/
938 static int igb_request_msix(struct igb_adapter *adapter)
939 {
940         struct net_device *netdev = adapter->netdev;
941         struct e1000_hw *hw = &adapter->hw;
942         int i, err = 0, vector = 0;
943
944         err = request_irq(adapter->msix_entries[vector].vector,
945                           igb_msix_other, 0, netdev->name, adapter);
946         if (err)
947                 goto out;
948         vector++;
949
950         for (i = 0; i < adapter->num_q_vectors; i++) {
951                 struct igb_q_vector *q_vector = adapter->q_vector[i];
952
953                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
954
955                 if (q_vector->rx.ring && q_vector->tx.ring)
956                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
957                                 q_vector->rx.ring->queue_index);
958                 else if (q_vector->tx.ring)
959                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
960                                 q_vector->tx.ring->queue_index);
961                 else if (q_vector->rx.ring)
962                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
963                                 q_vector->rx.ring->queue_index);
964                 else
965                         sprintf(q_vector->name, "%s-unused", netdev->name);
966
967                 err = request_irq(adapter->msix_entries[vector].vector,
968                                   igb_msix_ring, 0, q_vector->name,
969                                   q_vector);
970                 if (err)
971                         goto out;
972                 vector++;
973         }
974
975         igb_configure_msix(adapter);
976         return 0;
977 out:
978         return err;
979 }
980
981 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
982 {
983         if (adapter->msix_entries) {
984                 pci_disable_msix(adapter->pdev);
985                 kfree(adapter->msix_entries);
986                 adapter->msix_entries = NULL;
987         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
988                 pci_disable_msi(adapter->pdev);
989         }
990 }
991
992 /**
993  * igb_free_q_vectors - Free memory allocated for interrupt vectors
994  * @adapter: board private structure to initialize
995  *
996  * This function frees the memory allocated to the q_vectors.  In addition if
997  * NAPI is enabled it will delete any references to the NAPI struct prior
998  * to freeing the q_vector.
999  **/
1000 static void igb_free_q_vectors(struct igb_adapter *adapter)
1001 {
1002         int v_idx;
1003
1004         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1005                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1006                 adapter->q_vector[v_idx] = NULL;
1007                 if (!q_vector)
1008                         continue;
1009                 netif_napi_del(&q_vector->napi);
1010                 kfree(q_vector);
1011         }
1012         adapter->num_q_vectors = 0;
1013 }
1014
1015 /**
1016  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1017  *
1018  * This function resets the device so that it has 0 rx queues, tx queues, and
1019  * MSI-X interrupts allocated.
1020  */
1021 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1022 {
1023         igb_free_queues(adapter);
1024         igb_free_q_vectors(adapter);
1025         igb_reset_interrupt_capability(adapter);
1026 }
1027
1028 /**
1029  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1030  *
1031  * Attempt to configure interrupts using the best available
1032  * capabilities of the hardware and kernel.
1033  **/
1034 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1035 {
1036         int err;
1037         int numvecs, i;
1038
1039         /* Number of supported queues. */
1040         adapter->num_rx_queues = adapter->rss_queues;
1041         if (adapter->vfs_allocated_count)
1042                 adapter->num_tx_queues = 1;
1043         else
1044                 adapter->num_tx_queues = adapter->rss_queues;
1045
1046         /* start with one vector for every rx queue */
1047         numvecs = adapter->num_rx_queues;
1048
1049         /* if tx handler is separate add 1 for every tx queue */
1050         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1051                 numvecs += adapter->num_tx_queues;
1052
1053         /* store the number of vectors reserved for queues */
1054         adapter->num_q_vectors = numvecs;
1055
1056         /* add 1 vector for link status interrupts */
1057         numvecs++;
1058         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1059                                         GFP_KERNEL);
1060         if (!adapter->msix_entries)
1061                 goto msi_only;
1062
1063         for (i = 0; i < numvecs; i++)
1064                 adapter->msix_entries[i].entry = i;
1065
1066         err = pci_enable_msix(adapter->pdev,
1067                               adapter->msix_entries,
1068                               numvecs);
1069         if (err == 0)
1070                 goto out;
1071
1072         igb_reset_interrupt_capability(adapter);
1073
1074         /* If we can't do MSI-X, try MSI */
1075 msi_only:
1076 #ifdef CONFIG_PCI_IOV
1077         /* disable SR-IOV for non MSI-X configurations */
1078         if (adapter->vf_data) {
1079                 struct e1000_hw *hw = &adapter->hw;
1080                 /* disable iov and allow time for transactions to clear */
1081                 pci_disable_sriov(adapter->pdev);
1082                 msleep(500);
1083
1084                 kfree(adapter->vf_data);
1085                 adapter->vf_data = NULL;
1086                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1087                 wrfl();
1088                 msleep(100);
1089                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1090         }
1091 #endif
1092         adapter->vfs_allocated_count = 0;
1093         adapter->rss_queues = 1;
1094         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1095         adapter->num_rx_queues = 1;
1096         adapter->num_tx_queues = 1;
1097         adapter->num_q_vectors = 1;
1098         if (!pci_enable_msi(adapter->pdev))
1099                 adapter->flags |= IGB_FLAG_HAS_MSI;
1100 out:
1101         /* Notify the stack of the (possibly) reduced queue counts. */
1102         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103         return netif_set_real_num_rx_queues(adapter->netdev,
1104                                             adapter->num_rx_queues);
1105 }
1106
1107 /**
1108  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109  * @adapter: board private structure to initialize
1110  *
1111  * We allocate one q_vector per queue interrupt.  If allocation fails we
1112  * return -ENOMEM.
1113  **/
1114 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1115 {
1116         struct igb_q_vector *q_vector;
1117         struct e1000_hw *hw = &adapter->hw;
1118         int v_idx;
1119         int orig_node = adapter->node;
1120
1121         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1122                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1123                                                 adapter->num_tx_queues)) &&
1124                     (adapter->num_rx_queues == v_idx))
1125                         adapter->node = orig_node;
1126                 if (orig_node == -1) {
1127                         int cur_node = next_online_node(adapter->node);
1128                         if (cur_node == MAX_NUMNODES)
1129                                 cur_node = first_online_node;
1130                         adapter->node = cur_node;
1131                 }
1132                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1133                                         adapter->node);
1134                 if (!q_vector)
1135                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1136                                            GFP_KERNEL);
1137                 if (!q_vector)
1138                         goto err_out;
1139                 q_vector->adapter = adapter;
1140                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1141                 q_vector->itr_val = IGB_START_ITR;
1142                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1143                 adapter->q_vector[v_idx] = q_vector;
1144         }
1145         /* Restore the adapter's original node */
1146         adapter->node = orig_node;
1147
1148         return 0;
1149
1150 err_out:
1151         /* Restore the adapter's original node */
1152         adapter->node = orig_node;
1153         igb_free_q_vectors(adapter);
1154         return -ENOMEM;
1155 }
1156
1157 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1158                                       int ring_idx, int v_idx)
1159 {
1160         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1161
1162         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1163         q_vector->rx.ring->q_vector = q_vector;
1164         q_vector->rx.count++;
1165         q_vector->itr_val = adapter->rx_itr_setting;
1166         if (q_vector->itr_val && q_vector->itr_val <= 3)
1167                 q_vector->itr_val = IGB_START_ITR;
1168 }
1169
1170 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1171                                       int ring_idx, int v_idx)
1172 {
1173         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1174
1175         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1176         q_vector->tx.ring->q_vector = q_vector;
1177         q_vector->tx.count++;
1178         q_vector->itr_val = adapter->tx_itr_setting;
1179         q_vector->tx.work_limit = adapter->tx_work_limit;
1180         if (q_vector->itr_val && q_vector->itr_val <= 3)
1181                 q_vector->itr_val = IGB_START_ITR;
1182 }
1183
1184 /**
1185  * igb_map_ring_to_vector - maps allocated queues to vectors
1186  *
1187  * This function maps the recently allocated queues to vectors.
1188  **/
1189 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1190 {
1191         int i;
1192         int v_idx = 0;
1193
1194         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1195             (adapter->num_q_vectors < adapter->num_tx_queues))
1196                 return -ENOMEM;
1197
1198         if (adapter->num_q_vectors >=
1199             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1200                 for (i = 0; i < adapter->num_rx_queues; i++)
1201                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202                 for (i = 0; i < adapter->num_tx_queues; i++)
1203                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1204         } else {
1205                 for (i = 0; i < adapter->num_rx_queues; i++) {
1206                         if (i < adapter->num_tx_queues)
1207                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1208                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209                 }
1210                 for (; i < adapter->num_tx_queues; i++)
1211                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1212         }
1213         return 0;
1214 }
1215
1216 /**
1217  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1218  *
1219  * This function initializes the interrupts and allocates all of the queues.
1220  **/
1221 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1222 {
1223         struct pci_dev *pdev = adapter->pdev;
1224         int err;
1225
1226         err = igb_set_interrupt_capability(adapter);
1227         if (err)
1228                 return err;
1229
1230         err = igb_alloc_q_vectors(adapter);
1231         if (err) {
1232                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1233                 goto err_alloc_q_vectors;
1234         }
1235
1236         err = igb_alloc_queues(adapter);
1237         if (err) {
1238                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1239                 goto err_alloc_queues;
1240         }
1241
1242         err = igb_map_ring_to_vector(adapter);
1243         if (err) {
1244                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1245                 goto err_map_queues;
1246         }
1247
1248
1249         return 0;
1250 err_map_queues:
1251         igb_free_queues(adapter);
1252 err_alloc_queues:
1253         igb_free_q_vectors(adapter);
1254 err_alloc_q_vectors:
1255         igb_reset_interrupt_capability(adapter);
1256         return err;
1257 }
1258
1259 /**
1260  * igb_request_irq - initialize interrupts
1261  *
1262  * Attempts to configure interrupts using the best available
1263  * capabilities of the hardware and kernel.
1264  **/
1265 static int igb_request_irq(struct igb_adapter *adapter)
1266 {
1267         struct net_device *netdev = adapter->netdev;
1268         struct pci_dev *pdev = adapter->pdev;
1269         int err = 0;
1270
1271         if (adapter->msix_entries) {
1272                 err = igb_request_msix(adapter);
1273                 if (!err)
1274                         goto request_done;
1275                 /* fall back to MSI */
1276                 igb_clear_interrupt_scheme(adapter);
1277                 if (!pci_enable_msi(pdev))
1278                         adapter->flags |= IGB_FLAG_HAS_MSI;
1279                 igb_free_all_tx_resources(adapter);
1280                 igb_free_all_rx_resources(adapter);
1281                 adapter->num_tx_queues = 1;
1282                 adapter->num_rx_queues = 1;
1283                 adapter->num_q_vectors = 1;
1284                 err = igb_alloc_q_vectors(adapter);
1285                 if (err) {
1286                         dev_err(&pdev->dev,
1287                                 "Unable to allocate memory for vectors\n");
1288                         goto request_done;
1289                 }
1290                 err = igb_alloc_queues(adapter);
1291                 if (err) {
1292                         dev_err(&pdev->dev,
1293                                 "Unable to allocate memory for queues\n");
1294                         igb_free_q_vectors(adapter);
1295                         goto request_done;
1296                 }
1297                 igb_setup_all_tx_resources(adapter);
1298                 igb_setup_all_rx_resources(adapter);
1299         }
1300
1301         igb_assign_vector(adapter->q_vector[0], 0);
1302
1303         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1304                 err = request_irq(pdev->irq, igb_intr_msi, 0,
1305                                   netdev->name, adapter);
1306                 if (!err)
1307                         goto request_done;
1308
1309                 /* fall back to legacy interrupts */
1310                 igb_reset_interrupt_capability(adapter);
1311                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1312         }
1313
1314         err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1315                           netdev->name, adapter);
1316
1317         if (err)
1318                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1319                         err);
1320
1321 request_done:
1322         return err;
1323 }
1324
1325 static void igb_free_irq(struct igb_adapter *adapter)
1326 {
1327         if (adapter->msix_entries) {
1328                 int vector = 0, i;
1329
1330                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1331
1332                 for (i = 0; i < adapter->num_q_vectors; i++)
1333                         free_irq(adapter->msix_entries[vector++].vector,
1334                                  adapter->q_vector[i]);
1335         } else {
1336                 free_irq(adapter->pdev->irq, adapter);
1337         }
1338 }
1339
1340 /**
1341  * igb_irq_disable - Mask off interrupt generation on the NIC
1342  * @adapter: board private structure
1343  **/
1344 static void igb_irq_disable(struct igb_adapter *adapter)
1345 {
1346         struct e1000_hw *hw = &adapter->hw;
1347
1348         /*
1349          * we need to be careful when disabling interrupts.  The VFs are also
1350          * mapped into these registers and so clearing the bits can cause
1351          * issues on the VF drivers so we only need to clear what we set
1352          */
1353         if (adapter->msix_entries) {
1354                 u32 regval = rd32(E1000_EIAM);
1355                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1357                 regval = rd32(E1000_EIAC);
1358                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1359         }
1360
1361         wr32(E1000_IAM, 0);
1362         wr32(E1000_IMC, ~0);
1363         wrfl();
1364         if (adapter->msix_entries) {
1365                 int i;
1366                 for (i = 0; i < adapter->num_q_vectors; i++)
1367                         synchronize_irq(adapter->msix_entries[i].vector);
1368         } else {
1369                 synchronize_irq(adapter->pdev->irq);
1370         }
1371 }
1372
1373 /**
1374  * igb_irq_enable - Enable default interrupt generation settings
1375  * @adapter: board private structure
1376  **/
1377 static void igb_irq_enable(struct igb_adapter *adapter)
1378 {
1379         struct e1000_hw *hw = &adapter->hw;
1380
1381         if (adapter->msix_entries) {
1382                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1383                 u32 regval = rd32(E1000_EIAC);
1384                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385                 regval = rd32(E1000_EIAM);
1386                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1388                 if (adapter->vfs_allocated_count) {
1389                         wr32(E1000_MBVFIMR, 0xFF);
1390                         ims |= E1000_IMS_VMMB;
1391                 }
1392                 wr32(E1000_IMS, ims);
1393         } else {
1394                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1395                                 E1000_IMS_DRSTA);
1396                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1397                                 E1000_IMS_DRSTA);
1398         }
1399 }
1400
1401 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1402 {
1403         struct e1000_hw *hw = &adapter->hw;
1404         u16 vid = adapter->hw.mng_cookie.vlan_id;
1405         u16 old_vid = adapter->mng_vlan_id;
1406
1407         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408                 /* add VID to filter table */
1409                 igb_vfta_set(hw, vid, true);
1410                 adapter->mng_vlan_id = vid;
1411         } else {
1412                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1413         }
1414
1415         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1416             (vid != old_vid) &&
1417             !test_bit(old_vid, adapter->active_vlans)) {
1418                 /* remove VID from filter table */
1419                 igb_vfta_set(hw, old_vid, false);
1420         }
1421 }
1422
1423 /**
1424  * igb_release_hw_control - release control of the h/w to f/w
1425  * @adapter: address of board private structure
1426  *
1427  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428  * For ASF and Pass Through versions of f/w this means that the
1429  * driver is no longer loaded.
1430  *
1431  **/
1432 static void igb_release_hw_control(struct igb_adapter *adapter)
1433 {
1434         struct e1000_hw *hw = &adapter->hw;
1435         u32 ctrl_ext;
1436
1437         /* Let firmware take over control of h/w */
1438         ctrl_ext = rd32(E1000_CTRL_EXT);
1439         wr32(E1000_CTRL_EXT,
1440                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1441 }
1442
1443 /**
1444  * igb_get_hw_control - get control of the h/w from f/w
1445  * @adapter: address of board private structure
1446  *
1447  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448  * For ASF and Pass Through versions of f/w this means that
1449  * the driver is loaded.
1450  *
1451  **/
1452 static void igb_get_hw_control(struct igb_adapter *adapter)
1453 {
1454         struct e1000_hw *hw = &adapter->hw;
1455         u32 ctrl_ext;
1456
1457         /* Let firmware know the driver has taken over */
1458         ctrl_ext = rd32(E1000_CTRL_EXT);
1459         wr32(E1000_CTRL_EXT,
1460                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1461 }
1462
1463 /**
1464  * igb_configure - configure the hardware for RX and TX
1465  * @adapter: private board structure
1466  **/
1467 static void igb_configure(struct igb_adapter *adapter)
1468 {
1469         struct net_device *netdev = adapter->netdev;
1470         int i;
1471
1472         igb_get_hw_control(adapter);
1473         igb_set_rx_mode(netdev);
1474
1475         igb_restore_vlan(adapter);
1476
1477         igb_setup_tctl(adapter);
1478         igb_setup_mrqc(adapter);
1479         igb_setup_rctl(adapter);
1480
1481         igb_configure_tx(adapter);
1482         igb_configure_rx(adapter);
1483
1484         igb_rx_fifo_flush_82575(&adapter->hw);
1485
1486         /* call igb_desc_unused which always leaves
1487          * at least 1 descriptor unused to make sure
1488          * next_to_use != next_to_clean */
1489         for (i = 0; i < adapter->num_rx_queues; i++) {
1490                 struct igb_ring *ring = adapter->rx_ring[i];
1491                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1492         }
1493 }
1494
1495 /**
1496  * igb_power_up_link - Power up the phy/serdes link
1497  * @adapter: address of board private structure
1498  **/
1499 void igb_power_up_link(struct igb_adapter *adapter)
1500 {
1501         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502                 igb_power_up_phy_copper(&adapter->hw);
1503         else
1504                 igb_power_up_serdes_link_82575(&adapter->hw);
1505 }
1506
1507 /**
1508  * igb_power_down_link - Power down the phy/serdes link
1509  * @adapter: address of board private structure
1510  */
1511 static void igb_power_down_link(struct igb_adapter *adapter)
1512 {
1513         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514                 igb_power_down_phy_copper_82575(&adapter->hw);
1515         else
1516                 igb_shutdown_serdes_link_82575(&adapter->hw);
1517 }
1518
1519 /**
1520  * igb_up - Open the interface and prepare it to handle traffic
1521  * @adapter: board private structure
1522  **/
1523 int igb_up(struct igb_adapter *adapter)
1524 {
1525         struct e1000_hw *hw = &adapter->hw;
1526         int i;
1527
1528         /* hardware has been reset, we need to reload some things */
1529         igb_configure(adapter);
1530
1531         clear_bit(__IGB_DOWN, &adapter->state);
1532
1533         for (i = 0; i < adapter->num_q_vectors; i++)
1534                 napi_enable(&(adapter->q_vector[i]->napi));
1535
1536         if (adapter->msix_entries)
1537                 igb_configure_msix(adapter);
1538         else
1539                 igb_assign_vector(adapter->q_vector[0], 0);
1540
1541         /* Clear any pending interrupts. */
1542         rd32(E1000_ICR);
1543         igb_irq_enable(adapter);
1544
1545         /* notify VFs that reset has been completed */
1546         if (adapter->vfs_allocated_count) {
1547                 u32 reg_data = rd32(E1000_CTRL_EXT);
1548                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1549                 wr32(E1000_CTRL_EXT, reg_data);
1550         }
1551
1552         netif_tx_start_all_queues(adapter->netdev);
1553
1554         /* start the watchdog. */
1555         hw->mac.get_link_status = 1;
1556         schedule_work(&adapter->watchdog_task);
1557
1558         return 0;
1559 }
1560
1561 void igb_down(struct igb_adapter *adapter)
1562 {
1563         struct net_device *netdev = adapter->netdev;
1564         struct e1000_hw *hw = &adapter->hw;
1565         u32 tctl, rctl;
1566         int i;
1567
1568         /* signal that we're down so the interrupt handler does not
1569          * reschedule our watchdog timer */
1570         set_bit(__IGB_DOWN, &adapter->state);
1571
1572         /* disable receives in the hardware */
1573         rctl = rd32(E1000_RCTL);
1574         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1575         /* flush and sleep below */
1576
1577         netif_tx_stop_all_queues(netdev);
1578
1579         /* disable transmits in the hardware */
1580         tctl = rd32(E1000_TCTL);
1581         tctl &= ~E1000_TCTL_EN;
1582         wr32(E1000_TCTL, tctl);
1583         /* flush both disables and wait for them to finish */
1584         wrfl();
1585         msleep(10);
1586
1587         for (i = 0; i < adapter->num_q_vectors; i++)
1588                 napi_disable(&(adapter->q_vector[i]->napi));
1589
1590         igb_irq_disable(adapter);
1591
1592         del_timer_sync(&adapter->watchdog_timer);
1593         del_timer_sync(&adapter->phy_info_timer);
1594
1595         netif_carrier_off(netdev);
1596
1597         /* record the stats before reset*/
1598         spin_lock(&adapter->stats64_lock);
1599         igb_update_stats(adapter, &adapter->stats64);
1600         spin_unlock(&adapter->stats64_lock);
1601
1602         adapter->link_speed = 0;
1603         adapter->link_duplex = 0;
1604
1605         if (!pci_channel_offline(adapter->pdev))
1606                 igb_reset(adapter);
1607         igb_clean_all_tx_rings(adapter);
1608         igb_clean_all_rx_rings(adapter);
1609 #ifdef CONFIG_IGB_DCA
1610
1611         /* since we reset the hardware DCA settings were cleared */
1612         igb_setup_dca(adapter);
1613 #endif
1614 }
1615
1616 void igb_reinit_locked(struct igb_adapter *adapter)
1617 {
1618         WARN_ON(in_interrupt());
1619         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1620                 msleep(1);
1621         igb_down(adapter);
1622         igb_up(adapter);
1623         clear_bit(__IGB_RESETTING, &adapter->state);
1624 }
1625
1626 void igb_reset(struct igb_adapter *adapter)
1627 {
1628         struct pci_dev *pdev = adapter->pdev;
1629         struct e1000_hw *hw = &adapter->hw;
1630         struct e1000_mac_info *mac = &hw->mac;
1631         struct e1000_fc_info *fc = &hw->fc;
1632         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1633         u16 hwm;
1634
1635         /* Repartition Pba for greater than 9k mtu
1636          * To take effect CTRL.RST is required.
1637          */
1638         switch (mac->type) {
1639         case e1000_i350:
1640         case e1000_82580:
1641                 pba = rd32(E1000_RXPBS);
1642                 pba = igb_rxpbs_adjust_82580(pba);
1643                 break;
1644         case e1000_82576:
1645                 pba = rd32(E1000_RXPBS);
1646                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1647                 break;
1648         case e1000_82575:
1649         default:
1650                 pba = E1000_PBA_34K;
1651                 break;
1652         }
1653
1654         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1655             (mac->type < e1000_82576)) {
1656                 /* adjust PBA for jumbo frames */
1657                 wr32(E1000_PBA, pba);
1658
1659                 /* To maintain wire speed transmits, the Tx FIFO should be
1660                  * large enough to accommodate two full transmit packets,
1661                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1662                  * the Rx FIFO should be large enough to accommodate at least
1663                  * one full receive packet and is similarly rounded up and
1664                  * expressed in KB. */
1665                 pba = rd32(E1000_PBA);
1666                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1667                 tx_space = pba >> 16;
1668                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1669                 pba &= 0xffff;
1670                 /* the tx fifo also stores 16 bytes of information about the tx
1671                  * but don't include ethernet FCS because hardware appends it */
1672                 min_tx_space = (adapter->max_frame_size +
1673                                 sizeof(union e1000_adv_tx_desc) -
1674                                 ETH_FCS_LEN) * 2;
1675                 min_tx_space = ALIGN(min_tx_space, 1024);
1676                 min_tx_space >>= 10;
1677                 /* software strips receive CRC, so leave room for it */
1678                 min_rx_space = adapter->max_frame_size;
1679                 min_rx_space = ALIGN(min_rx_space, 1024);
1680                 min_rx_space >>= 10;
1681
1682                 /* If current Tx allocation is less than the min Tx FIFO size,
1683                  * and the min Tx FIFO size is less than the current Rx FIFO
1684                  * allocation, take space away from current Rx allocation */
1685                 if (tx_space < min_tx_space &&
1686                     ((min_tx_space - tx_space) < pba)) {
1687                         pba = pba - (min_tx_space - tx_space);
1688
1689                         /* if short on rx space, rx wins and must trump tx
1690                          * adjustment */
1691                         if (pba < min_rx_space)
1692                                 pba = min_rx_space;
1693                 }
1694                 wr32(E1000_PBA, pba);
1695         }
1696
1697         /* flow control settings */
1698         /* The high water mark must be low enough to fit one full frame
1699          * (or the size used for early receive) above it in the Rx FIFO.
1700          * Set it to the lower of:
1701          * - 90% of the Rx FIFO size, or
1702          * - the full Rx FIFO size minus one full frame */
1703         hwm = min(((pba << 10) * 9 / 10),
1704                         ((pba << 10) - 2 * adapter->max_frame_size));
1705
1706         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1707         fc->low_water = fc->high_water - 16;
1708         fc->pause_time = 0xFFFF;
1709         fc->send_xon = 1;
1710         fc->current_mode = fc->requested_mode;
1711
1712         /* disable receive for all VFs and wait one second */
1713         if (adapter->vfs_allocated_count) {
1714                 int i;
1715                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1716                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1717
1718                 /* ping all the active vfs to let them know we are going down */
1719                 igb_ping_all_vfs(adapter);
1720
1721                 /* disable transmits and receives */
1722                 wr32(E1000_VFRE, 0);
1723                 wr32(E1000_VFTE, 0);
1724         }
1725
1726         /* Allow time for pending master requests to run */
1727         hw->mac.ops.reset_hw(hw);
1728         wr32(E1000_WUC, 0);
1729
1730         if (hw->mac.ops.init_hw(hw))
1731                 dev_err(&pdev->dev, "Hardware Error\n");
1732
1733         igb_init_dmac(adapter, pba);
1734         if (!netif_running(adapter->netdev))
1735                 igb_power_down_link(adapter);
1736
1737         igb_update_mng_vlan(adapter);
1738
1739         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1741
1742         igb_get_phy_info(hw);
1743 }
1744
1745 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1746 {
1747         /*
1748          * Since there is no support for separate rx/tx vlan accel
1749          * enable/disable make sure tx flag is always in same state as rx.
1750          */
1751         if (features & NETIF_F_HW_VLAN_RX)
1752                 features |= NETIF_F_HW_VLAN_TX;
1753         else
1754                 features &= ~NETIF_F_HW_VLAN_TX;
1755
1756         return features;
1757 }
1758
1759 static int igb_set_features(struct net_device *netdev, u32 features)
1760 {
1761         u32 changed = netdev->features ^ features;
1762
1763         if (changed & NETIF_F_HW_VLAN_RX)
1764                 igb_vlan_mode(netdev, features);
1765
1766         return 0;
1767 }
1768
1769 static const struct net_device_ops igb_netdev_ops = {
1770         .ndo_open               = igb_open,
1771         .ndo_stop               = igb_close,
1772         .ndo_start_xmit         = igb_xmit_frame,
1773         .ndo_get_stats64        = igb_get_stats64,
1774         .ndo_set_rx_mode        = igb_set_rx_mode,
1775         .ndo_set_mac_address    = igb_set_mac,
1776         .ndo_change_mtu         = igb_change_mtu,
1777         .ndo_do_ioctl           = igb_ioctl,
1778         .ndo_tx_timeout         = igb_tx_timeout,
1779         .ndo_validate_addr      = eth_validate_addr,
1780         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1781         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1782         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1783         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1784         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1785         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1786 #ifdef CONFIG_NET_POLL_CONTROLLER
1787         .ndo_poll_controller    = igb_netpoll,
1788 #endif
1789         .ndo_fix_features       = igb_fix_features,
1790         .ndo_set_features       = igb_set_features,
1791 };
1792
1793 /**
1794  * igb_probe - Device Initialization Routine
1795  * @pdev: PCI device information struct
1796  * @ent: entry in igb_pci_tbl
1797  *
1798  * Returns 0 on success, negative on failure
1799  *
1800  * igb_probe initializes an adapter identified by a pci_dev structure.
1801  * The OS initialization, configuring of the adapter private structure,
1802  * and a hardware reset occur.
1803  **/
1804 static int __devinit igb_probe(struct pci_dev *pdev,
1805                                const struct pci_device_id *ent)
1806 {
1807         struct net_device *netdev;
1808         struct igb_adapter *adapter;
1809         struct e1000_hw *hw;
1810         u16 eeprom_data = 0;
1811         s32 ret_val;
1812         static int global_quad_port_a; /* global quad port a indication */
1813         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1814         unsigned long mmio_start, mmio_len;
1815         int err, pci_using_dac;
1816         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1817         u8 part_str[E1000_PBANUM_LENGTH];
1818
1819         /* Catch broken hardware that put the wrong VF device ID in
1820          * the PCIe SR-IOV capability.
1821          */
1822         if (pdev->is_virtfn) {
1823                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1824                      pci_name(pdev), pdev->vendor, pdev->device);
1825                 return -EINVAL;
1826         }
1827
1828         err = pci_enable_device_mem(pdev);
1829         if (err)
1830                 return err;
1831
1832         pci_using_dac = 0;
1833         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1834         if (!err) {
1835                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1836                 if (!err)
1837                         pci_using_dac = 1;
1838         } else {
1839                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1840                 if (err) {
1841                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1842                         if (err) {
1843                                 dev_err(&pdev->dev, "No usable DMA "
1844                                         "configuration, aborting\n");
1845                                 goto err_dma;
1846                         }
1847                 }
1848         }
1849
1850         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1851                                            IORESOURCE_MEM),
1852                                            igb_driver_name);
1853         if (err)
1854                 goto err_pci_reg;
1855
1856         pci_enable_pcie_error_reporting(pdev);
1857
1858         pci_set_master(pdev);
1859         pci_save_state(pdev);
1860
1861         err = -ENOMEM;
1862         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1863                                    IGB_MAX_TX_QUEUES);
1864         if (!netdev)
1865                 goto err_alloc_etherdev;
1866
1867         SET_NETDEV_DEV(netdev, &pdev->dev);
1868
1869         pci_set_drvdata(pdev, netdev);
1870         adapter = netdev_priv(netdev);
1871         adapter->netdev = netdev;
1872         adapter->pdev = pdev;
1873         hw = &adapter->hw;
1874         hw->back = adapter;
1875         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1876
1877         mmio_start = pci_resource_start(pdev, 0);
1878         mmio_len = pci_resource_len(pdev, 0);
1879
1880         err = -EIO;
1881         hw->hw_addr = ioremap(mmio_start, mmio_len);
1882         if (!hw->hw_addr)
1883                 goto err_ioremap;
1884
1885         netdev->netdev_ops = &igb_netdev_ops;
1886         igb_set_ethtool_ops(netdev);
1887         netdev->watchdog_timeo = 5 * HZ;
1888
1889         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1890
1891         netdev->mem_start = mmio_start;
1892         netdev->mem_end = mmio_start + mmio_len;
1893
1894         /* PCI config space info */
1895         hw->vendor_id = pdev->vendor;
1896         hw->device_id = pdev->device;
1897         hw->revision_id = pdev->revision;
1898         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1899         hw->subsystem_device_id = pdev->subsystem_device;
1900
1901         /* Copy the default MAC, PHY and NVM function pointers */
1902         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1903         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1904         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1905         /* Initialize skew-specific constants */
1906         err = ei->get_invariants(hw);
1907         if (err)
1908                 goto err_sw_init;
1909
1910         /* setup the private structure */
1911         err = igb_sw_init(adapter);
1912         if (err)
1913                 goto err_sw_init;
1914
1915         igb_get_bus_info_pcie(hw);
1916
1917         hw->phy.autoneg_wait_to_complete = false;
1918
1919         /* Copper options */
1920         if (hw->phy.media_type == e1000_media_type_copper) {
1921                 hw->phy.mdix = AUTO_ALL_MODES;
1922                 hw->phy.disable_polarity_correction = false;
1923                 hw->phy.ms_type = e1000_ms_hw_default;
1924         }
1925
1926         if (igb_check_reset_block(hw))
1927                 dev_info(&pdev->dev,
1928                         "PHY reset is blocked due to SOL/IDER session.\n");
1929
1930         /*
1931          * features is initialized to 0 in allocation, it might have bits
1932          * set by igb_sw_init so we should use an or instead of an
1933          * assignment.
1934          */
1935         netdev->features |= NETIF_F_SG |
1936                             NETIF_F_IP_CSUM |
1937                             NETIF_F_IPV6_CSUM |
1938                             NETIF_F_TSO |
1939                             NETIF_F_TSO6 |
1940                             NETIF_F_RXHASH |
1941                             NETIF_F_RXCSUM |
1942                             NETIF_F_HW_VLAN_RX |
1943                             NETIF_F_HW_VLAN_TX;
1944
1945         /* copy netdev features into list of user selectable features */
1946         netdev->hw_features |= netdev->features;
1947
1948         /* set this bit last since it cannot be part of hw_features */
1949         netdev->features |= NETIF_F_HW_VLAN_FILTER;
1950
1951         netdev->vlan_features |= NETIF_F_TSO |
1952                                  NETIF_F_TSO6 |
1953                                  NETIF_F_IP_CSUM |
1954                                  NETIF_F_IPV6_CSUM |
1955                                  NETIF_F_SG;
1956
1957         if (pci_using_dac) {
1958                 netdev->features |= NETIF_F_HIGHDMA;
1959                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1960         }
1961
1962         if (hw->mac.type >= e1000_82576) {
1963                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1964                 netdev->features |= NETIF_F_SCTP_CSUM;
1965         }
1966
1967         netdev->priv_flags |= IFF_UNICAST_FLT;
1968
1969         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1970
1971         /* before reading the NVM, reset the controller to put the device in a
1972          * known good starting state */
1973         hw->mac.ops.reset_hw(hw);
1974
1975         /* make sure the NVM is good */
1976         if (hw->nvm.ops.validate(hw) < 0) {
1977                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1978                 err = -EIO;
1979                 goto err_eeprom;
1980         }
1981
1982         /* copy the MAC address out of the NVM */
1983         if (hw->mac.ops.read_mac_addr(hw))
1984                 dev_err(&pdev->dev, "NVM Read Error\n");
1985
1986         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1987         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1988
1989         if (!is_valid_ether_addr(netdev->perm_addr)) {
1990                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1991                 err = -EIO;
1992                 goto err_eeprom;
1993         }
1994
1995         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1996                     (unsigned long) adapter);
1997         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1998                     (unsigned long) adapter);
1999
2000         INIT_WORK(&adapter->reset_task, igb_reset_task);
2001         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2002
2003         /* Initialize link properties that are user-changeable */
2004         adapter->fc_autoneg = true;
2005         hw->mac.autoneg = true;
2006         hw->phy.autoneg_advertised = 0x2f;
2007
2008         hw->fc.requested_mode = e1000_fc_default;
2009         hw->fc.current_mode = e1000_fc_default;
2010
2011         igb_validate_mdi_setting(hw);
2012
2013         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2014          * enable the ACPI Magic Packet filter
2015          */
2016
2017         if (hw->bus.func == 0)
2018                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2019         else if (hw->mac.type >= e1000_82580)
2020                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2021                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2022                                  &eeprom_data);
2023         else if (hw->bus.func == 1)
2024                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2025
2026         if (eeprom_data & eeprom_apme_mask)
2027                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2028
2029         /* now that we have the eeprom settings, apply the special cases where
2030          * the eeprom may be wrong or the board simply won't support wake on
2031          * lan on a particular port */
2032         switch (pdev->device) {
2033         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2034                 adapter->eeprom_wol = 0;
2035                 break;
2036         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2037         case E1000_DEV_ID_82576_FIBER:
2038         case E1000_DEV_ID_82576_SERDES:
2039                 /* Wake events only supported on port A for dual fiber
2040                  * regardless of eeprom setting */
2041                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2042                         adapter->eeprom_wol = 0;
2043                 break;
2044         case E1000_DEV_ID_82576_QUAD_COPPER:
2045         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2046                 /* if quad port adapter, disable WoL on all but port A */
2047                 if (global_quad_port_a != 0)
2048                         adapter->eeprom_wol = 0;
2049                 else
2050                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2051                 /* Reset for multiple quad port adapters */
2052                 if (++global_quad_port_a == 4)
2053                         global_quad_port_a = 0;
2054                 break;
2055         }
2056
2057         /* initialize the wol settings based on the eeprom settings */
2058         adapter->wol = adapter->eeprom_wol;
2059         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2060
2061         /* reset the hardware with the new settings */
2062         igb_reset(adapter);
2063
2064         /* let the f/w know that the h/w is now under the control of the
2065          * driver. */
2066         igb_get_hw_control(adapter);
2067
2068         strcpy(netdev->name, "eth%d");
2069         err = register_netdev(netdev);
2070         if (err)
2071                 goto err_register;
2072
2073         /* carrier off reporting is important to ethtool even BEFORE open */
2074         netif_carrier_off(netdev);
2075
2076 #ifdef CONFIG_IGB_DCA
2077         if (dca_add_requester(&pdev->dev) == 0) {
2078                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2079                 dev_info(&pdev->dev, "DCA enabled\n");
2080                 igb_setup_dca(adapter);
2081         }
2082
2083 #endif
2084         /* do hw tstamp init after resetting */
2085         igb_init_hw_timer(adapter);
2086
2087         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2088         /* print bus type/speed/width info */
2089         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2090                  netdev->name,
2091                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2092                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2093                                                             "unknown"),
2094                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2095                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2096                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2097                    "unknown"),
2098                  netdev->dev_addr);
2099
2100         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2101         if (ret_val)
2102                 strcpy(part_str, "Unknown");
2103         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2104         dev_info(&pdev->dev,
2105                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2106                 adapter->msix_entries ? "MSI-X" :
2107                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2108                 adapter->num_rx_queues, adapter->num_tx_queues);
2109         switch (hw->mac.type) {
2110         case e1000_i350:
2111                 igb_set_eee_i350(hw);
2112                 break;
2113         default:
2114                 break;
2115         }
2116         return 0;
2117
2118 err_register:
2119         igb_release_hw_control(adapter);
2120 err_eeprom:
2121         if (!igb_check_reset_block(hw))
2122                 igb_reset_phy(hw);
2123
2124         if (hw->flash_address)
2125                 iounmap(hw->flash_address);
2126 err_sw_init:
2127         igb_clear_interrupt_scheme(adapter);
2128         iounmap(hw->hw_addr);
2129 err_ioremap:
2130         free_netdev(netdev);
2131 err_alloc_etherdev:
2132         pci_release_selected_regions(pdev,
2133                                      pci_select_bars(pdev, IORESOURCE_MEM));
2134 err_pci_reg:
2135 err_dma:
2136         pci_disable_device(pdev);
2137         return err;
2138 }
2139
2140 /**
2141  * igb_remove - Device Removal Routine
2142  * @pdev: PCI device information struct
2143  *
2144  * igb_remove is called by the PCI subsystem to alert the driver
2145  * that it should release a PCI device.  The could be caused by a
2146  * Hot-Plug event, or because the driver is going to be removed from
2147  * memory.
2148  **/
2149 static void __devexit igb_remove(struct pci_dev *pdev)
2150 {
2151         struct net_device *netdev = pci_get_drvdata(pdev);
2152         struct igb_adapter *adapter = netdev_priv(netdev);
2153         struct e1000_hw *hw = &adapter->hw;
2154
2155         /*
2156          * The watchdog timer may be rescheduled, so explicitly
2157          * disable watchdog from being rescheduled.
2158          */
2159         set_bit(__IGB_DOWN, &adapter->state);
2160         del_timer_sync(&adapter->watchdog_timer);
2161         del_timer_sync(&adapter->phy_info_timer);
2162
2163         cancel_work_sync(&adapter->reset_task);
2164         cancel_work_sync(&adapter->watchdog_task);
2165
2166 #ifdef CONFIG_IGB_DCA
2167         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2168                 dev_info(&pdev->dev, "DCA disabled\n");
2169                 dca_remove_requester(&pdev->dev);
2170                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2171                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2172         }
2173 #endif
2174
2175         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2176          * would have already happened in close and is redundant. */
2177         igb_release_hw_control(adapter);
2178
2179         unregister_netdev(netdev);
2180
2181         igb_clear_interrupt_scheme(adapter);
2182
2183 #ifdef CONFIG_PCI_IOV
2184         /* reclaim resources allocated to VFs */
2185         if (adapter->vf_data) {
2186                 /* disable iov and allow time for transactions to clear */
2187                 if (!igb_check_vf_assignment(adapter)) {
2188                         pci_disable_sriov(pdev);
2189                         msleep(500);
2190                 } else {
2191                         dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2192                 }
2193
2194                 kfree(adapter->vf_data);
2195                 adapter->vf_data = NULL;
2196                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2197                 wrfl();
2198                 msleep(100);
2199                 dev_info(&pdev->dev, "IOV Disabled\n");
2200         }
2201 #endif
2202
2203         iounmap(hw->hw_addr);
2204         if (hw->flash_address)
2205                 iounmap(hw->flash_address);
2206         pci_release_selected_regions(pdev,
2207                                      pci_select_bars(pdev, IORESOURCE_MEM));
2208
2209         kfree(adapter->shadow_vfta);
2210         free_netdev(netdev);
2211
2212         pci_disable_pcie_error_reporting(pdev);
2213
2214         pci_disable_device(pdev);
2215 }
2216
2217 /**
2218  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2219  * @adapter: board private structure to initialize
2220  *
2221  * This function initializes the vf specific data storage and then attempts to
2222  * allocate the VFs.  The reason for ordering it this way is because it is much
2223  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2224  * the memory for the VFs.
2225  **/
2226 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2227 {
2228 #ifdef CONFIG_PCI_IOV
2229         struct pci_dev *pdev = adapter->pdev;
2230         int old_vfs = igb_find_enabled_vfs(adapter);
2231         int i;
2232
2233         if (old_vfs) {
2234                 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2235                          "max_vfs setting of %d\n", old_vfs, max_vfs);
2236                 adapter->vfs_allocated_count = old_vfs;
2237         }
2238
2239         if (!adapter->vfs_allocated_count)
2240                 return;
2241
2242         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2243                                 sizeof(struct vf_data_storage), GFP_KERNEL);
2244         /* if allocation failed then we do not support SR-IOV */
2245         if (!adapter->vf_data) {
2246                 adapter->vfs_allocated_count = 0;
2247                 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2248                         "Data Storage\n");
2249                 goto out;
2250         }
2251
2252         if (!old_vfs) {
2253                 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2254                         goto err_out;
2255         }
2256         dev_info(&pdev->dev, "%d VFs allocated\n",
2257                  adapter->vfs_allocated_count);
2258         for (i = 0; i < adapter->vfs_allocated_count; i++)
2259                 igb_vf_configure(adapter, i);
2260
2261         /* DMA Coalescing is not supported in IOV mode. */
2262         adapter->flags &= ~IGB_FLAG_DMAC;
2263         goto out;
2264 err_out:
2265         kfree(adapter->vf_data);
2266         adapter->vf_data = NULL;
2267         adapter->vfs_allocated_count = 0;
2268 out:
2269         return;
2270 #endif /* CONFIG_PCI_IOV */
2271 }
2272
2273 /**
2274  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2275  * @adapter: board private structure to initialize
2276  *
2277  * igb_init_hw_timer initializes the function pointer and values for the hw
2278  * timer found in hardware.
2279  **/
2280 static void igb_init_hw_timer(struct igb_adapter *adapter)
2281 {
2282         struct e1000_hw *hw = &adapter->hw;
2283
2284         switch (hw->mac.type) {
2285         case e1000_i350:
2286         case e1000_82580:
2287                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2288                 adapter->cycles.read = igb_read_clock;
2289                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2290                 adapter->cycles.mult = 1;
2291                 /*
2292                  * The 82580 timesync updates the system timer every 8ns by 8ns
2293                  * and the value cannot be shifted.  Instead we need to shift
2294                  * the registers to generate a 64bit timer value.  As a result
2295                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2296                  * 24 in order to generate a larger value for synchronization.
2297                  */
2298                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2299                 /* disable system timer temporarily by setting bit 31 */
2300                 wr32(E1000_TSAUXC, 0x80000000);
2301                 wrfl();
2302
2303                 /* Set registers so that rollover occurs soon to test this. */
2304                 wr32(E1000_SYSTIMR, 0x00000000);
2305                 wr32(E1000_SYSTIML, 0x80000000);
2306                 wr32(E1000_SYSTIMH, 0x000000FF);
2307                 wrfl();
2308
2309                 /* enable system timer by clearing bit 31 */
2310                 wr32(E1000_TSAUXC, 0x0);
2311                 wrfl();
2312
2313                 timecounter_init(&adapter->clock,
2314                                  &adapter->cycles,
2315                                  ktime_to_ns(ktime_get_real()));
2316                 /*
2317                  * Synchronize our NIC clock against system wall clock. NIC
2318                  * time stamp reading requires ~3us per sample, each sample
2319                  * was pretty stable even under load => only require 10
2320                  * samples for each offset comparison.
2321                  */
2322                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2323                 adapter->compare.source = &adapter->clock;
2324                 adapter->compare.target = ktime_get_real;
2325                 adapter->compare.num_samples = 10;
2326                 timecompare_update(&adapter->compare, 0);
2327                 break;
2328         case e1000_82576:
2329                 /*
2330                  * Initialize hardware timer: we keep it running just in case
2331                  * that some program needs it later on.
2332                  */
2333                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2334                 adapter->cycles.read = igb_read_clock;
2335                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2336                 adapter->cycles.mult = 1;
2337                 /**
2338                  * Scale the NIC clock cycle by a large factor so that
2339                  * relatively small clock corrections can be added or
2340                  * subtracted at each clock tick. The drawbacks of a large
2341                  * factor are a) that the clock register overflows more quickly
2342                  * (not such a big deal) and b) that the increment per tick has
2343                  * to fit into 24 bits.  As a result we need to use a shift of
2344                  * 19 so we can fit a value of 16 into the TIMINCA register.
2345                  */
2346                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2347                 wr32(E1000_TIMINCA,
2348                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2349                                 (16 << IGB_82576_TSYNC_SHIFT));
2350
2351                 /* Set registers so that rollover occurs soon to test this. */
2352                 wr32(E1000_SYSTIML, 0x00000000);
2353                 wr32(E1000_SYSTIMH, 0xFF800000);
2354                 wrfl();
2355
2356                 timecounter_init(&adapter->clock,
2357                                  &adapter->cycles,
2358                                  ktime_to_ns(ktime_get_real()));
2359                 /*
2360                  * Synchronize our NIC clock against system wall clock. NIC
2361                  * time stamp reading requires ~3us per sample, each sample
2362                  * was pretty stable even under load => only require 10
2363                  * samples for each offset comparison.
2364                  */
2365                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2366                 adapter->compare.source = &adapter->clock;
2367                 adapter->compare.target = ktime_get_real;
2368                 adapter->compare.num_samples = 10;
2369                 timecompare_update(&adapter->compare, 0);
2370                 break;
2371         case e1000_82575:
2372                 /* 82575 does not support timesync */
2373         default:
2374                 break;
2375         }
2376
2377 }
2378
2379 /**
2380  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2381  * @adapter: board private structure to initialize
2382  *
2383  * igb_sw_init initializes the Adapter private data structure.
2384  * Fields are initialized based on PCI device information and
2385  * OS network device settings (MTU size).
2386  **/
2387 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2388 {
2389         struct e1000_hw *hw = &adapter->hw;
2390         struct net_device *netdev = adapter->netdev;
2391         struct pci_dev *pdev = adapter->pdev;
2392
2393         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2394
2395         /* set default ring sizes */
2396         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2397         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2398
2399         /* set default ITR values */
2400         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2401         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2402
2403         /* set default work limits */
2404         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2405
2406         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2407                                   VLAN_HLEN;
2408         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2409
2410         adapter->node = -1;
2411
2412         spin_lock_init(&adapter->stats64_lock);
2413 #ifdef CONFIG_PCI_IOV
2414         switch (hw->mac.type) {
2415         case e1000_82576:
2416         case e1000_i350:
2417                 if (max_vfs > 7) {
2418                         dev_warn(&pdev->dev,
2419                                  "Maximum of 7 VFs per PF, using max\n");
2420                         adapter->vfs_allocated_count = 7;
2421                 } else
2422                         adapter->vfs_allocated_count = max_vfs;
2423                 break;
2424         default:
2425                 break;
2426         }
2427 #endif /* CONFIG_PCI_IOV */
2428         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2429         /* i350 cannot do RSS and SR-IOV at the same time */
2430         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2431                 adapter->rss_queues = 1;
2432
2433         /*
2434          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435          * then we should combine the queues into a queue pair in order to
2436          * conserve interrupts due to limited supply
2437          */
2438         if ((adapter->rss_queues > 4) ||
2439             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2440                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2441
2442         /* Setup and initialize a copy of the hw vlan table array */
2443         adapter->shadow_vfta = kzalloc(sizeof(u32) *
2444                                 E1000_VLAN_FILTER_TBL_SIZE,
2445                                 GFP_ATOMIC);
2446
2447         /* This call may decrease the number of queues */
2448         if (igb_init_interrupt_scheme(adapter)) {
2449                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2450                 return -ENOMEM;
2451         }
2452
2453         igb_probe_vfs(adapter);
2454
2455         /* Explicitly disable IRQ since the NIC can be in any state. */
2456         igb_irq_disable(adapter);
2457
2458         if (hw->mac.type == e1000_i350)
2459                 adapter->flags &= ~IGB_FLAG_DMAC;
2460
2461         set_bit(__IGB_DOWN, &adapter->state);
2462         return 0;
2463 }
2464
2465 /**
2466  * igb_open - Called when a network interface is made active
2467  * @netdev: network interface device structure
2468  *
2469  * Returns 0 on success, negative value on failure
2470  *
2471  * The open entry point is called when a network interface is made
2472  * active by the system (IFF_UP).  At this point all resources needed
2473  * for transmit and receive operations are allocated, the interrupt
2474  * handler is registered with the OS, the watchdog timer is started,
2475  * and the stack is notified that the interface is ready.
2476  **/
2477 static int igb_open(struct net_device *netdev)
2478 {
2479         struct igb_adapter *adapter = netdev_priv(netdev);
2480         struct e1000_hw *hw = &adapter->hw;
2481         int err;
2482         int i;
2483
2484         /* disallow open during test */
2485         if (test_bit(__IGB_TESTING, &adapter->state))
2486                 return -EBUSY;
2487
2488         netif_carrier_off(netdev);
2489
2490         /* allocate transmit descriptors */
2491         err = igb_setup_all_tx_resources(adapter);
2492         if (err)
2493                 goto err_setup_tx;
2494
2495         /* allocate receive descriptors */
2496         err = igb_setup_all_rx_resources(adapter);
2497         if (err)
2498                 goto err_setup_rx;
2499
2500         igb_power_up_link(adapter);
2501
2502         /* before we allocate an interrupt, we must be ready to handle it.
2503          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2504          * as soon as we call pci_request_irq, so we have to setup our
2505          * clean_rx handler before we do so.  */
2506         igb_configure(adapter);
2507
2508         err = igb_request_irq(adapter);
2509         if (err)
2510                 goto err_req_irq;
2511
2512         /* From here on the code is the same as igb_up() */
2513         clear_bit(__IGB_DOWN, &adapter->state);
2514
2515         for (i = 0; i < adapter->num_q_vectors; i++)
2516                 napi_enable(&(adapter->q_vector[i]->napi));
2517
2518         /* Clear any pending interrupts. */
2519         rd32(E1000_ICR);
2520
2521         igb_irq_enable(adapter);
2522
2523         /* notify VFs that reset has been completed */
2524         if (adapter->vfs_allocated_count) {
2525                 u32 reg_data = rd32(E1000_CTRL_EXT);
2526                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2527                 wr32(E1000_CTRL_EXT, reg_data);
2528         }
2529
2530         netif_tx_start_all_queues(netdev);
2531
2532         /* start the watchdog. */
2533         hw->mac.get_link_status = 1;
2534         schedule_work(&adapter->watchdog_task);
2535
2536         return 0;
2537
2538 err_req_irq:
2539         igb_release_hw_control(adapter);
2540         igb_power_down_link(adapter);
2541         igb_free_all_rx_resources(adapter);
2542 err_setup_rx:
2543         igb_free_all_tx_resources(adapter);
2544 err_setup_tx:
2545         igb_reset(adapter);
2546
2547         return err;
2548 }
2549
2550 /**
2551  * igb_close - Disables a network interface
2552  * @netdev: network interface device structure
2553  *
2554  * Returns 0, this is not allowed to fail
2555  *
2556  * The close entry point is called when an interface is de-activated
2557  * by the OS.  The hardware is still under the driver's control, but
2558  * needs to be disabled.  A global MAC reset is issued to stop the
2559  * hardware, and all transmit and receive resources are freed.
2560  **/
2561 static int igb_close(struct net_device *netdev)
2562 {
2563         struct igb_adapter *adapter = netdev_priv(netdev);
2564
2565         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2566         igb_down(adapter);
2567
2568         igb_free_irq(adapter);
2569
2570         igb_free_all_tx_resources(adapter);
2571         igb_free_all_rx_resources(adapter);
2572
2573         return 0;
2574 }
2575
2576 /**
2577  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2578  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2579  *
2580  * Return 0 on success, negative on failure
2581  **/
2582 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2583 {
2584         struct device *dev = tx_ring->dev;
2585         int orig_node = dev_to_node(dev);
2586         int size;
2587
2588         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2589         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2590         if (!tx_ring->tx_buffer_info)
2591                 tx_ring->tx_buffer_info = vzalloc(size);
2592         if (!tx_ring->tx_buffer_info)
2593                 goto err;
2594
2595         /* round up to nearest 4K */
2596         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2597         tx_ring->size = ALIGN(tx_ring->size, 4096);
2598
2599         set_dev_node(dev, tx_ring->numa_node);
2600         tx_ring->desc = dma_alloc_coherent(dev,
2601                                            tx_ring->size,
2602                                            &tx_ring->dma,
2603                                            GFP_KERNEL);
2604         set_dev_node(dev, orig_node);
2605         if (!tx_ring->desc)
2606                 tx_ring->desc = dma_alloc_coherent(dev,
2607                                                    tx_ring->size,
2608                                                    &tx_ring->dma,
2609                                                    GFP_KERNEL);
2610
2611         if (!tx_ring->desc)
2612                 goto err;
2613
2614         tx_ring->next_to_use = 0;
2615         tx_ring->next_to_clean = 0;
2616
2617         return 0;
2618
2619 err:
2620         vfree(tx_ring->tx_buffer_info);
2621         dev_err(dev,
2622                 "Unable to allocate memory for the transmit descriptor ring\n");
2623         return -ENOMEM;
2624 }
2625
2626 /**
2627  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2628  *                                (Descriptors) for all queues
2629  * @adapter: board private structure
2630  *
2631  * Return 0 on success, negative on failure
2632  **/
2633 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2634 {
2635         struct pci_dev *pdev = adapter->pdev;
2636         int i, err = 0;
2637
2638         for (i = 0; i < adapter->num_tx_queues; i++) {
2639                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2640                 if (err) {
2641                         dev_err(&pdev->dev,
2642                                 "Allocation for Tx Queue %u failed\n", i);
2643                         for (i--; i >= 0; i--)
2644                                 igb_free_tx_resources(adapter->tx_ring[i]);
2645                         break;
2646                 }
2647         }
2648
2649         return err;
2650 }
2651
2652 /**
2653  * igb_setup_tctl - configure the transmit control registers
2654  * @adapter: Board private structure
2655  **/
2656 void igb_setup_tctl(struct igb_adapter *adapter)
2657 {
2658         struct e1000_hw *hw = &adapter->hw;
2659         u32 tctl;
2660
2661         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2662         wr32(E1000_TXDCTL(0), 0);
2663
2664         /* Program the Transmit Control Register */
2665         tctl = rd32(E1000_TCTL);
2666         tctl &= ~E1000_TCTL_CT;
2667         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2668                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2669
2670         igb_config_collision_dist(hw);
2671
2672         /* Enable transmits */
2673         tctl |= E1000_TCTL_EN;
2674
2675         wr32(E1000_TCTL, tctl);
2676 }
2677
2678 /**
2679  * igb_configure_tx_ring - Configure transmit ring after Reset
2680  * @adapter: board private structure
2681  * @ring: tx ring to configure
2682  *
2683  * Configure a transmit ring after a reset.
2684  **/
2685 void igb_configure_tx_ring(struct igb_adapter *adapter,
2686                            struct igb_ring *ring)
2687 {
2688         struct e1000_hw *hw = &adapter->hw;
2689         u32 txdctl = 0;
2690         u64 tdba = ring->dma;
2691         int reg_idx = ring->reg_idx;
2692
2693         /* disable the queue */
2694         wr32(E1000_TXDCTL(reg_idx), 0);
2695         wrfl();
2696         mdelay(10);
2697
2698         wr32(E1000_TDLEN(reg_idx),
2699                         ring->count * sizeof(union e1000_adv_tx_desc));
2700         wr32(E1000_TDBAL(reg_idx),
2701                         tdba & 0x00000000ffffffffULL);
2702         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2703
2704         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2705         wr32(E1000_TDH(reg_idx), 0);
2706         writel(0, ring->tail);
2707
2708         txdctl |= IGB_TX_PTHRESH;
2709         txdctl |= IGB_TX_HTHRESH << 8;
2710         txdctl |= IGB_TX_WTHRESH << 16;
2711
2712         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2713         wr32(E1000_TXDCTL(reg_idx), txdctl);
2714 }
2715
2716 /**
2717  * igb_configure_tx - Configure transmit Unit after Reset
2718  * @adapter: board private structure
2719  *
2720  * Configure the Tx unit of the MAC after a reset.
2721  **/
2722 static void igb_configure_tx(struct igb_adapter *adapter)
2723 {
2724         int i;
2725
2726         for (i = 0; i < adapter->num_tx_queues; i++)
2727                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2728 }
2729
2730 /**
2731  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2732  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2733  *
2734  * Returns 0 on success, negative on failure
2735  **/
2736 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2737 {
2738         struct device *dev = rx_ring->dev;
2739         int orig_node = dev_to_node(dev);
2740         int size, desc_len;
2741
2742         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2743         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2744         if (!rx_ring->rx_buffer_info)
2745                 rx_ring->rx_buffer_info = vzalloc(size);
2746         if (!rx_ring->rx_buffer_info)
2747                 goto err;
2748
2749         desc_len = sizeof(union e1000_adv_rx_desc);
2750
2751         /* Round up to nearest 4K */
2752         rx_ring->size = rx_ring->count * desc_len;
2753         rx_ring->size = ALIGN(rx_ring->size, 4096);
2754
2755         set_dev_node(dev, rx_ring->numa_node);
2756         rx_ring->desc = dma_alloc_coherent(dev,
2757                                            rx_ring->size,
2758                                            &rx_ring->dma,
2759                                            GFP_KERNEL);
2760         set_dev_node(dev, orig_node);
2761         if (!rx_ring->desc)
2762                 rx_ring->desc = dma_alloc_coherent(dev,
2763                                                    rx_ring->size,
2764                                                    &rx_ring->dma,
2765                                                    GFP_KERNEL);
2766
2767         if (!rx_ring->desc)
2768                 goto err;
2769
2770         rx_ring->next_to_clean = 0;
2771         rx_ring->next_to_use = 0;
2772
2773         return 0;
2774
2775 err:
2776         vfree(rx_ring->rx_buffer_info);
2777         rx_ring->rx_buffer_info = NULL;
2778         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2779                 " ring\n");
2780         return -ENOMEM;
2781 }
2782
2783 /**
2784  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2785  *                                (Descriptors) for all queues
2786  * @adapter: board private structure
2787  *
2788  * Return 0 on success, negative on failure
2789  **/
2790 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2791 {
2792         struct pci_dev *pdev = adapter->pdev;
2793         int i, err = 0;
2794
2795         for (i = 0; i < adapter->num_rx_queues; i++) {
2796                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2797                 if (err) {
2798                         dev_err(&pdev->dev,
2799                                 "Allocation for Rx Queue %u failed\n", i);
2800                         for (i--; i >= 0; i--)
2801                                 igb_free_rx_resources(adapter->rx_ring[i]);
2802                         break;
2803                 }
2804         }
2805
2806         return err;
2807 }
2808
2809 /**
2810  * igb_setup_mrqc - configure the multiple receive queue control registers
2811  * @adapter: Board private structure
2812  **/
2813 static void igb_setup_mrqc(struct igb_adapter *adapter)
2814 {
2815         struct e1000_hw *hw = &adapter->hw;
2816         u32 mrqc, rxcsum;
2817         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2818         union e1000_reta {
2819                 u32 dword;
2820                 u8  bytes[4];
2821         } reta;
2822         static const u8 rsshash[40] = {
2823                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2824                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2825                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2826                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2827
2828         /* Fill out hash function seeds */
2829         for (j = 0; j < 10; j++) {
2830                 u32 rsskey = rsshash[(j * 4)];
2831                 rsskey |= rsshash[(j * 4) + 1] << 8;
2832                 rsskey |= rsshash[(j * 4) + 2] << 16;
2833                 rsskey |= rsshash[(j * 4) + 3] << 24;
2834                 array_wr32(E1000_RSSRK(0), j, rsskey);
2835         }
2836
2837         num_rx_queues = adapter->rss_queues;
2838
2839         if (adapter->vfs_allocated_count) {
2840                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2841                 switch (hw->mac.type) {
2842                 case e1000_i350:
2843                 case e1000_82580:
2844                         num_rx_queues = 1;
2845                         shift = 0;
2846                         break;
2847                 case e1000_82576:
2848                         shift = 3;
2849                         num_rx_queues = 2;
2850                         break;
2851                 case e1000_82575:
2852                         shift = 2;
2853                         shift2 = 6;
2854                 default:
2855                         break;
2856                 }
2857         } else {
2858                 if (hw->mac.type == e1000_82575)
2859                         shift = 6;
2860         }
2861
2862         for (j = 0; j < (32 * 4); j++) {
2863                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2864                 if (shift2)
2865                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2866                 if ((j & 3) == 3)
2867                         wr32(E1000_RETA(j >> 2), reta.dword);
2868         }
2869
2870         /*
2871          * Disable raw packet checksumming so that RSS hash is placed in
2872          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2873          * offloads as they are enabled by default
2874          */
2875         rxcsum = rd32(E1000_RXCSUM);
2876         rxcsum |= E1000_RXCSUM_PCSD;
2877
2878         if (adapter->hw.mac.type >= e1000_82576)
2879                 /* Enable Receive Checksum Offload for SCTP */
2880                 rxcsum |= E1000_RXCSUM_CRCOFL;
2881
2882         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2883         wr32(E1000_RXCSUM, rxcsum);
2884
2885         /* If VMDq is enabled then we set the appropriate mode for that, else
2886          * we default to RSS so that an RSS hash is calculated per packet even
2887          * if we are only using one queue */
2888         if (adapter->vfs_allocated_count) {
2889                 if (hw->mac.type > e1000_82575) {
2890                         /* Set the default pool for the PF's first queue */
2891                         u32 vtctl = rd32(E1000_VT_CTL);
2892                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2893                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2894                         vtctl |= adapter->vfs_allocated_count <<
2895                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2896                         wr32(E1000_VT_CTL, vtctl);
2897                 }
2898                 if (adapter->rss_queues > 1)
2899                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2900                 else
2901                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2902         } else {
2903                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2904         }
2905         igb_vmm_control(adapter);
2906
2907         /*
2908          * Generate RSS hash based on TCP port numbers and/or
2909          * IPv4/v6 src and dst addresses since UDP cannot be
2910          * hashed reliably due to IP fragmentation
2911          */
2912         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2913                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2914                 E1000_MRQC_RSS_FIELD_IPV6 |
2915                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2916                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2917
2918         wr32(E1000_MRQC, mrqc);
2919 }
2920
2921 /**
2922  * igb_setup_rctl - configure the receive control registers
2923  * @adapter: Board private structure
2924  **/
2925 void igb_setup_rctl(struct igb_adapter *adapter)
2926 {
2927         struct e1000_hw *hw = &adapter->hw;
2928         u32 rctl;
2929
2930         rctl = rd32(E1000_RCTL);
2931
2932         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2933         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2934
2935         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2936                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2937
2938         /*
2939          * enable stripping of CRC. It's unlikely this will break BMC
2940          * redirection as it did with e1000. Newer features require
2941          * that the HW strips the CRC.
2942          */
2943         rctl |= E1000_RCTL_SECRC;
2944
2945         /* disable store bad packets and clear size bits. */
2946         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2947
2948         /* enable LPE to prevent packets larger than max_frame_size */
2949         rctl |= E1000_RCTL_LPE;
2950
2951         /* disable queue 0 to prevent tail write w/o re-config */
2952         wr32(E1000_RXDCTL(0), 0);
2953
2954         /* Attention!!!  For SR-IOV PF driver operations you must enable
2955          * queue drop for all VF and PF queues to prevent head of line blocking
2956          * if an un-trusted VF does not provide descriptors to hardware.
2957          */
2958         if (adapter->vfs_allocated_count) {
2959                 /* set all queue drop enable bits */
2960                 wr32(E1000_QDE, ALL_QUEUES);
2961         }
2962
2963         wr32(E1000_RCTL, rctl);
2964 }
2965
2966 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2967                                    int vfn)
2968 {
2969         struct e1000_hw *hw = &adapter->hw;
2970         u32 vmolr;
2971
2972         /* if it isn't the PF check to see if VFs are enabled and
2973          * increase the size to support vlan tags */
2974         if (vfn < adapter->vfs_allocated_count &&
2975             adapter->vf_data[vfn].vlans_enabled)
2976                 size += VLAN_TAG_SIZE;
2977
2978         vmolr = rd32(E1000_VMOLR(vfn));
2979         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2980         vmolr |= size | E1000_VMOLR_LPE;
2981         wr32(E1000_VMOLR(vfn), vmolr);
2982
2983         return 0;
2984 }
2985
2986 /**
2987  * igb_rlpml_set - set maximum receive packet size
2988  * @adapter: board private structure
2989  *
2990  * Configure maximum receivable packet size.
2991  **/
2992 static void igb_rlpml_set(struct igb_adapter *adapter)
2993 {
2994         u32 max_frame_size = adapter->max_frame_size;
2995         struct e1000_hw *hw = &adapter->hw;
2996         u16 pf_id = adapter->vfs_allocated_count;
2997
2998         if (pf_id) {
2999                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3000                 /*
3001                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3002                  * to our max jumbo frame size, in case we need to enable
3003                  * jumbo frames on one of the rings later.
3004                  * This will not pass over-length frames into the default
3005                  * queue because it's gated by the VMOLR.RLPML.
3006                  */
3007                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3008         }
3009
3010         wr32(E1000_RLPML, max_frame_size);
3011 }
3012
3013 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3014                                  int vfn, bool aupe)
3015 {
3016         struct e1000_hw *hw = &adapter->hw;
3017         u32 vmolr;
3018
3019         /*
3020          * This register exists only on 82576 and newer so if we are older then
3021          * we should exit and do nothing
3022          */
3023         if (hw->mac.type < e1000_82576)
3024                 return;
3025
3026         vmolr = rd32(E1000_VMOLR(vfn));
3027         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3028         if (aupe)
3029                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3030         else
3031                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3032
3033         /* clear all bits that might not be set */
3034         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3035
3036         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3037                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3038         /*
3039          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3040          * multicast packets
3041          */
3042         if (vfn <= adapter->vfs_allocated_count)
3043                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3044
3045         wr32(E1000_VMOLR(vfn), vmolr);
3046 }
3047
3048 /**
3049  * igb_configure_rx_ring - Configure a receive ring after Reset
3050  * @adapter: board private structure
3051  * @ring: receive ring to be configured
3052  *
3053  * Configure the Rx unit of the MAC after a reset.
3054  **/
3055 void igb_configure_rx_ring(struct igb_adapter *adapter,
3056                            struct igb_ring *ring)
3057 {
3058         struct e1000_hw *hw = &adapter->hw;
3059         u64 rdba = ring->dma;
3060         int reg_idx = ring->reg_idx;
3061         u32 srrctl = 0, rxdctl = 0;
3062
3063         /* disable the queue */
3064         wr32(E1000_RXDCTL(reg_idx), 0);
3065
3066         /* Set DMA base address registers */
3067         wr32(E1000_RDBAL(reg_idx),
3068              rdba & 0x00000000ffffffffULL);
3069         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3070         wr32(E1000_RDLEN(reg_idx),
3071                        ring->count * sizeof(union e1000_adv_rx_desc));
3072
3073         /* initialize head and tail */
3074         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3075         wr32(E1000_RDH(reg_idx), 0);
3076         writel(0, ring->tail);
3077
3078         /* set descriptor configuration */
3079         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3080 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3081         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3082 #else
3083         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3084 #endif
3085         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3086         if (hw->mac.type >= e1000_82580)
3087                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3088         /* Only set Drop Enable if we are supporting multiple queues */
3089         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3090                 srrctl |= E1000_SRRCTL_DROP_EN;
3091
3092         wr32(E1000_SRRCTL(reg_idx), srrctl);
3093
3094         /* set filtering for VMDQ pools */
3095         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3096
3097         rxdctl |= IGB_RX_PTHRESH;
3098         rxdctl |= IGB_RX_HTHRESH << 8;
3099         rxdctl |= IGB_RX_WTHRESH << 16;
3100
3101         /* enable receive descriptor fetching */
3102         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3103         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3104 }
3105
3106 /**
3107  * igb_configure_rx - Configure receive Unit after Reset
3108  * @adapter: board private structure
3109  *
3110  * Configure the Rx unit of the MAC after a reset.
3111  **/
3112 static void igb_configure_rx(struct igb_adapter *adapter)
3113 {
3114         int i;
3115
3116         /* set UTA to appropriate mode */
3117         igb_set_uta(adapter);
3118
3119         /* set the correct pool for the PF default MAC address in entry 0 */
3120         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3121                          adapter->vfs_allocated_count);
3122
3123         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3124          * the Base and Length of the Rx Descriptor Ring */
3125         for (i = 0; i < adapter->num_rx_queues; i++)
3126                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3127 }
3128
3129 /**
3130  * igb_free_tx_resources - Free Tx Resources per Queue
3131  * @tx_ring: Tx descriptor ring for a specific queue
3132  *
3133  * Free all transmit software resources
3134  **/
3135 void igb_free_tx_resources(struct igb_ring *tx_ring)
3136 {
3137         igb_clean_tx_ring(tx_ring);
3138
3139         vfree(tx_ring->tx_buffer_info);
3140         tx_ring->tx_buffer_info = NULL;
3141
3142         /* if not set, then don't free */
3143         if (!tx_ring->desc)
3144                 return;
3145
3146         dma_free_coherent(tx_ring->dev, tx_ring->size,
3147                           tx_ring->desc, tx_ring->dma);
3148
3149         tx_ring->desc = NULL;
3150 }
3151
3152 /**
3153  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3154  * @adapter: board private structure
3155  *
3156  * Free all transmit software resources
3157  **/
3158 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3159 {
3160         int i;
3161
3162         for (i = 0; i < adapter->num_tx_queues; i++)
3163                 igb_free_tx_resources(adapter->tx_ring[i]);
3164 }
3165
3166 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3167                                     struct igb_tx_buffer *tx_buffer)
3168 {
3169         if (tx_buffer->skb) {
3170                 dev_kfree_skb_any(tx_buffer->skb);
3171                 if (tx_buffer->dma)
3172                         dma_unmap_single(ring->dev,
3173                                          tx_buffer->dma,
3174                                          tx_buffer->length,
3175                                          DMA_TO_DEVICE);
3176         } else if (tx_buffer->dma) {
3177                 dma_unmap_page(ring->dev,
3178                                tx_buffer->dma,
3179                                tx_buffer->length,
3180                                DMA_TO_DEVICE);
3181         }
3182         tx_buffer->next_to_watch = NULL;
3183         tx_buffer->skb = NULL;
3184         tx_buffer->dma = 0;
3185         /* buffer_info must be completely set up in the transmit path */
3186 }
3187
3188 /**
3189  * igb_clean_tx_ring - Free Tx Buffers
3190  * @tx_ring: ring to be cleaned
3191  **/
3192 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3193 {
3194         struct igb_tx_buffer *buffer_info;
3195         unsigned long size;
3196         u16 i;
3197
3198         if (!tx_ring->tx_buffer_info)
3199                 return;
3200         /* Free all the Tx ring sk_buffs */
3201
3202         for (i = 0; i < tx_ring->count; i++) {
3203                 buffer_info = &tx_ring->tx_buffer_info[i];
3204                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3205         }
3206
3207         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3208         memset(tx_ring->tx_buffer_info, 0, size);
3209
3210         /* Zero out the descriptor ring */
3211         memset(tx_ring->desc, 0, tx_ring->size);
3212
3213         tx_ring->next_to_use = 0;
3214         tx_ring->next_to_clean = 0;
3215 }
3216
3217 /**
3218  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3219  * @adapter: board private structure
3220  **/
3221 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3222 {
3223         int i;
3224
3225         for (i = 0; i < adapter->num_tx_queues; i++)
3226                 igb_clean_tx_ring(adapter->tx_ring[i]);
3227 }
3228
3229 /**
3230  * igb_free_rx_resources - Free Rx Resources
3231  * @rx_ring: ring to clean the resources from
3232  *
3233  * Free all receive software resources
3234  **/
3235 void igb_free_rx_resources(struct igb_ring *rx_ring)
3236 {
3237         igb_clean_rx_ring(rx_ring);
3238
3239         vfree(rx_ring->rx_buffer_info);
3240         rx_ring->rx_buffer_info = NULL;
3241
3242         /* if not set, then don't free */
3243         if (!rx_ring->desc)
3244                 return;
3245
3246         dma_free_coherent(rx_ring->dev, rx_ring->size,
3247                           rx_ring->desc, rx_ring->dma);
3248
3249         rx_ring->desc = NULL;
3250 }
3251
3252 /**
3253  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3254  * @adapter: board private structure
3255  *
3256  * Free all receive software resources
3257  **/
3258 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3259 {
3260         int i;
3261
3262         for (i = 0; i < adapter->num_rx_queues; i++)
3263                 igb_free_rx_resources(adapter->rx_ring[i]);
3264 }
3265
3266 /**
3267  * igb_clean_rx_ring - Free Rx Buffers per Queue
3268  * @rx_ring: ring to free buffers from
3269  **/
3270 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3271 {
3272         unsigned long size;
3273         u16 i;
3274
3275         if (!rx_ring->rx_buffer_info)
3276                 return;
3277
3278         /* Free all the Rx ring sk_buffs */
3279         for (i = 0; i < rx_ring->count; i++) {
3280                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3281                 if (buffer_info->dma) {
3282                         dma_unmap_single(rx_ring->dev,
3283                                          buffer_info->dma,
3284                                          IGB_RX_HDR_LEN,
3285                                          DMA_FROM_DEVICE);
3286                         buffer_info->dma = 0;
3287                 }
3288
3289                 if (buffer_info->skb) {
3290                         dev_kfree_skb(buffer_info->skb);
3291                         buffer_info->skb = NULL;
3292                 }
3293                 if (buffer_info->page_dma) {
3294                         dma_unmap_page(rx_ring->dev,
3295                                        buffer_info->page_dma,
3296                                        PAGE_SIZE / 2,
3297                                        DMA_FROM_DEVICE);
3298                         buffer_info->page_dma = 0;
3299                 }
3300                 if (buffer_info->page) {
3301                         put_page(buffer_info->page);
3302                         buffer_info->page = NULL;
3303                         buffer_info->page_offset = 0;
3304                 }
3305         }
3306
3307         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3308         memset(rx_ring->rx_buffer_info, 0, size);
3309
3310         /* Zero out the descriptor ring */
3311         memset(rx_ring->desc, 0, rx_ring->size);
3312
3313         rx_ring->next_to_clean = 0;
3314         rx_ring->next_to_use = 0;
3315 }
3316
3317 /**
3318  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3319  * @adapter: board private structure
3320  **/
3321 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3322 {
3323         int i;
3324
3325         for (i = 0; i < adapter->num_rx_queues; i++)
3326                 igb_clean_rx_ring(adapter->rx_ring[i]);
3327 }
3328
3329 /**
3330  * igb_set_mac - Change the Ethernet Address of the NIC
3331  * @netdev: network interface device structure
3332  * @p: pointer to an address structure
3333  *
3334  * Returns 0 on success, negative on failure
3335  **/
3336 static int igb_set_mac(struct net_device *netdev, void *p)
3337 {
3338         struct igb_adapter *adapter = netdev_priv(netdev);
3339         struct e1000_hw *hw = &adapter->hw;
3340         struct sockaddr *addr = p;
3341
3342         if (!is_valid_ether_addr(addr->sa_data))
3343                 return -EADDRNOTAVAIL;
3344
3345         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3346         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3347
3348         /* set the correct pool for the new PF MAC address in entry 0 */
3349         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3350                          adapter->vfs_allocated_count);
3351
3352         return 0;
3353 }
3354
3355 /**
3356  * igb_write_mc_addr_list - write multicast addresses to MTA
3357  * @netdev: network interface device structure
3358  *
3359  * Writes multicast address list to the MTA hash table.
3360  * Returns: -ENOMEM on failure
3361  *                0 on no addresses written
3362  *                X on writing X addresses to MTA
3363  **/
3364 static int igb_write_mc_addr_list(struct net_device *netdev)
3365 {
3366         struct igb_adapter *adapter = netdev_priv(netdev);
3367         struct e1000_hw *hw = &adapter->hw;
3368         struct netdev_hw_addr *ha;
3369         u8  *mta_list;
3370         int i;
3371
3372         if (netdev_mc_empty(netdev)) {
3373                 /* nothing to program, so clear mc list */
3374                 igb_update_mc_addr_list(hw, NULL, 0);
3375                 igb_restore_vf_multicasts(adapter);
3376                 return 0;
3377         }
3378
3379         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3380         if (!mta_list)
3381                 return -ENOMEM;
3382
3383         /* The shared function expects a packed array of only addresses. */
3384         i = 0;
3385         netdev_for_each_mc_addr(ha, netdev)
3386                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3387
3388         igb_update_mc_addr_list(hw, mta_list, i);
3389         kfree(mta_list);
3390
3391         return netdev_mc_count(netdev);
3392 }
3393
3394 /**
3395  * igb_write_uc_addr_list - write unicast addresses to RAR table
3396  * @netdev: network interface device structure
3397  *
3398  * Writes unicast address list to the RAR table.
3399  * Returns: -ENOMEM on failure/insufficient address space
3400  *                0 on no addresses written
3401  *                X on writing X addresses to the RAR table
3402  **/
3403 static int igb_write_uc_addr_list(struct net_device *netdev)
3404 {
3405         struct igb_adapter *adapter = netdev_priv(netdev);
3406         struct e1000_hw *hw = &adapter->hw;
3407         unsigned int vfn = adapter->vfs_allocated_count;
3408         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3409         int count = 0;
3410
3411         /* return ENOMEM indicating insufficient memory for addresses */
3412         if (netdev_uc_count(netdev) > rar_entries)
3413                 return -ENOMEM;
3414
3415         if (!netdev_uc_empty(netdev) && rar_entries) {
3416                 struct netdev_hw_addr *ha;
3417
3418                 netdev_for_each_uc_addr(ha, netdev) {
3419                         if (!rar_entries)
3420                                 break;
3421                         igb_rar_set_qsel(adapter, ha->addr,
3422                                          rar_entries--,
3423                                          vfn);
3424                         count++;
3425                 }
3426         }
3427         /* write the addresses in reverse order to avoid write combining */
3428         for (; rar_entries > 0 ; rar_entries--) {
3429                 wr32(E1000_RAH(rar_entries), 0);
3430                 wr32(E1000_RAL(rar_entries), 0);
3431         }
3432         wrfl();
3433
3434         return count;
3435 }
3436
3437 /**
3438  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3439  * @netdev: network interface device structure
3440  *
3441  * The set_rx_mode entry point is called whenever the unicast or multicast
3442  * address lists or the network interface flags are updated.  This routine is
3443  * responsible for configuring the hardware for proper unicast, multicast,
3444  * promiscuous mode, and all-multi behavior.
3445  **/
3446 static void igb_set_rx_mode(struct net_device *netdev)
3447 {
3448         struct igb_adapter *adapter = netdev_priv(netdev);
3449         struct e1000_hw *hw = &adapter->hw;
3450         unsigned int vfn = adapter->vfs_allocated_count;
3451         u32 rctl, vmolr = 0;
3452         int count;
3453
3454         /* Check for Promiscuous and All Multicast modes */
3455         rctl = rd32(E1000_RCTL);
3456
3457         /* clear the effected bits */
3458         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3459
3460         if (netdev->flags & IFF_PROMISC) {
3461                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3462                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3463         } else {
3464                 if (netdev->flags & IFF_ALLMULTI) {
3465                         rctl |= E1000_RCTL_MPE;
3466                         vmolr |= E1000_VMOLR_MPME;
3467                 } else {
3468                         /*
3469                          * Write addresses to the MTA, if the attempt fails
3470                          * then we should just turn on promiscuous mode so
3471                          * that we can at least receive multicast traffic
3472                          */
3473                         count = igb_write_mc_addr_list(netdev);
3474                         if (count < 0) {
3475                                 rctl |= E1000_RCTL_MPE;
3476                                 vmolr |= E1000_VMOLR_MPME;
3477                         } else if (count) {
3478                                 vmolr |= E1000_VMOLR_ROMPE;
3479                         }
3480                 }
3481                 /*
3482                  * Write addresses to available RAR registers, if there is not
3483                  * sufficient space to store all the addresses then enable
3484                  * unicast promiscuous mode
3485                  */
3486                 count = igb_write_uc_addr_list(netdev);
3487                 if (count < 0) {
3488                         rctl |= E1000_RCTL_UPE;
3489                         vmolr |= E1000_VMOLR_ROPE;
3490                 }
3491                 rctl |= E1000_RCTL_VFE;
3492         }
3493         wr32(E1000_RCTL, rctl);
3494
3495         /*
3496          * In order to support SR-IOV and eventually VMDq it is necessary to set
3497          * the VMOLR to enable the appropriate modes.  Without this workaround
3498          * we will have issues with VLAN tag stripping not being done for frames
3499          * that are only arriving because we are the default pool
3500          */
3501         if (hw->mac.type < e1000_82576)
3502                 return;
3503
3504         vmolr |= rd32(E1000_VMOLR(vfn)) &
3505                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3506         wr32(E1000_VMOLR(vfn), vmolr);
3507         igb_restore_vf_multicasts(adapter);
3508 }
3509
3510 static void igb_check_wvbr(struct igb_adapter *adapter)
3511 {
3512         struct e1000_hw *hw = &adapter->hw;
3513         u32 wvbr = 0;
3514
3515         switch (hw->mac.type) {
3516         case e1000_82576:
3517         case e1000_i350:
3518                 if (!(wvbr = rd32(E1000_WVBR)))
3519                         return;
3520                 break;
3521         default:
3522                 break;
3523         }
3524
3525         adapter->wvbr |= wvbr;
3526 }
3527
3528 #define IGB_STAGGERED_QUEUE_OFFSET 8
3529
3530 static void igb_spoof_check(struct igb_adapter *adapter)
3531 {
3532         int j;
3533
3534         if (!adapter->wvbr)
3535                 return;
3536
3537         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3538                 if (adapter->wvbr & (1 << j) ||
3539                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3540                         dev_warn(&adapter->pdev->dev,
3541                                 "Spoof event(s) detected on VF %d\n", j);
3542                         adapter->wvbr &=
3543                                 ~((1 << j) |
3544                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3545                 }
3546         }
3547 }
3548
3549 /* Need to wait a few seconds after link up to get diagnostic information from
3550  * the phy */
3551 static void igb_update_phy_info(unsigned long data)
3552 {
3553         struct igb_adapter *adapter = (struct igb_adapter *) data;
3554         igb_get_phy_info(&adapter->hw);
3555 }
3556
3557 /**
3558  * igb_has_link - check shared code for link and determine up/down
3559  * @adapter: pointer to driver private info
3560  **/
3561 bool igb_has_link(struct igb_adapter *adapter)
3562 {
3563         struct e1000_hw *hw = &adapter->hw;
3564         bool link_active = false;
3565         s32 ret_val = 0;
3566
3567         /* get_link_status is set on LSC (link status) interrupt or
3568          * rx sequence error interrupt.  get_link_status will stay
3569          * false until the e1000_check_for_link establishes link
3570          * for copper adapters ONLY
3571          */
3572         switch (hw->phy.media_type) {
3573         case e1000_media_type_copper:
3574                 if (hw->mac.get_link_status) {
3575                         ret_val = hw->mac.ops.check_for_link(hw);
3576                         link_active = !hw->mac.get_link_status;
3577                 } else {
3578                         link_active = true;
3579                 }
3580                 break;
3581         case e1000_media_type_internal_serdes:
3582                 ret_val = hw->mac.ops.check_for_link(hw);
3583                 link_active = hw->mac.serdes_has_link;
3584                 break;
3585         default:
3586         case e1000_media_type_unknown:
3587                 break;
3588         }
3589
3590         return link_active;
3591 }
3592
3593 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3594 {
3595         bool ret = false;
3596         u32 ctrl_ext, thstat;
3597
3598         /* check for thermal sensor event on i350, copper only */
3599         if (hw->mac.type == e1000_i350) {
3600                 thstat = rd32(E1000_THSTAT);
3601                 ctrl_ext = rd32(E1000_CTRL_EXT);
3602
3603                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3604                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3605                         ret = !!(thstat & event);
3606                 }
3607         }
3608
3609         return ret;
3610 }
3611
3612 /**
3613  * igb_watchdog - Timer Call-back
3614  * @data: pointer to adapter cast into an unsigned long
3615  **/
3616 static void igb_watchdog(unsigned long data)
3617 {
3618         struct igb_adapter *adapter = (struct igb_adapter *)data;
3619         /* Do the rest outside of interrupt context */
3620         schedule_work(&adapter->watchdog_task);
3621 }
3622
3623 static void igb_watchdog_task(struct work_struct *work)
3624 {
3625         struct igb_adapter *adapter = container_of(work,
3626                                                    struct igb_adapter,
3627                                                    watchdog_task);
3628         struct e1000_hw *hw = &adapter->hw;
3629         struct net_device *netdev = adapter->netdev;
3630         u32 link;
3631         int i;
3632
3633         link = igb_has_link(adapter);
3634         if (link) {
3635                 if (!netif_carrier_ok(netdev)) {
3636                         u32 ctrl;
3637                         hw->mac.ops.get_speed_and_duplex(hw,
3638                                                          &adapter->link_speed,
3639                                                          &adapter->link_duplex);
3640
3641                         ctrl = rd32(E1000_CTRL);
3642                         /* Links status message must follow this format */
3643                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3644                                  "Flow Control: %s\n",
3645                                netdev->name,
3646                                adapter->link_speed,
3647                                adapter->link_duplex == FULL_DUPLEX ?
3648                                  "Full Duplex" : "Half Duplex",
3649                                ((ctrl & E1000_CTRL_TFCE) &&
3650                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3651                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3652                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3653
3654                         /* check for thermal sensor event */
3655                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3656                                 printk(KERN_INFO "igb: %s The network adapter "
3657                                                  "link speed was downshifted "
3658                                                  "because it overheated.\n",
3659                                                  netdev->name);
3660                         }
3661
3662                         /* adjust timeout factor according to speed/duplex */
3663                         adapter->tx_timeout_factor = 1;
3664                         switch (adapter->link_speed) {
3665                         case SPEED_10:
3666                                 adapter->tx_timeout_factor = 14;
3667                                 break;
3668                         case SPEED_100:
3669                                 /* maybe add some timeout factor ? */
3670                                 break;
3671                         }
3672
3673                         netif_carrier_on(netdev);
3674
3675                         igb_ping_all_vfs(adapter);
3676                         igb_check_vf_rate_limit(adapter);
3677
3678                         /* link state has changed, schedule phy info update */
3679                         if (!test_bit(__IGB_DOWN, &adapter->state))
3680                                 mod_timer(&adapter->phy_info_timer,
3681                                           round_jiffies(jiffies + 2 * HZ));
3682                 }
3683         } else {
3684                 if (netif_carrier_ok(netdev)) {
3685                         adapter->link_speed = 0;
3686                         adapter->link_duplex = 0;
3687
3688                         /* check for thermal sensor event */
3689                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3690                                 printk(KERN_ERR "igb: %s The network adapter "
3691                                                 "was stopped because it "
3692                                                 "overheated.\n",
3693                                                 netdev->name);
3694                         }
3695
3696                         /* Links status message must follow this format */
3697                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3698                                netdev->name);
3699                         netif_carrier_off(netdev);
3700
3701                         igb_ping_all_vfs(adapter);
3702
3703                         /* link state has changed, schedule phy info update */
3704                         if (!test_bit(__IGB_DOWN, &adapter->state))
3705                                 mod_timer(&adapter->phy_info_timer,
3706                                           round_jiffies(jiffies + 2 * HZ));
3707                 }
3708         }
3709
3710         spin_lock(&adapter->stats64_lock);
3711         igb_update_stats(adapter, &adapter->stats64);
3712         spin_unlock(&adapter->stats64_lock);
3713
3714         for (i = 0; i < adapter->num_tx_queues; i++) {
3715                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3716                 if (!netif_carrier_ok(netdev)) {
3717                         /* We've lost link, so the controller stops DMA,
3718                          * but we've got queued Tx work that's never going
3719                          * to get done, so reset controller to flush Tx.
3720                          * (Do the reset outside of interrupt context). */
3721                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3722                                 adapter->tx_timeout_count++;
3723                                 schedule_work(&adapter->reset_task);
3724                                 /* return immediately since reset is imminent */
3725                                 return;
3726                         }
3727                 }
3728
3729                 /* Force detection of hung controller every watchdog period */
3730                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3731         }
3732
3733         /* Cause software interrupt to ensure rx ring is cleaned */
3734         if (adapter->msix_entries) {
3735                 u32 eics = 0;
3736                 for (i = 0; i < adapter->num_q_vectors; i++)
3737                         eics |= adapter->q_vector[i]->eims_value;
3738                 wr32(E1000_EICS, eics);
3739         } else {
3740                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3741         }
3742
3743         igb_spoof_check(adapter);
3744
3745         /* Reset the timer */
3746         if (!test_bit(__IGB_DOWN, &adapter->state))
3747                 mod_timer(&adapter->watchdog_timer,
3748                           round_jiffies(jiffies + 2 * HZ));
3749 }
3750
3751 enum latency_range {
3752         lowest_latency = 0,
3753         low_latency = 1,
3754         bulk_latency = 2,
3755         latency_invalid = 255
3756 };
3757
3758 /**
3759  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3760  *
3761  *      Stores a new ITR value based on strictly on packet size.  This
3762  *      algorithm is less sophisticated than that used in igb_update_itr,
3763  *      due to the difficulty of synchronizing statistics across multiple
3764  *      receive rings.  The divisors and thresholds used by this function
3765  *      were determined based on theoretical maximum wire speed and testing
3766  *      data, in order to minimize response time while increasing bulk
3767  *      throughput.
3768  *      This functionality is controlled by the InterruptThrottleRate module
3769  *      parameter (see igb_param.c)
3770  *      NOTE:  This function is called only when operating in a multiqueue
3771  *             receive environment.
3772  * @q_vector: pointer to q_vector
3773  **/
3774 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3775 {
3776         int new_val = q_vector->itr_val;
3777         int avg_wire_size = 0;
3778         struct igb_adapter *adapter = q_vector->adapter;
3779         unsigned int packets;
3780
3781         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3782          * ints/sec - ITR timer value of 120 ticks.
3783          */
3784         if (adapter->link_speed != SPEED_1000) {
3785                 new_val = IGB_4K_ITR;
3786                 goto set_itr_val;
3787         }
3788
3789         packets = q_vector->rx.total_packets;
3790         if (packets)
3791                 avg_wire_size = q_vector->rx.total_bytes / packets;
3792
3793         packets = q_vector->tx.total_packets;
3794         if (packets)
3795                 avg_wire_size = max_t(u32, avg_wire_size,
3796                                       q_vector->tx.total_bytes / packets);
3797
3798         /* if avg_wire_size isn't set no work was done */
3799         if (!avg_wire_size)
3800                 goto clear_counts;
3801
3802         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3803         avg_wire_size += 24;
3804
3805         /* Don't starve jumbo frames */
3806         avg_wire_size = min(avg_wire_size, 3000);
3807
3808         /* Give a little boost to mid-size frames */
3809         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3810                 new_val = avg_wire_size / 3;
3811         else
3812                 new_val = avg_wire_size / 2;
3813
3814         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3815         if (new_val < IGB_20K_ITR &&
3816             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3817              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3818                 new_val = IGB_20K_ITR;
3819
3820 set_itr_val:
3821         if (new_val != q_vector->itr_val) {
3822                 q_vector->itr_val = new_val;
3823                 q_vector->set_itr = 1;
3824         }
3825 clear_counts:
3826         q_vector->rx.total_bytes = 0;
3827         q_vector->rx.total_packets = 0;
3828         q_vector->tx.total_bytes = 0;
3829         q_vector->tx.total_packets = 0;
3830 }
3831
3832 /**
3833  * igb_update_itr - update the dynamic ITR value based on statistics
3834  *      Stores a new ITR value based on packets and byte
3835  *      counts during the last interrupt.  The advantage of per interrupt
3836  *      computation is faster updates and more accurate ITR for the current
3837  *      traffic pattern.  Constants in this function were computed
3838  *      based on theoretical maximum wire speed and thresholds were set based
3839  *      on testing data as well as attempting to minimize response time
3840  *      while increasing bulk throughput.
3841  *      this functionality is controlled by the InterruptThrottleRate module
3842  *      parameter (see igb_param.c)
3843  *      NOTE:  These calculations are only valid when operating in a single-
3844  *             queue environment.
3845  * @q_vector: pointer to q_vector
3846  * @ring_container: ring info to update the itr for
3847  **/
3848 static void igb_update_itr(struct igb_q_vector *q_vector,
3849                            struct igb_ring_container *ring_container)
3850 {
3851         unsigned int packets = ring_container->total_packets;
3852         unsigned int bytes = ring_container->total_bytes;
3853         u8 itrval = ring_container->itr;
3854
3855         /* no packets, exit with status unchanged */
3856         if (packets == 0)
3857                 return;
3858
3859         switch (itrval) {
3860         case lowest_latency:
3861                 /* handle TSO and jumbo frames */
3862                 if (bytes/packets > 8000)
3863                         itrval = bulk_latency;
3864                 else if ((packets < 5) && (bytes > 512))
3865                         itrval = low_latency;
3866                 break;
3867         case low_latency:  /* 50 usec aka 20000 ints/s */
3868                 if (bytes > 10000) {
3869                         /* this if handles the TSO accounting */
3870                         if (bytes/packets > 8000) {
3871                                 itrval = bulk_latency;
3872                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3873                                 itrval = bulk_latency;
3874                         } else if ((packets > 35)) {
3875                                 itrval = lowest_latency;
3876                         }
3877                 } else if (bytes/packets > 2000) {
3878                         itrval = bulk_latency;
3879                 } else if (packets <= 2 && bytes < 512) {
3880                         itrval = lowest_latency;
3881                 }
3882                 break;
3883         case bulk_latency: /* 250 usec aka 4000 ints/s */
3884                 if (bytes > 25000) {
3885                         if (packets > 35)
3886                                 itrval = low_latency;
3887                 } else if (bytes < 1500) {
3888                         itrval = low_latency;
3889                 }
3890                 break;
3891         }
3892
3893         /* clear work counters since we have the values we need */
3894         ring_container->total_bytes = 0;
3895         ring_container->total_packets = 0;
3896
3897         /* write updated itr to ring container */
3898         ring_container->itr = itrval;
3899 }
3900
3901 static void igb_set_itr(struct igb_q_vector *q_vector)
3902 {
3903         struct igb_adapter *adapter = q_vector->adapter;
3904         u32 new_itr = q_vector->itr_val;
3905         u8 current_itr = 0;
3906
3907         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3908         if (adapter->link_speed != SPEED_1000) {
3909                 current_itr = 0;
3910                 new_itr = IGB_4K_ITR;
3911                 goto set_itr_now;
3912         }
3913
3914         igb_update_itr(q_vector, &q_vector->tx);
3915         igb_update_itr(q_vector, &q_vector->rx);
3916
3917         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3918
3919         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3920         if (current_itr == lowest_latency &&
3921             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3922              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3923                 current_itr = low_latency;
3924
3925         switch (current_itr) {
3926         /* counts and packets in update_itr are dependent on these numbers */
3927         case lowest_latency:
3928                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3929                 break;
3930         case low_latency:
3931                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3932                 break;
3933         case bulk_latency:
3934                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3935                 break;
3936         default:
3937                 break;
3938         }
3939
3940 set_itr_now:
3941         if (new_itr != q_vector->itr_val) {
3942                 /* this attempts to bias the interrupt rate towards Bulk
3943                  * by adding intermediate steps when interrupt rate is
3944                  * increasing */
3945                 new_itr = new_itr > q_vector->itr_val ?
3946                              max((new_itr * q_vector->itr_val) /
3947                                  (new_itr + (q_vector->itr_val >> 2)),
3948                                  new_itr) :
3949                              new_itr;
3950                 /* Don't write the value here; it resets the adapter's
3951                  * internal timer, and causes us to delay far longer than
3952                  * we should between interrupts.  Instead, we write the ITR
3953                  * value at the beginning of the next interrupt so the timing
3954                  * ends up being correct.
3955                  */
3956                 q_vector->itr_val = new_itr;
3957                 q_vector->set_itr = 1;
3958         }
3959 }
3960
3961 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3962                      u32 type_tucmd, u32 mss_l4len_idx)
3963 {
3964         struct e1000_adv_tx_context_desc *context_desc;
3965         u16 i = tx_ring->next_to_use;
3966
3967         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3968
3969         i++;
3970         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3971
3972         /* set bits to identify this as an advanced context descriptor */
3973         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3974
3975         /* For 82575, context index must be unique per ring. */
3976         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3977                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3978
3979         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
3980         context_desc->seqnum_seed       = 0;
3981         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
3982         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
3983 }
3984
3985 static int igb_tso(struct igb_ring *tx_ring,
3986                    struct igb_tx_buffer *first,
3987                    u8 *hdr_len)
3988 {
3989         struct sk_buff *skb = first->skb;
3990         u32 vlan_macip_lens, type_tucmd;
3991         u32 mss_l4len_idx, l4len;
3992
3993         if (!skb_is_gso(skb))
3994                 return 0;
3995
3996         if (skb_header_cloned(skb)) {
3997                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3998                 if (err)
3999                         return err;
4000         }
4001
4002         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4003         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4004
4005         if (first->protocol == __constant_htons(ETH_P_IP)) {
4006                 struct iphdr *iph = ip_hdr(skb);
4007                 iph->tot_len = 0;
4008                 iph->check = 0;
4009                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4010                                                          iph->daddr, 0,
4011                                                          IPPROTO_TCP,
4012                                                          0);
4013                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4014                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4015                                    IGB_TX_FLAGS_CSUM |
4016                                    IGB_TX_FLAGS_IPV4;
4017         } else if (skb_is_gso_v6(skb)) {
4018                 ipv6_hdr(skb)->payload_len = 0;
4019                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4020                                                        &ipv6_hdr(skb)->daddr,
4021                                                        0, IPPROTO_TCP, 0);
4022                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4023                                    IGB_TX_FLAGS_CSUM;
4024         }
4025
4026         /* compute header lengths */
4027         l4len = tcp_hdrlen(skb);
4028         *hdr_len = skb_transport_offset(skb) + l4len;
4029
4030         /* update gso size and bytecount with header size */
4031         first->gso_segs = skb_shinfo(skb)->gso_segs;
4032         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4033
4034         /* MSS L4LEN IDX */
4035         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4036         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4037
4038         /* VLAN MACLEN IPLEN */
4039         vlan_macip_lens = skb_network_header_len(skb);
4040         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4041         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4042
4043         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4044
4045         return 1;
4046 }
4047
4048 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4049 {
4050         struct sk_buff *skb = first->skb;
4051         u32 vlan_macip_lens = 0;
4052         u32 mss_l4len_idx = 0;
4053         u32 type_tucmd = 0;
4054
4055         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4056                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4057                         return;
4058         } else {
4059                 u8 l4_hdr = 0;
4060                 switch (first->protocol) {
4061                 case __constant_htons(ETH_P_IP):
4062                         vlan_macip_lens |= skb_network_header_len(skb);
4063                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4064                         l4_hdr = ip_hdr(skb)->protocol;
4065                         break;
4066                 case __constant_htons(ETH_P_IPV6):
4067                         vlan_macip_lens |= skb_network_header_len(skb);
4068                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4069                         break;
4070                 default:
4071                         if (unlikely(net_ratelimit())) {
4072                                 dev_warn(tx_ring->dev,
4073                                  "partial checksum but proto=%x!\n",
4074                                  first->protocol);
4075                         }
4076                         break;
4077                 }
4078
4079                 switch (l4_hdr) {
4080                 case IPPROTO_TCP:
4081                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4082                         mss_l4len_idx = tcp_hdrlen(skb) <<
4083                                         E1000_ADVTXD_L4LEN_SHIFT;
4084                         break;
4085                 case IPPROTO_SCTP:
4086                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4087                         mss_l4len_idx = sizeof(struct sctphdr) <<
4088                                         E1000_ADVTXD_L4LEN_SHIFT;
4089                         break;
4090                 case IPPROTO_UDP:
4091                         mss_l4len_idx = sizeof(struct udphdr) <<
4092                                         E1000_ADVTXD_L4LEN_SHIFT;
4093                         break;
4094                 default:
4095                         if (unlikely(net_ratelimit())) {
4096                                 dev_warn(tx_ring->dev,
4097                                  "partial checksum but l4 proto=%x!\n",
4098                                  l4_hdr);
4099                         }
4100                         break;
4101                 }
4102
4103                 /* update TX checksum flag */
4104                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4105         }
4106
4107         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4108         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4109
4110         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4111 }
4112
4113 static __le32 igb_tx_cmd_type(u32 tx_flags)
4114 {
4115         /* set type for advanced descriptor with frame checksum insertion */
4116         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4117                                       E1000_ADVTXD_DCMD_IFCS |
4118                                       E1000_ADVTXD_DCMD_DEXT);
4119
4120         /* set HW vlan bit if vlan is present */
4121         if (tx_flags & IGB_TX_FLAGS_VLAN)
4122                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4123
4124         /* set timestamp bit if present */
4125         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4126                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4127
4128         /* set segmentation bits for TSO */
4129         if (tx_flags & IGB_TX_FLAGS_TSO)
4130                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4131
4132         return cmd_type;
4133 }
4134
4135 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4136                                  union e1000_adv_tx_desc *tx_desc,
4137                                  u32 tx_flags, unsigned int paylen)
4138 {
4139         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4140
4141         /* 82575 requires a unique index per ring if any offload is enabled */
4142         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4143             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4144                 olinfo_status |= tx_ring->reg_idx << 4;
4145
4146         /* insert L4 checksum */
4147         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4148                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4149
4150                 /* insert IPv4 checksum */
4151                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4152                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4153         }
4154
4155         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4156 }
4157
4158 /*
4159  * The largest size we can write to the descriptor is 65535.  In order to
4160  * maintain a power of two alignment we have to limit ourselves to 32K.
4161  */
4162 #define IGB_MAX_TXD_PWR 15
4163 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4164
4165 static void igb_tx_map(struct igb_ring *tx_ring,
4166                        struct igb_tx_buffer *first,
4167                        const u8 hdr_len)
4168 {
4169         struct sk_buff *skb = first->skb;
4170         struct igb_tx_buffer *tx_buffer_info;
4171         union e1000_adv_tx_desc *tx_desc;
4172         dma_addr_t dma;
4173         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4174         unsigned int data_len = skb->data_len;
4175         unsigned int size = skb_headlen(skb);
4176         unsigned int paylen = skb->len - hdr_len;
4177         __le32 cmd_type;
4178         u32 tx_flags = first->tx_flags;
4179         u16 i = tx_ring->next_to_use;
4180
4181         tx_desc = IGB_TX_DESC(tx_ring, i);
4182
4183         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4184         cmd_type = igb_tx_cmd_type(tx_flags);
4185
4186         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4187         if (dma_mapping_error(tx_ring->dev, dma))
4188                 goto dma_error;
4189
4190         /* record length, and DMA address */
4191         first->length = size;
4192         first->dma = dma;
4193         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4194
4195         for (;;) {
4196                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4197                         tx_desc->read.cmd_type_len =
4198                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4199
4200                         i++;
4201                         tx_desc++;
4202                         if (i == tx_ring->count) {
4203                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4204                                 i = 0;
4205                         }
4206
4207                         dma += IGB_MAX_DATA_PER_TXD;
4208                         size -= IGB_MAX_DATA_PER_TXD;
4209
4210                         tx_desc->read.olinfo_status = 0;
4211                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4212                 }
4213
4214                 if (likely(!data_len))
4215                         break;
4216
4217                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4218
4219                 i++;
4220                 tx_desc++;
4221                 if (i == tx_ring->count) {
4222                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4223                         i = 0;
4224                 }
4225
4226                 size = skb_frag_size(frag);
4227                 data_len -= size;
4228
4229                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4230                                    size, DMA_TO_DEVICE);
4231                 if (dma_mapping_error(tx_ring->dev, dma))
4232                         goto dma_error;
4233
4234                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4235                 tx_buffer_info->length = size;
4236                 tx_buffer_info->dma = dma;
4237
4238                 tx_desc->read.olinfo_status = 0;
4239                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4240
4241                 frag++;
4242         }
4243
4244         /* write last descriptor with RS and EOP bits */
4245         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4246         tx_desc->read.cmd_type_len = cmd_type;
4247
4248         /* set the timestamp */
4249         first->time_stamp = jiffies;
4250
4251         /*
4252          * Force memory writes to complete before letting h/w know there
4253          * are new descriptors to fetch.  (Only applicable for weak-ordered
4254          * memory model archs, such as IA-64).
4255          *
4256          * We also need this memory barrier to make certain all of the
4257          * status bits have been updated before next_to_watch is written.
4258          */
4259         wmb();
4260
4261         /* set next_to_watch value indicating a packet is present */
4262         first->next_to_watch = tx_desc;
4263
4264         i++;
4265         if (i == tx_ring->count)
4266                 i = 0;
4267
4268         tx_ring->next_to_use = i;
4269
4270         writel(i, tx_ring->tail);
4271
4272         /* we need this if more than one processor can write to our tail
4273          * at a time, it syncronizes IO on IA64/Altix systems */
4274         mmiowb();
4275
4276         return;
4277
4278 dma_error:
4279         dev_err(tx_ring->dev, "TX DMA map failed\n");
4280
4281         /* clear dma mappings for failed tx_buffer_info map */
4282         for (;;) {
4283                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4284                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4285                 if (tx_buffer_info == first)
4286                         break;
4287                 if (i == 0)
4288                         i = tx_ring->count;
4289                 i--;
4290         }
4291
4292         tx_ring->next_to_use = i;
4293 }
4294
4295 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4296 {
4297         struct net_device *netdev = tx_ring->netdev;
4298
4299         netif_stop_subqueue(netdev, tx_ring->queue_index);
4300
4301         /* Herbert's original patch had:
4302          *  smp_mb__after_netif_stop_queue();
4303          * but since that doesn't exist yet, just open code it. */
4304         smp_mb();
4305
4306         /* We need to check again in a case another CPU has just
4307          * made room available. */
4308         if (igb_desc_unused(tx_ring) < size)
4309                 return -EBUSY;
4310
4311         /* A reprieve! */
4312         netif_wake_subqueue(netdev, tx_ring->queue_index);
4313
4314         u64_stats_update_begin(&tx_ring->tx_syncp2);
4315         tx_ring->tx_stats.restart_queue2++;
4316         u64_stats_update_end(&tx_ring->tx_syncp2);
4317
4318         return 0;
4319 }
4320
4321 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4322 {
4323         if (igb_desc_unused(tx_ring) >= size)
4324                 return 0;
4325         return __igb_maybe_stop_tx(tx_ring, size);
4326 }
4327
4328 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4329                                 struct igb_ring *tx_ring)
4330 {
4331         struct igb_tx_buffer *first;
4332         int tso;
4333         u32 tx_flags = 0;
4334         __be16 protocol = vlan_get_protocol(skb);
4335         u8 hdr_len = 0;
4336
4337         /* need: 1 descriptor per page,
4338          *       + 2 desc gap to keep tail from touching head,
4339          *       + 1 desc for skb->data,
4340          *       + 1 desc for context descriptor,
4341          * otherwise try next time */
4342         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4343                 /* this is a hard error */
4344                 return NETDEV_TX_BUSY;
4345         }
4346
4347         /* record the location of the first descriptor for this packet */
4348         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4349         first->skb = skb;
4350         first->bytecount = skb->len;
4351         first->gso_segs = 1;
4352
4353         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4354                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4355                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4356         }
4357
4358         if (vlan_tx_tag_present(skb)) {
4359                 tx_flags |= IGB_TX_FLAGS_VLAN;
4360                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4361         }
4362
4363         /* record initial flags and protocol */
4364         first->tx_flags = tx_flags;
4365         first->protocol = protocol;
4366
4367         tso = igb_tso(tx_ring, first, &hdr_len);
4368         if (tso < 0)
4369                 goto out_drop;
4370         else if (!tso)
4371                 igb_tx_csum(tx_ring, first);
4372
4373         igb_tx_map(tx_ring, first, hdr_len);
4374
4375         /* Make sure there is space in the ring for the next send. */
4376         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4377
4378         return NETDEV_TX_OK;
4379
4380 out_drop:
4381         igb_unmap_and_free_tx_resource(tx_ring, first);
4382
4383         return NETDEV_TX_OK;
4384 }
4385
4386 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4387                                                     struct sk_buff *skb)
4388 {
4389         unsigned int r_idx = skb->queue_mapping;
4390
4391         if (r_idx >= adapter->num_tx_queues)
4392                 r_idx = r_idx % adapter->num_tx_queues;
4393
4394         return adapter->tx_ring[r_idx];
4395 }
4396
4397 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4398                                   struct net_device *netdev)
4399 {
4400         struct igb_adapter *adapter = netdev_priv(netdev);
4401
4402         if (test_bit(__IGB_DOWN, &adapter->state)) {
4403                 dev_kfree_skb_any(skb);
4404                 return NETDEV_TX_OK;
4405         }
4406
4407         if (skb->len <= 0) {
4408                 dev_kfree_skb_any(skb);
4409                 return NETDEV_TX_OK;
4410         }
4411
4412         /*
4413          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4414          * in order to meet this minimum size requirement.
4415          */
4416         if (skb->len < 17) {
4417                 if (skb_padto(skb, 17))
4418                         return NETDEV_TX_OK;
4419                 skb->len = 17;
4420         }
4421
4422         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4423 }
4424
4425 /**
4426  * igb_tx_timeout - Respond to a Tx Hang
4427  * @netdev: network interface device structure
4428  **/
4429 static void igb_tx_timeout(struct net_device *netdev)
4430 {
4431         struct igb_adapter *adapter = netdev_priv(netdev);
4432         struct e1000_hw *hw = &adapter->hw;
4433
4434         /* Do the reset outside of interrupt context */
4435         adapter->tx_timeout_count++;
4436
4437         if (hw->mac.type >= e1000_82580)
4438                 hw->dev_spec._82575.global_device_reset = true;
4439
4440         schedule_work(&adapter->reset_task);
4441         wr32(E1000_EICS,
4442              (adapter->eims_enable_mask & ~adapter->eims_other));
4443 }
4444
4445 static void igb_reset_task(struct work_struct *work)
4446 {
4447         struct igb_adapter *adapter;
4448         adapter = container_of(work, struct igb_adapter, reset_task);
4449
4450         igb_dump(adapter);
4451         netdev_err(adapter->netdev, "Reset adapter\n");
4452         igb_reinit_locked(adapter);
4453 }
4454
4455 /**
4456  * igb_get_stats64 - Get System Network Statistics
4457  * @netdev: network interface device structure
4458  * @stats: rtnl_link_stats64 pointer
4459  *
4460  **/
4461 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4462                                                  struct rtnl_link_stats64 *stats)
4463 {
4464         struct igb_adapter *adapter = netdev_priv(netdev);
4465
4466         spin_lock(&adapter->stats64_lock);
4467         igb_update_stats(adapter, &adapter->stats64);
4468         memcpy(stats, &adapter->stats64, sizeof(*stats));
4469         spin_unlock(&adapter->stats64_lock);
4470
4471         return stats;
4472 }
4473
4474 /**
4475  * igb_change_mtu - Change the Maximum Transfer Unit
4476  * @netdev: network interface device structure
4477  * @new_mtu: new value for maximum frame size
4478  *
4479  * Returns 0 on success, negative on failure
4480  **/
4481 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4482 {
4483         struct igb_adapter *adapter = netdev_priv(netdev);
4484         struct pci_dev *pdev = adapter->pdev;
4485         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4486
4487         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4488                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4489                 return -EINVAL;
4490         }
4491
4492 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4493         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4494                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4495                 return -EINVAL;
4496         }
4497
4498         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4499                 msleep(1);
4500
4501         /* igb_down has a dependency on max_frame_size */
4502         adapter->max_frame_size = max_frame;
4503
4504         if (netif_running(netdev))
4505                 igb_down(adapter);
4506
4507         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4508                  netdev->mtu, new_mtu);
4509         netdev->mtu = new_mtu;
4510
4511         if (netif_running(netdev))
4512                 igb_up(adapter);
4513         else
4514                 igb_reset(adapter);
4515
4516         clear_bit(__IGB_RESETTING, &adapter->state);
4517
4518         return 0;
4519 }
4520
4521 /**
4522  * igb_update_stats - Update the board statistics counters
4523  * @adapter: board private structure
4524  **/
4525
4526 void igb_update_stats(struct igb_adapter *adapter,
4527                       struct rtnl_link_stats64 *net_stats)
4528 {
4529         struct e1000_hw *hw = &adapter->hw;
4530         struct pci_dev *pdev = adapter->pdev;
4531         u32 reg, mpc;
4532         u16 phy_tmp;
4533         int i;
4534         u64 bytes, packets;
4535         unsigned int start;
4536         u64 _bytes, _packets;
4537
4538 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4539
4540         /*
4541          * Prevent stats update while adapter is being reset, or if the pci
4542          * connection is down.
4543          */
4544         if (adapter->link_speed == 0)
4545                 return;
4546         if (pci_channel_offline(pdev))
4547                 return;
4548
4549         bytes = 0;
4550         packets = 0;
4551         for (i = 0; i < adapter->num_rx_queues; i++) {
4552                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4553                 struct igb_ring *ring = adapter->rx_ring[i];
4554
4555                 ring->rx_stats.drops += rqdpc_tmp;
4556                 net_stats->rx_fifo_errors += rqdpc_tmp;
4557
4558                 do {
4559                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4560                         _bytes = ring->rx_stats.bytes;
4561                         _packets = ring->rx_stats.packets;
4562                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4563                 bytes += _bytes;
4564                 packets += _packets;
4565         }
4566
4567         net_stats->rx_bytes = bytes;
4568         net_stats->rx_packets = packets;
4569
4570         bytes = 0;
4571         packets = 0;
4572         for (i = 0; i < adapter->num_tx_queues; i++) {
4573                 struct igb_ring *ring = adapter->tx_ring[i];
4574                 do {
4575                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4576                         _bytes = ring->tx_stats.bytes;
4577                         _packets = ring->tx_stats.packets;
4578                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4579                 bytes += _bytes;
4580                 packets += _packets;
4581         }
4582         net_stats->tx_bytes = bytes;
4583         net_stats->tx_packets = packets;
4584
4585         /* read stats registers */
4586         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4587         adapter->stats.gprc += rd32(E1000_GPRC);
4588         adapter->stats.gorc += rd32(E1000_GORCL);
4589         rd32(E1000_GORCH); /* clear GORCL */
4590         adapter->stats.bprc += rd32(E1000_BPRC);
4591         adapter->stats.mprc += rd32(E1000_MPRC);
4592         adapter->stats.roc += rd32(E1000_ROC);
4593
4594         adapter->stats.prc64 += rd32(E1000_PRC64);
4595         adapter->stats.prc127 += rd32(E1000_PRC127);
4596         adapter->stats.prc255 += rd32(E1000_PRC255);
4597         adapter->stats.prc511 += rd32(E1000_PRC511);
4598         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4599         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4600         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4601         adapter->stats.sec += rd32(E1000_SEC);
4602
4603         mpc = rd32(E1000_MPC);
4604         adapter->stats.mpc += mpc;
4605         net_stats->rx_fifo_errors += mpc;
4606         adapter->stats.scc += rd32(E1000_SCC);
4607         adapter->stats.ecol += rd32(E1000_ECOL);
4608         adapter->stats.mcc += rd32(E1000_MCC);
4609         adapter->stats.latecol += rd32(E1000_LATECOL);
4610         adapter->stats.dc += rd32(E1000_DC);
4611         adapter->stats.rlec += rd32(E1000_RLEC);
4612         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4613         adapter->stats.xontxc += rd32(E1000_XONTXC);
4614         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4615         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4616         adapter->stats.fcruc += rd32(E1000_FCRUC);
4617         adapter->stats.gptc += rd32(E1000_GPTC);
4618         adapter->stats.gotc += rd32(E1000_GOTCL);
4619         rd32(E1000_GOTCH); /* clear GOTCL */
4620         adapter->stats.rnbc += rd32(E1000_RNBC);
4621         adapter->stats.ruc += rd32(E1000_RUC);
4622         adapter->stats.rfc += rd32(E1000_RFC);
4623         adapter->stats.rjc += rd32(E1000_RJC);
4624         adapter->stats.tor += rd32(E1000_TORH);
4625         adapter->stats.tot += rd32(E1000_TOTH);
4626         adapter->stats.tpr += rd32(E1000_TPR);
4627
4628         adapter->stats.ptc64 += rd32(E1000_PTC64);
4629         adapter->stats.ptc127 += rd32(E1000_PTC127);
4630         adapter->stats.ptc255 += rd32(E1000_PTC255);
4631         adapter->stats.ptc511 += rd32(E1000_PTC511);
4632         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4633         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4634
4635         adapter->stats.mptc += rd32(E1000_MPTC);
4636         adapter->stats.bptc += rd32(E1000_BPTC);
4637
4638         adapter->stats.tpt += rd32(E1000_TPT);
4639         adapter->stats.colc += rd32(E1000_COLC);
4640
4641         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4642         /* read internal phy specific stats */
4643         reg = rd32(E1000_CTRL_EXT);
4644         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4645                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4646                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4647         }
4648
4649         adapter->stats.tsctc += rd32(E1000_TSCTC);
4650         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4651
4652         adapter->stats.iac += rd32(E1000_IAC);
4653         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4654         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4655         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4656         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4657         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4658         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4659         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4660         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4661
4662         /* Fill out the OS statistics structure */
4663         net_stats->multicast = adapter->stats.mprc;
4664         net_stats->collisions = adapter->stats.colc;
4665
4666         /* Rx Errors */
4667
4668         /* RLEC on some newer hardware can be incorrect so build
4669          * our own version based on RUC and ROC */
4670         net_stats->rx_errors = adapter->stats.rxerrc +
4671                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4672                 adapter->stats.ruc + adapter->stats.roc +
4673                 adapter->stats.cexterr;
4674         net_stats->rx_length_errors = adapter->stats.ruc +
4675                                       adapter->stats.roc;
4676         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4677         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4678         net_stats->rx_missed_errors = adapter->stats.mpc;
4679
4680         /* Tx Errors */
4681         net_stats->tx_errors = adapter->stats.ecol +
4682                                adapter->stats.latecol;
4683         net_stats->tx_aborted_errors = adapter->stats.ecol;
4684         net_stats->tx_window_errors = adapter->stats.latecol;
4685         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4686
4687         /* Tx Dropped needs to be maintained elsewhere */
4688
4689         /* Phy Stats */
4690         if (hw->phy.media_type == e1000_media_type_copper) {
4691                 if ((adapter->link_speed == SPEED_1000) &&
4692                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4693                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4694                         adapter->phy_stats.idle_errors += phy_tmp;
4695                 }
4696         }
4697
4698         /* Management Stats */
4699         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4700         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4701         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4702
4703         /* OS2BMC Stats */
4704         reg = rd32(E1000_MANC);
4705         if (reg & E1000_MANC_EN_BMC2OS) {
4706                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4707                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4708                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4709                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4710         }
4711 }
4712
4713 static irqreturn_t igb_msix_other(int irq, void *data)
4714 {
4715         struct igb_adapter *adapter = data;
4716         struct e1000_hw *hw = &adapter->hw;
4717         u32 icr = rd32(E1000_ICR);
4718         /* reading ICR causes bit 31 of EICR to be cleared */
4719
4720         if (icr & E1000_ICR_DRSTA)
4721                 schedule_work(&adapter->reset_task);
4722
4723         if (icr & E1000_ICR_DOUTSYNC) {
4724                 /* HW is reporting DMA is out of sync */
4725                 adapter->stats.doosync++;
4726                 /* The DMA Out of Sync is also indication of a spoof event
4727                  * in IOV mode. Check the Wrong VM Behavior register to
4728                  * see if it is really a spoof event. */
4729                 igb_check_wvbr(adapter);
4730         }
4731
4732         /* Check for a mailbox event */
4733         if (icr & E1000_ICR_VMMB)
4734                 igb_msg_task(adapter);
4735
4736         if (icr & E1000_ICR_LSC) {
4737                 hw->mac.get_link_status = 1;
4738                 /* guard against interrupt when we're going down */
4739                 if (!test_bit(__IGB_DOWN, &adapter->state))
4740                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4741         }
4742
4743         wr32(E1000_EIMS, adapter->eims_other);
4744
4745         return IRQ_HANDLED;
4746 }
4747
4748 static void igb_write_itr(struct igb_q_vector *q_vector)
4749 {
4750         struct igb_adapter *adapter = q_vector->adapter;
4751         u32 itr_val = q_vector->itr_val & 0x7FFC;
4752
4753         if (!q_vector->set_itr)
4754                 return;
4755
4756         if (!itr_val)
4757                 itr_val = 0x4;
4758
4759         if (adapter->hw.mac.type == e1000_82575)
4760                 itr_val |= itr_val << 16;
4761         else
4762                 itr_val |= E1000_EITR_CNT_IGNR;
4763
4764         writel(itr_val, q_vector->itr_register);
4765         q_vector->set_itr = 0;
4766 }
4767
4768 static irqreturn_t igb_msix_ring(int irq, void *data)
4769 {
4770         struct igb_q_vector *q_vector = data;
4771
4772         /* Write the ITR value calculated from the previous interrupt. */
4773         igb_write_itr(q_vector);
4774
4775         napi_schedule(&q_vector->napi);
4776
4777         return IRQ_HANDLED;
4778 }
4779
4780 #ifdef CONFIG_IGB_DCA
4781 static void igb_update_dca(struct igb_q_vector *q_vector)
4782 {
4783         struct igb_adapter *adapter = q_vector->adapter;
4784         struct e1000_hw *hw = &adapter->hw;
4785         int cpu = get_cpu();
4786
4787         if (q_vector->cpu == cpu)
4788                 goto out_no_update;
4789
4790         if (q_vector->tx.ring) {
4791                 int q = q_vector->tx.ring->reg_idx;
4792                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4793                 if (hw->mac.type == e1000_82575) {
4794                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4795                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4796                 } else {
4797                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4798                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4799                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4800                 }
4801                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4802                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4803         }
4804         if (q_vector->rx.ring) {
4805                 int q = q_vector->rx.ring->reg_idx;
4806                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4807                 if (hw->mac.type == e1000_82575) {
4808                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4809                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4810                 } else {
4811                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4812                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4813                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4814                 }
4815                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4816                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4817                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4818                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4819         }
4820         q_vector->cpu = cpu;
4821 out_no_update:
4822         put_cpu();
4823 }
4824
4825 static void igb_setup_dca(struct igb_adapter *adapter)
4826 {
4827         struct e1000_hw *hw = &adapter->hw;
4828         int i;
4829
4830         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4831                 return;
4832
4833         /* Always use CB2 mode, difference is masked in the CB driver. */
4834         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4835
4836         for (i = 0; i < adapter->num_q_vectors; i++) {
4837                 adapter->q_vector[i]->cpu = -1;
4838                 igb_update_dca(adapter->q_vector[i]);
4839         }
4840 }
4841
4842 static int __igb_notify_dca(struct device *dev, void *data)
4843 {
4844         struct net_device *netdev = dev_get_drvdata(dev);
4845         struct igb_adapter *adapter = netdev_priv(netdev);
4846         struct pci_dev *pdev = adapter->pdev;
4847         struct e1000_hw *hw = &adapter->hw;
4848         unsigned long event = *(unsigned long *)data;
4849
4850         switch (event) {
4851         case DCA_PROVIDER_ADD:
4852                 /* if already enabled, don't do it again */
4853                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4854                         break;
4855                 if (dca_add_requester(dev) == 0) {
4856                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4857                         dev_info(&pdev->dev, "DCA enabled\n");
4858                         igb_setup_dca(adapter);
4859                         break;
4860                 }
4861                 /* Fall Through since DCA is disabled. */
4862         case DCA_PROVIDER_REMOVE:
4863                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4864                         /* without this a class_device is left
4865                          * hanging around in the sysfs model */
4866                         dca_remove_requester(dev);
4867                         dev_info(&pdev->dev, "DCA disabled\n");
4868                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4869                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4870                 }
4871                 break;
4872         }
4873
4874         return 0;
4875 }
4876
4877 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4878                           void *p)
4879 {
4880         int ret_val;
4881
4882         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4883                                          __igb_notify_dca);
4884
4885         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4886 }
4887 #endif /* CONFIG_IGB_DCA */
4888
4889 #ifdef CONFIG_PCI_IOV
4890 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4891 {
4892         unsigned char mac_addr[ETH_ALEN];
4893         struct pci_dev *pdev = adapter->pdev;
4894         struct e1000_hw *hw = &adapter->hw;
4895         struct pci_dev *pvfdev;
4896         unsigned int device_id;
4897         u16 thisvf_devfn;
4898
4899         random_ether_addr(mac_addr);
4900         igb_set_vf_mac(adapter, vf, mac_addr);
4901
4902         switch (adapter->hw.mac.type) {
4903         case e1000_82576:
4904                 device_id = IGB_82576_VF_DEV_ID;
4905                 /* VF Stride for 82576 is 2 */
4906                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4907                         (pdev->devfn & 1);
4908                 break;
4909         case e1000_i350:
4910                 device_id = IGB_I350_VF_DEV_ID;
4911                 /* VF Stride for I350 is 4 */
4912                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4913                                 (pdev->devfn & 3);
4914                 break;
4915         default:
4916                 device_id = 0;
4917                 thisvf_devfn = 0;
4918                 break;
4919         }
4920
4921         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4922         while (pvfdev) {
4923                 if (pvfdev->devfn == thisvf_devfn)
4924                         break;
4925                 pvfdev = pci_get_device(hw->vendor_id,
4926                                         device_id, pvfdev);
4927         }
4928
4929         if (pvfdev)
4930                 adapter->vf_data[vf].vfdev = pvfdev;
4931         else
4932                 dev_err(&pdev->dev,
4933                         "Couldn't find pci dev ptr for VF %4.4x\n",
4934                         thisvf_devfn);
4935         return pvfdev != NULL;
4936 }
4937
4938 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4939 {
4940         struct e1000_hw *hw = &adapter->hw;
4941         struct pci_dev *pdev = adapter->pdev;
4942         struct pci_dev *pvfdev;
4943         u16 vf_devfn = 0;
4944         u16 vf_stride;
4945         unsigned int device_id;
4946         int vfs_found = 0;
4947
4948         switch (adapter->hw.mac.type) {
4949         case e1000_82576:
4950                 device_id = IGB_82576_VF_DEV_ID;
4951                 /* VF Stride for 82576 is 2 */
4952                 vf_stride = 2;
4953                 break;
4954         case e1000_i350:
4955                 device_id = IGB_I350_VF_DEV_ID;
4956                 /* VF Stride for I350 is 4 */
4957                 vf_stride = 4;
4958                 break;
4959         default:
4960                 device_id = 0;
4961                 vf_stride = 0;
4962                 break;
4963         }
4964
4965         vf_devfn = pdev->devfn + 0x80;
4966         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4967         while (pvfdev) {
4968                 if (pvfdev->devfn == vf_devfn)
4969                         vfs_found++;
4970                 vf_devfn += vf_stride;
4971                 pvfdev = pci_get_device(hw->vendor_id,
4972                                         device_id, pvfdev);
4973         }
4974
4975         return vfs_found;
4976 }
4977
4978 static int igb_check_vf_assignment(struct igb_adapter *adapter)
4979 {
4980         int i;
4981         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4982                 if (adapter->vf_data[i].vfdev) {
4983                         if (adapter->vf_data[i].vfdev->dev_flags &
4984                             PCI_DEV_FLAGS_ASSIGNED)
4985                                 return true;
4986                 }
4987         }
4988         return false;
4989 }
4990
4991 #endif
4992 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4993 {
4994         struct e1000_hw *hw = &adapter->hw;
4995         u32 ping;
4996         int i;
4997
4998         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4999                 ping = E1000_PF_CONTROL_MSG;
5000                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5001                         ping |= E1000_VT_MSGTYPE_CTS;
5002                 igb_write_mbx(hw, &ping, 1, i);
5003         }
5004 }
5005
5006 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5007 {
5008         struct e1000_hw *hw = &adapter->hw;
5009         u32 vmolr = rd32(E1000_VMOLR(vf));
5010         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5011
5012         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5013                             IGB_VF_FLAG_MULTI_PROMISC);
5014         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5015
5016         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5017                 vmolr |= E1000_VMOLR_MPME;
5018                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5019                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5020         } else {
5021                 /*
5022                  * if we have hashes and we are clearing a multicast promisc
5023                  * flag we need to write the hashes to the MTA as this step
5024                  * was previously skipped
5025                  */
5026                 if (vf_data->num_vf_mc_hashes > 30) {
5027                         vmolr |= E1000_VMOLR_MPME;
5028                 } else if (vf_data->num_vf_mc_hashes) {
5029                         int j;
5030                         vmolr |= E1000_VMOLR_ROMPE;
5031                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5032                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5033                 }
5034         }
5035
5036         wr32(E1000_VMOLR(vf), vmolr);
5037
5038         /* there are flags left unprocessed, likely not supported */
5039         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5040                 return -EINVAL;
5041
5042         return 0;
5043
5044 }
5045
5046 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5047                                   u32 *msgbuf, u32 vf)
5048 {
5049         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5050         u16 *hash_list = (u16 *)&msgbuf[1];
5051         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5052         int i;
5053
5054         /* salt away the number of multicast addresses assigned
5055          * to this VF for later use to restore when the PF multi cast
5056          * list changes
5057          */
5058         vf_data->num_vf_mc_hashes = n;
5059
5060         /* only up to 30 hash values supported */
5061         if (n > 30)
5062                 n = 30;
5063
5064         /* store the hashes for later use */
5065         for (i = 0; i < n; i++)
5066                 vf_data->vf_mc_hashes[i] = hash_list[i];
5067
5068         /* Flush and reset the mta with the new values */
5069         igb_set_rx_mode(adapter->netdev);
5070
5071         return 0;
5072 }
5073
5074 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5075 {
5076         struct e1000_hw *hw = &adapter->hw;
5077         struct vf_data_storage *vf_data;
5078         int i, j;
5079
5080         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5081                 u32 vmolr = rd32(E1000_VMOLR(i));
5082                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5083
5084                 vf_data = &adapter->vf_data[i];
5085
5086                 if ((vf_data->num_vf_mc_hashes > 30) ||
5087                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5088                         vmolr |= E1000_VMOLR_MPME;
5089                 } else if (vf_data->num_vf_mc_hashes) {
5090                         vmolr |= E1000_VMOLR_ROMPE;
5091                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5092                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5093                 }
5094                 wr32(E1000_VMOLR(i), vmolr);
5095         }
5096 }
5097
5098 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5099 {
5100         struct e1000_hw *hw = &adapter->hw;
5101         u32 pool_mask, reg, vid;
5102         int i;
5103
5104         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5105
5106         /* Find the vlan filter for this id */
5107         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5108                 reg = rd32(E1000_VLVF(i));
5109
5110                 /* remove the vf from the pool */
5111                 reg &= ~pool_mask;
5112
5113                 /* if pool is empty then remove entry from vfta */
5114                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5115                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5116                         reg = 0;
5117                         vid = reg & E1000_VLVF_VLANID_MASK;
5118                         igb_vfta_set(hw, vid, false);
5119                 }
5120
5121                 wr32(E1000_VLVF(i), reg);
5122         }
5123
5124         adapter->vf_data[vf].vlans_enabled = 0;
5125 }
5126
5127 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5128 {
5129         struct e1000_hw *hw = &adapter->hw;
5130         u32 reg, i;
5131
5132         /* The vlvf table only exists on 82576 hardware and newer */
5133         if (hw->mac.type < e1000_82576)
5134                 return -1;
5135
5136         /* we only need to do this if VMDq is enabled */
5137         if (!adapter->vfs_allocated_count)
5138                 return -1;
5139
5140         /* Find the vlan filter for this id */
5141         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5142                 reg = rd32(E1000_VLVF(i));
5143                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5144                     vid == (reg & E1000_VLVF_VLANID_MASK))
5145                         break;
5146         }
5147
5148         if (add) {
5149                 if (i == E1000_VLVF_ARRAY_SIZE) {
5150                         /* Did not find a matching VLAN ID entry that was
5151                          * enabled.  Search for a free filter entry, i.e.
5152                          * one without the enable bit set
5153                          */
5154                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5155                                 reg = rd32(E1000_VLVF(i));
5156                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5157                                         break;
5158                         }
5159                 }
5160                 if (i < E1000_VLVF_ARRAY_SIZE) {
5161                         /* Found an enabled/available entry */
5162                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5163
5164                         /* if !enabled we need to set this up in vfta */
5165                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5166                                 /* add VID to filter table */
5167                                 igb_vfta_set(hw, vid, true);
5168                                 reg |= E1000_VLVF_VLANID_ENABLE;
5169                         }
5170                         reg &= ~E1000_VLVF_VLANID_MASK;
5171                         reg |= vid;
5172                         wr32(E1000_VLVF(i), reg);
5173
5174                         /* do not modify RLPML for PF devices */
5175                         if (vf >= adapter->vfs_allocated_count)
5176                                 return 0;
5177
5178                         if (!adapter->vf_data[vf].vlans_enabled) {
5179                                 u32 size;
5180                                 reg = rd32(E1000_VMOLR(vf));
5181                                 size = reg & E1000_VMOLR_RLPML_MASK;
5182                                 size += 4;
5183                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5184                                 reg |= size;
5185                                 wr32(E1000_VMOLR(vf), reg);
5186                         }
5187
5188                         adapter->vf_data[vf].vlans_enabled++;
5189                 }
5190         } else {
5191                 if (i < E1000_VLVF_ARRAY_SIZE) {
5192                         /* remove vf from the pool */
5193                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5194                         /* if pool is empty then remove entry from vfta */
5195                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5196                                 reg = 0;
5197                                 igb_vfta_set(hw, vid, false);
5198                         }
5199                         wr32(E1000_VLVF(i), reg);
5200
5201                         /* do not modify RLPML for PF devices */
5202                         if (vf >= adapter->vfs_allocated_count)
5203                                 return 0;
5204
5205                         adapter->vf_data[vf].vlans_enabled--;
5206                         if (!adapter->vf_data[vf].vlans_enabled) {
5207                                 u32 size;
5208                                 reg = rd32(E1000_VMOLR(vf));
5209                                 size = reg & E1000_VMOLR_RLPML_MASK;
5210                                 size -= 4;
5211                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5212                                 reg |= size;
5213                                 wr32(E1000_VMOLR(vf), reg);
5214                         }
5215                 }
5216         }
5217         return 0;
5218 }
5219
5220 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5221 {
5222         struct e1000_hw *hw = &adapter->hw;
5223
5224         if (vid)
5225                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5226         else
5227                 wr32(E1000_VMVIR(vf), 0);
5228 }
5229
5230 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5231                                int vf, u16 vlan, u8 qos)
5232 {
5233         int err = 0;
5234         struct igb_adapter *adapter = netdev_priv(netdev);
5235
5236         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5237                 return -EINVAL;
5238         if (vlan || qos) {
5239                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5240                 if (err)
5241                         goto out;
5242                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5243                 igb_set_vmolr(adapter, vf, !vlan);
5244                 adapter->vf_data[vf].pf_vlan = vlan;
5245                 adapter->vf_data[vf].pf_qos = qos;
5246                 dev_info(&adapter->pdev->dev,
5247                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5248                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5249                         dev_warn(&adapter->pdev->dev,
5250                                  "The VF VLAN has been set,"
5251                                  " but the PF device is not up.\n");
5252                         dev_warn(&adapter->pdev->dev,
5253                                  "Bring the PF device up before"
5254                                  " attempting to use the VF device.\n");
5255                 }
5256         } else {
5257                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5258                                    false, vf);
5259                 igb_set_vmvir(adapter, vlan, vf);
5260                 igb_set_vmolr(adapter, vf, true);
5261                 adapter->vf_data[vf].pf_vlan = 0;
5262                 adapter->vf_data[vf].pf_qos = 0;
5263        }
5264 out:
5265        return err;
5266 }
5267
5268 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5269 {
5270         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5271         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5272
5273         return igb_vlvf_set(adapter, vid, add, vf);
5274 }
5275
5276 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5277 {
5278         /* clear flags - except flag that indicates PF has set the MAC */
5279         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5280         adapter->vf_data[vf].last_nack = jiffies;
5281
5282         /* reset offloads to defaults */
5283         igb_set_vmolr(adapter, vf, true);
5284
5285         /* reset vlans for device */
5286         igb_clear_vf_vfta(adapter, vf);
5287         if (adapter->vf_data[vf].pf_vlan)
5288                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5289                                     adapter->vf_data[vf].pf_vlan,
5290                                     adapter->vf_data[vf].pf_qos);
5291         else
5292                 igb_clear_vf_vfta(adapter, vf);
5293
5294         /* reset multicast table array for vf */
5295         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5296
5297         /* Flush and reset the mta with the new values */
5298         igb_set_rx_mode(adapter->netdev);
5299 }
5300
5301 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5302 {
5303         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5304
5305         /* generate a new mac address as we were hotplug removed/added */
5306         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5307                 random_ether_addr(vf_mac);
5308
5309         /* process remaining reset events */
5310         igb_vf_reset(adapter, vf);
5311 }
5312
5313 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5314 {
5315         struct e1000_hw *hw = &adapter->hw;
5316         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5317         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5318         u32 reg, msgbuf[3];
5319         u8 *addr = (u8 *)(&msgbuf[1]);
5320
5321         /* process all the same items cleared in a function level reset */
5322         igb_vf_reset(adapter, vf);
5323
5324         /* set vf mac address */
5325         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5326
5327         /* enable transmit and receive for vf */
5328         reg = rd32(E1000_VFTE);
5329         wr32(E1000_VFTE, reg | (1 << vf));
5330         reg = rd32(E1000_VFRE);
5331         wr32(E1000_VFRE, reg | (1 << vf));
5332
5333         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5334
5335         /* reply to reset with ack and vf mac address */
5336         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5337         memcpy(addr, vf_mac, 6);
5338         igb_write_mbx(hw, msgbuf, 3, vf);
5339 }
5340
5341 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5342 {
5343         /*
5344          * The VF MAC Address is stored in a packed array of bytes
5345          * starting at the second 32 bit word of the msg array
5346          */
5347         unsigned char *addr = (char *)&msg[1];
5348         int err = -1;
5349
5350         if (is_valid_ether_addr(addr))
5351                 err = igb_set_vf_mac(adapter, vf, addr);
5352
5353         return err;
5354 }
5355
5356 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5357 {
5358         struct e1000_hw *hw = &adapter->hw;
5359         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5360         u32 msg = E1000_VT_MSGTYPE_NACK;
5361
5362         /* if device isn't clear to send it shouldn't be reading either */
5363         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5364             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5365                 igb_write_mbx(hw, &msg, 1, vf);
5366                 vf_data->last_nack = jiffies;
5367         }
5368 }
5369
5370 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5371 {
5372         struct pci_dev *pdev = adapter->pdev;
5373         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5374         struct e1000_hw *hw = &adapter->hw;
5375         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5376         s32 retval;
5377
5378         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5379
5380         if (retval) {
5381                 /* if receive failed revoke VF CTS stats and restart init */
5382                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5383                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5384                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5385                         return;
5386                 goto out;
5387         }
5388
5389         /* this is a message we already processed, do nothing */
5390         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5391                 return;
5392
5393         /*
5394          * until the vf completes a reset it should not be
5395          * allowed to start any configuration.
5396          */
5397
5398         if (msgbuf[0] == E1000_VF_RESET) {
5399                 igb_vf_reset_msg(adapter, vf);
5400                 return;
5401         }
5402
5403         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5404                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5405                         return;
5406                 retval = -1;
5407                 goto out;
5408         }
5409
5410         switch ((msgbuf[0] & 0xFFFF)) {
5411         case E1000_VF_SET_MAC_ADDR:
5412                 retval = -EINVAL;
5413                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5414                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5415                 else
5416                         dev_warn(&pdev->dev,
5417                                  "VF %d attempted to override administratively "
5418                                  "set MAC address\nReload the VF driver to "
5419                                  "resume operations\n", vf);
5420                 break;
5421         case E1000_VF_SET_PROMISC:
5422                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5423                 break;
5424         case E1000_VF_SET_MULTICAST:
5425                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5426                 break;
5427         case E1000_VF_SET_LPE:
5428                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5429                 break;
5430         case E1000_VF_SET_VLAN:
5431                 retval = -1;
5432                 if (vf_data->pf_vlan)
5433                         dev_warn(&pdev->dev,
5434                                  "VF %d attempted to override administratively "
5435                                  "set VLAN tag\nReload the VF driver to "
5436                                  "resume operations\n", vf);
5437                 else
5438                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5439                 break;
5440         default:
5441                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5442                 retval = -1;
5443                 break;
5444         }
5445
5446         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5447 out:
5448         /* notify the VF of the results of what it sent us */
5449         if (retval)
5450                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5451         else
5452                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5453
5454         igb_write_mbx(hw, msgbuf, 1, vf);
5455 }
5456
5457 static void igb_msg_task(struct igb_adapter *adapter)
5458 {
5459         struct e1000_hw *hw = &adapter->hw;
5460         u32 vf;
5461
5462         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5463                 /* process any reset requests */
5464                 if (!igb_check_for_rst(hw, vf))
5465                         igb_vf_reset_event(adapter, vf);
5466
5467                 /* process any messages pending */
5468                 if (!igb_check_for_msg(hw, vf))
5469                         igb_rcv_msg_from_vf(adapter, vf);
5470
5471                 /* process any acks */
5472                 if (!igb_check_for_ack(hw, vf))
5473                         igb_rcv_ack_from_vf(adapter, vf);
5474         }
5475 }
5476
5477 /**
5478  *  igb_set_uta - Set unicast filter table address
5479  *  @adapter: board private structure
5480  *
5481  *  The unicast table address is a register array of 32-bit registers.
5482  *  The table is meant to be used in a way similar to how the MTA is used
5483  *  however due to certain limitations in the hardware it is necessary to
5484  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5485  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5486  **/
5487 static void igb_set_uta(struct igb_adapter *adapter)
5488 {
5489         struct e1000_hw *hw = &adapter->hw;
5490         int i;
5491
5492         /* The UTA table only exists on 82576 hardware and newer */
5493         if (hw->mac.type < e1000_82576)
5494                 return;
5495
5496         /* we only need to do this if VMDq is enabled */
5497         if (!adapter->vfs_allocated_count)
5498                 return;
5499
5500         for (i = 0; i < hw->mac.uta_reg_count; i++)
5501                 array_wr32(E1000_UTA, i, ~0);
5502 }
5503
5504 /**
5505  * igb_intr_msi - Interrupt Handler
5506  * @irq: interrupt number
5507  * @data: pointer to a network interface device structure
5508  **/
5509 static irqreturn_t igb_intr_msi(int irq, void *data)
5510 {
5511         struct igb_adapter *adapter = data;
5512         struct igb_q_vector *q_vector = adapter->q_vector[0];
5513         struct e1000_hw *hw = &adapter->hw;
5514         /* read ICR disables interrupts using IAM */
5515         u32 icr = rd32(E1000_ICR);
5516
5517         igb_write_itr(q_vector);
5518
5519         if (icr & E1000_ICR_DRSTA)
5520                 schedule_work(&adapter->reset_task);
5521
5522         if (icr & E1000_ICR_DOUTSYNC) {
5523                 /* HW is reporting DMA is out of sync */
5524                 adapter->stats.doosync++;
5525         }
5526
5527         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5528                 hw->mac.get_link_status = 1;
5529                 if (!test_bit(__IGB_DOWN, &adapter->state))
5530                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5531         }
5532
5533         napi_schedule(&q_vector->napi);
5534
5535         return IRQ_HANDLED;
5536 }
5537
5538 /**
5539  * igb_intr - Legacy Interrupt Handler
5540  * @irq: interrupt number
5541  * @data: pointer to a network interface device structure
5542  **/
5543 static irqreturn_t igb_intr(int irq, void *data)
5544 {
5545         struct igb_adapter *adapter = data;
5546         struct igb_q_vector *q_vector = adapter->q_vector[0];
5547         struct e1000_hw *hw = &adapter->hw;
5548         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5549          * need for the IMC write */
5550         u32 icr = rd32(E1000_ICR);
5551
5552         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5553          * not set, then the adapter didn't send an interrupt */
5554         if (!(icr & E1000_ICR_INT_ASSERTED))
5555                 return IRQ_NONE;
5556
5557         igb_write_itr(q_vector);
5558
5559         if (icr & E1000_ICR_DRSTA)
5560                 schedule_work(&adapter->reset_task);
5561
5562         if (icr & E1000_ICR_DOUTSYNC) {
5563                 /* HW is reporting DMA is out of sync */
5564                 adapter->stats.doosync++;
5565         }
5566
5567         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5568                 hw->mac.get_link_status = 1;
5569                 /* guard against interrupt when we're going down */
5570                 if (!test_bit(__IGB_DOWN, &adapter->state))
5571                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5572         }
5573
5574         napi_schedule(&q_vector->napi);
5575
5576         return IRQ_HANDLED;
5577 }
5578
5579 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5580 {
5581         struct igb_adapter *adapter = q_vector->adapter;
5582         struct e1000_hw *hw = &adapter->hw;
5583
5584         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5585             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5586                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5587                         igb_set_itr(q_vector);
5588                 else
5589                         igb_update_ring_itr(q_vector);
5590         }
5591
5592         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5593                 if (adapter->msix_entries)
5594                         wr32(E1000_EIMS, q_vector->eims_value);
5595                 else
5596                         igb_irq_enable(adapter);
5597         }
5598 }
5599
5600 /**
5601  * igb_poll - NAPI Rx polling callback
5602  * @napi: napi polling structure
5603  * @budget: count of how many packets we should handle
5604  **/
5605 static int igb_poll(struct napi_struct *napi, int budget)
5606 {
5607         struct igb_q_vector *q_vector = container_of(napi,
5608                                                      struct igb_q_vector,
5609                                                      napi);
5610         bool clean_complete = true;
5611
5612 #ifdef CONFIG_IGB_DCA
5613         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5614                 igb_update_dca(q_vector);
5615 #endif
5616         if (q_vector->tx.ring)
5617                 clean_complete = igb_clean_tx_irq(q_vector);
5618
5619         if (q_vector->rx.ring)
5620                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5621
5622         /* If all work not completed, return budget and keep polling */
5623         if (!clean_complete)
5624                 return budget;
5625
5626         /* If not enough Rx work done, exit the polling mode */
5627         napi_complete(napi);
5628         igb_ring_irq_enable(q_vector);
5629
5630         return 0;
5631 }
5632
5633 /**
5634  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5635  * @adapter: board private structure
5636  * @shhwtstamps: timestamp structure to update
5637  * @regval: unsigned 64bit system time value.
5638  *
5639  * We need to convert the system time value stored in the RX/TXSTMP registers
5640  * into a hwtstamp which can be used by the upper level timestamping functions
5641  */
5642 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5643                                    struct skb_shared_hwtstamps *shhwtstamps,
5644                                    u64 regval)
5645 {
5646         u64 ns;
5647
5648         /*
5649          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5650          * 24 to match clock shift we setup earlier.
5651          */
5652         if (adapter->hw.mac.type >= e1000_82580)
5653                 regval <<= IGB_82580_TSYNC_SHIFT;
5654
5655         ns = timecounter_cyc2time(&adapter->clock, regval);
5656         timecompare_update(&adapter->compare, ns);
5657         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5658         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5659         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5660 }
5661
5662 /**
5663  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5664  * @q_vector: pointer to q_vector containing needed info
5665  * @buffer: pointer to igb_tx_buffer structure
5666  *
5667  * If we were asked to do hardware stamping and such a time stamp is
5668  * available, then it must have been for this skb here because we only
5669  * allow only one such packet into the queue.
5670  */
5671 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5672                             struct igb_tx_buffer *buffer_info)
5673 {
5674         struct igb_adapter *adapter = q_vector->adapter;
5675         struct e1000_hw *hw = &adapter->hw;
5676         struct skb_shared_hwtstamps shhwtstamps;
5677         u64 regval;
5678
5679         /* if skb does not support hw timestamp or TX stamp not valid exit */
5680         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5681             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5682                 return;
5683
5684         regval = rd32(E1000_TXSTMPL);
5685         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5686
5687         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5688         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5689 }
5690
5691 /**
5692  * igb_clean_tx_irq - Reclaim resources after transmit completes
5693  * @q_vector: pointer to q_vector containing needed info
5694  * returns true if ring is completely cleaned
5695  **/
5696 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5697 {
5698         struct igb_adapter *adapter = q_vector->adapter;
5699         struct igb_ring *tx_ring = q_vector->tx.ring;
5700         struct igb_tx_buffer *tx_buffer;
5701         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5702         unsigned int total_bytes = 0, total_packets = 0;
5703         unsigned int budget = q_vector->tx.work_limit;
5704         unsigned int i = tx_ring->next_to_clean;
5705
5706         if (test_bit(__IGB_DOWN, &adapter->state))
5707                 return true;
5708
5709         tx_buffer = &tx_ring->tx_buffer_info[i];
5710         tx_desc = IGB_TX_DESC(tx_ring, i);
5711         i -= tx_ring->count;
5712
5713         for (; budget; budget--) {
5714                 eop_desc = tx_buffer->next_to_watch;
5715
5716                 /* prevent any other reads prior to eop_desc */
5717                 rmb();
5718
5719                 /* if next_to_watch is not set then there is no work pending */
5720                 if (!eop_desc)
5721                         break;
5722
5723                 /* if DD is not set pending work has not been completed */
5724                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5725                         break;
5726
5727                 /* clear next_to_watch to prevent false hangs */
5728                 tx_buffer->next_to_watch = NULL;
5729
5730                 /* update the statistics for this packet */
5731                 total_bytes += tx_buffer->bytecount;
5732                 total_packets += tx_buffer->gso_segs;
5733
5734                 /* retrieve hardware timestamp */
5735                 igb_tx_hwtstamp(q_vector, tx_buffer);
5736
5737                 /* free the skb */
5738                 dev_kfree_skb_any(tx_buffer->skb);
5739                 tx_buffer->skb = NULL;
5740
5741                 /* unmap skb header data */
5742                 dma_unmap_single(tx_ring->dev,
5743                                  tx_buffer->dma,
5744                                  tx_buffer->length,
5745                                  DMA_TO_DEVICE);
5746
5747                 /* clear last DMA location and unmap remaining buffers */
5748                 while (tx_desc != eop_desc) {
5749                         tx_buffer->dma = 0;
5750
5751                         tx_buffer++;
5752                         tx_desc++;
5753                         i++;
5754                         if (unlikely(!i)) {
5755                                 i -= tx_ring->count;
5756                                 tx_buffer = tx_ring->tx_buffer_info;
5757                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5758                         }
5759
5760                         /* unmap any remaining paged data */
5761                         if (tx_buffer->dma) {
5762                                 dma_unmap_page(tx_ring->dev,
5763                                                tx_buffer->dma,
5764                                                tx_buffer->length,
5765                                                DMA_TO_DEVICE);
5766                         }
5767                 }
5768
5769                 /* clear last DMA location */
5770                 tx_buffer->dma = 0;
5771
5772                 /* move us one more past the eop_desc for start of next pkt */
5773                 tx_buffer++;
5774                 tx_desc++;
5775                 i++;
5776                 if (unlikely(!i)) {
5777                         i -= tx_ring->count;
5778                         tx_buffer = tx_ring->tx_buffer_info;
5779                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5780                 }
5781         }
5782
5783         i += tx_ring->count;
5784         tx_ring->next_to_clean = i;
5785         u64_stats_update_begin(&tx_ring->tx_syncp);
5786         tx_ring->tx_stats.bytes += total_bytes;
5787         tx_ring->tx_stats.packets += total_packets;
5788         u64_stats_update_end(&tx_ring->tx_syncp);
5789         q_vector->tx.total_bytes += total_bytes;
5790         q_vector->tx.total_packets += total_packets;
5791
5792         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5793                 struct e1000_hw *hw = &adapter->hw;
5794
5795                 eop_desc = tx_buffer->next_to_watch;
5796
5797                 /* Detect a transmit hang in hardware, this serializes the
5798                  * check with the clearing of time_stamp and movement of i */
5799                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5800                 if (eop_desc &&
5801                     time_after(jiffies, tx_buffer->time_stamp +
5802                                (adapter->tx_timeout_factor * HZ)) &&
5803                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5804
5805                         /* detected Tx unit hang */
5806                         dev_err(tx_ring->dev,
5807                                 "Detected Tx Unit Hang\n"
5808                                 "  Tx Queue             <%d>\n"
5809                                 "  TDH                  <%x>\n"
5810                                 "  TDT                  <%x>\n"
5811                                 "  next_to_use          <%x>\n"
5812                                 "  next_to_clean        <%x>\n"
5813                                 "buffer_info[next_to_clean]\n"
5814                                 "  time_stamp           <%lx>\n"
5815                                 "  next_to_watch        <%p>\n"
5816                                 "  jiffies              <%lx>\n"
5817                                 "  desc.status          <%x>\n",
5818                                 tx_ring->queue_index,
5819                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5820                                 readl(tx_ring->tail),
5821                                 tx_ring->next_to_use,
5822                                 tx_ring->next_to_clean,
5823                                 tx_buffer->time_stamp,
5824                                 eop_desc,
5825                                 jiffies,
5826                                 eop_desc->wb.status);
5827                         netif_stop_subqueue(tx_ring->netdev,
5828                                             tx_ring->queue_index);
5829
5830                         /* we are about to reset, no point in enabling stuff */
5831                         return true;
5832                 }
5833         }
5834
5835         if (unlikely(total_packets &&
5836                      netif_carrier_ok(tx_ring->netdev) &&
5837                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5838                 /* Make sure that anybody stopping the queue after this
5839                  * sees the new next_to_clean.
5840                  */
5841                 smp_mb();
5842                 if (__netif_subqueue_stopped(tx_ring->netdev,
5843                                              tx_ring->queue_index) &&
5844                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5845                         netif_wake_subqueue(tx_ring->netdev,
5846                                             tx_ring->queue_index);
5847
5848                         u64_stats_update_begin(&tx_ring->tx_syncp);
5849                         tx_ring->tx_stats.restart_queue++;
5850                         u64_stats_update_end(&tx_ring->tx_syncp);
5851                 }
5852         }
5853
5854         return !!budget;
5855 }
5856
5857 static inline void igb_rx_checksum(struct igb_ring *ring,
5858                                    union e1000_adv_rx_desc *rx_desc,
5859                                    struct sk_buff *skb)
5860 {
5861         skb_checksum_none_assert(skb);
5862
5863         /* Ignore Checksum bit is set */
5864         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5865                 return;
5866
5867         /* Rx checksum disabled via ethtool */
5868         if (!(ring->netdev->features & NETIF_F_RXCSUM))
5869                 return;
5870
5871         /* TCP/UDP checksum error bit is set */
5872         if (igb_test_staterr(rx_desc,
5873                              E1000_RXDEXT_STATERR_TCPE |
5874                              E1000_RXDEXT_STATERR_IPE)) {
5875                 /*
5876                  * work around errata with sctp packets where the TCPE aka
5877                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5878                  * packets, (aka let the stack check the crc32c)
5879                  */
5880                 if (!((skb->len == 60) &&
5881                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5882                         u64_stats_update_begin(&ring->rx_syncp);
5883                         ring->rx_stats.csum_err++;
5884                         u64_stats_update_end(&ring->rx_syncp);
5885                 }
5886                 /* let the stack verify checksum errors */
5887                 return;
5888         }
5889         /* It must be a TCP or UDP packet with a valid checksum */
5890         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5891                                       E1000_RXD_STAT_UDPCS))
5892                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5893
5894         dev_dbg(ring->dev, "cksum success: bits %08X\n",
5895                 le32_to_cpu(rx_desc->wb.upper.status_error));
5896 }
5897
5898 static inline void igb_rx_hash(struct igb_ring *ring,
5899                                union e1000_adv_rx_desc *rx_desc,
5900                                struct sk_buff *skb)
5901 {
5902         if (ring->netdev->features & NETIF_F_RXHASH)
5903                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5904 }
5905
5906 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5907                             union e1000_adv_rx_desc *rx_desc,
5908                             struct sk_buff *skb)
5909 {
5910         struct igb_adapter *adapter = q_vector->adapter;
5911         struct e1000_hw *hw = &adapter->hw;
5912         u64 regval;
5913
5914         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5915                                        E1000_RXDADV_STAT_TS))
5916                 return;
5917
5918         /*
5919          * If this bit is set, then the RX registers contain the time stamp. No
5920          * other packet will be time stamped until we read these registers, so
5921          * read the registers to make them available again. Because only one
5922          * packet can be time stamped at a time, we know that the register
5923          * values must belong to this one here and therefore we don't need to
5924          * compare any of the additional attributes stored for it.
5925          *
5926          * If nothing went wrong, then it should have a shared tx_flags that we
5927          * can turn into a skb_shared_hwtstamps.
5928          */
5929         if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5930                 u32 *stamp = (u32 *)skb->data;
5931                 regval = le32_to_cpu(*(stamp + 2));
5932                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5933                 skb_pull(skb, IGB_TS_HDR_LEN);
5934         } else {
5935                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5936                         return;
5937
5938                 regval = rd32(E1000_RXSTMPL);
5939                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5940         }
5941
5942         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5943 }
5944
5945 static void igb_rx_vlan(struct igb_ring *ring,
5946                         union e1000_adv_rx_desc *rx_desc,
5947                         struct sk_buff *skb)
5948 {
5949         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5950                 u16 vid;
5951                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5952                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5953                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5954                 else
5955                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5956
5957                 __vlan_hwaccel_put_tag(skb, vid);
5958         }
5959 }
5960
5961 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5962 {
5963         /* HW will not DMA in data larger than the given buffer, even if it
5964          * parses the (NFS, of course) header to be larger.  In that case, it
5965          * fills the header buffer and spills the rest into the page.
5966          */
5967         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5968                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5969         if (hlen > IGB_RX_HDR_LEN)
5970                 hlen = IGB_RX_HDR_LEN;
5971         return hlen;
5972 }
5973
5974 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5975 {
5976         struct igb_ring *rx_ring = q_vector->rx.ring;
5977         union e1000_adv_rx_desc *rx_desc;
5978         const int current_node = numa_node_id();
5979         unsigned int total_bytes = 0, total_packets = 0;
5980         u16 cleaned_count = igb_desc_unused(rx_ring);
5981         u16 i = rx_ring->next_to_clean;
5982
5983         rx_desc = IGB_RX_DESC(rx_ring, i);
5984
5985         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5986                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5987                 struct sk_buff *skb = buffer_info->skb;
5988                 union e1000_adv_rx_desc *next_rxd;
5989
5990                 buffer_info->skb = NULL;
5991                 prefetch(skb->data);
5992
5993                 i++;
5994                 if (i == rx_ring->count)
5995                         i = 0;
5996
5997                 next_rxd = IGB_RX_DESC(rx_ring, i);
5998                 prefetch(next_rxd);
5999
6000                 /*
6001                  * This memory barrier is needed to keep us from reading
6002                  * any other fields out of the rx_desc until we know the
6003                  * RXD_STAT_DD bit is set
6004                  */
6005                 rmb();
6006
6007                 if (!skb_is_nonlinear(skb)) {
6008                         __skb_put(skb, igb_get_hlen(rx_desc));
6009                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
6010                                          IGB_RX_HDR_LEN,
6011                                          DMA_FROM_DEVICE);
6012                         buffer_info->dma = 0;
6013                 }
6014
6015                 if (rx_desc->wb.upper.length) {
6016                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6017
6018                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6019                                                 buffer_info->page,
6020                                                 buffer_info->page_offset,
6021                                                 length);
6022
6023                         skb->len += length;
6024                         skb->data_len += length;
6025                         skb->truesize += PAGE_SIZE / 2;
6026
6027                         if ((page_count(buffer_info->page) != 1) ||
6028                             (page_to_nid(buffer_info->page) != current_node))
6029                                 buffer_info->page = NULL;
6030                         else
6031                                 get_page(buffer_info->page);
6032
6033                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6034                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
6035                         buffer_info->page_dma = 0;
6036                 }
6037
6038                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6039                         struct igb_rx_buffer *next_buffer;
6040                         next_buffer = &rx_ring->rx_buffer_info[i];
6041                         buffer_info->skb = next_buffer->skb;
6042                         buffer_info->dma = next_buffer->dma;
6043                         next_buffer->skb = skb;
6044                         next_buffer->dma = 0;
6045                         goto next_desc;
6046                 }
6047
6048                 if (igb_test_staterr(rx_desc,
6049                                      E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6050                         dev_kfree_skb_any(skb);
6051                         goto next_desc;
6052                 }
6053
6054                 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6055                 igb_rx_hash(rx_ring, rx_desc, skb);
6056                 igb_rx_checksum(rx_ring, rx_desc, skb);
6057                 igb_rx_vlan(rx_ring, rx_desc, skb);
6058
6059                 total_bytes += skb->len;
6060                 total_packets++;
6061
6062                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6063
6064                 napi_gro_receive(&q_vector->napi, skb);
6065
6066                 budget--;
6067 next_desc:
6068                 if (!budget)
6069                         break;
6070
6071                 cleaned_count++;
6072                 /* return some buffers to hardware, one at a time is too slow */
6073                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6074                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
6075                         cleaned_count = 0;
6076                 }
6077
6078                 /* use prefetched values */
6079                 rx_desc = next_rxd;
6080         }
6081
6082         rx_ring->next_to_clean = i;
6083         u64_stats_update_begin(&rx_ring->rx_syncp);
6084         rx_ring->rx_stats.packets += total_packets;
6085         rx_ring->rx_stats.bytes += total_bytes;
6086         u64_stats_update_end(&rx_ring->rx_syncp);
6087         q_vector->rx.total_packets += total_packets;
6088         q_vector->rx.total_bytes += total_bytes;
6089
6090         if (cleaned_count)
6091                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6092
6093         return !!budget;
6094 }
6095
6096 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6097                                  struct igb_rx_buffer *bi)
6098 {
6099         struct sk_buff *skb = bi->skb;
6100         dma_addr_t dma = bi->dma;
6101
6102         if (dma)
6103                 return true;
6104
6105         if (likely(!skb)) {
6106                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6107                                                 IGB_RX_HDR_LEN);
6108                 bi->skb = skb;
6109                 if (!skb) {
6110                         rx_ring->rx_stats.alloc_failed++;
6111                         return false;
6112                 }
6113
6114                 /* initialize skb for ring */
6115                 skb_record_rx_queue(skb, rx_ring->queue_index);
6116         }
6117
6118         dma = dma_map_single(rx_ring->dev, skb->data,
6119                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6120
6121         if (dma_mapping_error(rx_ring->dev, dma)) {
6122                 rx_ring->rx_stats.alloc_failed++;
6123                 return false;
6124         }
6125
6126         bi->dma = dma;
6127         return true;
6128 }
6129
6130 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6131                                   struct igb_rx_buffer *bi)
6132 {
6133         struct page *page = bi->page;
6134         dma_addr_t page_dma = bi->page_dma;
6135         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6136
6137         if (page_dma)
6138                 return true;
6139
6140         if (!page) {
6141                 page = netdev_alloc_page(rx_ring->netdev);
6142                 bi->page = page;
6143                 if (unlikely(!page)) {
6144                         rx_ring->rx_stats.alloc_failed++;
6145                         return false;
6146                 }
6147         }
6148
6149         page_dma = dma_map_page(rx_ring->dev, page,
6150                                 page_offset, PAGE_SIZE / 2,
6151                                 DMA_FROM_DEVICE);
6152
6153         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6154                 rx_ring->rx_stats.alloc_failed++;
6155                 return false;
6156         }
6157
6158         bi->page_dma = page_dma;
6159         bi->page_offset = page_offset;
6160         return true;
6161 }
6162
6163 /**
6164  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6165  * @adapter: address of board private structure
6166  **/
6167 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6168 {
6169         union e1000_adv_rx_desc *rx_desc;
6170         struct igb_rx_buffer *bi;
6171         u16 i = rx_ring->next_to_use;
6172
6173         rx_desc = IGB_RX_DESC(rx_ring, i);
6174         bi = &rx_ring->rx_buffer_info[i];
6175         i -= rx_ring->count;
6176
6177         while (cleaned_count--) {
6178                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6179                         break;
6180
6181                 /* Refresh the desc even if buffer_addrs didn't change
6182                  * because each write-back erases this info. */
6183                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6184
6185                 if (!igb_alloc_mapped_page(rx_ring, bi))
6186                         break;
6187
6188                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6189
6190                 rx_desc++;
6191                 bi++;
6192                 i++;
6193                 if (unlikely(!i)) {
6194                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6195                         bi = rx_ring->rx_buffer_info;
6196                         i -= rx_ring->count;
6197                 }
6198
6199                 /* clear the hdr_addr for the next_to_use descriptor */
6200                 rx_desc->read.hdr_addr = 0;
6201         }
6202
6203         i += rx_ring->count;
6204
6205         if (rx_ring->next_to_use != i) {
6206                 rx_ring->next_to_use = i;
6207
6208                 /* Force memory writes to complete before letting h/w
6209                  * know there are new descriptors to fetch.  (Only
6210                  * applicable for weak-ordered memory model archs,
6211                  * such as IA-64). */
6212                 wmb();
6213                 writel(i, rx_ring->tail);
6214         }
6215 }
6216
6217 /**
6218  * igb_mii_ioctl -
6219  * @netdev:
6220  * @ifreq:
6221  * @cmd:
6222  **/
6223 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6224 {
6225         struct igb_adapter *adapter = netdev_priv(netdev);
6226         struct mii_ioctl_data *data = if_mii(ifr);
6227
6228         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6229                 return -EOPNOTSUPP;
6230
6231         switch (cmd) {
6232         case SIOCGMIIPHY:
6233                 data->phy_id = adapter->hw.phy.addr;
6234                 break;
6235         case SIOCGMIIREG:
6236                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6237                                      &data->val_out))
6238                         return -EIO;
6239                 break;
6240         case SIOCSMIIREG:
6241         default:
6242                 return -EOPNOTSUPP;
6243         }
6244         return 0;
6245 }
6246
6247 /**
6248  * igb_hwtstamp_ioctl - control hardware time stamping
6249  * @netdev:
6250  * @ifreq:
6251  * @cmd:
6252  *
6253  * Outgoing time stamping can be enabled and disabled. Play nice and
6254  * disable it when requested, although it shouldn't case any overhead
6255  * when no packet needs it. At most one packet in the queue may be
6256  * marked for time stamping, otherwise it would be impossible to tell
6257  * for sure to which packet the hardware time stamp belongs.
6258  *
6259  * Incoming time stamping has to be configured via the hardware
6260  * filters. Not all combinations are supported, in particular event
6261  * type has to be specified. Matching the kind of event packet is
6262  * not supported, with the exception of "all V2 events regardless of
6263  * level 2 or 4".
6264  *
6265  **/
6266 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6267                               struct ifreq *ifr, int cmd)
6268 {
6269         struct igb_adapter *adapter = netdev_priv(netdev);
6270         struct e1000_hw *hw = &adapter->hw;
6271         struct hwtstamp_config config;
6272         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6273         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6274         u32 tsync_rx_cfg = 0;
6275         bool is_l4 = false;
6276         bool is_l2 = false;
6277         u32 regval;
6278
6279         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6280                 return -EFAULT;
6281
6282         /* reserved for future extensions */
6283         if (config.flags)
6284                 return -EINVAL;
6285
6286         switch (config.tx_type) {
6287         case HWTSTAMP_TX_OFF:
6288                 tsync_tx_ctl = 0;
6289         case HWTSTAMP_TX_ON:
6290                 break;
6291         default:
6292                 return -ERANGE;
6293         }
6294
6295         switch (config.rx_filter) {
6296         case HWTSTAMP_FILTER_NONE:
6297                 tsync_rx_ctl = 0;
6298                 break;
6299         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6300         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6301         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6302         case HWTSTAMP_FILTER_ALL:
6303                 /*
6304                  * register TSYNCRXCFG must be set, therefore it is not
6305                  * possible to time stamp both Sync and Delay_Req messages
6306                  * => fall back to time stamping all packets
6307                  */
6308                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6309                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6310                 break;
6311         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6312                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6313                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6314                 is_l4 = true;
6315                 break;
6316         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6317                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6318                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6319                 is_l4 = true;
6320                 break;
6321         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6322         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6323                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6324                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6325                 is_l2 = true;
6326                 is_l4 = true;
6327                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6328                 break;
6329         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6330         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6331                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6332                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6333                 is_l2 = true;
6334                 is_l4 = true;
6335                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6336                 break;
6337         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6338         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6339         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6340                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6341                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6342                 is_l2 = true;
6343                 is_l4 = true;
6344                 break;
6345         default:
6346                 return -ERANGE;
6347         }
6348
6349         if (hw->mac.type == e1000_82575) {
6350                 if (tsync_rx_ctl | tsync_tx_ctl)
6351                         return -EINVAL;
6352                 return 0;
6353         }
6354
6355         /*
6356          * Per-packet timestamping only works if all packets are
6357          * timestamped, so enable timestamping in all packets as
6358          * long as one rx filter was configured.
6359          */
6360         if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6361                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6362                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6363         }
6364
6365         /* enable/disable TX */
6366         regval = rd32(E1000_TSYNCTXCTL);
6367         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6368         regval |= tsync_tx_ctl;
6369         wr32(E1000_TSYNCTXCTL, regval);
6370
6371         /* enable/disable RX */
6372         regval = rd32(E1000_TSYNCRXCTL);
6373         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6374         regval |= tsync_rx_ctl;
6375         wr32(E1000_TSYNCRXCTL, regval);
6376
6377         /* define which PTP packets are time stamped */
6378         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6379
6380         /* define ethertype filter for timestamped packets */
6381         if (is_l2)
6382                 wr32(E1000_ETQF(3),
6383                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6384                                  E1000_ETQF_1588 | /* enable timestamping */
6385                                  ETH_P_1588));     /* 1588 eth protocol type */
6386         else
6387                 wr32(E1000_ETQF(3), 0);
6388
6389 #define PTP_PORT 319
6390         /* L4 Queue Filter[3]: filter by destination port and protocol */
6391         if (is_l4) {
6392                 u32 ftqf = (IPPROTO_UDP /* UDP */
6393                         | E1000_FTQF_VF_BP /* VF not compared */
6394                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6395                         | E1000_FTQF_MASK); /* mask all inputs */
6396                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6397
6398                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6399                 wr32(E1000_IMIREXT(3),
6400                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6401                 if (hw->mac.type == e1000_82576) {
6402                         /* enable source port check */
6403                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6404                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6405                 }
6406                 wr32(E1000_FTQF(3), ftqf);
6407         } else {
6408                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6409         }
6410         wrfl();
6411
6412         adapter->hwtstamp_config = config;
6413
6414         /* clear TX/RX time stamp registers, just to be sure */
6415         regval = rd32(E1000_TXSTMPH);
6416         regval = rd32(E1000_RXSTMPH);
6417
6418         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6419                 -EFAULT : 0;
6420 }
6421
6422 /**
6423  * igb_ioctl -
6424  * @netdev:
6425  * @ifreq:
6426  * @cmd:
6427  **/
6428 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6429 {
6430         switch (cmd) {
6431         case SIOCGMIIPHY:
6432         case SIOCGMIIREG:
6433         case SIOCSMIIREG:
6434                 return igb_mii_ioctl(netdev, ifr, cmd);
6435         case SIOCSHWTSTAMP:
6436                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6437         default:
6438                 return -EOPNOTSUPP;
6439         }
6440 }
6441
6442 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6443 {
6444         struct igb_adapter *adapter = hw->back;
6445         u16 cap_offset;
6446
6447         cap_offset = adapter->pdev->pcie_cap;
6448         if (!cap_offset)
6449                 return -E1000_ERR_CONFIG;
6450
6451         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6452
6453         return 0;
6454 }
6455
6456 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6457 {
6458         struct igb_adapter *adapter = hw->back;
6459         u16 cap_offset;
6460
6461         cap_offset = adapter->pdev->pcie_cap;
6462         if (!cap_offset)
6463                 return -E1000_ERR_CONFIG;
6464
6465         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6466
6467         return 0;
6468 }
6469
6470 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6471 {
6472         struct igb_adapter *adapter = netdev_priv(netdev);
6473         struct e1000_hw *hw = &adapter->hw;
6474         u32 ctrl, rctl;
6475         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6476
6477         if (enable) {
6478                 /* enable VLAN tag insert/strip */
6479                 ctrl = rd32(E1000_CTRL);
6480                 ctrl |= E1000_CTRL_VME;
6481                 wr32(E1000_CTRL, ctrl);
6482
6483                 /* Disable CFI check */
6484                 rctl = rd32(E1000_RCTL);
6485                 rctl &= ~E1000_RCTL_CFIEN;
6486                 wr32(E1000_RCTL, rctl);
6487         } else {
6488                 /* disable VLAN tag insert/strip */
6489                 ctrl = rd32(E1000_CTRL);
6490                 ctrl &= ~E1000_CTRL_VME;
6491                 wr32(E1000_CTRL, ctrl);
6492         }
6493
6494         igb_rlpml_set(adapter);
6495 }
6496
6497 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6498 {
6499         struct igb_adapter *adapter = netdev_priv(netdev);
6500         struct e1000_hw *hw = &adapter->hw;
6501         int pf_id = adapter->vfs_allocated_count;
6502
6503         /* attempt to add filter to vlvf array */
6504         igb_vlvf_set(adapter, vid, true, pf_id);
6505
6506         /* add the filter since PF can receive vlans w/o entry in vlvf */
6507         igb_vfta_set(hw, vid, true);
6508
6509         set_bit(vid, adapter->active_vlans);
6510 }
6511
6512 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6513 {
6514         struct igb_adapter *adapter = netdev_priv(netdev);
6515         struct e1000_hw *hw = &adapter->hw;
6516         int pf_id = adapter->vfs_allocated_count;
6517         s32 err;
6518
6519         /* remove vlan from VLVF table array */
6520         err = igb_vlvf_set(adapter, vid, false, pf_id);
6521
6522         /* if vid was not present in VLVF just remove it from table */
6523         if (err)
6524                 igb_vfta_set(hw, vid, false);
6525
6526         clear_bit(vid, adapter->active_vlans);
6527 }
6528
6529 static void igb_restore_vlan(struct igb_adapter *adapter)
6530 {
6531         u16 vid;
6532
6533         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6534
6535         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6536                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6537 }
6538
6539 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6540 {
6541         struct pci_dev *pdev = adapter->pdev;
6542         struct e1000_mac_info *mac = &adapter->hw.mac;
6543
6544         mac->autoneg = 0;
6545
6546         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6547          * for the switch() below to work */
6548         if ((spd & 1) || (dplx & ~1))
6549                 goto err_inval;
6550
6551         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6552         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6553             spd != SPEED_1000 &&
6554             dplx != DUPLEX_FULL)
6555                 goto err_inval;
6556
6557         switch (spd + dplx) {
6558         case SPEED_10 + DUPLEX_HALF:
6559                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6560                 break;
6561         case SPEED_10 + DUPLEX_FULL:
6562                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6563                 break;
6564         case SPEED_100 + DUPLEX_HALF:
6565                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6566                 break;
6567         case SPEED_100 + DUPLEX_FULL:
6568                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6569                 break;
6570         case SPEED_1000 + DUPLEX_FULL:
6571                 mac->autoneg = 1;
6572                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6573                 break;
6574         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6575         default:
6576                 goto err_inval;
6577         }
6578         return 0;
6579
6580 err_inval:
6581         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6582         return -EINVAL;
6583 }
6584
6585 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6586 {
6587         struct net_device *netdev = pci_get_drvdata(pdev);
6588         struct igb_adapter *adapter = netdev_priv(netdev);
6589         struct e1000_hw *hw = &adapter->hw;
6590         u32 ctrl, rctl, status;
6591         u32 wufc = adapter->wol;
6592 #ifdef CONFIG_PM
6593         int retval = 0;
6594 #endif
6595
6596         netif_device_detach(netdev);
6597
6598         if (netif_running(netdev))
6599                 igb_close(netdev);
6600
6601         igb_clear_interrupt_scheme(adapter);
6602
6603 #ifdef CONFIG_PM
6604         retval = pci_save_state(pdev);
6605         if (retval)
6606                 return retval;
6607 #endif
6608
6609         status = rd32(E1000_STATUS);
6610         if (status & E1000_STATUS_LU)
6611                 wufc &= ~E1000_WUFC_LNKC;
6612
6613         if (wufc) {
6614                 igb_setup_rctl(adapter);
6615                 igb_set_rx_mode(netdev);
6616
6617                 /* turn on all-multi mode if wake on multicast is enabled */
6618                 if (wufc & E1000_WUFC_MC) {
6619                         rctl = rd32(E1000_RCTL);
6620                         rctl |= E1000_RCTL_MPE;
6621                         wr32(E1000_RCTL, rctl);
6622                 }
6623
6624                 ctrl = rd32(E1000_CTRL);
6625                 /* advertise wake from D3Cold */
6626                 #define E1000_CTRL_ADVD3WUC 0x00100000
6627                 /* phy power management enable */
6628                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6629                 ctrl |= E1000_CTRL_ADVD3WUC;
6630                 wr32(E1000_CTRL, ctrl);
6631
6632                 /* Allow time for pending master requests to run */
6633                 igb_disable_pcie_master(hw);
6634
6635                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6636                 wr32(E1000_WUFC, wufc);
6637         } else {
6638                 wr32(E1000_WUC, 0);
6639                 wr32(E1000_WUFC, 0);
6640         }
6641
6642         *enable_wake = wufc || adapter->en_mng_pt;
6643         if (!*enable_wake)
6644                 igb_power_down_link(adapter);
6645         else
6646                 igb_power_up_link(adapter);
6647
6648         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6649          * would have already happened in close and is redundant. */
6650         igb_release_hw_control(adapter);
6651
6652         pci_disable_device(pdev);
6653
6654         return 0;
6655 }
6656
6657 #ifdef CONFIG_PM
6658 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6659 {
6660         int retval;
6661         bool wake;
6662
6663         retval = __igb_shutdown(pdev, &wake);
6664         if (retval)
6665                 return retval;
6666
6667         if (wake) {
6668                 pci_prepare_to_sleep(pdev);
6669         } else {
6670                 pci_wake_from_d3(pdev, false);
6671                 pci_set_power_state(pdev, PCI_D3hot);
6672         }
6673
6674         return 0;
6675 }
6676
6677 static int igb_resume(struct pci_dev *pdev)
6678 {
6679         struct net_device *netdev = pci_get_drvdata(pdev);
6680         struct igb_adapter *adapter = netdev_priv(netdev);
6681         struct e1000_hw *hw = &adapter->hw;
6682         u32 err;
6683
6684         pci_set_power_state(pdev, PCI_D0);
6685         pci_restore_state(pdev);
6686         pci_save_state(pdev);
6687
6688         err = pci_enable_device_mem(pdev);
6689         if (err) {
6690                 dev_err(&pdev->dev,
6691                         "igb: Cannot enable PCI device from suspend\n");
6692                 return err;
6693         }
6694         pci_set_master(pdev);
6695
6696         pci_enable_wake(pdev, PCI_D3hot, 0);
6697         pci_enable_wake(pdev, PCI_D3cold, 0);
6698
6699         if (igb_init_interrupt_scheme(adapter)) {
6700                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6701                 return -ENOMEM;
6702         }
6703
6704         igb_reset(adapter);
6705
6706         /* let the f/w know that the h/w is now under the control of the
6707          * driver. */
6708         igb_get_hw_control(adapter);
6709
6710         wr32(E1000_WUS, ~0);
6711
6712         if (netif_running(netdev)) {
6713                 err = igb_open(netdev);
6714                 if (err)
6715                         return err;
6716         }
6717
6718         netif_device_attach(netdev);
6719
6720         return 0;
6721 }
6722 #endif
6723
6724 static void igb_shutdown(struct pci_dev *pdev)
6725 {
6726         bool wake;
6727
6728         __igb_shutdown(pdev, &wake);
6729
6730         if (system_state == SYSTEM_POWER_OFF) {
6731                 pci_wake_from_d3(pdev, wake);
6732                 pci_set_power_state(pdev, PCI_D3hot);
6733         }
6734 }
6735
6736 #ifdef CONFIG_NET_POLL_CONTROLLER
6737 /*
6738  * Polling 'interrupt' - used by things like netconsole to send skbs
6739  * without having to re-enable interrupts. It's not called while
6740  * the interrupt routine is executing.
6741  */
6742 static void igb_netpoll(struct net_device *netdev)
6743 {
6744         struct igb_adapter *adapter = netdev_priv(netdev);
6745         struct e1000_hw *hw = &adapter->hw;
6746         struct igb_q_vector *q_vector;
6747         int i;
6748
6749         for (i = 0; i < adapter->num_q_vectors; i++) {
6750                 q_vector = adapter->q_vector[i];
6751                 if (adapter->msix_entries)
6752                         wr32(E1000_EIMC, q_vector->eims_value);
6753                 else
6754                         igb_irq_disable(adapter);
6755                 napi_schedule(&q_vector->napi);
6756         }
6757 }
6758 #endif /* CONFIG_NET_POLL_CONTROLLER */
6759
6760 /**
6761  * igb_io_error_detected - called when PCI error is detected
6762  * @pdev: Pointer to PCI device
6763  * @state: The current pci connection state
6764  *
6765  * This function is called after a PCI bus error affecting
6766  * this device has been detected.
6767  */
6768 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6769                                               pci_channel_state_t state)
6770 {
6771         struct net_device *netdev = pci_get_drvdata(pdev);
6772         struct igb_adapter *adapter = netdev_priv(netdev);
6773
6774         netif_device_detach(netdev);
6775
6776         if (state == pci_channel_io_perm_failure)
6777                 return PCI_ERS_RESULT_DISCONNECT;
6778
6779         if (netif_running(netdev))
6780                 igb_down(adapter);
6781         pci_disable_device(pdev);
6782
6783         /* Request a slot slot reset. */
6784         return PCI_ERS_RESULT_NEED_RESET;
6785 }
6786
6787 /**
6788  * igb_io_slot_reset - called after the pci bus has been reset.
6789  * @pdev: Pointer to PCI device
6790  *
6791  * Restart the card from scratch, as if from a cold-boot. Implementation
6792  * resembles the first-half of the igb_resume routine.
6793  */
6794 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6795 {
6796         struct net_device *netdev = pci_get_drvdata(pdev);
6797         struct igb_adapter *adapter = netdev_priv(netdev);
6798         struct e1000_hw *hw = &adapter->hw;
6799         pci_ers_result_t result;
6800         int err;
6801
6802         if (pci_enable_device_mem(pdev)) {
6803                 dev_err(&pdev->dev,
6804                         "Cannot re-enable PCI device after reset.\n");
6805                 result = PCI_ERS_RESULT_DISCONNECT;
6806         } else {
6807                 pci_set_master(pdev);
6808                 pci_restore_state(pdev);
6809                 pci_save_state(pdev);
6810
6811                 pci_enable_wake(pdev, PCI_D3hot, 0);
6812                 pci_enable_wake(pdev, PCI_D3cold, 0);
6813
6814                 igb_reset(adapter);
6815                 wr32(E1000_WUS, ~0);
6816                 result = PCI_ERS_RESULT_RECOVERED;
6817         }
6818
6819         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6820         if (err) {
6821                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6822                         "failed 0x%0x\n", err);
6823                 /* non-fatal, continue */
6824         }
6825
6826         return result;
6827 }
6828
6829 /**
6830  * igb_io_resume - called when traffic can start flowing again.
6831  * @pdev: Pointer to PCI device
6832  *
6833  * This callback is called when the error recovery driver tells us that
6834  * its OK to resume normal operation. Implementation resembles the
6835  * second-half of the igb_resume routine.
6836  */
6837 static void igb_io_resume(struct pci_dev *pdev)
6838 {
6839         struct net_device *netdev = pci_get_drvdata(pdev);
6840         struct igb_adapter *adapter = netdev_priv(netdev);
6841
6842         if (netif_running(netdev)) {
6843                 if (igb_up(adapter)) {
6844                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6845                         return;
6846                 }
6847         }
6848
6849         netif_device_attach(netdev);
6850
6851         /* let the f/w know that the h/w is now under the control of the
6852          * driver. */
6853         igb_get_hw_control(adapter);
6854 }
6855
6856 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6857                              u8 qsel)
6858 {
6859         u32 rar_low, rar_high;
6860         struct e1000_hw *hw = &adapter->hw;
6861
6862         /* HW expects these in little endian so we reverse the byte order
6863          * from network order (big endian) to little endian
6864          */
6865         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6866                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6867         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6868
6869         /* Indicate to hardware the Address is Valid. */
6870         rar_high |= E1000_RAH_AV;
6871
6872         if (hw->mac.type == e1000_82575)
6873                 rar_high |= E1000_RAH_POOL_1 * qsel;
6874         else
6875                 rar_high |= E1000_RAH_POOL_1 << qsel;
6876
6877         wr32(E1000_RAL(index), rar_low);
6878         wrfl();
6879         wr32(E1000_RAH(index), rar_high);
6880         wrfl();
6881 }
6882
6883 static int igb_set_vf_mac(struct igb_adapter *adapter,
6884                           int vf, unsigned char *mac_addr)
6885 {
6886         struct e1000_hw *hw = &adapter->hw;
6887         /* VF MAC addresses start at end of receive addresses and moves
6888          * torwards the first, as a result a collision should not be possible */
6889         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6890
6891         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6892
6893         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6894
6895         return 0;
6896 }
6897
6898 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6899 {
6900         struct igb_adapter *adapter = netdev_priv(netdev);
6901         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6902                 return -EINVAL;
6903         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6904         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6905         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6906                                       " change effective.");
6907         if (test_bit(__IGB_DOWN, &adapter->state)) {
6908                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6909                          " but the PF device is not up.\n");
6910                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6911                          " attempting to use the VF device.\n");
6912         }
6913         return igb_set_vf_mac(adapter, vf, mac);
6914 }
6915
6916 static int igb_link_mbps(int internal_link_speed)
6917 {
6918         switch (internal_link_speed) {
6919         case SPEED_100:
6920                 return 100;
6921         case SPEED_1000:
6922                 return 1000;
6923         default:
6924                 return 0;
6925         }
6926 }
6927
6928 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6929                                   int link_speed)
6930 {
6931         int rf_dec, rf_int;
6932         u32 bcnrc_val;
6933
6934         if (tx_rate != 0) {
6935                 /* Calculate the rate factor values to set */
6936                 rf_int = link_speed / tx_rate;
6937                 rf_dec = (link_speed - (rf_int * tx_rate));
6938                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6939
6940                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6941                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6942                                E1000_RTTBCNRC_RF_INT_MASK);
6943                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6944         } else {
6945                 bcnrc_val = 0;
6946         }
6947
6948         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6949         wr32(E1000_RTTBCNRC, bcnrc_val);
6950 }
6951
6952 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6953 {
6954         int actual_link_speed, i;
6955         bool reset_rate = false;
6956
6957         /* VF TX rate limit was not set or not supported */
6958         if ((adapter->vf_rate_link_speed == 0) ||
6959             (adapter->hw.mac.type != e1000_82576))
6960                 return;
6961
6962         actual_link_speed = igb_link_mbps(adapter->link_speed);
6963         if (actual_link_speed != adapter->vf_rate_link_speed) {
6964                 reset_rate = true;
6965                 adapter->vf_rate_link_speed = 0;
6966                 dev_info(&adapter->pdev->dev,
6967                          "Link speed has been changed. VF Transmit "
6968                          "rate is disabled\n");
6969         }
6970
6971         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6972                 if (reset_rate)
6973                         adapter->vf_data[i].tx_rate = 0;
6974
6975                 igb_set_vf_rate_limit(&adapter->hw, i,
6976                                       adapter->vf_data[i].tx_rate,
6977                                       actual_link_speed);
6978         }
6979 }
6980
6981 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6982 {
6983         struct igb_adapter *adapter = netdev_priv(netdev);
6984         struct e1000_hw *hw = &adapter->hw;
6985         int actual_link_speed;
6986
6987         if (hw->mac.type != e1000_82576)
6988                 return -EOPNOTSUPP;
6989
6990         actual_link_speed = igb_link_mbps(adapter->link_speed);
6991         if ((vf >= adapter->vfs_allocated_count) ||
6992             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6993             (tx_rate < 0) || (tx_rate > actual_link_speed))
6994                 return -EINVAL;
6995
6996         adapter->vf_rate_link_speed = actual_link_speed;
6997         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6998         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6999
7000         return 0;
7001 }
7002
7003 static int igb_ndo_get_vf_config(struct net_device *netdev,
7004                                  int vf, struct ifla_vf_info *ivi)
7005 {
7006         struct igb_adapter *adapter = netdev_priv(netdev);
7007         if (vf >= adapter->vfs_allocated_count)
7008                 return -EINVAL;
7009         ivi->vf = vf;
7010         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7011         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7012         ivi->vlan = adapter->vf_data[vf].pf_vlan;
7013         ivi->qos = adapter->vf_data[vf].pf_qos;
7014         return 0;
7015 }
7016
7017 static void igb_vmm_control(struct igb_adapter *adapter)
7018 {
7019         struct e1000_hw *hw = &adapter->hw;
7020         u32 reg;
7021
7022         switch (hw->mac.type) {
7023         case e1000_82575:
7024         default:
7025                 /* replication is not supported for 82575 */
7026                 return;
7027         case e1000_82576:
7028                 /* notify HW that the MAC is adding vlan tags */
7029                 reg = rd32(E1000_DTXCTL);
7030                 reg |= E1000_DTXCTL_VLAN_ADDED;
7031                 wr32(E1000_DTXCTL, reg);
7032         case e1000_82580:
7033                 /* enable replication vlan tag stripping */
7034                 reg = rd32(E1000_RPLOLR);
7035                 reg |= E1000_RPLOLR_STRVLAN;
7036                 wr32(E1000_RPLOLR, reg);
7037         case e1000_i350:
7038                 /* none of the above registers are supported by i350 */
7039                 break;
7040         }
7041
7042         if (adapter->vfs_allocated_count) {
7043                 igb_vmdq_set_loopback_pf(hw, true);
7044                 igb_vmdq_set_replication_pf(hw, true);
7045                 igb_vmdq_set_anti_spoofing_pf(hw, true,
7046                                                 adapter->vfs_allocated_count);
7047         } else {
7048                 igb_vmdq_set_loopback_pf(hw, false);
7049                 igb_vmdq_set_replication_pf(hw, false);
7050         }
7051 }
7052
7053 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7054 {
7055         struct e1000_hw *hw = &adapter->hw;
7056         u32 dmac_thr;
7057         u16 hwm;
7058
7059         if (hw->mac.type > e1000_82580) {
7060                 if (adapter->flags & IGB_FLAG_DMAC) {
7061                         u32 reg;
7062
7063                         /* force threshold to 0. */
7064                         wr32(E1000_DMCTXTH, 0);
7065
7066                         /*
7067                          * DMA Coalescing high water mark needs to be higher
7068                          * than the RX threshold. set hwm to PBA -  2 * max
7069                          * frame size
7070                          */
7071                         hwm = pba - (2 * adapter->max_frame_size);
7072                         reg = rd32(E1000_DMACR);
7073                         reg &= ~E1000_DMACR_DMACTHR_MASK;
7074                         dmac_thr = pba - 4;
7075
7076                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7077                                 & E1000_DMACR_DMACTHR_MASK);
7078
7079                         /* transition to L0x or L1 if available..*/
7080                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7081
7082                         /* watchdog timer= +-1000 usec in 32usec intervals */
7083                         reg |= (1000 >> 5);
7084                         wr32(E1000_DMACR, reg);
7085
7086                         /*
7087                          * no lower threshold to disable
7088                          * coalescing(smart fifb)-UTRESH=0
7089                          */
7090                         wr32(E1000_DMCRTRH, 0);
7091                         wr32(E1000_FCRTC, hwm);
7092
7093                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7094
7095                         wr32(E1000_DMCTLX, reg);
7096
7097                         /*
7098                          * free space in tx packet buffer to wake from
7099                          * DMA coal
7100                          */
7101                         wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7102                              (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7103
7104                         /*
7105                          * make low power state decision controlled
7106                          * by DMA coal
7107                          */
7108                         reg = rd32(E1000_PCIEMISC);
7109                         reg &= ~E1000_PCIEMISC_LX_DECISION;
7110                         wr32(E1000_PCIEMISC, reg);
7111                 } /* endif adapter->dmac is not disabled */
7112         } else if (hw->mac.type == e1000_82580) {
7113                 u32 reg = rd32(E1000_PCIEMISC);
7114                 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7115                 wr32(E1000_DMACR, 0);
7116         }
7117 }
7118
7119 /* igb_main.c */