Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
87         /* required last entry */
88         {0, }
89 };
90
91 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
92
93 void igb_reset(struct igb_adapter *);
94 static int igb_setup_all_tx_resources(struct igb_adapter *);
95 static int igb_setup_all_rx_resources(struct igb_adapter *);
96 static void igb_free_all_tx_resources(struct igb_adapter *);
97 static void igb_free_all_rx_resources(struct igb_adapter *);
98 static void igb_setup_mrqc(struct igb_adapter *);
99 void igb_update_stats(struct igb_adapter *);
100 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
101 static void __devexit igb_remove(struct pci_dev *pdev);
102 static int igb_sw_init(struct igb_adapter *);
103 static int igb_open(struct net_device *);
104 static int igb_close(struct net_device *);
105 static void igb_configure_tx(struct igb_adapter *);
106 static void igb_configure_rx(struct igb_adapter *);
107 static void igb_clean_all_tx_rings(struct igb_adapter *);
108 static void igb_clean_all_rx_rings(struct igb_adapter *);
109 static void igb_clean_tx_ring(struct igb_ring *);
110 static void igb_clean_rx_ring(struct igb_ring *);
111 static void igb_set_rx_mode(struct net_device *);
112 static void igb_update_phy_info(unsigned long);
113 static void igb_watchdog(unsigned long);
114 static void igb_watchdog_task(struct work_struct *);
115 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
116 static struct net_device_stats *igb_get_stats(struct net_device *);
117 static int igb_change_mtu(struct net_device *, int);
118 static int igb_set_mac(struct net_device *, void *);
119 static void igb_set_uta(struct igb_adapter *adapter);
120 static irqreturn_t igb_intr(int irq, void *);
121 static irqreturn_t igb_intr_msi(int irq, void *);
122 static irqreturn_t igb_msix_other(int irq, void *);
123 static irqreturn_t igb_msix_ring(int irq, void *);
124 #ifdef CONFIG_IGB_DCA
125 static void igb_update_dca(struct igb_q_vector *);
126 static void igb_setup_dca(struct igb_adapter *);
127 #endif /* CONFIG_IGB_DCA */
128 static bool igb_clean_tx_irq(struct igb_q_vector *);
129 static int igb_poll(struct napi_struct *, int);
130 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
131 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
132 static void igb_tx_timeout(struct net_device *);
133 static void igb_reset_task(struct work_struct *);
134 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
135 static void igb_vlan_rx_add_vid(struct net_device *, u16);
136 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
137 static void igb_restore_vlan(struct igb_adapter *);
138 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
139 static void igb_ping_all_vfs(struct igb_adapter *);
140 static void igb_msg_task(struct igb_adapter *);
141 static void igb_vmm_control(struct igb_adapter *);
142 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
143 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
144 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
146                                int vf, u16 vlan, u8 qos);
147 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
148 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
149                                  struct ifla_vf_info *ivi);
150
151 #ifdef CONFIG_PM
152 static int igb_suspend(struct pci_dev *, pm_message_t);
153 static int igb_resume(struct pci_dev *);
154 #endif
155 static void igb_shutdown(struct pci_dev *);
156 #ifdef CONFIG_IGB_DCA
157 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
158 static struct notifier_block dca_notifier = {
159         .notifier_call  = igb_notify_dca,
160         .next           = NULL,
161         .priority       = 0
162 };
163 #endif
164 #ifdef CONFIG_NET_POLL_CONTROLLER
165 /* for netdump / net console */
166 static void igb_netpoll(struct net_device *);
167 #endif
168 #ifdef CONFIG_PCI_IOV
169 static unsigned int max_vfs = 0;
170 module_param(max_vfs, uint, 0);
171 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
172                  "per physical function");
173 #endif /* CONFIG_PCI_IOV */
174
175 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
176                      pci_channel_state_t);
177 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
178 static void igb_io_resume(struct pci_dev *);
179
180 static struct pci_error_handlers igb_err_handler = {
181         .error_detected = igb_io_error_detected,
182         .slot_reset = igb_io_slot_reset,
183         .resume = igb_io_resume,
184 };
185
186
187 static struct pci_driver igb_driver = {
188         .name     = igb_driver_name,
189         .id_table = igb_pci_tbl,
190         .probe    = igb_probe,
191         .remove   = __devexit_p(igb_remove),
192 #ifdef CONFIG_PM
193         /* Power Managment Hooks */
194         .suspend  = igb_suspend,
195         .resume   = igb_resume,
196 #endif
197         .shutdown = igb_shutdown,
198         .err_handler = &igb_err_handler
199 };
200
201 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
202 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
203 MODULE_LICENSE("GPL");
204 MODULE_VERSION(DRV_VERSION);
205
206 struct igb_reg_info {
207         u32 ofs;
208         char *name;
209 };
210
211 static const struct igb_reg_info igb_reg_info_tbl[] = {
212
213         /* General Registers */
214         {E1000_CTRL, "CTRL"},
215         {E1000_STATUS, "STATUS"},
216         {E1000_CTRL_EXT, "CTRL_EXT"},
217
218         /* Interrupt Registers */
219         {E1000_ICR, "ICR"},
220
221         /* RX Registers */
222         {E1000_RCTL, "RCTL"},
223         {E1000_RDLEN(0), "RDLEN"},
224         {E1000_RDH(0), "RDH"},
225         {E1000_RDT(0), "RDT"},
226         {E1000_RXDCTL(0), "RXDCTL"},
227         {E1000_RDBAL(0), "RDBAL"},
228         {E1000_RDBAH(0), "RDBAH"},
229
230         /* TX Registers */
231         {E1000_TCTL, "TCTL"},
232         {E1000_TDBAL(0), "TDBAL"},
233         {E1000_TDBAH(0), "TDBAH"},
234         {E1000_TDLEN(0), "TDLEN"},
235         {E1000_TDH(0), "TDH"},
236         {E1000_TDT(0), "TDT"},
237         {E1000_TXDCTL(0), "TXDCTL"},
238         {E1000_TDFH, "TDFH"},
239         {E1000_TDFT, "TDFT"},
240         {E1000_TDFHS, "TDFHS"},
241         {E1000_TDFPC, "TDFPC"},
242
243         /* List Terminator */
244         {}
245 };
246
247 /*
248  * igb_regdump - register printout routine
249  */
250 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
251 {
252         int n = 0;
253         char rname[16];
254         u32 regs[8];
255
256         switch (reginfo->ofs) {
257         case E1000_RDLEN(0):
258                 for (n = 0; n < 4; n++)
259                         regs[n] = rd32(E1000_RDLEN(n));
260                 break;
261         case E1000_RDH(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDH(n));
264                 break;
265         case E1000_RDT(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDT(n));
268                 break;
269         case E1000_RXDCTL(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RXDCTL(n));
272                 break;
273         case E1000_RDBAL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDBAL(n));
276                 break;
277         case E1000_RDBAH(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAH(n));
280                 break;
281         case E1000_TDBAL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAL(n));
284                 break;
285         case E1000_TDBAH(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_TDBAH(n));
288                 break;
289         case E1000_TDLEN(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDLEN(n));
292                 break;
293         case E1000_TDH(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDH(n));
296                 break;
297         case E1000_TDT(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDT(n));
300                 break;
301         case E1000_TXDCTL(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TXDCTL(n));
304                 break;
305         default:
306                 printk(KERN_INFO "%-15s %08x\n",
307                         reginfo->name, rd32(reginfo->ofs));
308                 return;
309         }
310
311         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
312         printk(KERN_INFO "%-15s ", rname);
313         for (n = 0; n < 4; n++)
314                 printk(KERN_CONT "%08x ", regs[n]);
315         printk(KERN_CONT "\n");
316 }
317
318 /*
319  * igb_dump - Print registers, tx-rings and rx-rings
320  */
321 static void igb_dump(struct igb_adapter *adapter)
322 {
323         struct net_device *netdev = adapter->netdev;
324         struct e1000_hw *hw = &adapter->hw;
325         struct igb_reg_info *reginfo;
326         int n = 0;
327         struct igb_ring *tx_ring;
328         union e1000_adv_tx_desc *tx_desc;
329         struct my_u0 { u64 a; u64 b; } *u0;
330         struct igb_buffer *buffer_info;
331         struct igb_ring *rx_ring;
332         union e1000_adv_rx_desc *rx_desc;
333         u32 staterr;
334         int i = 0;
335
336         if (!netif_msg_hw(adapter))
337                 return;
338
339         /* Print netdevice Info */
340         if (netdev) {
341                 dev_info(&adapter->pdev->dev, "Net device Info\n");
342                 printk(KERN_INFO "Device Name     state            "
343                         "trans_start      last_rx\n");
344                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
345                 netdev->name,
346                 netdev->state,
347                 netdev->trans_start,
348                 netdev->last_rx);
349         }
350
351         /* Print Registers */
352         dev_info(&adapter->pdev->dev, "Register Dump\n");
353         printk(KERN_INFO " Register Name   Value\n");
354         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
355              reginfo->name; reginfo++) {
356                 igb_regdump(hw, reginfo);
357         }
358
359         /* Print TX Ring Summary */
360         if (!netdev || !netif_running(netdev))
361                 goto exit;
362
363         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
364         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
365                 " leng ntw timestamp\n");
366         for (n = 0; n < adapter->num_tx_queues; n++) {
367                 tx_ring = adapter->tx_ring[n];
368                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
369                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
370                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
371                            (u64)buffer_info->dma,
372                            buffer_info->length,
373                            buffer_info->next_to_watch,
374                            (u64)buffer_info->time_stamp);
375         }
376
377         /* Print TX Rings */
378         if (!netif_msg_tx_done(adapter))
379                 goto rx_ring_summary;
380
381         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
382
383         /* Transmit Descriptor Formats
384          *
385          * Advanced Transmit Descriptor
386          *   +--------------------------------------------------------------+
387          * 0 |         Buffer Address [63:0]                                |
388          *   +--------------------------------------------------------------+
389          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
390          *   +--------------------------------------------------------------+
391          *   63      46 45    40 39 38 36 35 32 31   24             15       0
392          */
393
394         for (n = 0; n < adapter->num_tx_queues; n++) {
395                 tx_ring = adapter->tx_ring[n];
396                 printk(KERN_INFO "------------------------------------\n");
397                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
398                 printk(KERN_INFO "------------------------------------\n");
399                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
400                         "[PlPOCIStDDM Ln] [bi->dma       ] "
401                         "leng  ntw timestamp        bi->skb\n");
402
403                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
404                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
405                         buffer_info = &tx_ring->buffer_info[i];
406                         u0 = (struct my_u0 *)tx_desc;
407                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
408                                 " %04X  %3X %016llX %p", i,
409                                 le64_to_cpu(u0->a),
410                                 le64_to_cpu(u0->b),
411                                 (u64)buffer_info->dma,
412                                 buffer_info->length,
413                                 buffer_info->next_to_watch,
414                                 (u64)buffer_info->time_stamp,
415                                 buffer_info->skb);
416                         if (i == tx_ring->next_to_use &&
417                                 i == tx_ring->next_to_clean)
418                                 printk(KERN_CONT " NTC/U\n");
419                         else if (i == tx_ring->next_to_use)
420                                 printk(KERN_CONT " NTU\n");
421                         else if (i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC\n");
423                         else
424                                 printk(KERN_CONT "\n");
425
426                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
427                                 print_hex_dump(KERN_INFO, "",
428                                         DUMP_PREFIX_ADDRESS,
429                                         16, 1, phys_to_virt(buffer_info->dma),
430                                         buffer_info->length, true);
431                 }
432         }
433
434         /* Print RX Rings Summary */
435 rx_ring_summary:
436         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
437         printk(KERN_INFO "Queue [NTU] [NTC]\n");
438         for (n = 0; n < adapter->num_rx_queues; n++) {
439                 rx_ring = adapter->rx_ring[n];
440                 printk(KERN_INFO " %5d %5X %5X\n", n,
441                            rx_ring->next_to_use, rx_ring->next_to_clean);
442         }
443
444         /* Print RX Rings */
445         if (!netif_msg_rx_status(adapter))
446                 goto exit;
447
448         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
449
450         /* Advanced Receive Descriptor (Read) Format
451          *    63                                           1        0
452          *    +-----------------------------------------------------+
453          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
454          *    +----------------------------------------------+------+
455          *  8 |       Header Buffer Address [63:1]           |  DD  |
456          *    +-----------------------------------------------------+
457          *
458          *
459          * Advanced Receive Descriptor (Write-Back) Format
460          *
461          *   63       48 47    32 31  30      21 20 17 16   4 3     0
462          *   +------------------------------------------------------+
463          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
464          *   | Checksum   Ident  |   |           |    | Type | Type |
465          *   +------------------------------------------------------+
466          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
467          *   +------------------------------------------------------+
468          *   63       48 47    32 31            20 19               0
469          */
470
471         for (n = 0; n < adapter->num_rx_queues; n++) {
472                 rx_ring = adapter->rx_ring[n];
473                 printk(KERN_INFO "------------------------------------\n");
474                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
475                 printk(KERN_INFO "------------------------------------\n");
476                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
477                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
478                         "<-- Adv Rx Read format\n");
479                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
480                         "[vl er S cks ln] ---------------- [bi->skb] "
481                         "<-- Adv Rx Write-Back format\n");
482
483                 for (i = 0; i < rx_ring->count; i++) {
484                         buffer_info = &rx_ring->buffer_info[i];
485                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
486                         u0 = (struct my_u0 *)rx_desc;
487                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
488                         if (staterr & E1000_RXD_STAT_DD) {
489                                 /* Descriptor Done */
490                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
491                                         "%016llX ---------------- %p", i,
492                                         le64_to_cpu(u0->a),
493                                         le64_to_cpu(u0->b),
494                                         buffer_info->skb);
495                         } else {
496                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
497                                         "%016llX %016llX %p", i,
498                                         le64_to_cpu(u0->a),
499                                         le64_to_cpu(u0->b),
500                                         (u64)buffer_info->dma,
501                                         buffer_info->skb);
502
503                                 if (netif_msg_pktdata(adapter)) {
504                                         print_hex_dump(KERN_INFO, "",
505                                                 DUMP_PREFIX_ADDRESS,
506                                                 16, 1,
507                                                 phys_to_virt(buffer_info->dma),
508                                                 rx_ring->rx_buffer_len, true);
509                                         if (rx_ring->rx_buffer_len
510                                                 < IGB_RXBUFFER_1024)
511                                                 print_hex_dump(KERN_INFO, "",
512                                                   DUMP_PREFIX_ADDRESS,
513                                                   16, 1,
514                                                   phys_to_virt(
515                                                     buffer_info->page_dma +
516                                                     buffer_info->page_offset),
517                                                   PAGE_SIZE/2, true);
518                                 }
519                         }
520
521                         if (i == rx_ring->next_to_use)
522                                 printk(KERN_CONT " NTU\n");
523                         else if (i == rx_ring->next_to_clean)
524                                 printk(KERN_CONT " NTC\n");
525                         else
526                                 printk(KERN_CONT "\n");
527
528                 }
529         }
530
531 exit:
532         return;
533 }
534
535
536 /**
537  * igb_read_clock - read raw cycle counter (to be used by time counter)
538  */
539 static cycle_t igb_read_clock(const struct cyclecounter *tc)
540 {
541         struct igb_adapter *adapter =
542                 container_of(tc, struct igb_adapter, cycles);
543         struct e1000_hw *hw = &adapter->hw;
544         u64 stamp = 0;
545         int shift = 0;
546
547         /*
548          * The timestamp latches on lowest register read. For the 82580
549          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
550          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
551          */
552         if (hw->mac.type == e1000_82580) {
553                 stamp = rd32(E1000_SYSTIMR) >> 8;
554                 shift = IGB_82580_TSYNC_SHIFT;
555         }
556
557         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
558         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
559         return stamp;
560 }
561
562 /**
563  * igb_get_hw_dev - return device
564  * used by hardware layer to print debugging information
565  **/
566 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
567 {
568         struct igb_adapter *adapter = hw->back;
569         return adapter->netdev;
570 }
571
572 /**
573  * igb_init_module - Driver Registration Routine
574  *
575  * igb_init_module is the first routine called when the driver is
576  * loaded. All it does is register with the PCI subsystem.
577  **/
578 static int __init igb_init_module(void)
579 {
580         int ret;
581         printk(KERN_INFO "%s - version %s\n",
582                igb_driver_string, igb_driver_version);
583
584         printk(KERN_INFO "%s\n", igb_copyright);
585
586 #ifdef CONFIG_IGB_DCA
587         dca_register_notify(&dca_notifier);
588 #endif
589         ret = pci_register_driver(&igb_driver);
590         return ret;
591 }
592
593 module_init(igb_init_module);
594
595 /**
596  * igb_exit_module - Driver Exit Cleanup Routine
597  *
598  * igb_exit_module is called just before the driver is removed
599  * from memory.
600  **/
601 static void __exit igb_exit_module(void)
602 {
603 #ifdef CONFIG_IGB_DCA
604         dca_unregister_notify(&dca_notifier);
605 #endif
606         pci_unregister_driver(&igb_driver);
607 }
608
609 module_exit(igb_exit_module);
610
611 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
612 /**
613  * igb_cache_ring_register - Descriptor ring to register mapping
614  * @adapter: board private structure to initialize
615  *
616  * Once we know the feature-set enabled for the device, we'll cache
617  * the register offset the descriptor ring is assigned to.
618  **/
619 static void igb_cache_ring_register(struct igb_adapter *adapter)
620 {
621         int i = 0, j = 0;
622         u32 rbase_offset = adapter->vfs_allocated_count;
623
624         switch (adapter->hw.mac.type) {
625         case e1000_82576:
626                 /* The queues are allocated for virtualization such that VF 0
627                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
628                  * In order to avoid collision we start at the first free queue
629                  * and continue consuming queues in the same sequence
630                  */
631                 if (adapter->vfs_allocated_count) {
632                         for (; i < adapter->rss_queues; i++)
633                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
634                                                                Q_IDX_82576(i);
635                 }
636         case e1000_82575:
637         case e1000_82580:
638         case e1000_i350:
639         default:
640                 for (; i < adapter->num_rx_queues; i++)
641                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
642                 for (; j < adapter->num_tx_queues; j++)
643                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
644                 break;
645         }
646 }
647
648 static void igb_free_queues(struct igb_adapter *adapter)
649 {
650         int i;
651
652         for (i = 0; i < adapter->num_tx_queues; i++) {
653                 kfree(adapter->tx_ring[i]);
654                 adapter->tx_ring[i] = NULL;
655         }
656         for (i = 0; i < adapter->num_rx_queues; i++) {
657                 kfree(adapter->rx_ring[i]);
658                 adapter->rx_ring[i] = NULL;
659         }
660         adapter->num_rx_queues = 0;
661         adapter->num_tx_queues = 0;
662 }
663
664 /**
665  * igb_alloc_queues - Allocate memory for all rings
666  * @adapter: board private structure to initialize
667  *
668  * We allocate one ring per queue at run-time since we don't know the
669  * number of queues at compile-time.
670  **/
671 static int igb_alloc_queues(struct igb_adapter *adapter)
672 {
673         struct igb_ring *ring;
674         int i;
675
676         for (i = 0; i < adapter->num_tx_queues; i++) {
677                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
678                 if (!ring)
679                         goto err;
680                 ring->count = adapter->tx_ring_count;
681                 ring->queue_index = i;
682                 ring->dev = &adapter->pdev->dev;
683                 ring->netdev = adapter->netdev;
684                 /* For 82575, context index must be unique per ring. */
685                 if (adapter->hw.mac.type == e1000_82575)
686                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
687                 adapter->tx_ring[i] = ring;
688         }
689
690         for (i = 0; i < adapter->num_rx_queues; i++) {
691                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
692                 if (!ring)
693                         goto err;
694                 ring->count = adapter->rx_ring_count;
695                 ring->queue_index = i;
696                 ring->dev = &adapter->pdev->dev;
697                 ring->netdev = adapter->netdev;
698                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
699                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
700                 /* set flag indicating ring supports SCTP checksum offload */
701                 if (adapter->hw.mac.type >= e1000_82576)
702                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
703                 adapter->rx_ring[i] = ring;
704         }
705
706         igb_cache_ring_register(adapter);
707
708         return 0;
709
710 err:
711         igb_free_queues(adapter);
712
713         return -ENOMEM;
714 }
715
716 #define IGB_N0_QUEUE -1
717 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
718 {
719         u32 msixbm = 0;
720         struct igb_adapter *adapter = q_vector->adapter;
721         struct e1000_hw *hw = &adapter->hw;
722         u32 ivar, index;
723         int rx_queue = IGB_N0_QUEUE;
724         int tx_queue = IGB_N0_QUEUE;
725
726         if (q_vector->rx_ring)
727                 rx_queue = q_vector->rx_ring->reg_idx;
728         if (q_vector->tx_ring)
729                 tx_queue = q_vector->tx_ring->reg_idx;
730
731         switch (hw->mac.type) {
732         case e1000_82575:
733                 /* The 82575 assigns vectors using a bitmask, which matches the
734                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
735                    or more queues to a vector, we write the appropriate bits
736                    into the MSIXBM register for that vector. */
737                 if (rx_queue > IGB_N0_QUEUE)
738                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
739                 if (tx_queue > IGB_N0_QUEUE)
740                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
741                 if (!adapter->msix_entries && msix_vector == 0)
742                         msixbm |= E1000_EIMS_OTHER;
743                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
744                 q_vector->eims_value = msixbm;
745                 break;
746         case e1000_82576:
747                 /* 82576 uses a table-based method for assigning vectors.
748                    Each queue has a single entry in the table to which we write
749                    a vector number along with a "valid" bit.  Sadly, the layout
750                    of the table is somewhat counterintuitive. */
751                 if (rx_queue > IGB_N0_QUEUE) {
752                         index = (rx_queue & 0x7);
753                         ivar = array_rd32(E1000_IVAR0, index);
754                         if (rx_queue < 8) {
755                                 /* vector goes into low byte of register */
756                                 ivar = ivar & 0xFFFFFF00;
757                                 ivar |= msix_vector | E1000_IVAR_VALID;
758                         } else {
759                                 /* vector goes into third byte of register */
760                                 ivar = ivar & 0xFF00FFFF;
761                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
762                         }
763                         array_wr32(E1000_IVAR0, index, ivar);
764                 }
765                 if (tx_queue > IGB_N0_QUEUE) {
766                         index = (tx_queue & 0x7);
767                         ivar = array_rd32(E1000_IVAR0, index);
768                         if (tx_queue < 8) {
769                                 /* vector goes into second byte of register */
770                                 ivar = ivar & 0xFFFF00FF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
772                         } else {
773                                 /* vector goes into high byte of register */
774                                 ivar = ivar & 0x00FFFFFF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
776                         }
777                         array_wr32(E1000_IVAR0, index, ivar);
778                 }
779                 q_vector->eims_value = 1 << msix_vector;
780                 break;
781         case e1000_82580:
782         case e1000_i350:
783                 /* 82580 uses the same table-based approach as 82576 but has fewer
784                    entries as a result we carry over for queues greater than 4. */
785                 if (rx_queue > IGB_N0_QUEUE) {
786                         index = (rx_queue >> 1);
787                         ivar = array_rd32(E1000_IVAR0, index);
788                         if (rx_queue & 0x1) {
789                                 /* vector goes into third byte of register */
790                                 ivar = ivar & 0xFF00FFFF;
791                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
792                         } else {
793                                 /* vector goes into low byte of register */
794                                 ivar = ivar & 0xFFFFFF00;
795                                 ivar |= msix_vector | E1000_IVAR_VALID;
796                         }
797                         array_wr32(E1000_IVAR0, index, ivar);
798                 }
799                 if (tx_queue > IGB_N0_QUEUE) {
800                         index = (tx_queue >> 1);
801                         ivar = array_rd32(E1000_IVAR0, index);
802                         if (tx_queue & 0x1) {
803                                 /* vector goes into high byte of register */
804                                 ivar = ivar & 0x00FFFFFF;
805                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
806                         } else {
807                                 /* vector goes into second byte of register */
808                                 ivar = ivar & 0xFFFF00FF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
810                         }
811                         array_wr32(E1000_IVAR0, index, ivar);
812                 }
813                 q_vector->eims_value = 1 << msix_vector;
814                 break;
815         default:
816                 BUG();
817                 break;
818         }
819
820         /* add q_vector eims value to global eims_enable_mask */
821         adapter->eims_enable_mask |= q_vector->eims_value;
822
823         /* configure q_vector to set itr on first interrupt */
824         q_vector->set_itr = 1;
825 }
826
827 /**
828  * igb_configure_msix - Configure MSI-X hardware
829  *
830  * igb_configure_msix sets up the hardware to properly
831  * generate MSI-X interrupts.
832  **/
833 static void igb_configure_msix(struct igb_adapter *adapter)
834 {
835         u32 tmp;
836         int i, vector = 0;
837         struct e1000_hw *hw = &adapter->hw;
838
839         adapter->eims_enable_mask = 0;
840
841         /* set vector for other causes, i.e. link changes */
842         switch (hw->mac.type) {
843         case e1000_82575:
844                 tmp = rd32(E1000_CTRL_EXT);
845                 /* enable MSI-X PBA support*/
846                 tmp |= E1000_CTRL_EXT_PBA_CLR;
847
848                 /* Auto-Mask interrupts upon ICR read. */
849                 tmp |= E1000_CTRL_EXT_EIAME;
850                 tmp |= E1000_CTRL_EXT_IRCA;
851
852                 wr32(E1000_CTRL_EXT, tmp);
853
854                 /* enable msix_other interrupt */
855                 array_wr32(E1000_MSIXBM(0), vector++,
856                                       E1000_EIMS_OTHER);
857                 adapter->eims_other = E1000_EIMS_OTHER;
858
859                 break;
860
861         case e1000_82576:
862         case e1000_82580:
863         case e1000_i350:
864                 /* Turn on MSI-X capability first, or our settings
865                  * won't stick.  And it will take days to debug. */
866                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
867                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
868                                 E1000_GPIE_NSICR);
869
870                 /* enable msix_other interrupt */
871                 adapter->eims_other = 1 << vector;
872                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
873
874                 wr32(E1000_IVAR_MISC, tmp);
875                 break;
876         default:
877                 /* do nothing, since nothing else supports MSI-X */
878                 break;
879         } /* switch (hw->mac.type) */
880
881         adapter->eims_enable_mask |= adapter->eims_other;
882
883         for (i = 0; i < adapter->num_q_vectors; i++)
884                 igb_assign_vector(adapter->q_vector[i], vector++);
885
886         wrfl();
887 }
888
889 /**
890  * igb_request_msix - Initialize MSI-X interrupts
891  *
892  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
893  * kernel.
894  **/
895 static int igb_request_msix(struct igb_adapter *adapter)
896 {
897         struct net_device *netdev = adapter->netdev;
898         struct e1000_hw *hw = &adapter->hw;
899         int i, err = 0, vector = 0;
900
901         err = request_irq(adapter->msix_entries[vector].vector,
902                           igb_msix_other, 0, netdev->name, adapter);
903         if (err)
904                 goto out;
905         vector++;
906
907         for (i = 0; i < adapter->num_q_vectors; i++) {
908                 struct igb_q_vector *q_vector = adapter->q_vector[i];
909
910                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
911
912                 if (q_vector->rx_ring && q_vector->tx_ring)
913                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
914                                 q_vector->rx_ring->queue_index);
915                 else if (q_vector->tx_ring)
916                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
917                                 q_vector->tx_ring->queue_index);
918                 else if (q_vector->rx_ring)
919                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
920                                 q_vector->rx_ring->queue_index);
921                 else
922                         sprintf(q_vector->name, "%s-unused", netdev->name);
923
924                 err = request_irq(adapter->msix_entries[vector].vector,
925                                   igb_msix_ring, 0, q_vector->name,
926                                   q_vector);
927                 if (err)
928                         goto out;
929                 vector++;
930         }
931
932         igb_configure_msix(adapter);
933         return 0;
934 out:
935         return err;
936 }
937
938 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
939 {
940         if (adapter->msix_entries) {
941                 pci_disable_msix(adapter->pdev);
942                 kfree(adapter->msix_entries);
943                 adapter->msix_entries = NULL;
944         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
945                 pci_disable_msi(adapter->pdev);
946         }
947 }
948
949 /**
950  * igb_free_q_vectors - Free memory allocated for interrupt vectors
951  * @adapter: board private structure to initialize
952  *
953  * This function frees the memory allocated to the q_vectors.  In addition if
954  * NAPI is enabled it will delete any references to the NAPI struct prior
955  * to freeing the q_vector.
956  **/
957 static void igb_free_q_vectors(struct igb_adapter *adapter)
958 {
959         int v_idx;
960
961         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
962                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
963                 adapter->q_vector[v_idx] = NULL;
964                 if (!q_vector)
965                         continue;
966                 netif_napi_del(&q_vector->napi);
967                 kfree(q_vector);
968         }
969         adapter->num_q_vectors = 0;
970 }
971
972 /**
973  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
974  *
975  * This function resets the device so that it has 0 rx queues, tx queues, and
976  * MSI-X interrupts allocated.
977  */
978 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
979 {
980         igb_free_queues(adapter);
981         igb_free_q_vectors(adapter);
982         igb_reset_interrupt_capability(adapter);
983 }
984
985 /**
986  * igb_set_interrupt_capability - set MSI or MSI-X if supported
987  *
988  * Attempt to configure interrupts using the best available
989  * capabilities of the hardware and kernel.
990  **/
991 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
992 {
993         int err;
994         int numvecs, i;
995
996         /* Number of supported queues. */
997         adapter->num_rx_queues = adapter->rss_queues;
998         if (adapter->vfs_allocated_count)
999                 adapter->num_tx_queues = 1;
1000         else
1001                 adapter->num_tx_queues = adapter->rss_queues;
1002
1003         /* start with one vector for every rx queue */
1004         numvecs = adapter->num_rx_queues;
1005
1006         /* if tx handler is separate add 1 for every tx queue */
1007         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1008                 numvecs += adapter->num_tx_queues;
1009
1010         /* store the number of vectors reserved for queues */
1011         adapter->num_q_vectors = numvecs;
1012
1013         /* add 1 vector for link status interrupts */
1014         numvecs++;
1015         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1016                                         GFP_KERNEL);
1017         if (!adapter->msix_entries)
1018                 goto msi_only;
1019
1020         for (i = 0; i < numvecs; i++)
1021                 adapter->msix_entries[i].entry = i;
1022
1023         err = pci_enable_msix(adapter->pdev,
1024                               adapter->msix_entries,
1025                               numvecs);
1026         if (err == 0)
1027                 goto out;
1028
1029         igb_reset_interrupt_capability(adapter);
1030
1031         /* If we can't do MSI-X, try MSI */
1032 msi_only:
1033 #ifdef CONFIG_PCI_IOV
1034         /* disable SR-IOV for non MSI-X configurations */
1035         if (adapter->vf_data) {
1036                 struct e1000_hw *hw = &adapter->hw;
1037                 /* disable iov and allow time for transactions to clear */
1038                 pci_disable_sriov(adapter->pdev);
1039                 msleep(500);
1040
1041                 kfree(adapter->vf_data);
1042                 adapter->vf_data = NULL;
1043                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1044                 msleep(100);
1045                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1046         }
1047 #endif
1048         adapter->vfs_allocated_count = 0;
1049         adapter->rss_queues = 1;
1050         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1051         adapter->num_rx_queues = 1;
1052         adapter->num_tx_queues = 1;
1053         adapter->num_q_vectors = 1;
1054         if (!pci_enable_msi(adapter->pdev))
1055                 adapter->flags |= IGB_FLAG_HAS_MSI;
1056 out:
1057         /* Notify the stack of the (possibly) reduced queue counts. */
1058         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1059         return netif_set_real_num_rx_queues(adapter->netdev,
1060                                             adapter->num_rx_queues);
1061 }
1062
1063 /**
1064  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1065  * @adapter: board private structure to initialize
1066  *
1067  * We allocate one q_vector per queue interrupt.  If allocation fails we
1068  * return -ENOMEM.
1069  **/
1070 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1071 {
1072         struct igb_q_vector *q_vector;
1073         struct e1000_hw *hw = &adapter->hw;
1074         int v_idx;
1075
1076         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1077                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1078                 if (!q_vector)
1079                         goto err_out;
1080                 q_vector->adapter = adapter;
1081                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1082                 q_vector->itr_val = IGB_START_ITR;
1083                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1084                 adapter->q_vector[v_idx] = q_vector;
1085         }
1086         return 0;
1087
1088 err_out:
1089         igb_free_q_vectors(adapter);
1090         return -ENOMEM;
1091 }
1092
1093 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1094                                       int ring_idx, int v_idx)
1095 {
1096         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1097
1098         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1099         q_vector->rx_ring->q_vector = q_vector;
1100         q_vector->itr_val = adapter->rx_itr_setting;
1101         if (q_vector->itr_val && q_vector->itr_val <= 3)
1102                 q_vector->itr_val = IGB_START_ITR;
1103 }
1104
1105 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1106                                       int ring_idx, int v_idx)
1107 {
1108         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109
1110         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1111         q_vector->tx_ring->q_vector = q_vector;
1112         q_vector->itr_val = adapter->tx_itr_setting;
1113         if (q_vector->itr_val && q_vector->itr_val <= 3)
1114                 q_vector->itr_val = IGB_START_ITR;
1115 }
1116
1117 /**
1118  * igb_map_ring_to_vector - maps allocated queues to vectors
1119  *
1120  * This function maps the recently allocated queues to vectors.
1121  **/
1122 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1123 {
1124         int i;
1125         int v_idx = 0;
1126
1127         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1128             (adapter->num_q_vectors < adapter->num_tx_queues))
1129                 return -ENOMEM;
1130
1131         if (adapter->num_q_vectors >=
1132             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1133                 for (i = 0; i < adapter->num_rx_queues; i++)
1134                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1135                 for (i = 0; i < adapter->num_tx_queues; i++)
1136                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1137         } else {
1138                 for (i = 0; i < adapter->num_rx_queues; i++) {
1139                         if (i < adapter->num_tx_queues)
1140                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1141                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1142                 }
1143                 for (; i < adapter->num_tx_queues; i++)
1144                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1145         }
1146         return 0;
1147 }
1148
1149 /**
1150  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1151  *
1152  * This function initializes the interrupts and allocates all of the queues.
1153  **/
1154 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1155 {
1156         struct pci_dev *pdev = adapter->pdev;
1157         int err;
1158
1159         err = igb_set_interrupt_capability(adapter);
1160         if (err)
1161                 return err;
1162
1163         err = igb_alloc_q_vectors(adapter);
1164         if (err) {
1165                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1166                 goto err_alloc_q_vectors;
1167         }
1168
1169         err = igb_alloc_queues(adapter);
1170         if (err) {
1171                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1172                 goto err_alloc_queues;
1173         }
1174
1175         err = igb_map_ring_to_vector(adapter);
1176         if (err) {
1177                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1178                 goto err_map_queues;
1179         }
1180
1181
1182         return 0;
1183 err_map_queues:
1184         igb_free_queues(adapter);
1185 err_alloc_queues:
1186         igb_free_q_vectors(adapter);
1187 err_alloc_q_vectors:
1188         igb_reset_interrupt_capability(adapter);
1189         return err;
1190 }
1191
1192 /**
1193  * igb_request_irq - initialize interrupts
1194  *
1195  * Attempts to configure interrupts using the best available
1196  * capabilities of the hardware and kernel.
1197  **/
1198 static int igb_request_irq(struct igb_adapter *adapter)
1199 {
1200         struct net_device *netdev = adapter->netdev;
1201         struct pci_dev *pdev = adapter->pdev;
1202         int err = 0;
1203
1204         if (adapter->msix_entries) {
1205                 err = igb_request_msix(adapter);
1206                 if (!err)
1207                         goto request_done;
1208                 /* fall back to MSI */
1209                 igb_clear_interrupt_scheme(adapter);
1210                 if (!pci_enable_msi(adapter->pdev))
1211                         adapter->flags |= IGB_FLAG_HAS_MSI;
1212                 igb_free_all_tx_resources(adapter);
1213                 igb_free_all_rx_resources(adapter);
1214                 adapter->num_tx_queues = 1;
1215                 adapter->num_rx_queues = 1;
1216                 adapter->num_q_vectors = 1;
1217                 err = igb_alloc_q_vectors(adapter);
1218                 if (err) {
1219                         dev_err(&pdev->dev,
1220                                 "Unable to allocate memory for vectors\n");
1221                         goto request_done;
1222                 }
1223                 err = igb_alloc_queues(adapter);
1224                 if (err) {
1225                         dev_err(&pdev->dev,
1226                                 "Unable to allocate memory for queues\n");
1227                         igb_free_q_vectors(adapter);
1228                         goto request_done;
1229                 }
1230                 igb_setup_all_tx_resources(adapter);
1231                 igb_setup_all_rx_resources(adapter);
1232         } else {
1233                 igb_assign_vector(adapter->q_vector[0], 0);
1234         }
1235
1236         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1237                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1238                                   netdev->name, adapter);
1239                 if (!err)
1240                         goto request_done;
1241
1242                 /* fall back to legacy interrupts */
1243                 igb_reset_interrupt_capability(adapter);
1244                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1245         }
1246
1247         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1248                           netdev->name, adapter);
1249
1250         if (err)
1251                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1252                         err);
1253
1254 request_done:
1255         return err;
1256 }
1257
1258 static void igb_free_irq(struct igb_adapter *adapter)
1259 {
1260         if (adapter->msix_entries) {
1261                 int vector = 0, i;
1262
1263                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1264
1265                 for (i = 0; i < adapter->num_q_vectors; i++) {
1266                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1267                         free_irq(adapter->msix_entries[vector++].vector,
1268                                  q_vector);
1269                 }
1270         } else {
1271                 free_irq(adapter->pdev->irq, adapter);
1272         }
1273 }
1274
1275 /**
1276  * igb_irq_disable - Mask off interrupt generation on the NIC
1277  * @adapter: board private structure
1278  **/
1279 static void igb_irq_disable(struct igb_adapter *adapter)
1280 {
1281         struct e1000_hw *hw = &adapter->hw;
1282
1283         /*
1284          * we need to be careful when disabling interrupts.  The VFs are also
1285          * mapped into these registers and so clearing the bits can cause
1286          * issues on the VF drivers so we only need to clear what we set
1287          */
1288         if (adapter->msix_entries) {
1289                 u32 regval = rd32(E1000_EIAM);
1290                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1291                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1292                 regval = rd32(E1000_EIAC);
1293                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1294         }
1295
1296         wr32(E1000_IAM, 0);
1297         wr32(E1000_IMC, ~0);
1298         wrfl();
1299         if (adapter->msix_entries) {
1300                 int i;
1301                 for (i = 0; i < adapter->num_q_vectors; i++)
1302                         synchronize_irq(adapter->msix_entries[i].vector);
1303         } else {
1304                 synchronize_irq(adapter->pdev->irq);
1305         }
1306 }
1307
1308 /**
1309  * igb_irq_enable - Enable default interrupt generation settings
1310  * @adapter: board private structure
1311  **/
1312 static void igb_irq_enable(struct igb_adapter *adapter)
1313 {
1314         struct e1000_hw *hw = &adapter->hw;
1315
1316         if (adapter->msix_entries) {
1317                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1318                 u32 regval = rd32(E1000_EIAC);
1319                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1320                 regval = rd32(E1000_EIAM);
1321                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1322                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1323                 if (adapter->vfs_allocated_count) {
1324                         wr32(E1000_MBVFIMR, 0xFF);
1325                         ims |= E1000_IMS_VMMB;
1326                 }
1327                 if (adapter->hw.mac.type == e1000_82580)
1328                         ims |= E1000_IMS_DRSTA;
1329
1330                 wr32(E1000_IMS, ims);
1331         } else {
1332                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1333                                 E1000_IMS_DRSTA);
1334                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1335                                 E1000_IMS_DRSTA);
1336         }
1337 }
1338
1339 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1340 {
1341         struct e1000_hw *hw = &adapter->hw;
1342         u16 vid = adapter->hw.mng_cookie.vlan_id;
1343         u16 old_vid = adapter->mng_vlan_id;
1344
1345         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1346                 /* add VID to filter table */
1347                 igb_vfta_set(hw, vid, true);
1348                 adapter->mng_vlan_id = vid;
1349         } else {
1350                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1351         }
1352
1353         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1354             (vid != old_vid) &&
1355             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1356                 /* remove VID from filter table */
1357                 igb_vfta_set(hw, old_vid, false);
1358         }
1359 }
1360
1361 /**
1362  * igb_release_hw_control - release control of the h/w to f/w
1363  * @adapter: address of board private structure
1364  *
1365  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1366  * For ASF and Pass Through versions of f/w this means that the
1367  * driver is no longer loaded.
1368  *
1369  **/
1370 static void igb_release_hw_control(struct igb_adapter *adapter)
1371 {
1372         struct e1000_hw *hw = &adapter->hw;
1373         u32 ctrl_ext;
1374
1375         /* Let firmware take over control of h/w */
1376         ctrl_ext = rd32(E1000_CTRL_EXT);
1377         wr32(E1000_CTRL_EXT,
1378                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1379 }
1380
1381 /**
1382  * igb_get_hw_control - get control of the h/w from f/w
1383  * @adapter: address of board private structure
1384  *
1385  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1386  * For ASF and Pass Through versions of f/w this means that
1387  * the driver is loaded.
1388  *
1389  **/
1390 static void igb_get_hw_control(struct igb_adapter *adapter)
1391 {
1392         struct e1000_hw *hw = &adapter->hw;
1393         u32 ctrl_ext;
1394
1395         /* Let firmware know the driver has taken over */
1396         ctrl_ext = rd32(E1000_CTRL_EXT);
1397         wr32(E1000_CTRL_EXT,
1398                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1399 }
1400
1401 /**
1402  * igb_configure - configure the hardware for RX and TX
1403  * @adapter: private board structure
1404  **/
1405 static void igb_configure(struct igb_adapter *adapter)
1406 {
1407         struct net_device *netdev = adapter->netdev;
1408         int i;
1409
1410         igb_get_hw_control(adapter);
1411         igb_set_rx_mode(netdev);
1412
1413         igb_restore_vlan(adapter);
1414
1415         igb_setup_tctl(adapter);
1416         igb_setup_mrqc(adapter);
1417         igb_setup_rctl(adapter);
1418
1419         igb_configure_tx(adapter);
1420         igb_configure_rx(adapter);
1421
1422         igb_rx_fifo_flush_82575(&adapter->hw);
1423
1424         /* call igb_desc_unused which always leaves
1425          * at least 1 descriptor unused to make sure
1426          * next_to_use != next_to_clean */
1427         for (i = 0; i < adapter->num_rx_queues; i++) {
1428                 struct igb_ring *ring = adapter->rx_ring[i];
1429                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1430         }
1431 }
1432
1433 /**
1434  * igb_power_up_link - Power up the phy/serdes link
1435  * @adapter: address of board private structure
1436  **/
1437 void igb_power_up_link(struct igb_adapter *adapter)
1438 {
1439         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1440                 igb_power_up_phy_copper(&adapter->hw);
1441         else
1442                 igb_power_up_serdes_link_82575(&adapter->hw);
1443 }
1444
1445 /**
1446  * igb_power_down_link - Power down the phy/serdes link
1447  * @adapter: address of board private structure
1448  */
1449 static void igb_power_down_link(struct igb_adapter *adapter)
1450 {
1451         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452                 igb_power_down_phy_copper_82575(&adapter->hw);
1453         else
1454                 igb_shutdown_serdes_link_82575(&adapter->hw);
1455 }
1456
1457 /**
1458  * igb_up - Open the interface and prepare it to handle traffic
1459  * @adapter: board private structure
1460  **/
1461 int igb_up(struct igb_adapter *adapter)
1462 {
1463         struct e1000_hw *hw = &adapter->hw;
1464         int i;
1465
1466         /* hardware has been reset, we need to reload some things */
1467         igb_configure(adapter);
1468
1469         clear_bit(__IGB_DOWN, &adapter->state);
1470
1471         for (i = 0; i < adapter->num_q_vectors; i++) {
1472                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1473                 napi_enable(&q_vector->napi);
1474         }
1475         if (adapter->msix_entries)
1476                 igb_configure_msix(adapter);
1477         else
1478                 igb_assign_vector(adapter->q_vector[0], 0);
1479
1480         /* Clear any pending interrupts. */
1481         rd32(E1000_ICR);
1482         igb_irq_enable(adapter);
1483
1484         /* notify VFs that reset has been completed */
1485         if (adapter->vfs_allocated_count) {
1486                 u32 reg_data = rd32(E1000_CTRL_EXT);
1487                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1488                 wr32(E1000_CTRL_EXT, reg_data);
1489         }
1490
1491         netif_tx_start_all_queues(adapter->netdev);
1492
1493         /* start the watchdog. */
1494         hw->mac.get_link_status = 1;
1495         schedule_work(&adapter->watchdog_task);
1496
1497         return 0;
1498 }
1499
1500 void igb_down(struct igb_adapter *adapter)
1501 {
1502         struct net_device *netdev = adapter->netdev;
1503         struct e1000_hw *hw = &adapter->hw;
1504         u32 tctl, rctl;
1505         int i;
1506
1507         /* signal that we're down so the interrupt handler does not
1508          * reschedule our watchdog timer */
1509         set_bit(__IGB_DOWN, &adapter->state);
1510
1511         /* disable receives in the hardware */
1512         rctl = rd32(E1000_RCTL);
1513         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1514         /* flush and sleep below */
1515
1516         netif_tx_stop_all_queues(netdev);
1517
1518         /* disable transmits in the hardware */
1519         tctl = rd32(E1000_TCTL);
1520         tctl &= ~E1000_TCTL_EN;
1521         wr32(E1000_TCTL, tctl);
1522         /* flush both disables and wait for them to finish */
1523         wrfl();
1524         msleep(10);
1525
1526         for (i = 0; i < adapter->num_q_vectors; i++) {
1527                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1528                 napi_disable(&q_vector->napi);
1529         }
1530
1531         igb_irq_disable(adapter);
1532
1533         del_timer_sync(&adapter->watchdog_timer);
1534         del_timer_sync(&adapter->phy_info_timer);
1535
1536         netif_carrier_off(netdev);
1537
1538         /* record the stats before reset*/
1539         igb_update_stats(adapter);
1540
1541         adapter->link_speed = 0;
1542         adapter->link_duplex = 0;
1543
1544         if (!pci_channel_offline(adapter->pdev))
1545                 igb_reset(adapter);
1546         igb_clean_all_tx_rings(adapter);
1547         igb_clean_all_rx_rings(adapter);
1548 #ifdef CONFIG_IGB_DCA
1549
1550         /* since we reset the hardware DCA settings were cleared */
1551         igb_setup_dca(adapter);
1552 #endif
1553 }
1554
1555 void igb_reinit_locked(struct igb_adapter *adapter)
1556 {
1557         WARN_ON(in_interrupt());
1558         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1559                 msleep(1);
1560         igb_down(adapter);
1561         igb_up(adapter);
1562         clear_bit(__IGB_RESETTING, &adapter->state);
1563 }
1564
1565 void igb_reset(struct igb_adapter *adapter)
1566 {
1567         struct pci_dev *pdev = adapter->pdev;
1568         struct e1000_hw *hw = &adapter->hw;
1569         struct e1000_mac_info *mac = &hw->mac;
1570         struct e1000_fc_info *fc = &hw->fc;
1571         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1572         u16 hwm;
1573
1574         /* Repartition Pba for greater than 9k mtu
1575          * To take effect CTRL.RST is required.
1576          */
1577         switch (mac->type) {
1578         case e1000_i350:
1579         case e1000_82580:
1580                 pba = rd32(E1000_RXPBS);
1581                 pba = igb_rxpbs_adjust_82580(pba);
1582                 break;
1583         case e1000_82576:
1584                 pba = rd32(E1000_RXPBS);
1585                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1586                 break;
1587         case e1000_82575:
1588         default:
1589                 pba = E1000_PBA_34K;
1590                 break;
1591         }
1592
1593         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1594             (mac->type < e1000_82576)) {
1595                 /* adjust PBA for jumbo frames */
1596                 wr32(E1000_PBA, pba);
1597
1598                 /* To maintain wire speed transmits, the Tx FIFO should be
1599                  * large enough to accommodate two full transmit packets,
1600                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1601                  * the Rx FIFO should be large enough to accommodate at least
1602                  * one full receive packet and is similarly rounded up and
1603                  * expressed in KB. */
1604                 pba = rd32(E1000_PBA);
1605                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1606                 tx_space = pba >> 16;
1607                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1608                 pba &= 0xffff;
1609                 /* the tx fifo also stores 16 bytes of information about the tx
1610                  * but don't include ethernet FCS because hardware appends it */
1611                 min_tx_space = (adapter->max_frame_size +
1612                                 sizeof(union e1000_adv_tx_desc) -
1613                                 ETH_FCS_LEN) * 2;
1614                 min_tx_space = ALIGN(min_tx_space, 1024);
1615                 min_tx_space >>= 10;
1616                 /* software strips receive CRC, so leave room for it */
1617                 min_rx_space = adapter->max_frame_size;
1618                 min_rx_space = ALIGN(min_rx_space, 1024);
1619                 min_rx_space >>= 10;
1620
1621                 /* If current Tx allocation is less than the min Tx FIFO size,
1622                  * and the min Tx FIFO size is less than the current Rx FIFO
1623                  * allocation, take space away from current Rx allocation */
1624                 if (tx_space < min_tx_space &&
1625                     ((min_tx_space - tx_space) < pba)) {
1626                         pba = pba - (min_tx_space - tx_space);
1627
1628                         /* if short on rx space, rx wins and must trump tx
1629                          * adjustment */
1630                         if (pba < min_rx_space)
1631                                 pba = min_rx_space;
1632                 }
1633                 wr32(E1000_PBA, pba);
1634         }
1635
1636         /* flow control settings */
1637         /* The high water mark must be low enough to fit one full frame
1638          * (or the size used for early receive) above it in the Rx FIFO.
1639          * Set it to the lower of:
1640          * - 90% of the Rx FIFO size, or
1641          * - the full Rx FIFO size minus one full frame */
1642         hwm = min(((pba << 10) * 9 / 10),
1643                         ((pba << 10) - 2 * adapter->max_frame_size));
1644
1645         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1646         fc->low_water = fc->high_water - 16;
1647         fc->pause_time = 0xFFFF;
1648         fc->send_xon = 1;
1649         fc->current_mode = fc->requested_mode;
1650
1651         /* disable receive for all VFs and wait one second */
1652         if (adapter->vfs_allocated_count) {
1653                 int i;
1654                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1655                         adapter->vf_data[i].flags = 0;
1656
1657                 /* ping all the active vfs to let them know we are going down */
1658                 igb_ping_all_vfs(adapter);
1659
1660                 /* disable transmits and receives */
1661                 wr32(E1000_VFRE, 0);
1662                 wr32(E1000_VFTE, 0);
1663         }
1664
1665         /* Allow time for pending master requests to run */
1666         hw->mac.ops.reset_hw(hw);
1667         wr32(E1000_WUC, 0);
1668
1669         if (hw->mac.ops.init_hw(hw))
1670                 dev_err(&pdev->dev, "Hardware Error\n");
1671
1672         if (hw->mac.type == e1000_82580) {
1673                 u32 reg = rd32(E1000_PCIEMISC);
1674                 wr32(E1000_PCIEMISC,
1675                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1676         }
1677         if (!netif_running(adapter->netdev))
1678                 igb_power_down_link(adapter);
1679
1680         igb_update_mng_vlan(adapter);
1681
1682         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1683         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1684
1685         igb_get_phy_info(hw);
1686 }
1687
1688 static const struct net_device_ops igb_netdev_ops = {
1689         .ndo_open               = igb_open,
1690         .ndo_stop               = igb_close,
1691         .ndo_start_xmit         = igb_xmit_frame_adv,
1692         .ndo_get_stats          = igb_get_stats,
1693         .ndo_set_rx_mode        = igb_set_rx_mode,
1694         .ndo_set_multicast_list = igb_set_rx_mode,
1695         .ndo_set_mac_address    = igb_set_mac,
1696         .ndo_change_mtu         = igb_change_mtu,
1697         .ndo_do_ioctl           = igb_ioctl,
1698         .ndo_tx_timeout         = igb_tx_timeout,
1699         .ndo_validate_addr      = eth_validate_addr,
1700         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1701         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1702         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1703         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1704         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1705         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1706         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1707 #ifdef CONFIG_NET_POLL_CONTROLLER
1708         .ndo_poll_controller    = igb_netpoll,
1709 #endif
1710 };
1711
1712 /**
1713  * igb_probe - Device Initialization Routine
1714  * @pdev: PCI device information struct
1715  * @ent: entry in igb_pci_tbl
1716  *
1717  * Returns 0 on success, negative on failure
1718  *
1719  * igb_probe initializes an adapter identified by a pci_dev structure.
1720  * The OS initialization, configuring of the adapter private structure,
1721  * and a hardware reset occur.
1722  **/
1723 static int __devinit igb_probe(struct pci_dev *pdev,
1724                                const struct pci_device_id *ent)
1725 {
1726         struct net_device *netdev;
1727         struct igb_adapter *adapter;
1728         struct e1000_hw *hw;
1729         u16 eeprom_data = 0;
1730         static int global_quad_port_a; /* global quad port a indication */
1731         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1732         unsigned long mmio_start, mmio_len;
1733         int err, pci_using_dac;
1734         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1735         u32 part_num;
1736
1737         /* Catch broken hardware that put the wrong VF device ID in
1738          * the PCIe SR-IOV capability.
1739          */
1740         if (pdev->is_virtfn) {
1741                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1742                      pci_name(pdev), pdev->vendor, pdev->device);
1743                 return -EINVAL;
1744         }
1745
1746         err = pci_enable_device_mem(pdev);
1747         if (err)
1748                 return err;
1749
1750         pci_using_dac = 0;
1751         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1752         if (!err) {
1753                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1754                 if (!err)
1755                         pci_using_dac = 1;
1756         } else {
1757                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1758                 if (err) {
1759                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1760                         if (err) {
1761                                 dev_err(&pdev->dev, "No usable DMA "
1762                                         "configuration, aborting\n");
1763                                 goto err_dma;
1764                         }
1765                 }
1766         }
1767
1768         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1769                                            IORESOURCE_MEM),
1770                                            igb_driver_name);
1771         if (err)
1772                 goto err_pci_reg;
1773
1774         pci_enable_pcie_error_reporting(pdev);
1775
1776         pci_set_master(pdev);
1777         pci_save_state(pdev);
1778
1779         err = -ENOMEM;
1780         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1781                                    IGB_ABS_MAX_TX_QUEUES);
1782         if (!netdev)
1783                 goto err_alloc_etherdev;
1784
1785         SET_NETDEV_DEV(netdev, &pdev->dev);
1786
1787         pci_set_drvdata(pdev, netdev);
1788         adapter = netdev_priv(netdev);
1789         adapter->netdev = netdev;
1790         adapter->pdev = pdev;
1791         hw = &adapter->hw;
1792         hw->back = adapter;
1793         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1794
1795         mmio_start = pci_resource_start(pdev, 0);
1796         mmio_len = pci_resource_len(pdev, 0);
1797
1798         err = -EIO;
1799         hw->hw_addr = ioremap(mmio_start, mmio_len);
1800         if (!hw->hw_addr)
1801                 goto err_ioremap;
1802
1803         netdev->netdev_ops = &igb_netdev_ops;
1804         igb_set_ethtool_ops(netdev);
1805         netdev->watchdog_timeo = 5 * HZ;
1806
1807         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1808
1809         netdev->mem_start = mmio_start;
1810         netdev->mem_end = mmio_start + mmio_len;
1811
1812         /* PCI config space info */
1813         hw->vendor_id = pdev->vendor;
1814         hw->device_id = pdev->device;
1815         hw->revision_id = pdev->revision;
1816         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1817         hw->subsystem_device_id = pdev->subsystem_device;
1818
1819         /* Copy the default MAC, PHY and NVM function pointers */
1820         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1821         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1822         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1823         /* Initialize skew-specific constants */
1824         err = ei->get_invariants(hw);
1825         if (err)
1826                 goto err_sw_init;
1827
1828         /* setup the private structure */
1829         err = igb_sw_init(adapter);
1830         if (err)
1831                 goto err_sw_init;
1832
1833         igb_get_bus_info_pcie(hw);
1834
1835         hw->phy.autoneg_wait_to_complete = false;
1836
1837         /* Copper options */
1838         if (hw->phy.media_type == e1000_media_type_copper) {
1839                 hw->phy.mdix = AUTO_ALL_MODES;
1840                 hw->phy.disable_polarity_correction = false;
1841                 hw->phy.ms_type = e1000_ms_hw_default;
1842         }
1843
1844         if (igb_check_reset_block(hw))
1845                 dev_info(&pdev->dev,
1846                         "PHY reset is blocked due to SOL/IDER session.\n");
1847
1848         netdev->features = NETIF_F_SG |
1849                            NETIF_F_IP_CSUM |
1850                            NETIF_F_HW_VLAN_TX |
1851                            NETIF_F_HW_VLAN_RX |
1852                            NETIF_F_HW_VLAN_FILTER;
1853
1854         netdev->features |= NETIF_F_IPV6_CSUM;
1855         netdev->features |= NETIF_F_TSO;
1856         netdev->features |= NETIF_F_TSO6;
1857         netdev->features |= NETIF_F_GRO;
1858
1859         netdev->vlan_features |= NETIF_F_TSO;
1860         netdev->vlan_features |= NETIF_F_TSO6;
1861         netdev->vlan_features |= NETIF_F_IP_CSUM;
1862         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1863         netdev->vlan_features |= NETIF_F_SG;
1864
1865         if (pci_using_dac) {
1866                 netdev->features |= NETIF_F_HIGHDMA;
1867                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1868         }
1869
1870         if (hw->mac.type >= e1000_82576)
1871                 netdev->features |= NETIF_F_SCTP_CSUM;
1872
1873         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1874
1875         /* before reading the NVM, reset the controller to put the device in a
1876          * known good starting state */
1877         hw->mac.ops.reset_hw(hw);
1878
1879         /* make sure the NVM is good */
1880         if (igb_validate_nvm_checksum(hw) < 0) {
1881                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1882                 err = -EIO;
1883                 goto err_eeprom;
1884         }
1885
1886         /* copy the MAC address out of the NVM */
1887         if (hw->mac.ops.read_mac_addr(hw))
1888                 dev_err(&pdev->dev, "NVM Read Error\n");
1889
1890         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1891         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1892
1893         if (!is_valid_ether_addr(netdev->perm_addr)) {
1894                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1895                 err = -EIO;
1896                 goto err_eeprom;
1897         }
1898
1899         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1900                     (unsigned long) adapter);
1901         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1902                     (unsigned long) adapter);
1903
1904         INIT_WORK(&adapter->reset_task, igb_reset_task);
1905         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1906
1907         /* Initialize link properties that are user-changeable */
1908         adapter->fc_autoneg = true;
1909         hw->mac.autoneg = true;
1910         hw->phy.autoneg_advertised = 0x2f;
1911
1912         hw->fc.requested_mode = e1000_fc_default;
1913         hw->fc.current_mode = e1000_fc_default;
1914
1915         igb_validate_mdi_setting(hw);
1916
1917         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1918          * enable the ACPI Magic Packet filter
1919          */
1920
1921         if (hw->bus.func == 0)
1922                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1923         else if (hw->mac.type == e1000_82580)
1924                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1925                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1926                                  &eeprom_data);
1927         else if (hw->bus.func == 1)
1928                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1929
1930         if (eeprom_data & eeprom_apme_mask)
1931                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1932
1933         /* now that we have the eeprom settings, apply the special cases where
1934          * the eeprom may be wrong or the board simply won't support wake on
1935          * lan on a particular port */
1936         switch (pdev->device) {
1937         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1938                 adapter->eeprom_wol = 0;
1939                 break;
1940         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1941         case E1000_DEV_ID_82576_FIBER:
1942         case E1000_DEV_ID_82576_SERDES:
1943                 /* Wake events only supported on port A for dual fiber
1944                  * regardless of eeprom setting */
1945                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1946                         adapter->eeprom_wol = 0;
1947                 break;
1948         case E1000_DEV_ID_82576_QUAD_COPPER:
1949         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1950                 /* if quad port adapter, disable WoL on all but port A */
1951                 if (global_quad_port_a != 0)
1952                         adapter->eeprom_wol = 0;
1953                 else
1954                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1955                 /* Reset for multiple quad port adapters */
1956                 if (++global_quad_port_a == 4)
1957                         global_quad_port_a = 0;
1958                 break;
1959         }
1960
1961         /* initialize the wol settings based on the eeprom settings */
1962         adapter->wol = adapter->eeprom_wol;
1963         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1964
1965         /* reset the hardware with the new settings */
1966         igb_reset(adapter);
1967
1968         /* let the f/w know that the h/w is now under the control of the
1969          * driver. */
1970         igb_get_hw_control(adapter);
1971
1972         strcpy(netdev->name, "eth%d");
1973         err = register_netdev(netdev);
1974         if (err)
1975                 goto err_register;
1976
1977         /* carrier off reporting is important to ethtool even BEFORE open */
1978         netif_carrier_off(netdev);
1979
1980 #ifdef CONFIG_IGB_DCA
1981         if (dca_add_requester(&pdev->dev) == 0) {
1982                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1983                 dev_info(&pdev->dev, "DCA enabled\n");
1984                 igb_setup_dca(adapter);
1985         }
1986
1987 #endif
1988         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1989         /* print bus type/speed/width info */
1990         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1991                  netdev->name,
1992                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1993                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1994                                                             "unknown"),
1995                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1996                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1997                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1998                    "unknown"),
1999                  netdev->dev_addr);
2000
2001         igb_read_part_num(hw, &part_num);
2002         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
2003                 (part_num >> 8), (part_num & 0xff));
2004
2005         dev_info(&pdev->dev,
2006                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2007                 adapter->msix_entries ? "MSI-X" :
2008                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2009                 adapter->num_rx_queues, adapter->num_tx_queues);
2010
2011         return 0;
2012
2013 err_register:
2014         igb_release_hw_control(adapter);
2015 err_eeprom:
2016         if (!igb_check_reset_block(hw))
2017                 igb_reset_phy(hw);
2018
2019         if (hw->flash_address)
2020                 iounmap(hw->flash_address);
2021 err_sw_init:
2022         igb_clear_interrupt_scheme(adapter);
2023         iounmap(hw->hw_addr);
2024 err_ioremap:
2025         free_netdev(netdev);
2026 err_alloc_etherdev:
2027         pci_release_selected_regions(pdev,
2028                                      pci_select_bars(pdev, IORESOURCE_MEM));
2029 err_pci_reg:
2030 err_dma:
2031         pci_disable_device(pdev);
2032         return err;
2033 }
2034
2035 /**
2036  * igb_remove - Device Removal Routine
2037  * @pdev: PCI device information struct
2038  *
2039  * igb_remove is called by the PCI subsystem to alert the driver
2040  * that it should release a PCI device.  The could be caused by a
2041  * Hot-Plug event, or because the driver is going to be removed from
2042  * memory.
2043  **/
2044 static void __devexit igb_remove(struct pci_dev *pdev)
2045 {
2046         struct net_device *netdev = pci_get_drvdata(pdev);
2047         struct igb_adapter *adapter = netdev_priv(netdev);
2048         struct e1000_hw *hw = &adapter->hw;
2049
2050         /* flush_scheduled work may reschedule our watchdog task, so
2051          * explicitly disable watchdog tasks from being rescheduled  */
2052         set_bit(__IGB_DOWN, &adapter->state);
2053         del_timer_sync(&adapter->watchdog_timer);
2054         del_timer_sync(&adapter->phy_info_timer);
2055
2056         flush_scheduled_work();
2057
2058 #ifdef CONFIG_IGB_DCA
2059         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2060                 dev_info(&pdev->dev, "DCA disabled\n");
2061                 dca_remove_requester(&pdev->dev);
2062                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2063                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2064         }
2065 #endif
2066
2067         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2068          * would have already happened in close and is redundant. */
2069         igb_release_hw_control(adapter);
2070
2071         unregister_netdev(netdev);
2072
2073         igb_clear_interrupt_scheme(adapter);
2074
2075 #ifdef CONFIG_PCI_IOV
2076         /* reclaim resources allocated to VFs */
2077         if (adapter->vf_data) {
2078                 /* disable iov and allow time for transactions to clear */
2079                 pci_disable_sriov(pdev);
2080                 msleep(500);
2081
2082                 kfree(adapter->vf_data);
2083                 adapter->vf_data = NULL;
2084                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2085                 msleep(100);
2086                 dev_info(&pdev->dev, "IOV Disabled\n");
2087         }
2088 #endif
2089
2090         iounmap(hw->hw_addr);
2091         if (hw->flash_address)
2092                 iounmap(hw->flash_address);
2093         pci_release_selected_regions(pdev,
2094                                      pci_select_bars(pdev, IORESOURCE_MEM));
2095
2096         free_netdev(netdev);
2097
2098         pci_disable_pcie_error_reporting(pdev);
2099
2100         pci_disable_device(pdev);
2101 }
2102
2103 /**
2104  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2105  * @adapter: board private structure to initialize
2106  *
2107  * This function initializes the vf specific data storage and then attempts to
2108  * allocate the VFs.  The reason for ordering it this way is because it is much
2109  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2110  * the memory for the VFs.
2111  **/
2112 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2113 {
2114 #ifdef CONFIG_PCI_IOV
2115         struct pci_dev *pdev = adapter->pdev;
2116
2117         if (adapter->vfs_allocated_count) {
2118                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2119                                            sizeof(struct vf_data_storage),
2120                                            GFP_KERNEL);
2121                 /* if allocation failed then we do not support SR-IOV */
2122                 if (!adapter->vf_data) {
2123                         adapter->vfs_allocated_count = 0;
2124                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2125                                 "Data Storage\n");
2126                 }
2127         }
2128
2129         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2130                 kfree(adapter->vf_data);
2131                 adapter->vf_data = NULL;
2132 #endif /* CONFIG_PCI_IOV */
2133                 adapter->vfs_allocated_count = 0;
2134 #ifdef CONFIG_PCI_IOV
2135         } else {
2136                 unsigned char mac_addr[ETH_ALEN];
2137                 int i;
2138                 dev_info(&pdev->dev, "%d vfs allocated\n",
2139                          adapter->vfs_allocated_count);
2140                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2141                         random_ether_addr(mac_addr);
2142                         igb_set_vf_mac(adapter, i, mac_addr);
2143                 }
2144         }
2145 #endif /* CONFIG_PCI_IOV */
2146 }
2147
2148
2149 /**
2150  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2151  * @adapter: board private structure to initialize
2152  *
2153  * igb_init_hw_timer initializes the function pointer and values for the hw
2154  * timer found in hardware.
2155  **/
2156 static void igb_init_hw_timer(struct igb_adapter *adapter)
2157 {
2158         struct e1000_hw *hw = &adapter->hw;
2159
2160         switch (hw->mac.type) {
2161         case e1000_i350:
2162         case e1000_82580:
2163                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2164                 adapter->cycles.read = igb_read_clock;
2165                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2166                 adapter->cycles.mult = 1;
2167                 /*
2168                  * The 82580 timesync updates the system timer every 8ns by 8ns
2169                  * and the value cannot be shifted.  Instead we need to shift
2170                  * the registers to generate a 64bit timer value.  As a result
2171                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2172                  * 24 in order to generate a larger value for synchronization.
2173                  */
2174                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2175                 /* disable system timer temporarily by setting bit 31 */
2176                 wr32(E1000_TSAUXC, 0x80000000);
2177                 wrfl();
2178
2179                 /* Set registers so that rollover occurs soon to test this. */
2180                 wr32(E1000_SYSTIMR, 0x00000000);
2181                 wr32(E1000_SYSTIML, 0x80000000);
2182                 wr32(E1000_SYSTIMH, 0x000000FF);
2183                 wrfl();
2184
2185                 /* enable system timer by clearing bit 31 */
2186                 wr32(E1000_TSAUXC, 0x0);
2187                 wrfl();
2188
2189                 timecounter_init(&adapter->clock,
2190                                  &adapter->cycles,
2191                                  ktime_to_ns(ktime_get_real()));
2192                 /*
2193                  * Synchronize our NIC clock against system wall clock. NIC
2194                  * time stamp reading requires ~3us per sample, each sample
2195                  * was pretty stable even under load => only require 10
2196                  * samples for each offset comparison.
2197                  */
2198                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2199                 adapter->compare.source = &adapter->clock;
2200                 adapter->compare.target = ktime_get_real;
2201                 adapter->compare.num_samples = 10;
2202                 timecompare_update(&adapter->compare, 0);
2203                 break;
2204         case e1000_82576:
2205                 /*
2206                  * Initialize hardware timer: we keep it running just in case
2207                  * that some program needs it later on.
2208                  */
2209                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2210                 adapter->cycles.read = igb_read_clock;
2211                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2212                 adapter->cycles.mult = 1;
2213                 /**
2214                  * Scale the NIC clock cycle by a large factor so that
2215                  * relatively small clock corrections can be added or
2216                  * substracted at each clock tick. The drawbacks of a large
2217                  * factor are a) that the clock register overflows more quickly
2218                  * (not such a big deal) and b) that the increment per tick has
2219                  * to fit into 24 bits.  As a result we need to use a shift of
2220                  * 19 so we can fit a value of 16 into the TIMINCA register.
2221                  */
2222                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2223                 wr32(E1000_TIMINCA,
2224                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2225                                 (16 << IGB_82576_TSYNC_SHIFT));
2226
2227                 /* Set registers so that rollover occurs soon to test this. */
2228                 wr32(E1000_SYSTIML, 0x00000000);
2229                 wr32(E1000_SYSTIMH, 0xFF800000);
2230                 wrfl();
2231
2232                 timecounter_init(&adapter->clock,
2233                                  &adapter->cycles,
2234                                  ktime_to_ns(ktime_get_real()));
2235                 /*
2236                  * Synchronize our NIC clock against system wall clock. NIC
2237                  * time stamp reading requires ~3us per sample, each sample
2238                  * was pretty stable even under load => only require 10
2239                  * samples for each offset comparison.
2240                  */
2241                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2242                 adapter->compare.source = &adapter->clock;
2243                 adapter->compare.target = ktime_get_real;
2244                 adapter->compare.num_samples = 10;
2245                 timecompare_update(&adapter->compare, 0);
2246                 break;
2247         case e1000_82575:
2248                 /* 82575 does not support timesync */
2249         default:
2250                 break;
2251         }
2252
2253 }
2254
2255 /**
2256  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2257  * @adapter: board private structure to initialize
2258  *
2259  * igb_sw_init initializes the Adapter private data structure.
2260  * Fields are initialized based on PCI device information and
2261  * OS network device settings (MTU size).
2262  **/
2263 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2264 {
2265         struct e1000_hw *hw = &adapter->hw;
2266         struct net_device *netdev = adapter->netdev;
2267         struct pci_dev *pdev = adapter->pdev;
2268
2269         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2270
2271         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2272         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2273         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2274         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2275
2276         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2277         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2278
2279 #ifdef CONFIG_PCI_IOV
2280         if (hw->mac.type == e1000_82576)
2281                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2282
2283 #endif /* CONFIG_PCI_IOV */
2284         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2285
2286         /*
2287          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2288          * then we should combine the queues into a queue pair in order to
2289          * conserve interrupts due to limited supply
2290          */
2291         if ((adapter->rss_queues > 4) ||
2292             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2293                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2294
2295         /* This call may decrease the number of queues */
2296         if (igb_init_interrupt_scheme(adapter)) {
2297                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2298                 return -ENOMEM;
2299         }
2300
2301         igb_init_hw_timer(adapter);
2302         igb_probe_vfs(adapter);
2303
2304         /* Explicitly disable IRQ since the NIC can be in any state. */
2305         igb_irq_disable(adapter);
2306
2307         set_bit(__IGB_DOWN, &adapter->state);
2308         return 0;
2309 }
2310
2311 /**
2312  * igb_open - Called when a network interface is made active
2313  * @netdev: network interface device structure
2314  *
2315  * Returns 0 on success, negative value on failure
2316  *
2317  * The open entry point is called when a network interface is made
2318  * active by the system (IFF_UP).  At this point all resources needed
2319  * for transmit and receive operations are allocated, the interrupt
2320  * handler is registered with the OS, the watchdog timer is started,
2321  * and the stack is notified that the interface is ready.
2322  **/
2323 static int igb_open(struct net_device *netdev)
2324 {
2325         struct igb_adapter *adapter = netdev_priv(netdev);
2326         struct e1000_hw *hw = &adapter->hw;
2327         int err;
2328         int i;
2329
2330         /* disallow open during test */
2331         if (test_bit(__IGB_TESTING, &adapter->state))
2332                 return -EBUSY;
2333
2334         netif_carrier_off(netdev);
2335
2336         /* allocate transmit descriptors */
2337         err = igb_setup_all_tx_resources(adapter);
2338         if (err)
2339                 goto err_setup_tx;
2340
2341         /* allocate receive descriptors */
2342         err = igb_setup_all_rx_resources(adapter);
2343         if (err)
2344                 goto err_setup_rx;
2345
2346         igb_power_up_link(adapter);
2347
2348         /* before we allocate an interrupt, we must be ready to handle it.
2349          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2350          * as soon as we call pci_request_irq, so we have to setup our
2351          * clean_rx handler before we do so.  */
2352         igb_configure(adapter);
2353
2354         err = igb_request_irq(adapter);
2355         if (err)
2356                 goto err_req_irq;
2357
2358         /* From here on the code is the same as igb_up() */
2359         clear_bit(__IGB_DOWN, &adapter->state);
2360
2361         for (i = 0; i < adapter->num_q_vectors; i++) {
2362                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2363                 napi_enable(&q_vector->napi);
2364         }
2365
2366         /* Clear any pending interrupts. */
2367         rd32(E1000_ICR);
2368
2369         igb_irq_enable(adapter);
2370
2371         /* notify VFs that reset has been completed */
2372         if (adapter->vfs_allocated_count) {
2373                 u32 reg_data = rd32(E1000_CTRL_EXT);
2374                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2375                 wr32(E1000_CTRL_EXT, reg_data);
2376         }
2377
2378         netif_tx_start_all_queues(netdev);
2379
2380         /* start the watchdog. */
2381         hw->mac.get_link_status = 1;
2382         schedule_work(&adapter->watchdog_task);
2383
2384         return 0;
2385
2386 err_req_irq:
2387         igb_release_hw_control(adapter);
2388         igb_power_down_link(adapter);
2389         igb_free_all_rx_resources(adapter);
2390 err_setup_rx:
2391         igb_free_all_tx_resources(adapter);
2392 err_setup_tx:
2393         igb_reset(adapter);
2394
2395         return err;
2396 }
2397
2398 /**
2399  * igb_close - Disables a network interface
2400  * @netdev: network interface device structure
2401  *
2402  * Returns 0, this is not allowed to fail
2403  *
2404  * The close entry point is called when an interface is de-activated
2405  * by the OS.  The hardware is still under the driver's control, but
2406  * needs to be disabled.  A global MAC reset is issued to stop the
2407  * hardware, and all transmit and receive resources are freed.
2408  **/
2409 static int igb_close(struct net_device *netdev)
2410 {
2411         struct igb_adapter *adapter = netdev_priv(netdev);
2412
2413         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2414         igb_down(adapter);
2415
2416         igb_free_irq(adapter);
2417
2418         igb_free_all_tx_resources(adapter);
2419         igb_free_all_rx_resources(adapter);
2420
2421         return 0;
2422 }
2423
2424 /**
2425  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2426  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2427  *
2428  * Return 0 on success, negative on failure
2429  **/
2430 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2431 {
2432         struct device *dev = tx_ring->dev;
2433         int size;
2434
2435         size = sizeof(struct igb_buffer) * tx_ring->count;
2436         tx_ring->buffer_info = vmalloc(size);
2437         if (!tx_ring->buffer_info)
2438                 goto err;
2439         memset(tx_ring->buffer_info, 0, size);
2440
2441         /* round up to nearest 4K */
2442         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2443         tx_ring->size = ALIGN(tx_ring->size, 4096);
2444
2445         tx_ring->desc = dma_alloc_coherent(dev,
2446                                            tx_ring->size,
2447                                            &tx_ring->dma,
2448                                            GFP_KERNEL);
2449
2450         if (!tx_ring->desc)
2451                 goto err;
2452
2453         tx_ring->next_to_use = 0;
2454         tx_ring->next_to_clean = 0;
2455         return 0;
2456
2457 err:
2458         vfree(tx_ring->buffer_info);
2459         dev_err(dev,
2460                 "Unable to allocate memory for the transmit descriptor ring\n");
2461         return -ENOMEM;
2462 }
2463
2464 /**
2465  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2466  *                                (Descriptors) for all queues
2467  * @adapter: board private structure
2468  *
2469  * Return 0 on success, negative on failure
2470  **/
2471 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2472 {
2473         struct pci_dev *pdev = adapter->pdev;
2474         int i, err = 0;
2475
2476         for (i = 0; i < adapter->num_tx_queues; i++) {
2477                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2478                 if (err) {
2479                         dev_err(&pdev->dev,
2480                                 "Allocation for Tx Queue %u failed\n", i);
2481                         for (i--; i >= 0; i--)
2482                                 igb_free_tx_resources(adapter->tx_ring[i]);
2483                         break;
2484                 }
2485         }
2486
2487         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2488                 int r_idx = i % adapter->num_tx_queues;
2489                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2490         }
2491         return err;
2492 }
2493
2494 /**
2495  * igb_setup_tctl - configure the transmit control registers
2496  * @adapter: Board private structure
2497  **/
2498 void igb_setup_tctl(struct igb_adapter *adapter)
2499 {
2500         struct e1000_hw *hw = &adapter->hw;
2501         u32 tctl;
2502
2503         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2504         wr32(E1000_TXDCTL(0), 0);
2505
2506         /* Program the Transmit Control Register */
2507         tctl = rd32(E1000_TCTL);
2508         tctl &= ~E1000_TCTL_CT;
2509         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2510                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2511
2512         igb_config_collision_dist(hw);
2513
2514         /* Enable transmits */
2515         tctl |= E1000_TCTL_EN;
2516
2517         wr32(E1000_TCTL, tctl);
2518 }
2519
2520 /**
2521  * igb_configure_tx_ring - Configure transmit ring after Reset
2522  * @adapter: board private structure
2523  * @ring: tx ring to configure
2524  *
2525  * Configure a transmit ring after a reset.
2526  **/
2527 void igb_configure_tx_ring(struct igb_adapter *adapter,
2528                            struct igb_ring *ring)
2529 {
2530         struct e1000_hw *hw = &adapter->hw;
2531         u32 txdctl;
2532         u64 tdba = ring->dma;
2533         int reg_idx = ring->reg_idx;
2534
2535         /* disable the queue */
2536         txdctl = rd32(E1000_TXDCTL(reg_idx));
2537         wr32(E1000_TXDCTL(reg_idx),
2538                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2539         wrfl();
2540         mdelay(10);
2541
2542         wr32(E1000_TDLEN(reg_idx),
2543                         ring->count * sizeof(union e1000_adv_tx_desc));
2544         wr32(E1000_TDBAL(reg_idx),
2545                         tdba & 0x00000000ffffffffULL);
2546         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2547
2548         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2549         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2550         writel(0, ring->head);
2551         writel(0, ring->tail);
2552
2553         txdctl |= IGB_TX_PTHRESH;
2554         txdctl |= IGB_TX_HTHRESH << 8;
2555         txdctl |= IGB_TX_WTHRESH << 16;
2556
2557         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2558         wr32(E1000_TXDCTL(reg_idx), txdctl);
2559 }
2560
2561 /**
2562  * igb_configure_tx - Configure transmit Unit after Reset
2563  * @adapter: board private structure
2564  *
2565  * Configure the Tx unit of the MAC after a reset.
2566  **/
2567 static void igb_configure_tx(struct igb_adapter *adapter)
2568 {
2569         int i;
2570
2571         for (i = 0; i < adapter->num_tx_queues; i++)
2572                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2573 }
2574
2575 /**
2576  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2577  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2578  *
2579  * Returns 0 on success, negative on failure
2580  **/
2581 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2582 {
2583         struct device *dev = rx_ring->dev;
2584         int size, desc_len;
2585
2586         size = sizeof(struct igb_buffer) * rx_ring->count;
2587         rx_ring->buffer_info = vmalloc(size);
2588         if (!rx_ring->buffer_info)
2589                 goto err;
2590         memset(rx_ring->buffer_info, 0, size);
2591
2592         desc_len = sizeof(union e1000_adv_rx_desc);
2593
2594         /* Round up to nearest 4K */
2595         rx_ring->size = rx_ring->count * desc_len;
2596         rx_ring->size = ALIGN(rx_ring->size, 4096);
2597
2598         rx_ring->desc = dma_alloc_coherent(dev,
2599                                            rx_ring->size,
2600                                            &rx_ring->dma,
2601                                            GFP_KERNEL);
2602
2603         if (!rx_ring->desc)
2604                 goto err;
2605
2606         rx_ring->next_to_clean = 0;
2607         rx_ring->next_to_use = 0;
2608
2609         return 0;
2610
2611 err:
2612         vfree(rx_ring->buffer_info);
2613         rx_ring->buffer_info = NULL;
2614         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2615                 " ring\n");
2616         return -ENOMEM;
2617 }
2618
2619 /**
2620  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2621  *                                (Descriptors) for all queues
2622  * @adapter: board private structure
2623  *
2624  * Return 0 on success, negative on failure
2625  **/
2626 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2627 {
2628         struct pci_dev *pdev = adapter->pdev;
2629         int i, err = 0;
2630
2631         for (i = 0; i < adapter->num_rx_queues; i++) {
2632                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2633                 if (err) {
2634                         dev_err(&pdev->dev,
2635                                 "Allocation for Rx Queue %u failed\n", i);
2636                         for (i--; i >= 0; i--)
2637                                 igb_free_rx_resources(adapter->rx_ring[i]);
2638                         break;
2639                 }
2640         }
2641
2642         return err;
2643 }
2644
2645 /**
2646  * igb_setup_mrqc - configure the multiple receive queue control registers
2647  * @adapter: Board private structure
2648  **/
2649 static void igb_setup_mrqc(struct igb_adapter *adapter)
2650 {
2651         struct e1000_hw *hw = &adapter->hw;
2652         u32 mrqc, rxcsum;
2653         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2654         union e1000_reta {
2655                 u32 dword;
2656                 u8  bytes[4];
2657         } reta;
2658         static const u8 rsshash[40] = {
2659                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2660                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2661                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2662                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2663
2664         /* Fill out hash function seeds */
2665         for (j = 0; j < 10; j++) {
2666                 u32 rsskey = rsshash[(j * 4)];
2667                 rsskey |= rsshash[(j * 4) + 1] << 8;
2668                 rsskey |= rsshash[(j * 4) + 2] << 16;
2669                 rsskey |= rsshash[(j * 4) + 3] << 24;
2670                 array_wr32(E1000_RSSRK(0), j, rsskey);
2671         }
2672
2673         num_rx_queues = adapter->rss_queues;
2674
2675         if (adapter->vfs_allocated_count) {
2676                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2677                 switch (hw->mac.type) {
2678                 case e1000_i350:
2679                 case e1000_82580:
2680                         num_rx_queues = 1;
2681                         shift = 0;
2682                         break;
2683                 case e1000_82576:
2684                         shift = 3;
2685                         num_rx_queues = 2;
2686                         break;
2687                 case e1000_82575:
2688                         shift = 2;
2689                         shift2 = 6;
2690                 default:
2691                         break;
2692                 }
2693         } else {
2694                 if (hw->mac.type == e1000_82575)
2695                         shift = 6;
2696         }
2697
2698         for (j = 0; j < (32 * 4); j++) {
2699                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2700                 if (shift2)
2701                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2702                 if ((j & 3) == 3)
2703                         wr32(E1000_RETA(j >> 2), reta.dword);
2704         }
2705
2706         /*
2707          * Disable raw packet checksumming so that RSS hash is placed in
2708          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2709          * offloads as they are enabled by default
2710          */
2711         rxcsum = rd32(E1000_RXCSUM);
2712         rxcsum |= E1000_RXCSUM_PCSD;
2713
2714         if (adapter->hw.mac.type >= e1000_82576)
2715                 /* Enable Receive Checksum Offload for SCTP */
2716                 rxcsum |= E1000_RXCSUM_CRCOFL;
2717
2718         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2719         wr32(E1000_RXCSUM, rxcsum);
2720
2721         /* If VMDq is enabled then we set the appropriate mode for that, else
2722          * we default to RSS so that an RSS hash is calculated per packet even
2723          * if we are only using one queue */
2724         if (adapter->vfs_allocated_count) {
2725                 if (hw->mac.type > e1000_82575) {
2726                         /* Set the default pool for the PF's first queue */
2727                         u32 vtctl = rd32(E1000_VT_CTL);
2728                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2729                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2730                         vtctl |= adapter->vfs_allocated_count <<
2731                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2732                         wr32(E1000_VT_CTL, vtctl);
2733                 }
2734                 if (adapter->rss_queues > 1)
2735                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2736                 else
2737                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2738         } else {
2739                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2740         }
2741         igb_vmm_control(adapter);
2742
2743         /*
2744          * Generate RSS hash based on TCP port numbers and/or
2745          * IPv4/v6 src and dst addresses since UDP cannot be
2746          * hashed reliably due to IP fragmentation
2747          */
2748         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2749                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2750                 E1000_MRQC_RSS_FIELD_IPV6 |
2751                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2752                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2753
2754         wr32(E1000_MRQC, mrqc);
2755 }
2756
2757 /**
2758  * igb_setup_rctl - configure the receive control registers
2759  * @adapter: Board private structure
2760  **/
2761 void igb_setup_rctl(struct igb_adapter *adapter)
2762 {
2763         struct e1000_hw *hw = &adapter->hw;
2764         u32 rctl;
2765
2766         rctl = rd32(E1000_RCTL);
2767
2768         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2769         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2770
2771         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2772                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2773
2774         /*
2775          * enable stripping of CRC. It's unlikely this will break BMC
2776          * redirection as it did with e1000. Newer features require
2777          * that the HW strips the CRC.
2778          */
2779         rctl |= E1000_RCTL_SECRC;
2780
2781         /* disable store bad packets and clear size bits. */
2782         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2783
2784         /* enable LPE to prevent packets larger than max_frame_size */
2785         rctl |= E1000_RCTL_LPE;
2786
2787         /* disable queue 0 to prevent tail write w/o re-config */
2788         wr32(E1000_RXDCTL(0), 0);
2789
2790         /* Attention!!!  For SR-IOV PF driver operations you must enable
2791          * queue drop for all VF and PF queues to prevent head of line blocking
2792          * if an un-trusted VF does not provide descriptors to hardware.
2793          */
2794         if (adapter->vfs_allocated_count) {
2795                 /* set all queue drop enable bits */
2796                 wr32(E1000_QDE, ALL_QUEUES);
2797         }
2798
2799         wr32(E1000_RCTL, rctl);
2800 }
2801
2802 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2803                                    int vfn)
2804 {
2805         struct e1000_hw *hw = &adapter->hw;
2806         u32 vmolr;
2807
2808         /* if it isn't the PF check to see if VFs are enabled and
2809          * increase the size to support vlan tags */
2810         if (vfn < adapter->vfs_allocated_count &&
2811             adapter->vf_data[vfn].vlans_enabled)
2812                 size += VLAN_TAG_SIZE;
2813
2814         vmolr = rd32(E1000_VMOLR(vfn));
2815         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2816         vmolr |= size | E1000_VMOLR_LPE;
2817         wr32(E1000_VMOLR(vfn), vmolr);
2818
2819         return 0;
2820 }
2821
2822 /**
2823  * igb_rlpml_set - set maximum receive packet size
2824  * @adapter: board private structure
2825  *
2826  * Configure maximum receivable packet size.
2827  **/
2828 static void igb_rlpml_set(struct igb_adapter *adapter)
2829 {
2830         u32 max_frame_size = adapter->max_frame_size;
2831         struct e1000_hw *hw = &adapter->hw;
2832         u16 pf_id = adapter->vfs_allocated_count;
2833
2834         if (adapter->vlgrp)
2835                 max_frame_size += VLAN_TAG_SIZE;
2836
2837         /* if vfs are enabled we set RLPML to the largest possible request
2838          * size and set the VMOLR RLPML to the size we need */
2839         if (pf_id) {
2840                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2841                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2842         }
2843
2844         wr32(E1000_RLPML, max_frame_size);
2845 }
2846
2847 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2848                                  int vfn, bool aupe)
2849 {
2850         struct e1000_hw *hw = &adapter->hw;
2851         u32 vmolr;
2852
2853         /*
2854          * This register exists only on 82576 and newer so if we are older then
2855          * we should exit and do nothing
2856          */
2857         if (hw->mac.type < e1000_82576)
2858                 return;
2859
2860         vmolr = rd32(E1000_VMOLR(vfn));
2861         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2862         if (aupe)
2863                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2864         else
2865                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2866
2867         /* clear all bits that might not be set */
2868         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2869
2870         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2871                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2872         /*
2873          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2874          * multicast packets
2875          */
2876         if (vfn <= adapter->vfs_allocated_count)
2877                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2878
2879         wr32(E1000_VMOLR(vfn), vmolr);
2880 }
2881
2882 /**
2883  * igb_configure_rx_ring - Configure a receive ring after Reset
2884  * @adapter: board private structure
2885  * @ring: receive ring to be configured
2886  *
2887  * Configure the Rx unit of the MAC after a reset.
2888  **/
2889 void igb_configure_rx_ring(struct igb_adapter *adapter,
2890                            struct igb_ring *ring)
2891 {
2892         struct e1000_hw *hw = &adapter->hw;
2893         u64 rdba = ring->dma;
2894         int reg_idx = ring->reg_idx;
2895         u32 srrctl, rxdctl;
2896
2897         /* disable the queue */
2898         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2899         wr32(E1000_RXDCTL(reg_idx),
2900                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2901
2902         /* Set DMA base address registers */
2903         wr32(E1000_RDBAL(reg_idx),
2904              rdba & 0x00000000ffffffffULL);
2905         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2906         wr32(E1000_RDLEN(reg_idx),
2907                        ring->count * sizeof(union e1000_adv_rx_desc));
2908
2909         /* initialize head and tail */
2910         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2911         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2912         writel(0, ring->head);
2913         writel(0, ring->tail);
2914
2915         /* set descriptor configuration */
2916         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2917                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2918                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2919 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2920                 srrctl |= IGB_RXBUFFER_16384 >>
2921                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2922 #else
2923                 srrctl |= (PAGE_SIZE / 2) >>
2924                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2925 #endif
2926                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2927         } else {
2928                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2929                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2930                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2931         }
2932         if (hw->mac.type == e1000_82580)
2933                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2934         /* Only set Drop Enable if we are supporting multiple queues */
2935         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2936                 srrctl |= E1000_SRRCTL_DROP_EN;
2937
2938         wr32(E1000_SRRCTL(reg_idx), srrctl);
2939
2940         /* set filtering for VMDQ pools */
2941         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2942
2943         /* enable receive descriptor fetching */
2944         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2945         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2946         rxdctl &= 0xFFF00000;
2947         rxdctl |= IGB_RX_PTHRESH;
2948         rxdctl |= IGB_RX_HTHRESH << 8;
2949         rxdctl |= IGB_RX_WTHRESH << 16;
2950         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2951 }
2952
2953 /**
2954  * igb_configure_rx - Configure receive Unit after Reset
2955  * @adapter: board private structure
2956  *
2957  * Configure the Rx unit of the MAC after a reset.
2958  **/
2959 static void igb_configure_rx(struct igb_adapter *adapter)
2960 {
2961         int i;
2962
2963         /* set UTA to appropriate mode */
2964         igb_set_uta(adapter);
2965
2966         /* set the correct pool for the PF default MAC address in entry 0 */
2967         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2968                          adapter->vfs_allocated_count);
2969
2970         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2971          * the Base and Length of the Rx Descriptor Ring */
2972         for (i = 0; i < adapter->num_rx_queues; i++)
2973                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2974 }
2975
2976 /**
2977  * igb_free_tx_resources - Free Tx Resources per Queue
2978  * @tx_ring: Tx descriptor ring for a specific queue
2979  *
2980  * Free all transmit software resources
2981  **/
2982 void igb_free_tx_resources(struct igb_ring *tx_ring)
2983 {
2984         igb_clean_tx_ring(tx_ring);
2985
2986         vfree(tx_ring->buffer_info);
2987         tx_ring->buffer_info = NULL;
2988
2989         /* if not set, then don't free */
2990         if (!tx_ring->desc)
2991                 return;
2992
2993         dma_free_coherent(tx_ring->dev, tx_ring->size,
2994                           tx_ring->desc, tx_ring->dma);
2995
2996         tx_ring->desc = NULL;
2997 }
2998
2999 /**
3000  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3001  * @adapter: board private structure
3002  *
3003  * Free all transmit software resources
3004  **/
3005 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3006 {
3007         int i;
3008
3009         for (i = 0; i < adapter->num_tx_queues; i++)
3010                 igb_free_tx_resources(adapter->tx_ring[i]);
3011 }
3012
3013 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3014                                     struct igb_buffer *buffer_info)
3015 {
3016         if (buffer_info->dma) {
3017                 if (buffer_info->mapped_as_page)
3018                         dma_unmap_page(tx_ring->dev,
3019                                         buffer_info->dma,
3020                                         buffer_info->length,
3021                                         DMA_TO_DEVICE);
3022                 else
3023                         dma_unmap_single(tx_ring->dev,
3024                                         buffer_info->dma,
3025                                         buffer_info->length,
3026                                         DMA_TO_DEVICE);
3027                 buffer_info->dma = 0;
3028         }
3029         if (buffer_info->skb) {
3030                 dev_kfree_skb_any(buffer_info->skb);
3031                 buffer_info->skb = NULL;
3032         }
3033         buffer_info->time_stamp = 0;
3034         buffer_info->length = 0;
3035         buffer_info->next_to_watch = 0;
3036         buffer_info->mapped_as_page = false;
3037 }
3038
3039 /**
3040  * igb_clean_tx_ring - Free Tx Buffers
3041  * @tx_ring: ring to be cleaned
3042  **/
3043 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3044 {
3045         struct igb_buffer *buffer_info;
3046         unsigned long size;
3047         unsigned int i;
3048
3049         if (!tx_ring->buffer_info)
3050                 return;
3051         /* Free all the Tx ring sk_buffs */
3052
3053         for (i = 0; i < tx_ring->count; i++) {
3054                 buffer_info = &tx_ring->buffer_info[i];
3055                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3056         }
3057
3058         size = sizeof(struct igb_buffer) * tx_ring->count;
3059         memset(tx_ring->buffer_info, 0, size);
3060
3061         /* Zero out the descriptor ring */
3062         memset(tx_ring->desc, 0, tx_ring->size);
3063
3064         tx_ring->next_to_use = 0;
3065         tx_ring->next_to_clean = 0;
3066 }
3067
3068 /**
3069  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3070  * @adapter: board private structure
3071  **/
3072 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3073 {
3074         int i;
3075
3076         for (i = 0; i < adapter->num_tx_queues; i++)
3077                 igb_clean_tx_ring(adapter->tx_ring[i]);
3078 }
3079
3080 /**
3081  * igb_free_rx_resources - Free Rx Resources
3082  * @rx_ring: ring to clean the resources from
3083  *
3084  * Free all receive software resources
3085  **/
3086 void igb_free_rx_resources(struct igb_ring *rx_ring)
3087 {
3088         igb_clean_rx_ring(rx_ring);
3089
3090         vfree(rx_ring->buffer_info);
3091         rx_ring->buffer_info = NULL;
3092
3093         /* if not set, then don't free */
3094         if (!rx_ring->desc)
3095                 return;
3096
3097         dma_free_coherent(rx_ring->dev, rx_ring->size,
3098                           rx_ring->desc, rx_ring->dma);
3099
3100         rx_ring->desc = NULL;
3101 }
3102
3103 /**
3104  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3105  * @adapter: board private structure
3106  *
3107  * Free all receive software resources
3108  **/
3109 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3110 {
3111         int i;
3112
3113         for (i = 0; i < adapter->num_rx_queues; i++)
3114                 igb_free_rx_resources(adapter->rx_ring[i]);
3115 }
3116
3117 /**
3118  * igb_clean_rx_ring - Free Rx Buffers per Queue
3119  * @rx_ring: ring to free buffers from
3120  **/
3121 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3122 {
3123         struct igb_buffer *buffer_info;
3124         unsigned long size;
3125         unsigned int i;
3126
3127         if (!rx_ring->buffer_info)
3128                 return;
3129
3130         /* Free all the Rx ring sk_buffs */
3131         for (i = 0; i < rx_ring->count; i++) {
3132                 buffer_info = &rx_ring->buffer_info[i];
3133                 if (buffer_info->dma) {
3134                         dma_unmap_single(rx_ring->dev,
3135                                          buffer_info->dma,
3136                                          rx_ring->rx_buffer_len,
3137                                          DMA_FROM_DEVICE);
3138                         buffer_info->dma = 0;
3139                 }
3140
3141                 if (buffer_info->skb) {
3142                         dev_kfree_skb(buffer_info->skb);
3143                         buffer_info->skb = NULL;
3144                 }
3145                 if (buffer_info->page_dma) {
3146                         dma_unmap_page(rx_ring->dev,
3147                                        buffer_info->page_dma,
3148                                        PAGE_SIZE / 2,
3149                                        DMA_FROM_DEVICE);
3150                         buffer_info->page_dma = 0;
3151                 }
3152                 if (buffer_info->page) {
3153                         put_page(buffer_info->page);
3154                         buffer_info->page = NULL;
3155                         buffer_info->page_offset = 0;
3156                 }
3157         }
3158
3159         size = sizeof(struct igb_buffer) * rx_ring->count;
3160         memset(rx_ring->buffer_info, 0, size);
3161
3162         /* Zero out the descriptor ring */
3163         memset(rx_ring->desc, 0, rx_ring->size);
3164
3165         rx_ring->next_to_clean = 0;
3166         rx_ring->next_to_use = 0;
3167 }
3168
3169 /**
3170  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3171  * @adapter: board private structure
3172  **/
3173 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3174 {
3175         int i;
3176
3177         for (i = 0; i < adapter->num_rx_queues; i++)
3178                 igb_clean_rx_ring(adapter->rx_ring[i]);
3179 }
3180
3181 /**
3182  * igb_set_mac - Change the Ethernet Address of the NIC
3183  * @netdev: network interface device structure
3184  * @p: pointer to an address structure
3185  *
3186  * Returns 0 on success, negative on failure
3187  **/
3188 static int igb_set_mac(struct net_device *netdev, void *p)
3189 {
3190         struct igb_adapter *adapter = netdev_priv(netdev);
3191         struct e1000_hw *hw = &adapter->hw;
3192         struct sockaddr *addr = p;
3193
3194         if (!is_valid_ether_addr(addr->sa_data))
3195                 return -EADDRNOTAVAIL;
3196
3197         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3198         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3199
3200         /* set the correct pool for the new PF MAC address in entry 0 */
3201         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3202                          adapter->vfs_allocated_count);
3203
3204         return 0;
3205 }
3206
3207 /**
3208  * igb_write_mc_addr_list - write multicast addresses to MTA
3209  * @netdev: network interface device structure
3210  *
3211  * Writes multicast address list to the MTA hash table.
3212  * Returns: -ENOMEM on failure
3213  *                0 on no addresses written
3214  *                X on writing X addresses to MTA
3215  **/
3216 static int igb_write_mc_addr_list(struct net_device *netdev)
3217 {
3218         struct igb_adapter *adapter = netdev_priv(netdev);
3219         struct e1000_hw *hw = &adapter->hw;
3220         struct netdev_hw_addr *ha;
3221         u8  *mta_list;
3222         int i;
3223
3224         if (netdev_mc_empty(netdev)) {
3225                 /* nothing to program, so clear mc list */
3226                 igb_update_mc_addr_list(hw, NULL, 0);
3227                 igb_restore_vf_multicasts(adapter);
3228                 return 0;
3229         }
3230
3231         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3232         if (!mta_list)
3233                 return -ENOMEM;
3234
3235         /* The shared function expects a packed array of only addresses. */
3236         i = 0;
3237         netdev_for_each_mc_addr(ha, netdev)
3238                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3239
3240         igb_update_mc_addr_list(hw, mta_list, i);
3241         kfree(mta_list);
3242
3243         return netdev_mc_count(netdev);
3244 }
3245
3246 /**
3247  * igb_write_uc_addr_list - write unicast addresses to RAR table
3248  * @netdev: network interface device structure
3249  *
3250  * Writes unicast address list to the RAR table.
3251  * Returns: -ENOMEM on failure/insufficient address space
3252  *                0 on no addresses written
3253  *                X on writing X addresses to the RAR table
3254  **/
3255 static int igb_write_uc_addr_list(struct net_device *netdev)
3256 {
3257         struct igb_adapter *adapter = netdev_priv(netdev);
3258         struct e1000_hw *hw = &adapter->hw;
3259         unsigned int vfn = adapter->vfs_allocated_count;
3260         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3261         int count = 0;
3262
3263         /* return ENOMEM indicating insufficient memory for addresses */
3264         if (netdev_uc_count(netdev) > rar_entries)
3265                 return -ENOMEM;
3266
3267         if (!netdev_uc_empty(netdev) && rar_entries) {
3268                 struct netdev_hw_addr *ha;
3269
3270                 netdev_for_each_uc_addr(ha, netdev) {
3271                         if (!rar_entries)
3272                                 break;
3273                         igb_rar_set_qsel(adapter, ha->addr,
3274                                          rar_entries--,
3275                                          vfn);
3276                         count++;
3277                 }
3278         }
3279         /* write the addresses in reverse order to avoid write combining */
3280         for (; rar_entries > 0 ; rar_entries--) {
3281                 wr32(E1000_RAH(rar_entries), 0);
3282                 wr32(E1000_RAL(rar_entries), 0);
3283         }
3284         wrfl();
3285
3286         return count;
3287 }
3288
3289 /**
3290  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3291  * @netdev: network interface device structure
3292  *
3293  * The set_rx_mode entry point is called whenever the unicast or multicast
3294  * address lists or the network interface flags are updated.  This routine is
3295  * responsible for configuring the hardware for proper unicast, multicast,
3296  * promiscuous mode, and all-multi behavior.
3297  **/
3298 static void igb_set_rx_mode(struct net_device *netdev)
3299 {
3300         struct igb_adapter *adapter = netdev_priv(netdev);
3301         struct e1000_hw *hw = &adapter->hw;
3302         unsigned int vfn = adapter->vfs_allocated_count;
3303         u32 rctl, vmolr = 0;
3304         int count;
3305
3306         /* Check for Promiscuous and All Multicast modes */
3307         rctl = rd32(E1000_RCTL);
3308
3309         /* clear the effected bits */
3310         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3311
3312         if (netdev->flags & IFF_PROMISC) {
3313                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3314                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3315         } else {
3316                 if (netdev->flags & IFF_ALLMULTI) {
3317                         rctl |= E1000_RCTL_MPE;
3318                         vmolr |= E1000_VMOLR_MPME;
3319                 } else {
3320                         /*
3321                          * Write addresses to the MTA, if the attempt fails
3322                          * then we should just turn on promiscous mode so
3323                          * that we can at least receive multicast traffic
3324                          */
3325                         count = igb_write_mc_addr_list(netdev);
3326                         if (count < 0) {
3327                                 rctl |= E1000_RCTL_MPE;
3328                                 vmolr |= E1000_VMOLR_MPME;
3329                         } else if (count) {
3330                                 vmolr |= E1000_VMOLR_ROMPE;
3331                         }
3332                 }
3333                 /*
3334                  * Write addresses to available RAR registers, if there is not
3335                  * sufficient space to store all the addresses then enable
3336                  * unicast promiscous mode
3337                  */
3338                 count = igb_write_uc_addr_list(netdev);
3339                 if (count < 0) {
3340                         rctl |= E1000_RCTL_UPE;
3341                         vmolr |= E1000_VMOLR_ROPE;
3342                 }
3343                 rctl |= E1000_RCTL_VFE;
3344         }
3345         wr32(E1000_RCTL, rctl);
3346
3347         /*
3348          * In order to support SR-IOV and eventually VMDq it is necessary to set
3349          * the VMOLR to enable the appropriate modes.  Without this workaround
3350          * we will have issues with VLAN tag stripping not being done for frames
3351          * that are only arriving because we are the default pool
3352          */
3353         if (hw->mac.type < e1000_82576)
3354                 return;
3355
3356         vmolr |= rd32(E1000_VMOLR(vfn)) &
3357                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3358         wr32(E1000_VMOLR(vfn), vmolr);
3359         igb_restore_vf_multicasts(adapter);
3360 }
3361
3362 /* Need to wait a few seconds after link up to get diagnostic information from
3363  * the phy */
3364 static void igb_update_phy_info(unsigned long data)
3365 {
3366         struct igb_adapter *adapter = (struct igb_adapter *) data;
3367         igb_get_phy_info(&adapter->hw);
3368 }
3369
3370 /**
3371  * igb_has_link - check shared code for link and determine up/down
3372  * @adapter: pointer to driver private info
3373  **/
3374 bool igb_has_link(struct igb_adapter *adapter)
3375 {
3376         struct e1000_hw *hw = &adapter->hw;
3377         bool link_active = false;
3378         s32 ret_val = 0;
3379
3380         /* get_link_status is set on LSC (link status) interrupt or
3381          * rx sequence error interrupt.  get_link_status will stay
3382          * false until the e1000_check_for_link establishes link
3383          * for copper adapters ONLY
3384          */
3385         switch (hw->phy.media_type) {
3386         case e1000_media_type_copper:
3387                 if (hw->mac.get_link_status) {
3388                         ret_val = hw->mac.ops.check_for_link(hw);
3389                         link_active = !hw->mac.get_link_status;
3390                 } else {
3391                         link_active = true;
3392                 }
3393                 break;
3394         case e1000_media_type_internal_serdes:
3395                 ret_val = hw->mac.ops.check_for_link(hw);
3396                 link_active = hw->mac.serdes_has_link;
3397                 break;
3398         default:
3399         case e1000_media_type_unknown:
3400                 break;
3401         }
3402
3403         return link_active;
3404 }
3405
3406 /**
3407  * igb_watchdog - Timer Call-back
3408  * @data: pointer to adapter cast into an unsigned long
3409  **/
3410 static void igb_watchdog(unsigned long data)
3411 {
3412         struct igb_adapter *adapter = (struct igb_adapter *)data;
3413         /* Do the rest outside of interrupt context */
3414         schedule_work(&adapter->watchdog_task);
3415 }
3416
3417 static void igb_watchdog_task(struct work_struct *work)
3418 {
3419         struct igb_adapter *adapter = container_of(work,
3420                                                    struct igb_adapter,
3421                                                    watchdog_task);
3422         struct e1000_hw *hw = &adapter->hw;
3423         struct net_device *netdev = adapter->netdev;
3424         u32 link;
3425         int i;
3426
3427         link = igb_has_link(adapter);
3428         if (link) {
3429                 if (!netif_carrier_ok(netdev)) {
3430                         u32 ctrl;
3431                         hw->mac.ops.get_speed_and_duplex(hw,
3432                                                          &adapter->link_speed,
3433                                                          &adapter->link_duplex);
3434
3435                         ctrl = rd32(E1000_CTRL);
3436                         /* Links status message must follow this format */
3437                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3438                                  "Flow Control: %s\n",
3439                                netdev->name,
3440                                adapter->link_speed,
3441                                adapter->link_duplex == FULL_DUPLEX ?
3442                                  "Full Duplex" : "Half Duplex",
3443                                ((ctrl & E1000_CTRL_TFCE) &&
3444                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3445                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3446                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3447
3448                         /* adjust timeout factor according to speed/duplex */
3449                         adapter->tx_timeout_factor = 1;
3450                         switch (adapter->link_speed) {
3451                         case SPEED_10:
3452                                 adapter->tx_timeout_factor = 14;
3453                                 break;
3454                         case SPEED_100:
3455                                 /* maybe add some timeout factor ? */
3456                                 break;
3457                         }
3458
3459                         netif_carrier_on(netdev);
3460
3461                         igb_ping_all_vfs(adapter);
3462
3463                         /* link state has changed, schedule phy info update */
3464                         if (!test_bit(__IGB_DOWN, &adapter->state))
3465                                 mod_timer(&adapter->phy_info_timer,
3466                                           round_jiffies(jiffies + 2 * HZ));
3467                 }
3468         } else {
3469                 if (netif_carrier_ok(netdev)) {
3470                         adapter->link_speed = 0;
3471                         adapter->link_duplex = 0;
3472                         /* Links status message must follow this format */
3473                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3474                                netdev->name);
3475                         netif_carrier_off(netdev);
3476
3477                         igb_ping_all_vfs(adapter);
3478
3479                         /* link state has changed, schedule phy info update */
3480                         if (!test_bit(__IGB_DOWN, &adapter->state))
3481                                 mod_timer(&adapter->phy_info_timer,
3482                                           round_jiffies(jiffies + 2 * HZ));
3483                 }
3484         }
3485
3486         igb_update_stats(adapter);
3487
3488         for (i = 0; i < adapter->num_tx_queues; i++) {
3489                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3490                 if (!netif_carrier_ok(netdev)) {
3491                         /* We've lost link, so the controller stops DMA,
3492                          * but we've got queued Tx work that's never going
3493                          * to get done, so reset controller to flush Tx.
3494                          * (Do the reset outside of interrupt context). */
3495                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3496                                 adapter->tx_timeout_count++;
3497                                 schedule_work(&adapter->reset_task);
3498                                 /* return immediately since reset is imminent */
3499                                 return;
3500                         }
3501                 }
3502
3503                 /* Force detection of hung controller every watchdog period */
3504                 tx_ring->detect_tx_hung = true;
3505         }
3506
3507         /* Cause software interrupt to ensure rx ring is cleaned */
3508         if (adapter->msix_entries) {
3509                 u32 eics = 0;
3510                 for (i = 0; i < adapter->num_q_vectors; i++) {
3511                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3512                         eics |= q_vector->eims_value;
3513                 }
3514                 wr32(E1000_EICS, eics);
3515         } else {
3516                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3517         }
3518
3519         /* Reset the timer */
3520         if (!test_bit(__IGB_DOWN, &adapter->state))
3521                 mod_timer(&adapter->watchdog_timer,
3522                           round_jiffies(jiffies + 2 * HZ));
3523 }
3524
3525 enum latency_range {
3526         lowest_latency = 0,
3527         low_latency = 1,
3528         bulk_latency = 2,
3529         latency_invalid = 255
3530 };
3531
3532 /**
3533  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3534  *
3535  *      Stores a new ITR value based on strictly on packet size.  This
3536  *      algorithm is less sophisticated than that used in igb_update_itr,
3537  *      due to the difficulty of synchronizing statistics across multiple
3538  *      receive rings.  The divisors and thresholds used by this fuction
3539  *      were determined based on theoretical maximum wire speed and testing
3540  *      data, in order to minimize response time while increasing bulk
3541  *      throughput.
3542  *      This functionality is controlled by the InterruptThrottleRate module
3543  *      parameter (see igb_param.c)
3544  *      NOTE:  This function is called only when operating in a multiqueue
3545  *             receive environment.
3546  * @q_vector: pointer to q_vector
3547  **/
3548 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3549 {
3550         int new_val = q_vector->itr_val;
3551         int avg_wire_size = 0;
3552         struct igb_adapter *adapter = q_vector->adapter;
3553
3554         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3555          * ints/sec - ITR timer value of 120 ticks.
3556          */
3557         if (adapter->link_speed != SPEED_1000) {
3558                 new_val = 976;
3559                 goto set_itr_val;
3560         }
3561
3562         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3563                 struct igb_ring *ring = q_vector->rx_ring;
3564                 avg_wire_size = ring->total_bytes / ring->total_packets;
3565         }
3566
3567         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3568                 struct igb_ring *ring = q_vector->tx_ring;
3569                 avg_wire_size = max_t(u32, avg_wire_size,
3570                                       (ring->total_bytes /
3571                                        ring->total_packets));
3572         }
3573
3574         /* if avg_wire_size isn't set no work was done */
3575         if (!avg_wire_size)
3576                 goto clear_counts;
3577
3578         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3579         avg_wire_size += 24;
3580
3581         /* Don't starve jumbo frames */
3582         avg_wire_size = min(avg_wire_size, 3000);
3583
3584         /* Give a little boost to mid-size frames */
3585         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3586                 new_val = avg_wire_size / 3;
3587         else
3588                 new_val = avg_wire_size / 2;
3589
3590         /* when in itr mode 3 do not exceed 20K ints/sec */
3591         if (adapter->rx_itr_setting == 3 && new_val < 196)
3592                 new_val = 196;
3593
3594 set_itr_val:
3595         if (new_val != q_vector->itr_val) {
3596                 q_vector->itr_val = new_val;
3597                 q_vector->set_itr = 1;
3598         }
3599 clear_counts:
3600         if (q_vector->rx_ring) {
3601                 q_vector->rx_ring->total_bytes = 0;
3602                 q_vector->rx_ring->total_packets = 0;
3603         }
3604         if (q_vector->tx_ring) {
3605                 q_vector->tx_ring->total_bytes = 0;
3606                 q_vector->tx_ring->total_packets = 0;
3607         }
3608 }
3609
3610 /**
3611  * igb_update_itr - update the dynamic ITR value based on statistics
3612  *      Stores a new ITR value based on packets and byte
3613  *      counts during the last interrupt.  The advantage of per interrupt
3614  *      computation is faster updates and more accurate ITR for the current
3615  *      traffic pattern.  Constants in this function were computed
3616  *      based on theoretical maximum wire speed and thresholds were set based
3617  *      on testing data as well as attempting to minimize response time
3618  *      while increasing bulk throughput.
3619  *      this functionality is controlled by the InterruptThrottleRate module
3620  *      parameter (see igb_param.c)
3621  *      NOTE:  These calculations are only valid when operating in a single-
3622  *             queue environment.
3623  * @adapter: pointer to adapter
3624  * @itr_setting: current q_vector->itr_val
3625  * @packets: the number of packets during this measurement interval
3626  * @bytes: the number of bytes during this measurement interval
3627  **/
3628 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3629                                    int packets, int bytes)
3630 {
3631         unsigned int retval = itr_setting;
3632
3633         if (packets == 0)
3634                 goto update_itr_done;
3635
3636         switch (itr_setting) {
3637         case lowest_latency:
3638                 /* handle TSO and jumbo frames */
3639                 if (bytes/packets > 8000)
3640                         retval = bulk_latency;
3641                 else if ((packets < 5) && (bytes > 512))
3642                         retval = low_latency;
3643                 break;
3644         case low_latency:  /* 50 usec aka 20000 ints/s */
3645                 if (bytes > 10000) {
3646                         /* this if handles the TSO accounting */
3647                         if (bytes/packets > 8000) {
3648                                 retval = bulk_latency;
3649                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3650                                 retval = bulk_latency;
3651                         } else if ((packets > 35)) {
3652                                 retval = lowest_latency;
3653                         }
3654                 } else if (bytes/packets > 2000) {
3655                         retval = bulk_latency;
3656                 } else if (packets <= 2 && bytes < 512) {
3657                         retval = lowest_latency;
3658                 }
3659                 break;
3660         case bulk_latency: /* 250 usec aka 4000 ints/s */
3661                 if (bytes > 25000) {
3662                         if (packets > 35)
3663                                 retval = low_latency;
3664                 } else if (bytes < 1500) {
3665                         retval = low_latency;
3666                 }
3667                 break;
3668         }
3669
3670 update_itr_done:
3671         return retval;
3672 }
3673
3674 static void igb_set_itr(struct igb_adapter *adapter)
3675 {
3676         struct igb_q_vector *q_vector = adapter->q_vector[0];
3677         u16 current_itr;
3678         u32 new_itr = q_vector->itr_val;
3679
3680         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3681         if (adapter->link_speed != SPEED_1000) {
3682                 current_itr = 0;
3683                 new_itr = 4000;
3684                 goto set_itr_now;
3685         }
3686
3687         adapter->rx_itr = igb_update_itr(adapter,
3688                                     adapter->rx_itr,
3689                                     q_vector->rx_ring->total_packets,
3690                                     q_vector->rx_ring->total_bytes);
3691
3692         adapter->tx_itr = igb_update_itr(adapter,
3693                                     adapter->tx_itr,
3694                                     q_vector->tx_ring->total_packets,
3695                                     q_vector->tx_ring->total_bytes);
3696         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3697
3698         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3699         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3700                 current_itr = low_latency;
3701
3702         switch (current_itr) {
3703         /* counts and packets in update_itr are dependent on these numbers */
3704         case lowest_latency:
3705                 new_itr = 56;  /* aka 70,000 ints/sec */
3706                 break;
3707         case low_latency:
3708                 new_itr = 196; /* aka 20,000 ints/sec */
3709                 break;
3710         case bulk_latency:
3711                 new_itr = 980; /* aka 4,000 ints/sec */
3712                 break;
3713         default:
3714                 break;
3715         }
3716
3717 set_itr_now:
3718         q_vector->rx_ring->total_bytes = 0;
3719         q_vector->rx_ring->total_packets = 0;
3720         q_vector->tx_ring->total_bytes = 0;
3721         q_vector->tx_ring->total_packets = 0;
3722
3723         if (new_itr != q_vector->itr_val) {
3724                 /* this attempts to bias the interrupt rate towards Bulk
3725                  * by adding intermediate steps when interrupt rate is
3726                  * increasing */
3727                 new_itr = new_itr > q_vector->itr_val ?
3728                              max((new_itr * q_vector->itr_val) /
3729                                  (new_itr + (q_vector->itr_val >> 2)),
3730                                  new_itr) :
3731                              new_itr;
3732                 /* Don't write the value here; it resets the adapter's
3733                  * internal timer, and causes us to delay far longer than
3734                  * we should between interrupts.  Instead, we write the ITR
3735                  * value at the beginning of the next interrupt so the timing
3736                  * ends up being correct.
3737                  */
3738                 q_vector->itr_val = new_itr;
3739                 q_vector->set_itr = 1;
3740         }
3741 }
3742
3743 #define IGB_TX_FLAGS_CSUM               0x00000001
3744 #define IGB_TX_FLAGS_VLAN               0x00000002
3745 #define IGB_TX_FLAGS_TSO                0x00000004
3746 #define IGB_TX_FLAGS_IPV4               0x00000008
3747 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3748 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3749 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3750
3751 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3752                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3753 {
3754         struct e1000_adv_tx_context_desc *context_desc;
3755         unsigned int i;
3756         int err;
3757         struct igb_buffer *buffer_info;
3758         u32 info = 0, tu_cmd = 0;
3759         u32 mss_l4len_idx;
3760         u8 l4len;
3761
3762         if (skb_header_cloned(skb)) {
3763                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3764                 if (err)
3765                         return err;
3766         }
3767
3768         l4len = tcp_hdrlen(skb);
3769         *hdr_len += l4len;
3770
3771         if (skb->protocol == htons(ETH_P_IP)) {
3772                 struct iphdr *iph = ip_hdr(skb);
3773                 iph->tot_len = 0;
3774                 iph->check = 0;
3775                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3776                                                          iph->daddr, 0,
3777                                                          IPPROTO_TCP,
3778                                                          0);
3779         } else if (skb_is_gso_v6(skb)) {
3780                 ipv6_hdr(skb)->payload_len = 0;
3781                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3782                                                        &ipv6_hdr(skb)->daddr,
3783                                                        0, IPPROTO_TCP, 0);
3784         }
3785
3786         i = tx_ring->next_to_use;
3787
3788         buffer_info = &tx_ring->buffer_info[i];
3789         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3790         /* VLAN MACLEN IPLEN */
3791         if (tx_flags & IGB_TX_FLAGS_VLAN)
3792                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3793         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3794         *hdr_len += skb_network_offset(skb);
3795         info |= skb_network_header_len(skb);
3796         *hdr_len += skb_network_header_len(skb);
3797         context_desc->vlan_macip_lens = cpu_to_le32(info);
3798
3799         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3800         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3801
3802         if (skb->protocol == htons(ETH_P_IP))
3803                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3804         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3805
3806         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3807
3808         /* MSS L4LEN IDX */
3809         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3810         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3811
3812         /* For 82575, context index must be unique per ring. */
3813         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3814                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3815
3816         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3817         context_desc->seqnum_seed = 0;
3818
3819         buffer_info->time_stamp = jiffies;
3820         buffer_info->next_to_watch = i;
3821         buffer_info->dma = 0;
3822         i++;
3823         if (i == tx_ring->count)
3824                 i = 0;
3825
3826         tx_ring->next_to_use = i;
3827
3828         return true;
3829 }
3830
3831 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3832                                    struct sk_buff *skb, u32 tx_flags)
3833 {
3834         struct e1000_adv_tx_context_desc *context_desc;
3835         struct device *dev = tx_ring->dev;
3836         struct igb_buffer *buffer_info;
3837         u32 info = 0, tu_cmd = 0;
3838         unsigned int i;
3839
3840         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3841             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3842                 i = tx_ring->next_to_use;
3843                 buffer_info = &tx_ring->buffer_info[i];
3844                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3845
3846                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3847                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3848
3849                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3850                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3851                         info |= skb_network_header_len(skb);
3852
3853                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3854
3855                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3856
3857                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3858                         __be16 protocol;
3859
3860                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3861                                 const struct vlan_ethhdr *vhdr =
3862                                           (const struct vlan_ethhdr*)skb->data;
3863
3864                                 protocol = vhdr->h_vlan_encapsulated_proto;
3865                         } else {
3866                                 protocol = skb->protocol;
3867                         }
3868
3869                         switch (protocol) {
3870                         case cpu_to_be16(ETH_P_IP):
3871                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3872                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3873                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3874                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3875                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3876                                 break;
3877                         case cpu_to_be16(ETH_P_IPV6):
3878                                 /* XXX what about other V6 headers?? */
3879                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3880                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3881                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3882                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3883                                 break;
3884                         default:
3885                                 if (unlikely(net_ratelimit()))
3886                                         dev_warn(dev,
3887                                             "partial checksum but proto=%x!\n",
3888                                             skb->protocol);
3889                                 break;
3890                         }
3891                 }
3892
3893                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3894                 context_desc->seqnum_seed = 0;
3895                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3896                         context_desc->mss_l4len_idx =
3897                                 cpu_to_le32(tx_ring->reg_idx << 4);
3898
3899                 buffer_info->time_stamp = jiffies;
3900                 buffer_info->next_to_watch = i;
3901                 buffer_info->dma = 0;
3902
3903                 i++;
3904                 if (i == tx_ring->count)
3905                         i = 0;
3906                 tx_ring->next_to_use = i;
3907
3908                 return true;
3909         }
3910         return false;
3911 }
3912
3913 #define IGB_MAX_TXD_PWR 16
3914 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3915
3916 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3917                                  unsigned int first)
3918 {
3919         struct igb_buffer *buffer_info;
3920         struct device *dev = tx_ring->dev;
3921         unsigned int hlen = skb_headlen(skb);
3922         unsigned int count = 0, i;
3923         unsigned int f;
3924         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3925
3926         i = tx_ring->next_to_use;
3927
3928         buffer_info = &tx_ring->buffer_info[i];
3929         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3930         buffer_info->length = hlen;
3931         /* set time_stamp *before* dma to help avoid a possible race */
3932         buffer_info->time_stamp = jiffies;
3933         buffer_info->next_to_watch = i;
3934         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3935                                           DMA_TO_DEVICE);
3936         if (dma_mapping_error(dev, buffer_info->dma))
3937                 goto dma_error;
3938
3939         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3940                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3941                 unsigned int len = frag->size;
3942
3943                 count++;
3944                 i++;
3945                 if (i == tx_ring->count)
3946                         i = 0;
3947
3948                 buffer_info = &tx_ring->buffer_info[i];
3949                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3950                 buffer_info->length = len;
3951                 buffer_info->time_stamp = jiffies;
3952                 buffer_info->next_to_watch = i;
3953                 buffer_info->mapped_as_page = true;
3954                 buffer_info->dma = dma_map_page(dev,
3955                                                 frag->page,
3956                                                 frag->page_offset,
3957                                                 len,
3958                                                 DMA_TO_DEVICE);
3959                 if (dma_mapping_error(dev, buffer_info->dma))
3960                         goto dma_error;
3961
3962         }
3963
3964         tx_ring->buffer_info[i].skb = skb;
3965         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
3966         /* multiply data chunks by size of headers */
3967         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3968         tx_ring->buffer_info[i].gso_segs = gso_segs;
3969         tx_ring->buffer_info[first].next_to_watch = i;
3970
3971         return ++count;
3972
3973 dma_error:
3974         dev_err(dev, "TX DMA map failed\n");
3975
3976         /* clear timestamp and dma mappings for failed buffer_info mapping */
3977         buffer_info->dma = 0;
3978         buffer_info->time_stamp = 0;
3979         buffer_info->length = 0;
3980         buffer_info->next_to_watch = 0;
3981         buffer_info->mapped_as_page = false;
3982
3983         /* clear timestamp and dma mappings for remaining portion of packet */
3984         while (count--) {
3985                 if (i == 0)
3986                         i = tx_ring->count;
3987                 i--;
3988                 buffer_info = &tx_ring->buffer_info[i];
3989                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3990         }
3991
3992         return 0;
3993 }
3994
3995 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3996                                     u32 tx_flags, int count, u32 paylen,
3997                                     u8 hdr_len)
3998 {
3999         union e1000_adv_tx_desc *tx_desc;
4000         struct igb_buffer *buffer_info;
4001         u32 olinfo_status = 0, cmd_type_len;
4002         unsigned int i = tx_ring->next_to_use;
4003
4004         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4005                         E1000_ADVTXD_DCMD_DEXT);
4006
4007         if (tx_flags & IGB_TX_FLAGS_VLAN)
4008                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4009
4010         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4011                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4012
4013         if (tx_flags & IGB_TX_FLAGS_TSO) {
4014                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4015
4016                 /* insert tcp checksum */
4017                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4018
4019                 /* insert ip checksum */
4020                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4021                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4022
4023         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4024                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4025         }
4026
4027         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4028             (tx_flags & (IGB_TX_FLAGS_CSUM |
4029                          IGB_TX_FLAGS_TSO |
4030                          IGB_TX_FLAGS_VLAN)))
4031                 olinfo_status |= tx_ring->reg_idx << 4;
4032
4033         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4034
4035         do {
4036                 buffer_info = &tx_ring->buffer_info[i];
4037                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4038                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4039                 tx_desc->read.cmd_type_len =
4040                         cpu_to_le32(cmd_type_len | buffer_info->length);
4041                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4042                 count--;
4043                 i++;
4044                 if (i == tx_ring->count)
4045                         i = 0;
4046         } while (count > 0);
4047
4048         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4049         /* Force memory writes to complete before letting h/w
4050          * know there are new descriptors to fetch.  (Only
4051          * applicable for weak-ordered memory model archs,
4052          * such as IA-64). */
4053         wmb();
4054
4055         tx_ring->next_to_use = i;
4056         writel(i, tx_ring->tail);
4057         /* we need this if more than one processor can write to our tail
4058          * at a time, it syncronizes IO on IA64/Altix systems */
4059         mmiowb();
4060 }
4061
4062 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4063 {
4064         struct net_device *netdev = tx_ring->netdev;
4065
4066         netif_stop_subqueue(netdev, tx_ring->queue_index);
4067
4068         /* Herbert's original patch had:
4069          *  smp_mb__after_netif_stop_queue();
4070          * but since that doesn't exist yet, just open code it. */
4071         smp_mb();
4072
4073         /* We need to check again in a case another CPU has just
4074          * made room available. */
4075         if (igb_desc_unused(tx_ring) < size)
4076                 return -EBUSY;
4077
4078         /* A reprieve! */
4079         netif_wake_subqueue(netdev, tx_ring->queue_index);
4080         tx_ring->tx_stats.restart_queue++;
4081         return 0;
4082 }
4083
4084 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4085 {
4086         if (igb_desc_unused(tx_ring) >= size)
4087                 return 0;
4088         return __igb_maybe_stop_tx(tx_ring, size);
4089 }
4090
4091 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4092                                     struct igb_ring *tx_ring)
4093 {
4094         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4095         int tso = 0, count;
4096         u32 tx_flags = 0;
4097         u16 first;
4098         u8 hdr_len = 0;
4099
4100         /* need: 1 descriptor per page,
4101          *       + 2 desc gap to keep tail from touching head,
4102          *       + 1 desc for skb->data,
4103          *       + 1 desc for context descriptor,
4104          * otherwise try next time */
4105         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4106                 /* this is a hard error */
4107                 return NETDEV_TX_BUSY;
4108         }
4109
4110         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4111                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4112                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4113         }
4114
4115         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
4116                 tx_flags |= IGB_TX_FLAGS_VLAN;
4117                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4118         }
4119
4120         if (skb->protocol == htons(ETH_P_IP))
4121                 tx_flags |= IGB_TX_FLAGS_IPV4;
4122
4123         first = tx_ring->next_to_use;
4124         if (skb_is_gso(skb)) {
4125                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4126
4127                 if (tso < 0) {
4128                         dev_kfree_skb_any(skb);
4129                         return NETDEV_TX_OK;
4130                 }
4131         }
4132
4133         if (tso)
4134                 tx_flags |= IGB_TX_FLAGS_TSO;
4135         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4136                  (skb->ip_summed == CHECKSUM_PARTIAL))
4137                 tx_flags |= IGB_TX_FLAGS_CSUM;
4138
4139         /*
4140          * count reflects descriptors mapped, if 0 or less then mapping error
4141          * has occured and we need to rewind the descriptor queue
4142          */
4143         count = igb_tx_map_adv(tx_ring, skb, first);
4144         if (!count) {
4145                 dev_kfree_skb_any(skb);
4146                 tx_ring->buffer_info[first].time_stamp = 0;
4147                 tx_ring->next_to_use = first;
4148                 return NETDEV_TX_OK;
4149         }
4150
4151         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4152
4153         /* Make sure there is space in the ring for the next send. */
4154         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4155
4156         return NETDEV_TX_OK;
4157 }
4158
4159 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4160                                       struct net_device *netdev)
4161 {
4162         struct igb_adapter *adapter = netdev_priv(netdev);
4163         struct igb_ring *tx_ring;
4164         int r_idx = 0;
4165
4166         if (test_bit(__IGB_DOWN, &adapter->state)) {
4167                 dev_kfree_skb_any(skb);
4168                 return NETDEV_TX_OK;
4169         }
4170
4171         if (skb->len <= 0) {
4172                 dev_kfree_skb_any(skb);
4173                 return NETDEV_TX_OK;
4174         }
4175
4176         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4177         tx_ring = adapter->multi_tx_table[r_idx];
4178
4179         /* This goes back to the question of how to logically map a tx queue
4180          * to a flow.  Right now, performance is impacted slightly negatively
4181          * if using multiple tx queues.  If the stack breaks away from a
4182          * single qdisc implementation, we can look at this again. */
4183         return igb_xmit_frame_ring_adv(skb, tx_ring);
4184 }
4185
4186 /**
4187  * igb_tx_timeout - Respond to a Tx Hang
4188  * @netdev: network interface device structure
4189  **/
4190 static void igb_tx_timeout(struct net_device *netdev)
4191 {
4192         struct igb_adapter *adapter = netdev_priv(netdev);
4193         struct e1000_hw *hw = &adapter->hw;
4194
4195         /* Do the reset outside of interrupt context */
4196         adapter->tx_timeout_count++;
4197
4198         if (hw->mac.type == e1000_82580)
4199                 hw->dev_spec._82575.global_device_reset = true;
4200
4201         schedule_work(&adapter->reset_task);
4202         wr32(E1000_EICS,
4203              (adapter->eims_enable_mask & ~adapter->eims_other));
4204 }
4205
4206 static void igb_reset_task(struct work_struct *work)
4207 {
4208         struct igb_adapter *adapter;
4209         adapter = container_of(work, struct igb_adapter, reset_task);
4210
4211         igb_dump(adapter);
4212         netdev_err(adapter->netdev, "Reset adapter\n");
4213         igb_reinit_locked(adapter);
4214 }
4215
4216 /**
4217  * igb_get_stats - Get System Network Statistics
4218  * @netdev: network interface device structure
4219  *
4220  * Returns the address of the device statistics structure.
4221  * The statistics are actually updated from the timer callback.
4222  **/
4223 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4224 {
4225         /* only return the current stats */
4226         return &netdev->stats;
4227 }
4228
4229 /**
4230  * igb_change_mtu - Change the Maximum Transfer Unit
4231  * @netdev: network interface device structure
4232  * @new_mtu: new value for maximum frame size
4233  *
4234  * Returns 0 on success, negative on failure
4235  **/
4236 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4237 {
4238         struct igb_adapter *adapter = netdev_priv(netdev);
4239         struct pci_dev *pdev = adapter->pdev;
4240         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4241         u32 rx_buffer_len, i;
4242
4243         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4244                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4245                 return -EINVAL;
4246         }
4247
4248         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4249                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4250                 return -EINVAL;
4251         }
4252
4253         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4254                 msleep(1);
4255
4256         /* igb_down has a dependency on max_frame_size */
4257         adapter->max_frame_size = max_frame;
4258
4259         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4260          * means we reserve 2 more, this pushes us to allocate from the next
4261          * larger slab size.
4262          * i.e. RXBUFFER_2048 --> size-4096 slab
4263          */
4264
4265         if (adapter->hw.mac.type == e1000_82580)
4266                 max_frame += IGB_TS_HDR_LEN;
4267
4268         if (max_frame <= IGB_RXBUFFER_1024)
4269                 rx_buffer_len = IGB_RXBUFFER_1024;
4270         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4271                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4272         else
4273                 rx_buffer_len = IGB_RXBUFFER_128;
4274
4275         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4276              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4277                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4278
4279         if ((adapter->hw.mac.type == e1000_82580) &&
4280             (rx_buffer_len == IGB_RXBUFFER_128))
4281                 rx_buffer_len += IGB_RXBUFFER_64;
4282
4283         if (netif_running(netdev))
4284                 igb_down(adapter);
4285
4286         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4287                  netdev->mtu, new_mtu);
4288         netdev->mtu = new_mtu;
4289
4290         for (i = 0; i < adapter->num_rx_queues; i++)
4291                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4292
4293         if (netif_running(netdev))
4294                 igb_up(adapter);
4295         else
4296                 igb_reset(adapter);
4297
4298         clear_bit(__IGB_RESETTING, &adapter->state);
4299
4300         return 0;
4301 }
4302
4303 /**
4304  * igb_update_stats - Update the board statistics counters
4305  * @adapter: board private structure
4306  **/
4307
4308 void igb_update_stats(struct igb_adapter *adapter)
4309 {
4310         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
4311         struct e1000_hw *hw = &adapter->hw;
4312         struct pci_dev *pdev = adapter->pdev;
4313         u32 reg, mpc;
4314         u16 phy_tmp;
4315         int i;
4316         u64 bytes, packets;
4317
4318 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4319
4320         /*
4321          * Prevent stats update while adapter is being reset, or if the pci
4322          * connection is down.
4323          */
4324         if (adapter->link_speed == 0)
4325                 return;
4326         if (pci_channel_offline(pdev))
4327                 return;
4328
4329         bytes = 0;
4330         packets = 0;
4331         for (i = 0; i < adapter->num_rx_queues; i++) {
4332                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4333                 struct igb_ring *ring = adapter->rx_ring[i];
4334                 ring->rx_stats.drops += rqdpc_tmp;
4335                 net_stats->rx_fifo_errors += rqdpc_tmp;
4336                 bytes += ring->rx_stats.bytes;
4337                 packets += ring->rx_stats.packets;
4338         }
4339
4340         net_stats->rx_bytes = bytes;
4341         net_stats->rx_packets = packets;
4342
4343         bytes = 0;
4344         packets = 0;
4345         for (i = 0; i < adapter->num_tx_queues; i++) {
4346                 struct igb_ring *ring = adapter->tx_ring[i];
4347                 bytes += ring->tx_stats.bytes;
4348                 packets += ring->tx_stats.packets;
4349         }
4350         net_stats->tx_bytes = bytes;
4351         net_stats->tx_packets = packets;
4352
4353         /* read stats registers */
4354         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4355         adapter->stats.gprc += rd32(E1000_GPRC);
4356         adapter->stats.gorc += rd32(E1000_GORCL);
4357         rd32(E1000_GORCH); /* clear GORCL */
4358         adapter->stats.bprc += rd32(E1000_BPRC);
4359         adapter->stats.mprc += rd32(E1000_MPRC);
4360         adapter->stats.roc += rd32(E1000_ROC);
4361
4362         adapter->stats.prc64 += rd32(E1000_PRC64);
4363         adapter->stats.prc127 += rd32(E1000_PRC127);
4364         adapter->stats.prc255 += rd32(E1000_PRC255);
4365         adapter->stats.prc511 += rd32(E1000_PRC511);
4366         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4367         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4368         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4369         adapter->stats.sec += rd32(E1000_SEC);
4370
4371         mpc = rd32(E1000_MPC);
4372         adapter->stats.mpc += mpc;
4373         net_stats->rx_fifo_errors += mpc;
4374         adapter->stats.scc += rd32(E1000_SCC);
4375         adapter->stats.ecol += rd32(E1000_ECOL);
4376         adapter->stats.mcc += rd32(E1000_MCC);
4377         adapter->stats.latecol += rd32(E1000_LATECOL);
4378         adapter->stats.dc += rd32(E1000_DC);
4379         adapter->stats.rlec += rd32(E1000_RLEC);
4380         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4381         adapter->stats.xontxc += rd32(E1000_XONTXC);
4382         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4383         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4384         adapter->stats.fcruc += rd32(E1000_FCRUC);
4385         adapter->stats.gptc += rd32(E1000_GPTC);
4386         adapter->stats.gotc += rd32(E1000_GOTCL);
4387         rd32(E1000_GOTCH); /* clear GOTCL */
4388         adapter->stats.rnbc += rd32(E1000_RNBC);
4389         adapter->stats.ruc += rd32(E1000_RUC);
4390         adapter->stats.rfc += rd32(E1000_RFC);
4391         adapter->stats.rjc += rd32(E1000_RJC);
4392         adapter->stats.tor += rd32(E1000_TORH);
4393         adapter->stats.tot += rd32(E1000_TOTH);
4394         adapter->stats.tpr += rd32(E1000_TPR);
4395
4396         adapter->stats.ptc64 += rd32(E1000_PTC64);
4397         adapter->stats.ptc127 += rd32(E1000_PTC127);
4398         adapter->stats.ptc255 += rd32(E1000_PTC255);
4399         adapter->stats.ptc511 += rd32(E1000_PTC511);
4400         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4401         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4402
4403         adapter->stats.mptc += rd32(E1000_MPTC);
4404         adapter->stats.bptc += rd32(E1000_BPTC);
4405
4406         adapter->stats.tpt += rd32(E1000_TPT);
4407         adapter->stats.colc += rd32(E1000_COLC);
4408
4409         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4410         /* read internal phy specific stats */
4411         reg = rd32(E1000_CTRL_EXT);
4412         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4413                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4414                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4415         }
4416
4417         adapter->stats.tsctc += rd32(E1000_TSCTC);
4418         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4419
4420         adapter->stats.iac += rd32(E1000_IAC);
4421         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4422         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4423         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4424         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4425         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4426         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4427         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4428         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4429
4430         /* Fill out the OS statistics structure */
4431         net_stats->multicast = adapter->stats.mprc;
4432         net_stats->collisions = adapter->stats.colc;
4433
4434         /* Rx Errors */
4435
4436         /* RLEC on some newer hardware can be incorrect so build
4437          * our own version based on RUC and ROC */
4438         net_stats->rx_errors = adapter->stats.rxerrc +
4439                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4440                 adapter->stats.ruc + adapter->stats.roc +
4441                 adapter->stats.cexterr;
4442         net_stats->rx_length_errors = adapter->stats.ruc +
4443                                       adapter->stats.roc;
4444         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4445         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4446         net_stats->rx_missed_errors = adapter->stats.mpc;
4447
4448         /* Tx Errors */
4449         net_stats->tx_errors = adapter->stats.ecol +
4450                                adapter->stats.latecol;
4451         net_stats->tx_aborted_errors = adapter->stats.ecol;
4452         net_stats->tx_window_errors = adapter->stats.latecol;
4453         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4454
4455         /* Tx Dropped needs to be maintained elsewhere */
4456
4457         /* Phy Stats */
4458         if (hw->phy.media_type == e1000_media_type_copper) {
4459                 if ((adapter->link_speed == SPEED_1000) &&
4460                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4461                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4462                         adapter->phy_stats.idle_errors += phy_tmp;
4463                 }
4464         }
4465
4466         /* Management Stats */
4467         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4468         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4469         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4470 }
4471
4472 static irqreturn_t igb_msix_other(int irq, void *data)
4473 {
4474         struct igb_adapter *adapter = data;
4475         struct e1000_hw *hw = &adapter->hw;
4476         u32 icr = rd32(E1000_ICR);
4477         /* reading ICR causes bit 31 of EICR to be cleared */
4478
4479         if (icr & E1000_ICR_DRSTA)
4480                 schedule_work(&adapter->reset_task);
4481
4482         if (icr & E1000_ICR_DOUTSYNC) {
4483                 /* HW is reporting DMA is out of sync */
4484                 adapter->stats.doosync++;
4485         }
4486
4487         /* Check for a mailbox event */
4488         if (icr & E1000_ICR_VMMB)
4489                 igb_msg_task(adapter);
4490
4491         if (icr & E1000_ICR_LSC) {
4492                 hw->mac.get_link_status = 1;
4493                 /* guard against interrupt when we're going down */
4494                 if (!test_bit(__IGB_DOWN, &adapter->state))
4495                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4496         }
4497
4498         if (adapter->vfs_allocated_count)
4499                 wr32(E1000_IMS, E1000_IMS_LSC |
4500                                 E1000_IMS_VMMB |
4501                                 E1000_IMS_DOUTSYNC);
4502         else
4503                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4504         wr32(E1000_EIMS, adapter->eims_other);
4505
4506         return IRQ_HANDLED;
4507 }
4508
4509 static void igb_write_itr(struct igb_q_vector *q_vector)
4510 {
4511         struct igb_adapter *adapter = q_vector->adapter;
4512         u32 itr_val = q_vector->itr_val & 0x7FFC;
4513
4514         if (!q_vector->set_itr)
4515                 return;
4516
4517         if (!itr_val)
4518                 itr_val = 0x4;
4519
4520         if (adapter->hw.mac.type == e1000_82575)
4521                 itr_val |= itr_val << 16;
4522         else
4523                 itr_val |= 0x8000000;
4524
4525         writel(itr_val, q_vector->itr_register);
4526         q_vector->set_itr = 0;
4527 }
4528
4529 static irqreturn_t igb_msix_ring(int irq, void *data)
4530 {
4531         struct igb_q_vector *q_vector = data;
4532
4533         /* Write the ITR value calculated from the previous interrupt. */
4534         igb_write_itr(q_vector);
4535
4536         napi_schedule(&q_vector->napi);
4537
4538         return IRQ_HANDLED;
4539 }
4540
4541 #ifdef CONFIG_IGB_DCA
4542 static void igb_update_dca(struct igb_q_vector *q_vector)
4543 {
4544         struct igb_adapter *adapter = q_vector->adapter;
4545         struct e1000_hw *hw = &adapter->hw;
4546         int cpu = get_cpu();
4547
4548         if (q_vector->cpu == cpu)
4549                 goto out_no_update;
4550
4551         if (q_vector->tx_ring) {
4552                 int q = q_vector->tx_ring->reg_idx;
4553                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4554                 if (hw->mac.type == e1000_82575) {
4555                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4556                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4557                 } else {
4558                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4559                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4560                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4561                 }
4562                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4563                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4564         }
4565         if (q_vector->rx_ring) {
4566                 int q = q_vector->rx_ring->reg_idx;
4567                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4568                 if (hw->mac.type == e1000_82575) {
4569                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4570                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4571                 } else {
4572                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4573                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4574                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4575                 }
4576                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4577                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4578                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4579                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4580         }
4581         q_vector->cpu = cpu;
4582 out_no_update:
4583         put_cpu();
4584 }
4585
4586 static void igb_setup_dca(struct igb_adapter *adapter)
4587 {
4588         struct e1000_hw *hw = &adapter->hw;
4589         int i;
4590
4591         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4592                 return;
4593
4594         /* Always use CB2 mode, difference is masked in the CB driver. */
4595         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4596
4597         for (i = 0; i < adapter->num_q_vectors; i++) {
4598                 adapter->q_vector[i]->cpu = -1;
4599                 igb_update_dca(adapter->q_vector[i]);
4600         }
4601 }
4602
4603 static int __igb_notify_dca(struct device *dev, void *data)
4604 {
4605         struct net_device *netdev = dev_get_drvdata(dev);
4606         struct igb_adapter *adapter = netdev_priv(netdev);
4607         struct pci_dev *pdev = adapter->pdev;
4608         struct e1000_hw *hw = &adapter->hw;
4609         unsigned long event = *(unsigned long *)data;
4610
4611         switch (event) {
4612         case DCA_PROVIDER_ADD:
4613                 /* if already enabled, don't do it again */
4614                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4615                         break;
4616                 if (dca_add_requester(dev) == 0) {
4617                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4618                         dev_info(&pdev->dev, "DCA enabled\n");
4619                         igb_setup_dca(adapter);
4620                         break;
4621                 }
4622                 /* Fall Through since DCA is disabled. */
4623         case DCA_PROVIDER_REMOVE:
4624                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4625                         /* without this a class_device is left
4626                          * hanging around in the sysfs model */
4627                         dca_remove_requester(dev);
4628                         dev_info(&pdev->dev, "DCA disabled\n");
4629                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4630                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4631                 }
4632                 break;
4633         }
4634
4635         return 0;
4636 }
4637
4638 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4639                           void *p)
4640 {
4641         int ret_val;
4642
4643         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4644                                          __igb_notify_dca);
4645
4646         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4647 }
4648 #endif /* CONFIG_IGB_DCA */
4649
4650 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4651 {
4652         struct e1000_hw *hw = &adapter->hw;
4653         u32 ping;
4654         int i;
4655
4656         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4657                 ping = E1000_PF_CONTROL_MSG;
4658                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4659                         ping |= E1000_VT_MSGTYPE_CTS;
4660                 igb_write_mbx(hw, &ping, 1, i);
4661         }
4662 }
4663
4664 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4665 {
4666         struct e1000_hw *hw = &adapter->hw;
4667         u32 vmolr = rd32(E1000_VMOLR(vf));
4668         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4669
4670         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4671                             IGB_VF_FLAG_MULTI_PROMISC);
4672         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4673
4674         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4675                 vmolr |= E1000_VMOLR_MPME;
4676                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4677                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4678         } else {
4679                 /*
4680                  * if we have hashes and we are clearing a multicast promisc
4681                  * flag we need to write the hashes to the MTA as this step
4682                  * was previously skipped
4683                  */
4684                 if (vf_data->num_vf_mc_hashes > 30) {
4685                         vmolr |= E1000_VMOLR_MPME;
4686                 } else if (vf_data->num_vf_mc_hashes) {
4687                         int j;
4688                         vmolr |= E1000_VMOLR_ROMPE;
4689                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4690                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4691                 }
4692         }
4693
4694         wr32(E1000_VMOLR(vf), vmolr);
4695
4696         /* there are flags left unprocessed, likely not supported */
4697         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4698                 return -EINVAL;
4699
4700         return 0;
4701
4702 }
4703
4704 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4705                                   u32 *msgbuf, u32 vf)
4706 {
4707         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4708         u16 *hash_list = (u16 *)&msgbuf[1];
4709         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4710         int i;
4711
4712         /* salt away the number of multicast addresses assigned
4713          * to this VF for later use to restore when the PF multi cast
4714          * list changes
4715          */
4716         vf_data->num_vf_mc_hashes = n;
4717
4718         /* only up to 30 hash values supported */
4719         if (n > 30)
4720                 n = 30;
4721
4722         /* store the hashes for later use */
4723         for (i = 0; i < n; i++)
4724                 vf_data->vf_mc_hashes[i] = hash_list[i];
4725
4726         /* Flush and reset the mta with the new values */
4727         igb_set_rx_mode(adapter->netdev);
4728
4729         return 0;
4730 }
4731
4732 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4733 {
4734         struct e1000_hw *hw = &adapter->hw;
4735         struct vf_data_storage *vf_data;
4736         int i, j;
4737
4738         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4739                 u32 vmolr = rd32(E1000_VMOLR(i));
4740                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4741
4742                 vf_data = &adapter->vf_data[i];
4743
4744                 if ((vf_data->num_vf_mc_hashes > 30) ||
4745                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4746                         vmolr |= E1000_VMOLR_MPME;
4747                 } else if (vf_data->num_vf_mc_hashes) {
4748                         vmolr |= E1000_VMOLR_ROMPE;
4749                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4750                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4751                 }
4752                 wr32(E1000_VMOLR(i), vmolr);
4753         }
4754 }
4755
4756 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4757 {
4758         struct e1000_hw *hw = &adapter->hw;
4759         u32 pool_mask, reg, vid;
4760         int i;
4761
4762         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4763
4764         /* Find the vlan filter for this id */
4765         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4766                 reg = rd32(E1000_VLVF(i));
4767
4768                 /* remove the vf from the pool */
4769                 reg &= ~pool_mask;
4770
4771                 /* if pool is empty then remove entry from vfta */
4772                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4773                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4774                         reg = 0;
4775                         vid = reg & E1000_VLVF_VLANID_MASK;
4776                         igb_vfta_set(hw, vid, false);
4777                 }
4778
4779                 wr32(E1000_VLVF(i), reg);
4780         }
4781
4782         adapter->vf_data[vf].vlans_enabled = 0;
4783 }
4784
4785 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4786 {
4787         struct e1000_hw *hw = &adapter->hw;
4788         u32 reg, i;
4789
4790         /* The vlvf table only exists on 82576 hardware and newer */
4791         if (hw->mac.type < e1000_82576)
4792                 return -1;
4793
4794         /* we only need to do this if VMDq is enabled */
4795         if (!adapter->vfs_allocated_count)
4796                 return -1;
4797
4798         /* Find the vlan filter for this id */
4799         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4800                 reg = rd32(E1000_VLVF(i));
4801                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4802                     vid == (reg & E1000_VLVF_VLANID_MASK))
4803                         break;
4804         }
4805
4806         if (add) {
4807                 if (i == E1000_VLVF_ARRAY_SIZE) {
4808                         /* Did not find a matching VLAN ID entry that was
4809                          * enabled.  Search for a free filter entry, i.e.
4810                          * one without the enable bit set
4811                          */
4812                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4813                                 reg = rd32(E1000_VLVF(i));
4814                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4815                                         break;
4816                         }
4817                 }
4818                 if (i < E1000_VLVF_ARRAY_SIZE) {
4819                         /* Found an enabled/available entry */
4820                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4821
4822                         /* if !enabled we need to set this up in vfta */
4823                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4824                                 /* add VID to filter table */
4825                                 igb_vfta_set(hw, vid, true);
4826                                 reg |= E1000_VLVF_VLANID_ENABLE;
4827                         }
4828                         reg &= ~E1000_VLVF_VLANID_MASK;
4829                         reg |= vid;
4830                         wr32(E1000_VLVF(i), reg);
4831
4832                         /* do not modify RLPML for PF devices */
4833                         if (vf >= adapter->vfs_allocated_count)
4834                                 return 0;
4835
4836                         if (!adapter->vf_data[vf].vlans_enabled) {
4837                                 u32 size;
4838                                 reg = rd32(E1000_VMOLR(vf));
4839                                 size = reg & E1000_VMOLR_RLPML_MASK;
4840                                 size += 4;
4841                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4842                                 reg |= size;
4843                                 wr32(E1000_VMOLR(vf), reg);
4844                         }
4845
4846                         adapter->vf_data[vf].vlans_enabled++;
4847                         return 0;
4848                 }
4849         } else {
4850                 if (i < E1000_VLVF_ARRAY_SIZE) {
4851                         /* remove vf from the pool */
4852                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4853                         /* if pool is empty then remove entry from vfta */
4854                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4855                                 reg = 0;
4856                                 igb_vfta_set(hw, vid, false);
4857                         }
4858                         wr32(E1000_VLVF(i), reg);
4859
4860                         /* do not modify RLPML for PF devices */
4861                         if (vf >= adapter->vfs_allocated_count)
4862                                 return 0;
4863
4864                         adapter->vf_data[vf].vlans_enabled--;
4865                         if (!adapter->vf_data[vf].vlans_enabled) {
4866                                 u32 size;
4867                                 reg = rd32(E1000_VMOLR(vf));
4868                                 size = reg & E1000_VMOLR_RLPML_MASK;
4869                                 size -= 4;
4870                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4871                                 reg |= size;
4872                                 wr32(E1000_VMOLR(vf), reg);
4873                         }
4874                 }
4875         }
4876         return 0;
4877 }
4878
4879 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4880 {
4881         struct e1000_hw *hw = &adapter->hw;
4882
4883         if (vid)
4884                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4885         else
4886                 wr32(E1000_VMVIR(vf), 0);
4887 }
4888
4889 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4890                                int vf, u16 vlan, u8 qos)
4891 {
4892         int err = 0;
4893         struct igb_adapter *adapter = netdev_priv(netdev);
4894
4895         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4896                 return -EINVAL;
4897         if (vlan || qos) {
4898                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4899                 if (err)
4900                         goto out;
4901                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4902                 igb_set_vmolr(adapter, vf, !vlan);
4903                 adapter->vf_data[vf].pf_vlan = vlan;
4904                 adapter->vf_data[vf].pf_qos = qos;
4905                 dev_info(&adapter->pdev->dev,
4906                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4907                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4908                         dev_warn(&adapter->pdev->dev,
4909                                  "The VF VLAN has been set,"
4910                                  " but the PF device is not up.\n");
4911                         dev_warn(&adapter->pdev->dev,
4912                                  "Bring the PF device up before"
4913                                  " attempting to use the VF device.\n");
4914                 }
4915         } else {
4916                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4917                                    false, vf);
4918                 igb_set_vmvir(adapter, vlan, vf);
4919                 igb_set_vmolr(adapter, vf, true);
4920                 adapter->vf_data[vf].pf_vlan = 0;
4921                 adapter->vf_data[vf].pf_qos = 0;
4922        }
4923 out:
4924        return err;
4925 }
4926
4927 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4928 {
4929         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4930         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4931
4932         return igb_vlvf_set(adapter, vid, add, vf);
4933 }
4934
4935 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4936 {
4937         /* clear flags */
4938         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4939         adapter->vf_data[vf].last_nack = jiffies;
4940
4941         /* reset offloads to defaults */
4942         igb_set_vmolr(adapter, vf, true);
4943
4944         /* reset vlans for device */
4945         igb_clear_vf_vfta(adapter, vf);
4946         if (adapter->vf_data[vf].pf_vlan)
4947                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4948                                     adapter->vf_data[vf].pf_vlan,
4949                                     adapter->vf_data[vf].pf_qos);
4950         else
4951                 igb_clear_vf_vfta(adapter, vf);
4952
4953         /* reset multicast table array for vf */
4954         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4955
4956         /* Flush and reset the mta with the new values */
4957         igb_set_rx_mode(adapter->netdev);
4958 }
4959
4960 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4961 {
4962         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4963
4964         /* generate a new mac address as we were hotplug removed/added */
4965         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4966                 random_ether_addr(vf_mac);
4967
4968         /* process remaining reset events */
4969         igb_vf_reset(adapter, vf);
4970 }
4971
4972 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4973 {
4974         struct e1000_hw *hw = &adapter->hw;
4975         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4976         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4977         u32 reg, msgbuf[3];
4978         u8 *addr = (u8 *)(&msgbuf[1]);
4979
4980         /* process all the same items cleared in a function level reset */
4981         igb_vf_reset(adapter, vf);
4982
4983         /* set vf mac address */
4984         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4985
4986         /* enable transmit and receive for vf */
4987         reg = rd32(E1000_VFTE);
4988         wr32(E1000_VFTE, reg | (1 << vf));
4989         reg = rd32(E1000_VFRE);
4990         wr32(E1000_VFRE, reg | (1 << vf));
4991
4992         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4993
4994         /* reply to reset with ack and vf mac address */
4995         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4996         memcpy(addr, vf_mac, 6);
4997         igb_write_mbx(hw, msgbuf, 3, vf);
4998 }
4999
5000 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5001 {
5002         /*
5003          * The VF MAC Address is stored in a packed array of bytes
5004          * starting at the second 32 bit word of the msg array
5005          */
5006         unsigned char *addr = (char *)&msg[1];
5007         int err = -1;
5008
5009         if (is_valid_ether_addr(addr))
5010                 err = igb_set_vf_mac(adapter, vf, addr);
5011
5012         return err;
5013 }
5014
5015 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5016 {
5017         struct e1000_hw *hw = &adapter->hw;
5018         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5019         u32 msg = E1000_VT_MSGTYPE_NACK;
5020
5021         /* if device isn't clear to send it shouldn't be reading either */
5022         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5023             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5024                 igb_write_mbx(hw, &msg, 1, vf);
5025                 vf_data->last_nack = jiffies;
5026         }
5027 }
5028
5029 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5030 {
5031         struct pci_dev *pdev = adapter->pdev;
5032         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5033         struct e1000_hw *hw = &adapter->hw;
5034         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5035         s32 retval;
5036
5037         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5038
5039         if (retval) {
5040                 /* if receive failed revoke VF CTS stats and restart init */
5041                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5042                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5043                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5044                         return;
5045                 goto out;
5046         }
5047
5048         /* this is a message we already processed, do nothing */
5049         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5050                 return;
5051
5052         /*
5053          * until the vf completes a reset it should not be
5054          * allowed to start any configuration.
5055          */
5056
5057         if (msgbuf[0] == E1000_VF_RESET) {
5058                 igb_vf_reset_msg(adapter, vf);
5059                 return;
5060         }
5061
5062         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5063                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5064                         return;
5065                 retval = -1;
5066                 goto out;
5067         }
5068
5069         switch ((msgbuf[0] & 0xFFFF)) {
5070         case E1000_VF_SET_MAC_ADDR:
5071                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5072                 break;
5073         case E1000_VF_SET_PROMISC:
5074                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5075                 break;
5076         case E1000_VF_SET_MULTICAST:
5077                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5078                 break;
5079         case E1000_VF_SET_LPE:
5080                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5081                 break;
5082         case E1000_VF_SET_VLAN:
5083                 if (adapter->vf_data[vf].pf_vlan)
5084                         retval = -1;
5085                 else
5086                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5087                 break;
5088         default:
5089                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5090                 retval = -1;
5091                 break;
5092         }
5093
5094         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5095 out:
5096         /* notify the VF of the results of what it sent us */
5097         if (retval)
5098                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5099         else
5100                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5101
5102         igb_write_mbx(hw, msgbuf, 1, vf);
5103 }
5104
5105 static void igb_msg_task(struct igb_adapter *adapter)
5106 {
5107         struct e1000_hw *hw = &adapter->hw;
5108         u32 vf;
5109
5110         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5111                 /* process any reset requests */
5112                 if (!igb_check_for_rst(hw, vf))
5113                         igb_vf_reset_event(adapter, vf);
5114
5115                 /* process any messages pending */
5116                 if (!igb_check_for_msg(hw, vf))
5117                         igb_rcv_msg_from_vf(adapter, vf);
5118
5119                 /* process any acks */
5120                 if (!igb_check_for_ack(hw, vf))
5121                         igb_rcv_ack_from_vf(adapter, vf);
5122         }
5123 }
5124
5125 /**
5126  *  igb_set_uta - Set unicast filter table address
5127  *  @adapter: board private structure
5128  *
5129  *  The unicast table address is a register array of 32-bit registers.
5130  *  The table is meant to be used in a way similar to how the MTA is used
5131  *  however due to certain limitations in the hardware it is necessary to
5132  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5133  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5134  **/
5135 static void igb_set_uta(struct igb_adapter *adapter)
5136 {
5137         struct e1000_hw *hw = &adapter->hw;
5138         int i;
5139
5140         /* The UTA table only exists on 82576 hardware and newer */
5141         if (hw->mac.type < e1000_82576)
5142                 return;
5143
5144         /* we only need to do this if VMDq is enabled */
5145         if (!adapter->vfs_allocated_count)
5146                 return;
5147
5148         for (i = 0; i < hw->mac.uta_reg_count; i++)
5149                 array_wr32(E1000_UTA, i, ~0);
5150 }
5151
5152 /**
5153  * igb_intr_msi - Interrupt Handler
5154  * @irq: interrupt number
5155  * @data: pointer to a network interface device structure
5156  **/
5157 static irqreturn_t igb_intr_msi(int irq, void *data)
5158 {
5159         struct igb_adapter *adapter = data;
5160         struct igb_q_vector *q_vector = adapter->q_vector[0];
5161         struct e1000_hw *hw = &adapter->hw;
5162         /* read ICR disables interrupts using IAM */
5163         u32 icr = rd32(E1000_ICR);
5164
5165         igb_write_itr(q_vector);
5166
5167         if (icr & E1000_ICR_DRSTA)
5168                 schedule_work(&adapter->reset_task);
5169
5170         if (icr & E1000_ICR_DOUTSYNC) {
5171                 /* HW is reporting DMA is out of sync */
5172                 adapter->stats.doosync++;
5173         }
5174
5175         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5176                 hw->mac.get_link_status = 1;
5177                 if (!test_bit(__IGB_DOWN, &adapter->state))
5178                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5179         }
5180
5181         napi_schedule(&q_vector->napi);
5182
5183         return IRQ_HANDLED;
5184 }
5185
5186 /**
5187  * igb_intr - Legacy Interrupt Handler
5188  * @irq: interrupt number
5189  * @data: pointer to a network interface device structure
5190  **/
5191 static irqreturn_t igb_intr(int irq, void *data)
5192 {
5193         struct igb_adapter *adapter = data;
5194         struct igb_q_vector *q_vector = adapter->q_vector[0];
5195         struct e1000_hw *hw = &adapter->hw;
5196         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5197          * need for the IMC write */
5198         u32 icr = rd32(E1000_ICR);
5199         if (!icr)
5200                 return IRQ_NONE;  /* Not our interrupt */
5201
5202         igb_write_itr(q_vector);
5203
5204         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5205          * not set, then the adapter didn't send an interrupt */
5206         if (!(icr & E1000_ICR_INT_ASSERTED))
5207                 return IRQ_NONE;
5208
5209         if (icr & E1000_ICR_DRSTA)
5210                 schedule_work(&adapter->reset_task);
5211
5212         if (icr & E1000_ICR_DOUTSYNC) {
5213                 /* HW is reporting DMA is out of sync */
5214                 adapter->stats.doosync++;
5215         }
5216
5217         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5218                 hw->mac.get_link_status = 1;
5219                 /* guard against interrupt when we're going down */
5220                 if (!test_bit(__IGB_DOWN, &adapter->state))
5221                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5222         }
5223
5224         napi_schedule(&q_vector->napi);
5225
5226         return IRQ_HANDLED;
5227 }
5228
5229 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5230 {
5231         struct igb_adapter *adapter = q_vector->adapter;
5232         struct e1000_hw *hw = &adapter->hw;
5233
5234         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5235             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5236                 if (!adapter->msix_entries)
5237                         igb_set_itr(adapter);
5238                 else
5239                         igb_update_ring_itr(q_vector);
5240         }
5241
5242         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5243                 if (adapter->msix_entries)
5244                         wr32(E1000_EIMS, q_vector->eims_value);
5245                 else
5246                         igb_irq_enable(adapter);
5247         }
5248 }
5249
5250 /**
5251  * igb_poll - NAPI Rx polling callback
5252  * @napi: napi polling structure
5253  * @budget: count of how many packets we should handle
5254  **/
5255 static int igb_poll(struct napi_struct *napi, int budget)
5256 {
5257         struct igb_q_vector *q_vector = container_of(napi,
5258                                                      struct igb_q_vector,
5259                                                      napi);
5260         int tx_clean_complete = 1, work_done = 0;
5261
5262 #ifdef CONFIG_IGB_DCA
5263         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5264                 igb_update_dca(q_vector);
5265 #endif
5266         if (q_vector->tx_ring)
5267                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5268
5269         if (q_vector->rx_ring)
5270                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5271
5272         if (!tx_clean_complete)
5273                 work_done = budget;
5274
5275         /* If not enough Rx work done, exit the polling mode */
5276         if (work_done < budget) {
5277                 napi_complete(napi);
5278                 igb_ring_irq_enable(q_vector);
5279         }
5280
5281         return work_done;
5282 }
5283
5284 /**
5285  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5286  * @adapter: board private structure
5287  * @shhwtstamps: timestamp structure to update
5288  * @regval: unsigned 64bit system time value.
5289  *
5290  * We need to convert the system time value stored in the RX/TXSTMP registers
5291  * into a hwtstamp which can be used by the upper level timestamping functions
5292  */
5293 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5294                                    struct skb_shared_hwtstamps *shhwtstamps,
5295                                    u64 regval)
5296 {
5297         u64 ns;
5298
5299         /*
5300          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5301          * 24 to match clock shift we setup earlier.
5302          */
5303         if (adapter->hw.mac.type == e1000_82580)
5304                 regval <<= IGB_82580_TSYNC_SHIFT;
5305
5306         ns = timecounter_cyc2time(&adapter->clock, regval);
5307         timecompare_update(&adapter->compare, ns);
5308         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5309         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5310         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5311 }
5312
5313 /**
5314  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5315  * @q_vector: pointer to q_vector containing needed info
5316  * @buffer: pointer to igb_buffer structure
5317  *
5318  * If we were asked to do hardware stamping and such a time stamp is
5319  * available, then it must have been for this skb here because we only
5320  * allow only one such packet into the queue.
5321  */
5322 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5323 {
5324         struct igb_adapter *adapter = q_vector->adapter;
5325         struct e1000_hw *hw = &adapter->hw;
5326         struct skb_shared_hwtstamps shhwtstamps;
5327         u64 regval;
5328
5329         /* if skb does not support hw timestamp or TX stamp not valid exit */
5330         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5331             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5332                 return;
5333
5334         regval = rd32(E1000_TXSTMPL);
5335         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5336
5337         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5338         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5339 }
5340
5341 /**
5342  * igb_clean_tx_irq - Reclaim resources after transmit completes
5343  * @q_vector: pointer to q_vector containing needed info
5344  * returns true if ring is completely cleaned
5345  **/
5346 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5347 {
5348         struct igb_adapter *adapter = q_vector->adapter;
5349         struct igb_ring *tx_ring = q_vector->tx_ring;
5350         struct net_device *netdev = tx_ring->netdev;
5351         struct e1000_hw *hw = &adapter->hw;
5352         struct igb_buffer *buffer_info;
5353         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5354         unsigned int total_bytes = 0, total_packets = 0;
5355         unsigned int i, eop, count = 0;
5356         bool cleaned = false;
5357
5358         i = tx_ring->next_to_clean;
5359         eop = tx_ring->buffer_info[i].next_to_watch;
5360         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5361
5362         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5363                (count < tx_ring->count)) {
5364                 rmb();  /* read buffer_info after eop_desc status */
5365                 for (cleaned = false; !cleaned; count++) {
5366                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5367                         buffer_info = &tx_ring->buffer_info[i];
5368                         cleaned = (i == eop);
5369
5370                         if (buffer_info->skb) {
5371                                 total_bytes += buffer_info->bytecount;
5372                                 /* gso_segs is currently only valid for tcp */
5373                                 total_packets += buffer_info->gso_segs;
5374                                 igb_tx_hwtstamp(q_vector, buffer_info);
5375                         }
5376
5377                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5378                         tx_desc->wb.status = 0;
5379
5380                         i++;
5381                         if (i == tx_ring->count)
5382                                 i = 0;
5383                 }
5384                 eop = tx_ring->buffer_info[i].next_to_watch;
5385                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5386         }
5387
5388         tx_ring->next_to_clean = i;
5389
5390         if (unlikely(count &&
5391                      netif_carrier_ok(netdev) &&
5392                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5393                 /* Make sure that anybody stopping the queue after this
5394                  * sees the new next_to_clean.
5395                  */
5396                 smp_mb();
5397                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5398                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5399                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5400                         tx_ring->tx_stats.restart_queue++;
5401                 }
5402         }
5403
5404         if (tx_ring->detect_tx_hung) {
5405                 /* Detect a transmit hang in hardware, this serializes the
5406                  * check with the clearing of time_stamp and movement of i */
5407                 tx_ring->detect_tx_hung = false;
5408                 if (tx_ring->buffer_info[i].time_stamp &&
5409                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5410                                (adapter->tx_timeout_factor * HZ)) &&
5411                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5412
5413                         /* detected Tx unit hang */
5414                         dev_err(tx_ring->dev,
5415                                 "Detected Tx Unit Hang\n"
5416                                 "  Tx Queue             <%d>\n"
5417                                 "  TDH                  <%x>\n"
5418                                 "  TDT                  <%x>\n"
5419                                 "  next_to_use          <%x>\n"
5420                                 "  next_to_clean        <%x>\n"
5421                                 "buffer_info[next_to_clean]\n"
5422                                 "  time_stamp           <%lx>\n"
5423                                 "  next_to_watch        <%x>\n"
5424                                 "  jiffies              <%lx>\n"
5425                                 "  desc.status          <%x>\n",
5426                                 tx_ring->queue_index,
5427                                 readl(tx_ring->head),
5428                                 readl(tx_ring->tail),
5429                                 tx_ring->next_to_use,
5430                                 tx_ring->next_to_clean,
5431                                 tx_ring->buffer_info[eop].time_stamp,
5432                                 eop,
5433                                 jiffies,
5434                                 eop_desc->wb.status);
5435                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5436                 }
5437         }
5438         tx_ring->total_bytes += total_bytes;
5439         tx_ring->total_packets += total_packets;
5440         tx_ring->tx_stats.bytes += total_bytes;
5441         tx_ring->tx_stats.packets += total_packets;
5442         return count < tx_ring->count;
5443 }
5444
5445 /**
5446  * igb_receive_skb - helper function to handle rx indications
5447  * @q_vector: structure containing interrupt and ring information
5448  * @skb: packet to send up
5449  * @vlan_tag: vlan tag for packet
5450  **/
5451 static void igb_receive_skb(struct igb_q_vector *q_vector,
5452                             struct sk_buff *skb,
5453                             u16 vlan_tag)
5454 {
5455         struct igb_adapter *adapter = q_vector->adapter;
5456
5457         if (vlan_tag && adapter->vlgrp)
5458                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5459                                  vlan_tag, skb);
5460         else
5461                 napi_gro_receive(&q_vector->napi, skb);
5462 }
5463
5464 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5465                                        u32 status_err, struct sk_buff *skb)
5466 {
5467         skb_checksum_none_assert(skb);
5468
5469         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5470         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5471              (status_err & E1000_RXD_STAT_IXSM))
5472                 return;
5473
5474         /* TCP/UDP checksum error bit is set */
5475         if (status_err &
5476             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5477                 /*
5478                  * work around errata with sctp packets where the TCPE aka
5479                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5480                  * packets, (aka let the stack check the crc32c)
5481                  */
5482                 if ((skb->len == 60) &&
5483                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5484                         ring->rx_stats.csum_err++;
5485
5486                 /* let the stack verify checksum errors */
5487                 return;
5488         }
5489         /* It must be a TCP or UDP packet with a valid checksum */
5490         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5491                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5492
5493         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5494 }
5495
5496 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5497                                    struct sk_buff *skb)
5498 {
5499         struct igb_adapter *adapter = q_vector->adapter;
5500         struct e1000_hw *hw = &adapter->hw;
5501         u64 regval;
5502
5503         /*
5504          * If this bit is set, then the RX registers contain the time stamp. No
5505          * other packet will be time stamped until we read these registers, so
5506          * read the registers to make them available again. Because only one
5507          * packet can be time stamped at a time, we know that the register
5508          * values must belong to this one here and therefore we don't need to
5509          * compare any of the additional attributes stored for it.
5510          *
5511          * If nothing went wrong, then it should have a shared tx_flags that we
5512          * can turn into a skb_shared_hwtstamps.
5513          */
5514         if (staterr & E1000_RXDADV_STAT_TSIP) {
5515                 u32 *stamp = (u32 *)skb->data;
5516                 regval = le32_to_cpu(*(stamp + 2));
5517                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5518                 skb_pull(skb, IGB_TS_HDR_LEN);
5519         } else {
5520                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5521                         return;
5522
5523                 regval = rd32(E1000_RXSTMPL);
5524                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5525         }
5526
5527         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5528 }
5529 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5530                                union e1000_adv_rx_desc *rx_desc)
5531 {
5532         /* HW will not DMA in data larger than the given buffer, even if it
5533          * parses the (NFS, of course) header to be larger.  In that case, it
5534          * fills the header buffer and spills the rest into the page.
5535          */
5536         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5537                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5538         if (hlen > rx_ring->rx_buffer_len)
5539                 hlen = rx_ring->rx_buffer_len;
5540         return hlen;
5541 }
5542
5543 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5544                                  int *work_done, int budget)
5545 {
5546         struct igb_ring *rx_ring = q_vector->rx_ring;
5547         struct net_device *netdev = rx_ring->netdev;
5548         struct device *dev = rx_ring->dev;
5549         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5550         struct igb_buffer *buffer_info , *next_buffer;
5551         struct sk_buff *skb;
5552         bool cleaned = false;
5553         int cleaned_count = 0;
5554         int current_node = numa_node_id();
5555         unsigned int total_bytes = 0, total_packets = 0;
5556         unsigned int i;
5557         u32 staterr;
5558         u16 length;
5559         u16 vlan_tag;
5560
5561         i = rx_ring->next_to_clean;
5562         buffer_info = &rx_ring->buffer_info[i];
5563         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5564         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5565
5566         while (staterr & E1000_RXD_STAT_DD) {
5567                 if (*work_done >= budget)
5568                         break;
5569                 (*work_done)++;
5570                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5571
5572                 skb = buffer_info->skb;
5573                 prefetch(skb->data - NET_IP_ALIGN);
5574                 buffer_info->skb = NULL;
5575
5576                 i++;
5577                 if (i == rx_ring->count)
5578                         i = 0;
5579
5580                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5581                 prefetch(next_rxd);
5582                 next_buffer = &rx_ring->buffer_info[i];
5583
5584                 length = le16_to_cpu(rx_desc->wb.upper.length);
5585                 cleaned = true;
5586                 cleaned_count++;
5587
5588                 if (buffer_info->dma) {
5589                         dma_unmap_single(dev, buffer_info->dma,
5590                                          rx_ring->rx_buffer_len,
5591                                          DMA_FROM_DEVICE);
5592                         buffer_info->dma = 0;
5593                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5594                                 skb_put(skb, length);
5595                                 goto send_up;
5596                         }
5597                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5598                 }
5599
5600                 if (length) {
5601                         dma_unmap_page(dev, buffer_info->page_dma,
5602                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5603                         buffer_info->page_dma = 0;
5604
5605                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5606                                                 buffer_info->page,
5607                                                 buffer_info->page_offset,
5608                                                 length);
5609
5610                         if ((page_count(buffer_info->page) != 1) ||
5611                             (page_to_nid(buffer_info->page) != current_node))
5612                                 buffer_info->page = NULL;
5613                         else
5614                                 get_page(buffer_info->page);
5615
5616                         skb->len += length;
5617                         skb->data_len += length;
5618                         skb->truesize += length;
5619                 }
5620
5621                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5622                         buffer_info->skb = next_buffer->skb;
5623                         buffer_info->dma = next_buffer->dma;
5624                         next_buffer->skb = skb;
5625                         next_buffer->dma = 0;
5626                         goto next_desc;
5627                 }
5628 send_up:
5629                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5630                         dev_kfree_skb_irq(skb);
5631                         goto next_desc;
5632                 }
5633
5634                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5635                         igb_rx_hwtstamp(q_vector, staterr, skb);
5636                 total_bytes += skb->len;
5637                 total_packets++;
5638
5639                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5640
5641                 skb->protocol = eth_type_trans(skb, netdev);
5642                 skb_record_rx_queue(skb, rx_ring->queue_index);
5643
5644                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5645                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5646
5647                 igb_receive_skb(q_vector, skb, vlan_tag);
5648
5649 next_desc:
5650                 rx_desc->wb.upper.status_error = 0;
5651
5652                 /* return some buffers to hardware, one at a time is too slow */
5653                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5654                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5655                         cleaned_count = 0;
5656                 }
5657
5658                 /* use prefetched values */
5659                 rx_desc = next_rxd;
5660                 buffer_info = next_buffer;
5661                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5662         }
5663
5664         rx_ring->next_to_clean = i;
5665         cleaned_count = igb_desc_unused(rx_ring);
5666
5667         if (cleaned_count)
5668                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5669
5670         rx_ring->total_packets += total_packets;
5671         rx_ring->total_bytes += total_bytes;
5672         rx_ring->rx_stats.packets += total_packets;
5673         rx_ring->rx_stats.bytes += total_bytes;
5674         return cleaned;
5675 }
5676
5677 /**
5678  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5679  * @adapter: address of board private structure
5680  **/
5681 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5682 {
5683         struct net_device *netdev = rx_ring->netdev;
5684         union e1000_adv_rx_desc *rx_desc;
5685         struct igb_buffer *buffer_info;
5686         struct sk_buff *skb;
5687         unsigned int i;
5688         int bufsz;
5689
5690         i = rx_ring->next_to_use;
5691         buffer_info = &rx_ring->buffer_info[i];
5692
5693         bufsz = rx_ring->rx_buffer_len;
5694
5695         while (cleaned_count--) {
5696                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5697
5698                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5699                         if (!buffer_info->page) {
5700                                 buffer_info->page = netdev_alloc_page(netdev);
5701                                 if (!buffer_info->page) {
5702                                         rx_ring->rx_stats.alloc_failed++;
5703                                         goto no_buffers;
5704                                 }
5705                                 buffer_info->page_offset = 0;
5706                         } else {
5707                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5708                         }
5709                         buffer_info->page_dma =
5710                                 dma_map_page(rx_ring->dev, buffer_info->page,
5711                                              buffer_info->page_offset,
5712                                              PAGE_SIZE / 2,
5713                                              DMA_FROM_DEVICE);
5714                         if (dma_mapping_error(rx_ring->dev,
5715                                               buffer_info->page_dma)) {
5716                                 buffer_info->page_dma = 0;
5717                                 rx_ring->rx_stats.alloc_failed++;
5718                                 goto no_buffers;
5719                         }
5720                 }
5721
5722                 skb = buffer_info->skb;
5723                 if (!skb) {
5724                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5725                         if (!skb) {
5726                                 rx_ring->rx_stats.alloc_failed++;
5727                                 goto no_buffers;
5728                         }
5729
5730                         buffer_info->skb = skb;
5731                 }
5732                 if (!buffer_info->dma) {
5733                         buffer_info->dma = dma_map_single(rx_ring->dev,
5734                                                           skb->data,
5735                                                           bufsz,
5736                                                           DMA_FROM_DEVICE);
5737                         if (dma_mapping_error(rx_ring->dev,
5738                                               buffer_info->dma)) {
5739                                 buffer_info->dma = 0;
5740                                 rx_ring->rx_stats.alloc_failed++;
5741                                 goto no_buffers;
5742                         }
5743                 }
5744                 /* Refresh the desc even if buffer_addrs didn't change because
5745                  * each write-back erases this info. */
5746                 if (bufsz < IGB_RXBUFFER_1024) {
5747                         rx_desc->read.pkt_addr =
5748                              cpu_to_le64(buffer_info->page_dma);
5749                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5750                 } else {
5751                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5752                         rx_desc->read.hdr_addr = 0;
5753                 }
5754
5755                 i++;
5756                 if (i == rx_ring->count)
5757                         i = 0;
5758                 buffer_info = &rx_ring->buffer_info[i];
5759         }
5760
5761 no_buffers:
5762         if (rx_ring->next_to_use != i) {
5763                 rx_ring->next_to_use = i;
5764                 if (i == 0)
5765                         i = (rx_ring->count - 1);
5766                 else
5767                         i--;
5768
5769                 /* Force memory writes to complete before letting h/w
5770                  * know there are new descriptors to fetch.  (Only
5771                  * applicable for weak-ordered memory model archs,
5772                  * such as IA-64). */
5773                 wmb();
5774                 writel(i, rx_ring->tail);
5775         }
5776 }
5777
5778 /**
5779  * igb_mii_ioctl -
5780  * @netdev:
5781  * @ifreq:
5782  * @cmd:
5783  **/
5784 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5785 {
5786         struct igb_adapter *adapter = netdev_priv(netdev);
5787         struct mii_ioctl_data *data = if_mii(ifr);
5788
5789         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5790                 return -EOPNOTSUPP;
5791
5792         switch (cmd) {
5793         case SIOCGMIIPHY:
5794                 data->phy_id = adapter->hw.phy.addr;
5795                 break;
5796         case SIOCGMIIREG:
5797                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5798                                      &data->val_out))
5799                         return -EIO;
5800                 break;
5801         case SIOCSMIIREG:
5802         default:
5803                 return -EOPNOTSUPP;
5804         }
5805         return 0;
5806 }
5807
5808 /**
5809  * igb_hwtstamp_ioctl - control hardware time stamping
5810  * @netdev:
5811  * @ifreq:
5812  * @cmd:
5813  *
5814  * Outgoing time stamping can be enabled and disabled. Play nice and
5815  * disable it when requested, although it shouldn't case any overhead
5816  * when no packet needs it. At most one packet in the queue may be
5817  * marked for time stamping, otherwise it would be impossible to tell
5818  * for sure to which packet the hardware time stamp belongs.
5819  *
5820  * Incoming time stamping has to be configured via the hardware
5821  * filters. Not all combinations are supported, in particular event
5822  * type has to be specified. Matching the kind of event packet is
5823  * not supported, with the exception of "all V2 events regardless of
5824  * level 2 or 4".
5825  *
5826  **/
5827 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5828                               struct ifreq *ifr, int cmd)
5829 {
5830         struct igb_adapter *adapter = netdev_priv(netdev);
5831         struct e1000_hw *hw = &adapter->hw;
5832         struct hwtstamp_config config;
5833         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5834         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5835         u32 tsync_rx_cfg = 0;
5836         bool is_l4 = false;
5837         bool is_l2 = false;
5838         u32 regval;
5839
5840         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5841                 return -EFAULT;
5842
5843         /* reserved for future extensions */
5844         if (config.flags)
5845                 return -EINVAL;
5846
5847         switch (config.tx_type) {
5848         case HWTSTAMP_TX_OFF:
5849                 tsync_tx_ctl = 0;
5850         case HWTSTAMP_TX_ON:
5851                 break;
5852         default:
5853                 return -ERANGE;
5854         }
5855
5856         switch (config.rx_filter) {
5857         case HWTSTAMP_FILTER_NONE:
5858                 tsync_rx_ctl = 0;
5859                 break;
5860         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5861         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5862         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5863         case HWTSTAMP_FILTER_ALL:
5864                 /*
5865                  * register TSYNCRXCFG must be set, therefore it is not
5866                  * possible to time stamp both Sync and Delay_Req messages
5867                  * => fall back to time stamping all packets
5868                  */
5869                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5870                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5871                 break;
5872         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5873                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5874                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5875                 is_l4 = true;
5876                 break;
5877         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5878                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5879                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5880                 is_l4 = true;
5881                 break;
5882         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5883         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5884                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5885                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5886                 is_l2 = true;
5887                 is_l4 = true;
5888                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5889                 break;
5890         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5891         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5892                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5893                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5894                 is_l2 = true;
5895                 is_l4 = true;
5896                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5897                 break;
5898         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5899         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5900         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5901                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5902                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5903                 is_l2 = true;
5904                 break;
5905         default:
5906                 return -ERANGE;
5907         }
5908
5909         if (hw->mac.type == e1000_82575) {
5910                 if (tsync_rx_ctl | tsync_tx_ctl)
5911                         return -EINVAL;
5912                 return 0;
5913         }
5914
5915         /*
5916          * Per-packet timestamping only works if all packets are
5917          * timestamped, so enable timestamping in all packets as
5918          * long as one rx filter was configured.
5919          */
5920         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5921                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5922                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5923         }
5924
5925         /* enable/disable TX */
5926         regval = rd32(E1000_TSYNCTXCTL);
5927         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5928         regval |= tsync_tx_ctl;
5929         wr32(E1000_TSYNCTXCTL, regval);
5930
5931         /* enable/disable RX */
5932         regval = rd32(E1000_TSYNCRXCTL);
5933         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5934         regval |= tsync_rx_ctl;
5935         wr32(E1000_TSYNCRXCTL, regval);
5936
5937         /* define which PTP packets are time stamped */
5938         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5939
5940         /* define ethertype filter for timestamped packets */
5941         if (is_l2)
5942                 wr32(E1000_ETQF(3),
5943                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5944                                  E1000_ETQF_1588 | /* enable timestamping */
5945                                  ETH_P_1588));     /* 1588 eth protocol type */
5946         else
5947                 wr32(E1000_ETQF(3), 0);
5948
5949 #define PTP_PORT 319
5950         /* L4 Queue Filter[3]: filter by destination port and protocol */
5951         if (is_l4) {
5952                 u32 ftqf = (IPPROTO_UDP /* UDP */
5953                         | E1000_FTQF_VF_BP /* VF not compared */
5954                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5955                         | E1000_FTQF_MASK); /* mask all inputs */
5956                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5957
5958                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5959                 wr32(E1000_IMIREXT(3),
5960                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5961                 if (hw->mac.type == e1000_82576) {
5962                         /* enable source port check */
5963                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5964                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5965                 }
5966                 wr32(E1000_FTQF(3), ftqf);
5967         } else {
5968                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5969         }
5970         wrfl();
5971
5972         adapter->hwtstamp_config = config;
5973
5974         /* clear TX/RX time stamp registers, just to be sure */
5975         regval = rd32(E1000_TXSTMPH);
5976         regval = rd32(E1000_RXSTMPH);
5977
5978         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5979                 -EFAULT : 0;
5980 }
5981
5982 /**
5983  * igb_ioctl -
5984  * @netdev:
5985  * @ifreq:
5986  * @cmd:
5987  **/
5988 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5989 {
5990         switch (cmd) {
5991         case SIOCGMIIPHY:
5992         case SIOCGMIIREG:
5993         case SIOCSMIIREG:
5994                 return igb_mii_ioctl(netdev, ifr, cmd);
5995         case SIOCSHWTSTAMP:
5996                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5997         default:
5998                 return -EOPNOTSUPP;
5999         }
6000 }
6001
6002 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6003 {
6004         struct igb_adapter *adapter = hw->back;
6005         u16 cap_offset;
6006
6007         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6008         if (!cap_offset)
6009                 return -E1000_ERR_CONFIG;
6010
6011         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6012
6013         return 0;
6014 }
6015
6016 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6017 {
6018         struct igb_adapter *adapter = hw->back;
6019         u16 cap_offset;
6020
6021         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6022         if (!cap_offset)
6023                 return -E1000_ERR_CONFIG;
6024
6025         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6026
6027         return 0;
6028 }
6029
6030 static void igb_vlan_rx_register(struct net_device *netdev,
6031                                  struct vlan_group *grp)
6032 {
6033         struct igb_adapter *adapter = netdev_priv(netdev);
6034         struct e1000_hw *hw = &adapter->hw;
6035         u32 ctrl, rctl;
6036
6037         igb_irq_disable(adapter);
6038         adapter->vlgrp = grp;
6039
6040         if (grp) {
6041                 /* enable VLAN tag insert/strip */
6042                 ctrl = rd32(E1000_CTRL);
6043                 ctrl |= E1000_CTRL_VME;
6044                 wr32(E1000_CTRL, ctrl);
6045
6046                 /* Disable CFI check */
6047                 rctl = rd32(E1000_RCTL);
6048                 rctl &= ~E1000_RCTL_CFIEN;
6049                 wr32(E1000_RCTL, rctl);
6050         } else {
6051                 /* disable VLAN tag insert/strip */
6052                 ctrl = rd32(E1000_CTRL);
6053                 ctrl &= ~E1000_CTRL_VME;
6054                 wr32(E1000_CTRL, ctrl);
6055         }
6056
6057         igb_rlpml_set(adapter);
6058
6059         if (!test_bit(__IGB_DOWN, &adapter->state))
6060                 igb_irq_enable(adapter);
6061 }
6062
6063 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6064 {
6065         struct igb_adapter *adapter = netdev_priv(netdev);
6066         struct e1000_hw *hw = &adapter->hw;
6067         int pf_id = adapter->vfs_allocated_count;
6068
6069         /* attempt to add filter to vlvf array */
6070         igb_vlvf_set(adapter, vid, true, pf_id);
6071
6072         /* add the filter since PF can receive vlans w/o entry in vlvf */
6073         igb_vfta_set(hw, vid, true);
6074 }
6075
6076 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6077 {
6078         struct igb_adapter *adapter = netdev_priv(netdev);
6079         struct e1000_hw *hw = &adapter->hw;
6080         int pf_id = adapter->vfs_allocated_count;
6081         s32 err;
6082
6083         igb_irq_disable(adapter);
6084         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6085
6086         if (!test_bit(__IGB_DOWN, &adapter->state))
6087                 igb_irq_enable(adapter);
6088
6089         /* remove vlan from VLVF table array */
6090         err = igb_vlvf_set(adapter, vid, false, pf_id);
6091
6092         /* if vid was not present in VLVF just remove it from table */
6093         if (err)
6094                 igb_vfta_set(hw, vid, false);
6095 }
6096
6097 static void igb_restore_vlan(struct igb_adapter *adapter)
6098 {
6099         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6100
6101         if (adapter->vlgrp) {
6102                 u16 vid;
6103                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
6104                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6105                                 continue;
6106                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6107                 }
6108         }
6109 }
6110
6111 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6112 {
6113         struct pci_dev *pdev = adapter->pdev;
6114         struct e1000_mac_info *mac = &adapter->hw.mac;
6115
6116         mac->autoneg = 0;
6117
6118         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6119         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6120                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6121                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6122                 return -EINVAL;
6123         }
6124
6125         switch (spddplx) {
6126         case SPEED_10 + DUPLEX_HALF:
6127                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6128                 break;
6129         case SPEED_10 + DUPLEX_FULL:
6130                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6131                 break;
6132         case SPEED_100 + DUPLEX_HALF:
6133                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6134                 break;
6135         case SPEED_100 + DUPLEX_FULL:
6136                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6137                 break;
6138         case SPEED_1000 + DUPLEX_FULL:
6139                 mac->autoneg = 1;
6140                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6141                 break;
6142         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6143         default:
6144                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6145                 return -EINVAL;
6146         }
6147         return 0;
6148 }
6149
6150 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6151 {
6152         struct net_device *netdev = pci_get_drvdata(pdev);
6153         struct igb_adapter *adapter = netdev_priv(netdev);
6154         struct e1000_hw *hw = &adapter->hw;
6155         u32 ctrl, rctl, status;
6156         u32 wufc = adapter->wol;
6157 #ifdef CONFIG_PM
6158         int retval = 0;
6159 #endif
6160
6161         netif_device_detach(netdev);
6162
6163         if (netif_running(netdev))
6164                 igb_close(netdev);
6165
6166         igb_clear_interrupt_scheme(adapter);
6167
6168 #ifdef CONFIG_PM
6169         retval = pci_save_state(pdev);
6170         if (retval)
6171                 return retval;
6172 #endif
6173
6174         status = rd32(E1000_STATUS);
6175         if (status & E1000_STATUS_LU)
6176                 wufc &= ~E1000_WUFC_LNKC;
6177
6178         if (wufc) {
6179                 igb_setup_rctl(adapter);
6180                 igb_set_rx_mode(netdev);
6181
6182                 /* turn on all-multi mode if wake on multicast is enabled */
6183                 if (wufc & E1000_WUFC_MC) {
6184                         rctl = rd32(E1000_RCTL);
6185                         rctl |= E1000_RCTL_MPE;
6186                         wr32(E1000_RCTL, rctl);
6187                 }
6188
6189                 ctrl = rd32(E1000_CTRL);
6190                 /* advertise wake from D3Cold */
6191                 #define E1000_CTRL_ADVD3WUC 0x00100000
6192                 /* phy power management enable */
6193                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6194                 ctrl |= E1000_CTRL_ADVD3WUC;
6195                 wr32(E1000_CTRL, ctrl);
6196
6197                 /* Allow time for pending master requests to run */
6198                 igb_disable_pcie_master(hw);
6199
6200                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6201                 wr32(E1000_WUFC, wufc);
6202         } else {
6203                 wr32(E1000_WUC, 0);
6204                 wr32(E1000_WUFC, 0);
6205         }
6206
6207         *enable_wake = wufc || adapter->en_mng_pt;
6208         if (!*enable_wake)
6209                 igb_power_down_link(adapter);
6210         else
6211                 igb_power_up_link(adapter);
6212
6213         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6214          * would have already happened in close and is redundant. */
6215         igb_release_hw_control(adapter);
6216
6217         pci_disable_device(pdev);
6218
6219         return 0;
6220 }
6221
6222 #ifdef CONFIG_PM
6223 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6224 {
6225         int retval;
6226         bool wake;
6227
6228         retval = __igb_shutdown(pdev, &wake);
6229         if (retval)
6230                 return retval;
6231
6232         if (wake) {
6233                 pci_prepare_to_sleep(pdev);
6234         } else {
6235                 pci_wake_from_d3(pdev, false);
6236                 pci_set_power_state(pdev, PCI_D3hot);
6237         }
6238
6239         return 0;
6240 }
6241
6242 static int igb_resume(struct pci_dev *pdev)
6243 {
6244         struct net_device *netdev = pci_get_drvdata(pdev);
6245         struct igb_adapter *adapter = netdev_priv(netdev);
6246         struct e1000_hw *hw = &adapter->hw;
6247         u32 err;
6248
6249         pci_set_power_state(pdev, PCI_D0);
6250         pci_restore_state(pdev);
6251         pci_save_state(pdev);
6252
6253         err = pci_enable_device_mem(pdev);
6254         if (err) {
6255                 dev_err(&pdev->dev,
6256                         "igb: Cannot enable PCI device from suspend\n");
6257                 return err;
6258         }
6259         pci_set_master(pdev);
6260
6261         pci_enable_wake(pdev, PCI_D3hot, 0);
6262         pci_enable_wake(pdev, PCI_D3cold, 0);
6263
6264         if (igb_init_interrupt_scheme(adapter)) {
6265                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6266                 return -ENOMEM;
6267         }
6268
6269         igb_reset(adapter);
6270
6271         /* let the f/w know that the h/w is now under the control of the
6272          * driver. */
6273         igb_get_hw_control(adapter);
6274
6275         wr32(E1000_WUS, ~0);
6276
6277         if (netif_running(netdev)) {
6278                 err = igb_open(netdev);
6279                 if (err)
6280                         return err;
6281         }
6282
6283         netif_device_attach(netdev);
6284
6285         return 0;
6286 }
6287 #endif
6288
6289 static void igb_shutdown(struct pci_dev *pdev)
6290 {
6291         bool wake;
6292
6293         __igb_shutdown(pdev, &wake);
6294
6295         if (system_state == SYSTEM_POWER_OFF) {
6296                 pci_wake_from_d3(pdev, wake);
6297                 pci_set_power_state(pdev, PCI_D3hot);
6298         }
6299 }
6300
6301 #ifdef CONFIG_NET_POLL_CONTROLLER
6302 /*
6303  * Polling 'interrupt' - used by things like netconsole to send skbs
6304  * without having to re-enable interrupts. It's not called while
6305  * the interrupt routine is executing.
6306  */
6307 static void igb_netpoll(struct net_device *netdev)
6308 {
6309         struct igb_adapter *adapter = netdev_priv(netdev);
6310         struct e1000_hw *hw = &adapter->hw;
6311         int i;
6312
6313         if (!adapter->msix_entries) {
6314                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6315                 igb_irq_disable(adapter);
6316                 napi_schedule(&q_vector->napi);
6317                 return;
6318         }
6319
6320         for (i = 0; i < adapter->num_q_vectors; i++) {
6321                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6322                 wr32(E1000_EIMC, q_vector->eims_value);
6323                 napi_schedule(&q_vector->napi);
6324         }
6325 }
6326 #endif /* CONFIG_NET_POLL_CONTROLLER */
6327
6328 /**
6329  * igb_io_error_detected - called when PCI error is detected
6330  * @pdev: Pointer to PCI device
6331  * @state: The current pci connection state
6332  *
6333  * This function is called after a PCI bus error affecting
6334  * this device has been detected.
6335  */
6336 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6337                                               pci_channel_state_t state)
6338 {
6339         struct net_device *netdev = pci_get_drvdata(pdev);
6340         struct igb_adapter *adapter = netdev_priv(netdev);
6341
6342         netif_device_detach(netdev);
6343
6344         if (state == pci_channel_io_perm_failure)
6345                 return PCI_ERS_RESULT_DISCONNECT;
6346
6347         if (netif_running(netdev))
6348                 igb_down(adapter);
6349         pci_disable_device(pdev);
6350
6351         /* Request a slot slot reset. */
6352         return PCI_ERS_RESULT_NEED_RESET;
6353 }
6354
6355 /**
6356  * igb_io_slot_reset - called after the pci bus has been reset.
6357  * @pdev: Pointer to PCI device
6358  *
6359  * Restart the card from scratch, as if from a cold-boot. Implementation
6360  * resembles the first-half of the igb_resume routine.
6361  */
6362 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6363 {
6364         struct net_device *netdev = pci_get_drvdata(pdev);
6365         struct igb_adapter *adapter = netdev_priv(netdev);
6366         struct e1000_hw *hw = &adapter->hw;
6367         pci_ers_result_t result;
6368         int err;
6369
6370         if (pci_enable_device_mem(pdev)) {
6371                 dev_err(&pdev->dev,
6372                         "Cannot re-enable PCI device after reset.\n");
6373                 result = PCI_ERS_RESULT_DISCONNECT;
6374         } else {
6375                 pci_set_master(pdev);
6376                 pci_restore_state(pdev);
6377                 pci_save_state(pdev);
6378
6379                 pci_enable_wake(pdev, PCI_D3hot, 0);
6380                 pci_enable_wake(pdev, PCI_D3cold, 0);
6381
6382                 igb_reset(adapter);
6383                 wr32(E1000_WUS, ~0);
6384                 result = PCI_ERS_RESULT_RECOVERED;
6385         }
6386
6387         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6388         if (err) {
6389                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6390                         "failed 0x%0x\n", err);
6391                 /* non-fatal, continue */
6392         }
6393
6394         return result;
6395 }
6396
6397 /**
6398  * igb_io_resume - called when traffic can start flowing again.
6399  * @pdev: Pointer to PCI device
6400  *
6401  * This callback is called when the error recovery driver tells us that
6402  * its OK to resume normal operation. Implementation resembles the
6403  * second-half of the igb_resume routine.
6404  */
6405 static void igb_io_resume(struct pci_dev *pdev)
6406 {
6407         struct net_device *netdev = pci_get_drvdata(pdev);
6408         struct igb_adapter *adapter = netdev_priv(netdev);
6409
6410         if (netif_running(netdev)) {
6411                 if (igb_up(adapter)) {
6412                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6413                         return;
6414                 }
6415         }
6416
6417         netif_device_attach(netdev);
6418
6419         /* let the f/w know that the h/w is now under the control of the
6420          * driver. */
6421         igb_get_hw_control(adapter);
6422 }
6423
6424 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6425                              u8 qsel)
6426 {
6427         u32 rar_low, rar_high;
6428         struct e1000_hw *hw = &adapter->hw;
6429
6430         /* HW expects these in little endian so we reverse the byte order
6431          * from network order (big endian) to little endian
6432          */
6433         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6434                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6435         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6436
6437         /* Indicate to hardware the Address is Valid. */
6438         rar_high |= E1000_RAH_AV;
6439
6440         if (hw->mac.type == e1000_82575)
6441                 rar_high |= E1000_RAH_POOL_1 * qsel;
6442         else
6443                 rar_high |= E1000_RAH_POOL_1 << qsel;
6444
6445         wr32(E1000_RAL(index), rar_low);
6446         wrfl();
6447         wr32(E1000_RAH(index), rar_high);
6448         wrfl();
6449 }
6450
6451 static int igb_set_vf_mac(struct igb_adapter *adapter,
6452                           int vf, unsigned char *mac_addr)
6453 {
6454         struct e1000_hw *hw = &adapter->hw;
6455         /* VF MAC addresses start at end of receive addresses and moves
6456          * torwards the first, as a result a collision should not be possible */
6457         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6458
6459         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6460
6461         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6462
6463         return 0;
6464 }
6465
6466 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6467 {
6468         struct igb_adapter *adapter = netdev_priv(netdev);
6469         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6470                 return -EINVAL;
6471         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6472         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6473         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6474                                       " change effective.");
6475         if (test_bit(__IGB_DOWN, &adapter->state)) {
6476                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6477                          " but the PF device is not up.\n");
6478                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6479                          " attempting to use the VF device.\n");
6480         }
6481         return igb_set_vf_mac(adapter, vf, mac);
6482 }
6483
6484 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6485 {
6486         return -EOPNOTSUPP;
6487 }
6488
6489 static int igb_ndo_get_vf_config(struct net_device *netdev,
6490                                  int vf, struct ifla_vf_info *ivi)
6491 {
6492         struct igb_adapter *adapter = netdev_priv(netdev);
6493         if (vf >= adapter->vfs_allocated_count)
6494                 return -EINVAL;
6495         ivi->vf = vf;
6496         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6497         ivi->tx_rate = 0;
6498         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6499         ivi->qos = adapter->vf_data[vf].pf_qos;
6500         return 0;
6501 }
6502
6503 static void igb_vmm_control(struct igb_adapter *adapter)
6504 {
6505         struct e1000_hw *hw = &adapter->hw;
6506         u32 reg;
6507
6508         switch (hw->mac.type) {
6509         case e1000_82575:
6510         default:
6511                 /* replication is not supported for 82575 */
6512                 return;
6513         case e1000_82576:
6514                 /* notify HW that the MAC is adding vlan tags */
6515                 reg = rd32(E1000_DTXCTL);
6516                 reg |= E1000_DTXCTL_VLAN_ADDED;
6517                 wr32(E1000_DTXCTL, reg);
6518         case e1000_82580:
6519                 /* enable replication vlan tag stripping */
6520                 reg = rd32(E1000_RPLOLR);
6521                 reg |= E1000_RPLOLR_STRVLAN;
6522                 wr32(E1000_RPLOLR, reg);
6523         case e1000_i350:
6524                 /* none of the above registers are supported by i350 */
6525                 break;
6526         }
6527
6528         if (adapter->vfs_allocated_count) {
6529                 igb_vmdq_set_loopback_pf(hw, true);
6530                 igb_vmdq_set_replication_pf(hw, true);
6531         } else {
6532                 igb_vmdq_set_loopback_pf(hw, false);
6533                 igb_vmdq_set_replication_pf(hw, false);
6534         }
6535 }
6536
6537 /* igb_main.c */