Merge branch 'davem-next.r8169' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2011 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #include <linux/prefetch.h>
49 #ifdef CONFIG_IGB_DCA
50 #include <linux/dca.h>
51 #endif
52 #include "igb.h"
53
54 #define MAJ 3
55 #define MIN 0
56 #define BUILD 6
57 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
58 __stringify(BUILD) "-k"
59 char igb_driver_name[] = "igb";
60 char igb_driver_version[] = DRV_VERSION;
61 static const char igb_driver_string[] =
62                                 "Intel(R) Gigabit Ethernet Network Driver";
63 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
64
65 static const struct e1000_info *igb_info_tbl[] = {
66         [board_82575] = &e1000_82575_info,
67 };
68
69 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
95         /* required last entry */
96         {0, }
97 };
98
99 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
100
101 void igb_reset(struct igb_adapter *);
102 static int igb_setup_all_tx_resources(struct igb_adapter *);
103 static int igb_setup_all_rx_resources(struct igb_adapter *);
104 static void igb_free_all_tx_resources(struct igb_adapter *);
105 static void igb_free_all_rx_resources(struct igb_adapter *);
106 static void igb_setup_mrqc(struct igb_adapter *);
107 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
108 static void __devexit igb_remove(struct pci_dev *pdev);
109 static void igb_init_hw_timer(struct igb_adapter *adapter);
110 static int igb_sw_init(struct igb_adapter *);
111 static int igb_open(struct net_device *);
112 static int igb_close(struct net_device *);
113 static void igb_configure_tx(struct igb_adapter *);
114 static void igb_configure_rx(struct igb_adapter *);
115 static void igb_clean_all_tx_rings(struct igb_adapter *);
116 static void igb_clean_all_rx_rings(struct igb_adapter *);
117 static void igb_clean_tx_ring(struct igb_ring *);
118 static void igb_clean_rx_ring(struct igb_ring *);
119 static void igb_set_rx_mode(struct net_device *);
120 static void igb_update_phy_info(unsigned long);
121 static void igb_watchdog(unsigned long);
122 static void igb_watchdog_task(struct work_struct *);
123 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
124 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
125                                                  struct rtnl_link_stats64 *stats);
126 static int igb_change_mtu(struct net_device *, int);
127 static int igb_set_mac(struct net_device *, void *);
128 static void igb_set_uta(struct igb_adapter *adapter);
129 static irqreturn_t igb_intr(int irq, void *);
130 static irqreturn_t igb_intr_msi(int irq, void *);
131 static irqreturn_t igb_msix_other(int irq, void *);
132 static irqreturn_t igb_msix_ring(int irq, void *);
133 #ifdef CONFIG_IGB_DCA
134 static void igb_update_dca(struct igb_q_vector *);
135 static void igb_setup_dca(struct igb_adapter *);
136 #endif /* CONFIG_IGB_DCA */
137 static bool igb_clean_tx_irq(struct igb_q_vector *);
138 static int igb_poll(struct napi_struct *, int);
139 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
140 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
141 static void igb_tx_timeout(struct net_device *);
142 static void igb_reset_task(struct work_struct *);
143 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
144 static void igb_vlan_rx_add_vid(struct net_device *, u16);
145 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
146 static void igb_restore_vlan(struct igb_adapter *);
147 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
148 static void igb_ping_all_vfs(struct igb_adapter *);
149 static void igb_msg_task(struct igb_adapter *);
150 static void igb_vmm_control(struct igb_adapter *);
151 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
152 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
153 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
154 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
155                                int vf, u16 vlan, u8 qos);
156 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
157 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
158                                  struct ifla_vf_info *ivi);
159 static void igb_check_vf_rate_limit(struct igb_adapter *);
160
161 #ifdef CONFIG_PM
162 static int igb_suspend(struct pci_dev *, pm_message_t);
163 static int igb_resume(struct pci_dev *);
164 #endif
165 static void igb_shutdown(struct pci_dev *);
166 #ifdef CONFIG_IGB_DCA
167 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
168 static struct notifier_block dca_notifier = {
169         .notifier_call  = igb_notify_dca,
170         .next           = NULL,
171         .priority       = 0
172 };
173 #endif
174 #ifdef CONFIG_NET_POLL_CONTROLLER
175 /* for netdump / net console */
176 static void igb_netpoll(struct net_device *);
177 #endif
178 #ifdef CONFIG_PCI_IOV
179 static unsigned int max_vfs = 0;
180 module_param(max_vfs, uint, 0);
181 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
182                  "per physical function");
183 #endif /* CONFIG_PCI_IOV */
184
185 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
186                      pci_channel_state_t);
187 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
188 static void igb_io_resume(struct pci_dev *);
189
190 static struct pci_error_handlers igb_err_handler = {
191         .error_detected = igb_io_error_detected,
192         .slot_reset = igb_io_slot_reset,
193         .resume = igb_io_resume,
194 };
195
196
197 static struct pci_driver igb_driver = {
198         .name     = igb_driver_name,
199         .id_table = igb_pci_tbl,
200         .probe    = igb_probe,
201         .remove   = __devexit_p(igb_remove),
202 #ifdef CONFIG_PM
203         /* Power Management Hooks */
204         .suspend  = igb_suspend,
205         .resume   = igb_resume,
206 #endif
207         .shutdown = igb_shutdown,
208         .err_handler = &igb_err_handler
209 };
210
211 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
212 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
213 MODULE_LICENSE("GPL");
214 MODULE_VERSION(DRV_VERSION);
215
216 struct igb_reg_info {
217         u32 ofs;
218         char *name;
219 };
220
221 static const struct igb_reg_info igb_reg_info_tbl[] = {
222
223         /* General Registers */
224         {E1000_CTRL, "CTRL"},
225         {E1000_STATUS, "STATUS"},
226         {E1000_CTRL_EXT, "CTRL_EXT"},
227
228         /* Interrupt Registers */
229         {E1000_ICR, "ICR"},
230
231         /* RX Registers */
232         {E1000_RCTL, "RCTL"},
233         {E1000_RDLEN(0), "RDLEN"},
234         {E1000_RDH(0), "RDH"},
235         {E1000_RDT(0), "RDT"},
236         {E1000_RXDCTL(0), "RXDCTL"},
237         {E1000_RDBAL(0), "RDBAL"},
238         {E1000_RDBAH(0), "RDBAH"},
239
240         /* TX Registers */
241         {E1000_TCTL, "TCTL"},
242         {E1000_TDBAL(0), "TDBAL"},
243         {E1000_TDBAH(0), "TDBAH"},
244         {E1000_TDLEN(0), "TDLEN"},
245         {E1000_TDH(0), "TDH"},
246         {E1000_TDT(0), "TDT"},
247         {E1000_TXDCTL(0), "TXDCTL"},
248         {E1000_TDFH, "TDFH"},
249         {E1000_TDFT, "TDFT"},
250         {E1000_TDFHS, "TDFHS"},
251         {E1000_TDFPC, "TDFPC"},
252
253         /* List Terminator */
254         {}
255 };
256
257 /*
258  * igb_regdump - register printout routine
259  */
260 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
261 {
262         int n = 0;
263         char rname[16];
264         u32 regs[8];
265
266         switch (reginfo->ofs) {
267         case E1000_RDLEN(0):
268                 for (n = 0; n < 4; n++)
269                         regs[n] = rd32(E1000_RDLEN(n));
270                 break;
271         case E1000_RDH(0):
272                 for (n = 0; n < 4; n++)
273                         regs[n] = rd32(E1000_RDH(n));
274                 break;
275         case E1000_RDT(0):
276                 for (n = 0; n < 4; n++)
277                         regs[n] = rd32(E1000_RDT(n));
278                 break;
279         case E1000_RXDCTL(0):
280                 for (n = 0; n < 4; n++)
281                         regs[n] = rd32(E1000_RXDCTL(n));
282                 break;
283         case E1000_RDBAL(0):
284                 for (n = 0; n < 4; n++)
285                         regs[n] = rd32(E1000_RDBAL(n));
286                 break;
287         case E1000_RDBAH(0):
288                 for (n = 0; n < 4; n++)
289                         regs[n] = rd32(E1000_RDBAH(n));
290                 break;
291         case E1000_TDBAL(0):
292                 for (n = 0; n < 4; n++)
293                         regs[n] = rd32(E1000_RDBAL(n));
294                 break;
295         case E1000_TDBAH(0):
296                 for (n = 0; n < 4; n++)
297                         regs[n] = rd32(E1000_TDBAH(n));
298                 break;
299         case E1000_TDLEN(0):
300                 for (n = 0; n < 4; n++)
301                         regs[n] = rd32(E1000_TDLEN(n));
302                 break;
303         case E1000_TDH(0):
304                 for (n = 0; n < 4; n++)
305                         regs[n] = rd32(E1000_TDH(n));
306                 break;
307         case E1000_TDT(0):
308                 for (n = 0; n < 4; n++)
309                         regs[n] = rd32(E1000_TDT(n));
310                 break;
311         case E1000_TXDCTL(0):
312                 for (n = 0; n < 4; n++)
313                         regs[n] = rd32(E1000_TXDCTL(n));
314                 break;
315         default:
316                 printk(KERN_INFO "%-15s %08x\n",
317                         reginfo->name, rd32(reginfo->ofs));
318                 return;
319         }
320
321         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
322         printk(KERN_INFO "%-15s ", rname);
323         for (n = 0; n < 4; n++)
324                 printk(KERN_CONT "%08x ", regs[n]);
325         printk(KERN_CONT "\n");
326 }
327
328 /*
329  * igb_dump - Print registers, tx-rings and rx-rings
330  */
331 static void igb_dump(struct igb_adapter *adapter)
332 {
333         struct net_device *netdev = adapter->netdev;
334         struct e1000_hw *hw = &adapter->hw;
335         struct igb_reg_info *reginfo;
336         int n = 0;
337         struct igb_ring *tx_ring;
338         union e1000_adv_tx_desc *tx_desc;
339         struct my_u0 { u64 a; u64 b; } *u0;
340         struct igb_buffer *buffer_info;
341         struct igb_ring *rx_ring;
342         union e1000_adv_rx_desc *rx_desc;
343         u32 staterr;
344         int i = 0;
345
346         if (!netif_msg_hw(adapter))
347                 return;
348
349         /* Print netdevice Info */
350         if (netdev) {
351                 dev_info(&adapter->pdev->dev, "Net device Info\n");
352                 printk(KERN_INFO "Device Name     state            "
353                         "trans_start      last_rx\n");
354                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
355                 netdev->name,
356                 netdev->state,
357                 netdev->trans_start,
358                 netdev->last_rx);
359         }
360
361         /* Print Registers */
362         dev_info(&adapter->pdev->dev, "Register Dump\n");
363         printk(KERN_INFO " Register Name   Value\n");
364         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
365              reginfo->name; reginfo++) {
366                 igb_regdump(hw, reginfo);
367         }
368
369         /* Print TX Ring Summary */
370         if (!netdev || !netif_running(netdev))
371                 goto exit;
372
373         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
374         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
375                 " leng ntw timestamp\n");
376         for (n = 0; n < adapter->num_tx_queues; n++) {
377                 tx_ring = adapter->tx_ring[n];
378                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
379                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
380                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
381                            (u64)buffer_info->dma,
382                            buffer_info->length,
383                            buffer_info->next_to_watch,
384                            (u64)buffer_info->time_stamp);
385         }
386
387         /* Print TX Rings */
388         if (!netif_msg_tx_done(adapter))
389                 goto rx_ring_summary;
390
391         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
392
393         /* Transmit Descriptor Formats
394          *
395          * Advanced Transmit Descriptor
396          *   +--------------------------------------------------------------+
397          * 0 |         Buffer Address [63:0]                                |
398          *   +--------------------------------------------------------------+
399          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
400          *   +--------------------------------------------------------------+
401          *   63      46 45    40 39 38 36 35 32 31   24             15       0
402          */
403
404         for (n = 0; n < adapter->num_tx_queues; n++) {
405                 tx_ring = adapter->tx_ring[n];
406                 printk(KERN_INFO "------------------------------------\n");
407                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
408                 printk(KERN_INFO "------------------------------------\n");
409                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
410                         "[PlPOCIStDDM Ln] [bi->dma       ] "
411                         "leng  ntw timestamp        bi->skb\n");
412
413                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
414                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
415                         buffer_info = &tx_ring->buffer_info[i];
416                         u0 = (struct my_u0 *)tx_desc;
417                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
418                                 " %04X  %3X %016llX %p", i,
419                                 le64_to_cpu(u0->a),
420                                 le64_to_cpu(u0->b),
421                                 (u64)buffer_info->dma,
422                                 buffer_info->length,
423                                 buffer_info->next_to_watch,
424                                 (u64)buffer_info->time_stamp,
425                                 buffer_info->skb);
426                         if (i == tx_ring->next_to_use &&
427                                 i == tx_ring->next_to_clean)
428                                 printk(KERN_CONT " NTC/U\n");
429                         else if (i == tx_ring->next_to_use)
430                                 printk(KERN_CONT " NTU\n");
431                         else if (i == tx_ring->next_to_clean)
432                                 printk(KERN_CONT " NTC\n");
433                         else
434                                 printk(KERN_CONT "\n");
435
436                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
437                                 print_hex_dump(KERN_INFO, "",
438                                         DUMP_PREFIX_ADDRESS,
439                                         16, 1, phys_to_virt(buffer_info->dma),
440                                         buffer_info->length, true);
441                 }
442         }
443
444         /* Print RX Rings Summary */
445 rx_ring_summary:
446         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
447         printk(KERN_INFO "Queue [NTU] [NTC]\n");
448         for (n = 0; n < adapter->num_rx_queues; n++) {
449                 rx_ring = adapter->rx_ring[n];
450                 printk(KERN_INFO " %5d %5X %5X\n", n,
451                            rx_ring->next_to_use, rx_ring->next_to_clean);
452         }
453
454         /* Print RX Rings */
455         if (!netif_msg_rx_status(adapter))
456                 goto exit;
457
458         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
459
460         /* Advanced Receive Descriptor (Read) Format
461          *    63                                           1        0
462          *    +-----------------------------------------------------+
463          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
464          *    +----------------------------------------------+------+
465          *  8 |       Header Buffer Address [63:1]           |  DD  |
466          *    +-----------------------------------------------------+
467          *
468          *
469          * Advanced Receive Descriptor (Write-Back) Format
470          *
471          *   63       48 47    32 31  30      21 20 17 16   4 3     0
472          *   +------------------------------------------------------+
473          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
474          *   | Checksum   Ident  |   |           |    | Type | Type |
475          *   +------------------------------------------------------+
476          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
477          *   +------------------------------------------------------+
478          *   63       48 47    32 31            20 19               0
479          */
480
481         for (n = 0; n < adapter->num_rx_queues; n++) {
482                 rx_ring = adapter->rx_ring[n];
483                 printk(KERN_INFO "------------------------------------\n");
484                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
485                 printk(KERN_INFO "------------------------------------\n");
486                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
487                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
488                         "<-- Adv Rx Read format\n");
489                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
490                         "[vl er S cks ln] ---------------- [bi->skb] "
491                         "<-- Adv Rx Write-Back format\n");
492
493                 for (i = 0; i < rx_ring->count; i++) {
494                         buffer_info = &rx_ring->buffer_info[i];
495                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
496                         u0 = (struct my_u0 *)rx_desc;
497                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
498                         if (staterr & E1000_RXD_STAT_DD) {
499                                 /* Descriptor Done */
500                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
501                                         "%016llX ---------------- %p", i,
502                                         le64_to_cpu(u0->a),
503                                         le64_to_cpu(u0->b),
504                                         buffer_info->skb);
505                         } else {
506                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
507                                         "%016llX %016llX %p", i,
508                                         le64_to_cpu(u0->a),
509                                         le64_to_cpu(u0->b),
510                                         (u64)buffer_info->dma,
511                                         buffer_info->skb);
512
513                                 if (netif_msg_pktdata(adapter)) {
514                                         print_hex_dump(KERN_INFO, "",
515                                                 DUMP_PREFIX_ADDRESS,
516                                                 16, 1,
517                                                 phys_to_virt(buffer_info->dma),
518                                                 rx_ring->rx_buffer_len, true);
519                                         if (rx_ring->rx_buffer_len
520                                                 < IGB_RXBUFFER_1024)
521                                                 print_hex_dump(KERN_INFO, "",
522                                                   DUMP_PREFIX_ADDRESS,
523                                                   16, 1,
524                                                   phys_to_virt(
525                                                     buffer_info->page_dma +
526                                                     buffer_info->page_offset),
527                                                   PAGE_SIZE/2, true);
528                                 }
529                         }
530
531                         if (i == rx_ring->next_to_use)
532                                 printk(KERN_CONT " NTU\n");
533                         else if (i == rx_ring->next_to_clean)
534                                 printk(KERN_CONT " NTC\n");
535                         else
536                                 printk(KERN_CONT "\n");
537
538                 }
539         }
540
541 exit:
542         return;
543 }
544
545
546 /**
547  * igb_read_clock - read raw cycle counter (to be used by time counter)
548  */
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
550 {
551         struct igb_adapter *adapter =
552                 container_of(tc, struct igb_adapter, cycles);
553         struct e1000_hw *hw = &adapter->hw;
554         u64 stamp = 0;
555         int shift = 0;
556
557         /*
558          * The timestamp latches on lowest register read. For the 82580
559          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
560          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
561          */
562         if (hw->mac.type == e1000_82580) {
563                 stamp = rd32(E1000_SYSTIMR) >> 8;
564                 shift = IGB_82580_TSYNC_SHIFT;
565         }
566
567         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
569         return stamp;
570 }
571
572 /**
573  * igb_get_hw_dev - return device
574  * used by hardware layer to print debugging information
575  **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578         struct igb_adapter *adapter = hw->back;
579         return adapter->netdev;
580 }
581
582 /**
583  * igb_init_module - Driver Registration Routine
584  *
585  * igb_init_module is the first routine called when the driver is
586  * loaded. All it does is register with the PCI subsystem.
587  **/
588 static int __init igb_init_module(void)
589 {
590         int ret;
591         printk(KERN_INFO "%s - version %s\n",
592                igb_driver_string, igb_driver_version);
593
594         printk(KERN_INFO "%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597         dca_register_notify(&dca_notifier);
598 #endif
599         ret = pci_register_driver(&igb_driver);
600         return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606  * igb_exit_module - Driver Exit Cleanup Routine
607  *
608  * igb_exit_module is called just before the driver is removed
609  * from memory.
610  **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614         dca_unregister_notify(&dca_notifier);
615 #endif
616         pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623  * igb_cache_ring_register - Descriptor ring to register mapping
624  * @adapter: board private structure to initialize
625  *
626  * Once we know the feature-set enabled for the device, we'll cache
627  * the register offset the descriptor ring is assigned to.
628  **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631         int i = 0, j = 0;
632         u32 rbase_offset = adapter->vfs_allocated_count;
633
634         switch (adapter->hw.mac.type) {
635         case e1000_82576:
636                 /* The queues are allocated for virtualization such that VF 0
637                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638                  * In order to avoid collision we start at the first free queue
639                  * and continue consuming queues in the same sequence
640                  */
641                 if (adapter->vfs_allocated_count) {
642                         for (; i < adapter->rss_queues; i++)
643                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
644                                                                Q_IDX_82576(i);
645                 }
646         case e1000_82575:
647         case e1000_82580:
648         case e1000_i350:
649         default:
650                 for (; i < adapter->num_rx_queues; i++)
651                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652                 for (; j < adapter->num_tx_queues; j++)
653                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654                 break;
655         }
656 }
657
658 static void igb_free_queues(struct igb_adapter *adapter)
659 {
660         int i;
661
662         for (i = 0; i < adapter->num_tx_queues; i++) {
663                 kfree(adapter->tx_ring[i]);
664                 adapter->tx_ring[i] = NULL;
665         }
666         for (i = 0; i < adapter->num_rx_queues; i++) {
667                 kfree(adapter->rx_ring[i]);
668                 adapter->rx_ring[i] = NULL;
669         }
670         adapter->num_rx_queues = 0;
671         adapter->num_tx_queues = 0;
672 }
673
674 /**
675  * igb_alloc_queues - Allocate memory for all rings
676  * @adapter: board private structure to initialize
677  *
678  * We allocate one ring per queue at run-time since we don't know the
679  * number of queues at compile-time.
680  **/
681 static int igb_alloc_queues(struct igb_adapter *adapter)
682 {
683         struct igb_ring *ring;
684         int i;
685
686         for (i = 0; i < adapter->num_tx_queues; i++) {
687                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
688                 if (!ring)
689                         goto err;
690                 ring->count = adapter->tx_ring_count;
691                 ring->queue_index = i;
692                 ring->dev = &adapter->pdev->dev;
693                 ring->netdev = adapter->netdev;
694                 /* For 82575, context index must be unique per ring. */
695                 if (adapter->hw.mac.type == e1000_82575)
696                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697                 adapter->tx_ring[i] = ring;
698         }
699
700         for (i = 0; i < adapter->num_rx_queues; i++) {
701                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702                 if (!ring)
703                         goto err;
704                 ring->count = adapter->rx_ring_count;
705                 ring->queue_index = i;
706                 ring->dev = &adapter->pdev->dev;
707                 ring->netdev = adapter->netdev;
708                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
709                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
710                 /* set flag indicating ring supports SCTP checksum offload */
711                 if (adapter->hw.mac.type >= e1000_82576)
712                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
713                 adapter->rx_ring[i] = ring;
714         }
715
716         igb_cache_ring_register(adapter);
717
718         return 0;
719
720 err:
721         igb_free_queues(adapter);
722
723         return -ENOMEM;
724 }
725
726 #define IGB_N0_QUEUE -1
727 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
728 {
729         u32 msixbm = 0;
730         struct igb_adapter *adapter = q_vector->adapter;
731         struct e1000_hw *hw = &adapter->hw;
732         u32 ivar, index;
733         int rx_queue = IGB_N0_QUEUE;
734         int tx_queue = IGB_N0_QUEUE;
735
736         if (q_vector->rx_ring)
737                 rx_queue = q_vector->rx_ring->reg_idx;
738         if (q_vector->tx_ring)
739                 tx_queue = q_vector->tx_ring->reg_idx;
740
741         switch (hw->mac.type) {
742         case e1000_82575:
743                 /* The 82575 assigns vectors using a bitmask, which matches the
744                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
745                    or more queues to a vector, we write the appropriate bits
746                    into the MSIXBM register for that vector. */
747                 if (rx_queue > IGB_N0_QUEUE)
748                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
749                 if (tx_queue > IGB_N0_QUEUE)
750                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
751                 if (!adapter->msix_entries && msix_vector == 0)
752                         msixbm |= E1000_EIMS_OTHER;
753                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
754                 q_vector->eims_value = msixbm;
755                 break;
756         case e1000_82576:
757                 /* 82576 uses a table-based method for assigning vectors.
758                    Each queue has a single entry in the table to which we write
759                    a vector number along with a "valid" bit.  Sadly, the layout
760                    of the table is somewhat counterintuitive. */
761                 if (rx_queue > IGB_N0_QUEUE) {
762                         index = (rx_queue & 0x7);
763                         ivar = array_rd32(E1000_IVAR0, index);
764                         if (rx_queue < 8) {
765                                 /* vector goes into low byte of register */
766                                 ivar = ivar & 0xFFFFFF00;
767                                 ivar |= msix_vector | E1000_IVAR_VALID;
768                         } else {
769                                 /* vector goes into third byte of register */
770                                 ivar = ivar & 0xFF00FFFF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
772                         }
773                         array_wr32(E1000_IVAR0, index, ivar);
774                 }
775                 if (tx_queue > IGB_N0_QUEUE) {
776                         index = (tx_queue & 0x7);
777                         ivar = array_rd32(E1000_IVAR0, index);
778                         if (tx_queue < 8) {
779                                 /* vector goes into second byte of register */
780                                 ivar = ivar & 0xFFFF00FF;
781                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
782                         } else {
783                                 /* vector goes into high byte of register */
784                                 ivar = ivar & 0x00FFFFFF;
785                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
786                         }
787                         array_wr32(E1000_IVAR0, index, ivar);
788                 }
789                 q_vector->eims_value = 1 << msix_vector;
790                 break;
791         case e1000_82580:
792         case e1000_i350:
793                 /* 82580 uses the same table-based approach as 82576 but has fewer
794                    entries as a result we carry over for queues greater than 4. */
795                 if (rx_queue > IGB_N0_QUEUE) {
796                         index = (rx_queue >> 1);
797                         ivar = array_rd32(E1000_IVAR0, index);
798                         if (rx_queue & 0x1) {
799                                 /* vector goes into third byte of register */
800                                 ivar = ivar & 0xFF00FFFF;
801                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
802                         } else {
803                                 /* vector goes into low byte of register */
804                                 ivar = ivar & 0xFFFFFF00;
805                                 ivar |= msix_vector | E1000_IVAR_VALID;
806                         }
807                         array_wr32(E1000_IVAR0, index, ivar);
808                 }
809                 if (tx_queue > IGB_N0_QUEUE) {
810                         index = (tx_queue >> 1);
811                         ivar = array_rd32(E1000_IVAR0, index);
812                         if (tx_queue & 0x1) {
813                                 /* vector goes into high byte of register */
814                                 ivar = ivar & 0x00FFFFFF;
815                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
816                         } else {
817                                 /* vector goes into second byte of register */
818                                 ivar = ivar & 0xFFFF00FF;
819                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
820                         }
821                         array_wr32(E1000_IVAR0, index, ivar);
822                 }
823                 q_vector->eims_value = 1 << msix_vector;
824                 break;
825         default:
826                 BUG();
827                 break;
828         }
829
830         /* add q_vector eims value to global eims_enable_mask */
831         adapter->eims_enable_mask |= q_vector->eims_value;
832
833         /* configure q_vector to set itr on first interrupt */
834         q_vector->set_itr = 1;
835 }
836
837 /**
838  * igb_configure_msix - Configure MSI-X hardware
839  *
840  * igb_configure_msix sets up the hardware to properly
841  * generate MSI-X interrupts.
842  **/
843 static void igb_configure_msix(struct igb_adapter *adapter)
844 {
845         u32 tmp;
846         int i, vector = 0;
847         struct e1000_hw *hw = &adapter->hw;
848
849         adapter->eims_enable_mask = 0;
850
851         /* set vector for other causes, i.e. link changes */
852         switch (hw->mac.type) {
853         case e1000_82575:
854                 tmp = rd32(E1000_CTRL_EXT);
855                 /* enable MSI-X PBA support*/
856                 tmp |= E1000_CTRL_EXT_PBA_CLR;
857
858                 /* Auto-Mask interrupts upon ICR read. */
859                 tmp |= E1000_CTRL_EXT_EIAME;
860                 tmp |= E1000_CTRL_EXT_IRCA;
861
862                 wr32(E1000_CTRL_EXT, tmp);
863
864                 /* enable msix_other interrupt */
865                 array_wr32(E1000_MSIXBM(0), vector++,
866                                       E1000_EIMS_OTHER);
867                 adapter->eims_other = E1000_EIMS_OTHER;
868
869                 break;
870
871         case e1000_82576:
872         case e1000_82580:
873         case e1000_i350:
874                 /* Turn on MSI-X capability first, or our settings
875                  * won't stick.  And it will take days to debug. */
876                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
877                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
878                                 E1000_GPIE_NSICR);
879
880                 /* enable msix_other interrupt */
881                 adapter->eims_other = 1 << vector;
882                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
883
884                 wr32(E1000_IVAR_MISC, tmp);
885                 break;
886         default:
887                 /* do nothing, since nothing else supports MSI-X */
888                 break;
889         } /* switch (hw->mac.type) */
890
891         adapter->eims_enable_mask |= adapter->eims_other;
892
893         for (i = 0; i < adapter->num_q_vectors; i++)
894                 igb_assign_vector(adapter->q_vector[i], vector++);
895
896         wrfl();
897 }
898
899 /**
900  * igb_request_msix - Initialize MSI-X interrupts
901  *
902  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
903  * kernel.
904  **/
905 static int igb_request_msix(struct igb_adapter *adapter)
906 {
907         struct net_device *netdev = adapter->netdev;
908         struct e1000_hw *hw = &adapter->hw;
909         int i, err = 0, vector = 0;
910
911         err = request_irq(adapter->msix_entries[vector].vector,
912                           igb_msix_other, 0, netdev->name, adapter);
913         if (err)
914                 goto out;
915         vector++;
916
917         for (i = 0; i < adapter->num_q_vectors; i++) {
918                 struct igb_q_vector *q_vector = adapter->q_vector[i];
919
920                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
921
922                 if (q_vector->rx_ring && q_vector->tx_ring)
923                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
924                                 q_vector->rx_ring->queue_index);
925                 else if (q_vector->tx_ring)
926                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
927                                 q_vector->tx_ring->queue_index);
928                 else if (q_vector->rx_ring)
929                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
930                                 q_vector->rx_ring->queue_index);
931                 else
932                         sprintf(q_vector->name, "%s-unused", netdev->name);
933
934                 err = request_irq(adapter->msix_entries[vector].vector,
935                                   igb_msix_ring, 0, q_vector->name,
936                                   q_vector);
937                 if (err)
938                         goto out;
939                 vector++;
940         }
941
942         igb_configure_msix(adapter);
943         return 0;
944 out:
945         return err;
946 }
947
948 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
949 {
950         if (adapter->msix_entries) {
951                 pci_disable_msix(adapter->pdev);
952                 kfree(adapter->msix_entries);
953                 adapter->msix_entries = NULL;
954         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
955                 pci_disable_msi(adapter->pdev);
956         }
957 }
958
959 /**
960  * igb_free_q_vectors - Free memory allocated for interrupt vectors
961  * @adapter: board private structure to initialize
962  *
963  * This function frees the memory allocated to the q_vectors.  In addition if
964  * NAPI is enabled it will delete any references to the NAPI struct prior
965  * to freeing the q_vector.
966  **/
967 static void igb_free_q_vectors(struct igb_adapter *adapter)
968 {
969         int v_idx;
970
971         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
972                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
973                 adapter->q_vector[v_idx] = NULL;
974                 if (!q_vector)
975                         continue;
976                 netif_napi_del(&q_vector->napi);
977                 kfree(q_vector);
978         }
979         adapter->num_q_vectors = 0;
980 }
981
982 /**
983  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
984  *
985  * This function resets the device so that it has 0 rx queues, tx queues, and
986  * MSI-X interrupts allocated.
987  */
988 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
989 {
990         igb_free_queues(adapter);
991         igb_free_q_vectors(adapter);
992         igb_reset_interrupt_capability(adapter);
993 }
994
995 /**
996  * igb_set_interrupt_capability - set MSI or MSI-X if supported
997  *
998  * Attempt to configure interrupts using the best available
999  * capabilities of the hardware and kernel.
1000  **/
1001 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1002 {
1003         int err;
1004         int numvecs, i;
1005
1006         /* Number of supported queues. */
1007         adapter->num_rx_queues = adapter->rss_queues;
1008         if (adapter->vfs_allocated_count)
1009                 adapter->num_tx_queues = 1;
1010         else
1011                 adapter->num_tx_queues = adapter->rss_queues;
1012
1013         /* start with one vector for every rx queue */
1014         numvecs = adapter->num_rx_queues;
1015
1016         /* if tx handler is separate add 1 for every tx queue */
1017         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1018                 numvecs += adapter->num_tx_queues;
1019
1020         /* store the number of vectors reserved for queues */
1021         adapter->num_q_vectors = numvecs;
1022
1023         /* add 1 vector for link status interrupts */
1024         numvecs++;
1025         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1026                                         GFP_KERNEL);
1027         if (!adapter->msix_entries)
1028                 goto msi_only;
1029
1030         for (i = 0; i < numvecs; i++)
1031                 adapter->msix_entries[i].entry = i;
1032
1033         err = pci_enable_msix(adapter->pdev,
1034                               adapter->msix_entries,
1035                               numvecs);
1036         if (err == 0)
1037                 goto out;
1038
1039         igb_reset_interrupt_capability(adapter);
1040
1041         /* If we can't do MSI-X, try MSI */
1042 msi_only:
1043 #ifdef CONFIG_PCI_IOV
1044         /* disable SR-IOV for non MSI-X configurations */
1045         if (adapter->vf_data) {
1046                 struct e1000_hw *hw = &adapter->hw;
1047                 /* disable iov and allow time for transactions to clear */
1048                 pci_disable_sriov(adapter->pdev);
1049                 msleep(500);
1050
1051                 kfree(adapter->vf_data);
1052                 adapter->vf_data = NULL;
1053                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1054                 msleep(100);
1055                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1056         }
1057 #endif
1058         adapter->vfs_allocated_count = 0;
1059         adapter->rss_queues = 1;
1060         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061         adapter->num_rx_queues = 1;
1062         adapter->num_tx_queues = 1;
1063         adapter->num_q_vectors = 1;
1064         if (!pci_enable_msi(adapter->pdev))
1065                 adapter->flags |= IGB_FLAG_HAS_MSI;
1066 out:
1067         /* Notify the stack of the (possibly) reduced queue counts. */
1068         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069         return netif_set_real_num_rx_queues(adapter->netdev,
1070                                             adapter->num_rx_queues);
1071 }
1072
1073 /**
1074  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075  * @adapter: board private structure to initialize
1076  *
1077  * We allocate one q_vector per queue interrupt.  If allocation fails we
1078  * return -ENOMEM.
1079  **/
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1081 {
1082         struct igb_q_vector *q_vector;
1083         struct e1000_hw *hw = &adapter->hw;
1084         int v_idx;
1085
1086         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1088                 if (!q_vector)
1089                         goto err_out;
1090                 q_vector->adapter = adapter;
1091                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092                 q_vector->itr_val = IGB_START_ITR;
1093                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094                 adapter->q_vector[v_idx] = q_vector;
1095         }
1096         return 0;
1097
1098 err_out:
1099         igb_free_q_vectors(adapter);
1100         return -ENOMEM;
1101 }
1102
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104                                       int ring_idx, int v_idx)
1105 {
1106         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109         q_vector->rx_ring->q_vector = q_vector;
1110         q_vector->itr_val = adapter->rx_itr_setting;
1111         if (q_vector->itr_val && q_vector->itr_val <= 3)
1112                 q_vector->itr_val = IGB_START_ITR;
1113 }
1114
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116                                       int ring_idx, int v_idx)
1117 {
1118         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1119
1120         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121         q_vector->tx_ring->q_vector = q_vector;
1122         q_vector->itr_val = adapter->tx_itr_setting;
1123         if (q_vector->itr_val && q_vector->itr_val <= 3)
1124                 q_vector->itr_val = IGB_START_ITR;
1125 }
1126
1127 /**
1128  * igb_map_ring_to_vector - maps allocated queues to vectors
1129  *
1130  * This function maps the recently allocated queues to vectors.
1131  **/
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1133 {
1134         int i;
1135         int v_idx = 0;
1136
1137         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138             (adapter->num_q_vectors < adapter->num_tx_queues))
1139                 return -ENOMEM;
1140
1141         if (adapter->num_q_vectors >=
1142             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143                 for (i = 0; i < adapter->num_rx_queues; i++)
1144                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145                 for (i = 0; i < adapter->num_tx_queues; i++)
1146                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147         } else {
1148                 for (i = 0; i < adapter->num_rx_queues; i++) {
1149                         if (i < adapter->num_tx_queues)
1150                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1152                 }
1153                 for (; i < adapter->num_tx_queues; i++)
1154                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1155         }
1156         return 0;
1157 }
1158
1159 /**
1160  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1161  *
1162  * This function initializes the interrupts and allocates all of the queues.
1163  **/
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1165 {
1166         struct pci_dev *pdev = adapter->pdev;
1167         int err;
1168
1169         err = igb_set_interrupt_capability(adapter);
1170         if (err)
1171                 return err;
1172
1173         err = igb_alloc_q_vectors(adapter);
1174         if (err) {
1175                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176                 goto err_alloc_q_vectors;
1177         }
1178
1179         err = igb_alloc_queues(adapter);
1180         if (err) {
1181                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182                 goto err_alloc_queues;
1183         }
1184
1185         err = igb_map_ring_to_vector(adapter);
1186         if (err) {
1187                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188                 goto err_map_queues;
1189         }
1190
1191
1192         return 0;
1193 err_map_queues:
1194         igb_free_queues(adapter);
1195 err_alloc_queues:
1196         igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198         igb_reset_interrupt_capability(adapter);
1199         return err;
1200 }
1201
1202 /**
1203  * igb_request_irq - initialize interrupts
1204  *
1205  * Attempts to configure interrupts using the best available
1206  * capabilities of the hardware and kernel.
1207  **/
1208 static int igb_request_irq(struct igb_adapter *adapter)
1209 {
1210         struct net_device *netdev = adapter->netdev;
1211         struct pci_dev *pdev = adapter->pdev;
1212         int err = 0;
1213
1214         if (adapter->msix_entries) {
1215                 err = igb_request_msix(adapter);
1216                 if (!err)
1217                         goto request_done;
1218                 /* fall back to MSI */
1219                 igb_clear_interrupt_scheme(adapter);
1220                 if (!pci_enable_msi(adapter->pdev))
1221                         adapter->flags |= IGB_FLAG_HAS_MSI;
1222                 igb_free_all_tx_resources(adapter);
1223                 igb_free_all_rx_resources(adapter);
1224                 adapter->num_tx_queues = 1;
1225                 adapter->num_rx_queues = 1;
1226                 adapter->num_q_vectors = 1;
1227                 err = igb_alloc_q_vectors(adapter);
1228                 if (err) {
1229                         dev_err(&pdev->dev,
1230                                 "Unable to allocate memory for vectors\n");
1231                         goto request_done;
1232                 }
1233                 err = igb_alloc_queues(adapter);
1234                 if (err) {
1235                         dev_err(&pdev->dev,
1236                                 "Unable to allocate memory for queues\n");
1237                         igb_free_q_vectors(adapter);
1238                         goto request_done;
1239                 }
1240                 igb_setup_all_tx_resources(adapter);
1241                 igb_setup_all_rx_resources(adapter);
1242         } else {
1243                 igb_assign_vector(adapter->q_vector[0], 0);
1244         }
1245
1246         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248                                   netdev->name, adapter);
1249                 if (!err)
1250                         goto request_done;
1251
1252                 /* fall back to legacy interrupts */
1253                 igb_reset_interrupt_capability(adapter);
1254                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1255         }
1256
1257         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258                           netdev->name, adapter);
1259
1260         if (err)
1261                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262                         err);
1263
1264 request_done:
1265         return err;
1266 }
1267
1268 static void igb_free_irq(struct igb_adapter *adapter)
1269 {
1270         if (adapter->msix_entries) {
1271                 int vector = 0, i;
1272
1273                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1274
1275                 for (i = 0; i < adapter->num_q_vectors; i++) {
1276                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1277                         free_irq(adapter->msix_entries[vector++].vector,
1278                                  q_vector);
1279                 }
1280         } else {
1281                 free_irq(adapter->pdev->irq, adapter);
1282         }
1283 }
1284
1285 /**
1286  * igb_irq_disable - Mask off interrupt generation on the NIC
1287  * @adapter: board private structure
1288  **/
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1290 {
1291         struct e1000_hw *hw = &adapter->hw;
1292
1293         /*
1294          * we need to be careful when disabling interrupts.  The VFs are also
1295          * mapped into these registers and so clearing the bits can cause
1296          * issues on the VF drivers so we only need to clear what we set
1297          */
1298         if (adapter->msix_entries) {
1299                 u32 regval = rd32(E1000_EIAM);
1300                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302                 regval = rd32(E1000_EIAC);
1303                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1304         }
1305
1306         wr32(E1000_IAM, 0);
1307         wr32(E1000_IMC, ~0);
1308         wrfl();
1309         if (adapter->msix_entries) {
1310                 int i;
1311                 for (i = 0; i < adapter->num_q_vectors; i++)
1312                         synchronize_irq(adapter->msix_entries[i].vector);
1313         } else {
1314                 synchronize_irq(adapter->pdev->irq);
1315         }
1316 }
1317
1318 /**
1319  * igb_irq_enable - Enable default interrupt generation settings
1320  * @adapter: board private structure
1321  **/
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1323 {
1324         struct e1000_hw *hw = &adapter->hw;
1325
1326         if (adapter->msix_entries) {
1327                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328                 u32 regval = rd32(E1000_EIAC);
1329                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330                 regval = rd32(E1000_EIAM);
1331                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333                 if (adapter->vfs_allocated_count) {
1334                         wr32(E1000_MBVFIMR, 0xFF);
1335                         ims |= E1000_IMS_VMMB;
1336                 }
1337                 if (adapter->hw.mac.type == e1000_82580)
1338                         ims |= E1000_IMS_DRSTA;
1339
1340                 wr32(E1000_IMS, ims);
1341         } else {
1342                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1343                                 E1000_IMS_DRSTA);
1344                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1345                                 E1000_IMS_DRSTA);
1346         }
1347 }
1348
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1350 {
1351         struct e1000_hw *hw = &adapter->hw;
1352         u16 vid = adapter->hw.mng_cookie.vlan_id;
1353         u16 old_vid = adapter->mng_vlan_id;
1354
1355         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356                 /* add VID to filter table */
1357                 igb_vfta_set(hw, vid, true);
1358                 adapter->mng_vlan_id = vid;
1359         } else {
1360                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1361         }
1362
1363         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1364             (vid != old_vid) &&
1365             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1366                 /* remove VID from filter table */
1367                 igb_vfta_set(hw, old_vid, false);
1368         }
1369 }
1370
1371 /**
1372  * igb_release_hw_control - release control of the h/w to f/w
1373  * @adapter: address of board private structure
1374  *
1375  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376  * For ASF and Pass Through versions of f/w this means that the
1377  * driver is no longer loaded.
1378  *
1379  **/
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1381 {
1382         struct e1000_hw *hw = &adapter->hw;
1383         u32 ctrl_ext;
1384
1385         /* Let firmware take over control of h/w */
1386         ctrl_ext = rd32(E1000_CTRL_EXT);
1387         wr32(E1000_CTRL_EXT,
1388                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1389 }
1390
1391 /**
1392  * igb_get_hw_control - get control of the h/w from f/w
1393  * @adapter: address of board private structure
1394  *
1395  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396  * For ASF and Pass Through versions of f/w this means that
1397  * the driver is loaded.
1398  *
1399  **/
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1401 {
1402         struct e1000_hw *hw = &adapter->hw;
1403         u32 ctrl_ext;
1404
1405         /* Let firmware know the driver has taken over */
1406         ctrl_ext = rd32(E1000_CTRL_EXT);
1407         wr32(E1000_CTRL_EXT,
1408                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1409 }
1410
1411 /**
1412  * igb_configure - configure the hardware for RX and TX
1413  * @adapter: private board structure
1414  **/
1415 static void igb_configure(struct igb_adapter *adapter)
1416 {
1417         struct net_device *netdev = adapter->netdev;
1418         int i;
1419
1420         igb_get_hw_control(adapter);
1421         igb_set_rx_mode(netdev);
1422
1423         igb_restore_vlan(adapter);
1424
1425         igb_setup_tctl(adapter);
1426         igb_setup_mrqc(adapter);
1427         igb_setup_rctl(adapter);
1428
1429         igb_configure_tx(adapter);
1430         igb_configure_rx(adapter);
1431
1432         igb_rx_fifo_flush_82575(&adapter->hw);
1433
1434         /* call igb_desc_unused which always leaves
1435          * at least 1 descriptor unused to make sure
1436          * next_to_use != next_to_clean */
1437         for (i = 0; i < adapter->num_rx_queues; i++) {
1438                 struct igb_ring *ring = adapter->rx_ring[i];
1439                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1440         }
1441 }
1442
1443 /**
1444  * igb_power_up_link - Power up the phy/serdes link
1445  * @adapter: address of board private structure
1446  **/
1447 void igb_power_up_link(struct igb_adapter *adapter)
1448 {
1449         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450                 igb_power_up_phy_copper(&adapter->hw);
1451         else
1452                 igb_power_up_serdes_link_82575(&adapter->hw);
1453 }
1454
1455 /**
1456  * igb_power_down_link - Power down the phy/serdes link
1457  * @adapter: address of board private structure
1458  */
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1460 {
1461         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462                 igb_power_down_phy_copper_82575(&adapter->hw);
1463         else
1464                 igb_shutdown_serdes_link_82575(&adapter->hw);
1465 }
1466
1467 /**
1468  * igb_up - Open the interface and prepare it to handle traffic
1469  * @adapter: board private structure
1470  **/
1471 int igb_up(struct igb_adapter *adapter)
1472 {
1473         struct e1000_hw *hw = &adapter->hw;
1474         int i;
1475
1476         /* hardware has been reset, we need to reload some things */
1477         igb_configure(adapter);
1478
1479         clear_bit(__IGB_DOWN, &adapter->state);
1480
1481         for (i = 0; i < adapter->num_q_vectors; i++) {
1482                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483                 napi_enable(&q_vector->napi);
1484         }
1485         if (adapter->msix_entries)
1486                 igb_configure_msix(adapter);
1487         else
1488                 igb_assign_vector(adapter->q_vector[0], 0);
1489
1490         /* Clear any pending interrupts. */
1491         rd32(E1000_ICR);
1492         igb_irq_enable(adapter);
1493
1494         /* notify VFs that reset has been completed */
1495         if (adapter->vfs_allocated_count) {
1496                 u32 reg_data = rd32(E1000_CTRL_EXT);
1497                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498                 wr32(E1000_CTRL_EXT, reg_data);
1499         }
1500
1501         netif_tx_start_all_queues(adapter->netdev);
1502
1503         /* start the watchdog. */
1504         hw->mac.get_link_status = 1;
1505         schedule_work(&adapter->watchdog_task);
1506
1507         return 0;
1508 }
1509
1510 void igb_down(struct igb_adapter *adapter)
1511 {
1512         struct net_device *netdev = adapter->netdev;
1513         struct e1000_hw *hw = &adapter->hw;
1514         u32 tctl, rctl;
1515         int i;
1516
1517         /* signal that we're down so the interrupt handler does not
1518          * reschedule our watchdog timer */
1519         set_bit(__IGB_DOWN, &adapter->state);
1520
1521         /* disable receives in the hardware */
1522         rctl = rd32(E1000_RCTL);
1523         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524         /* flush and sleep below */
1525
1526         netif_tx_stop_all_queues(netdev);
1527
1528         /* disable transmits in the hardware */
1529         tctl = rd32(E1000_TCTL);
1530         tctl &= ~E1000_TCTL_EN;
1531         wr32(E1000_TCTL, tctl);
1532         /* flush both disables and wait for them to finish */
1533         wrfl();
1534         msleep(10);
1535
1536         for (i = 0; i < adapter->num_q_vectors; i++) {
1537                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538                 napi_disable(&q_vector->napi);
1539         }
1540
1541         igb_irq_disable(adapter);
1542
1543         del_timer_sync(&adapter->watchdog_timer);
1544         del_timer_sync(&adapter->phy_info_timer);
1545
1546         netif_carrier_off(netdev);
1547
1548         /* record the stats before reset*/
1549         spin_lock(&adapter->stats64_lock);
1550         igb_update_stats(adapter, &adapter->stats64);
1551         spin_unlock(&adapter->stats64_lock);
1552
1553         adapter->link_speed = 0;
1554         adapter->link_duplex = 0;
1555
1556         if (!pci_channel_offline(adapter->pdev))
1557                 igb_reset(adapter);
1558         igb_clean_all_tx_rings(adapter);
1559         igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1561
1562         /* since we reset the hardware DCA settings were cleared */
1563         igb_setup_dca(adapter);
1564 #endif
1565 }
1566
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1568 {
1569         WARN_ON(in_interrupt());
1570         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1571                 msleep(1);
1572         igb_down(adapter);
1573         igb_up(adapter);
1574         clear_bit(__IGB_RESETTING, &adapter->state);
1575 }
1576
1577 void igb_reset(struct igb_adapter *adapter)
1578 {
1579         struct pci_dev *pdev = adapter->pdev;
1580         struct e1000_hw *hw = &adapter->hw;
1581         struct e1000_mac_info *mac = &hw->mac;
1582         struct e1000_fc_info *fc = &hw->fc;
1583         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1584         u16 hwm;
1585
1586         /* Repartition Pba for greater than 9k mtu
1587          * To take effect CTRL.RST is required.
1588          */
1589         switch (mac->type) {
1590         case e1000_i350:
1591         case e1000_82580:
1592                 pba = rd32(E1000_RXPBS);
1593                 pba = igb_rxpbs_adjust_82580(pba);
1594                 break;
1595         case e1000_82576:
1596                 pba = rd32(E1000_RXPBS);
1597                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1598                 break;
1599         case e1000_82575:
1600         default:
1601                 pba = E1000_PBA_34K;
1602                 break;
1603         }
1604
1605         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606             (mac->type < e1000_82576)) {
1607                 /* adjust PBA for jumbo frames */
1608                 wr32(E1000_PBA, pba);
1609
1610                 /* To maintain wire speed transmits, the Tx FIFO should be
1611                  * large enough to accommodate two full transmit packets,
1612                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1613                  * the Rx FIFO should be large enough to accommodate at least
1614                  * one full receive packet and is similarly rounded up and
1615                  * expressed in KB. */
1616                 pba = rd32(E1000_PBA);
1617                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618                 tx_space = pba >> 16;
1619                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1620                 pba &= 0xffff;
1621                 /* the tx fifo also stores 16 bytes of information about the tx
1622                  * but don't include ethernet FCS because hardware appends it */
1623                 min_tx_space = (adapter->max_frame_size +
1624                                 sizeof(union e1000_adv_tx_desc) -
1625                                 ETH_FCS_LEN) * 2;
1626                 min_tx_space = ALIGN(min_tx_space, 1024);
1627                 min_tx_space >>= 10;
1628                 /* software strips receive CRC, so leave room for it */
1629                 min_rx_space = adapter->max_frame_size;
1630                 min_rx_space = ALIGN(min_rx_space, 1024);
1631                 min_rx_space >>= 10;
1632
1633                 /* If current Tx allocation is less than the min Tx FIFO size,
1634                  * and the min Tx FIFO size is less than the current Rx FIFO
1635                  * allocation, take space away from current Rx allocation */
1636                 if (tx_space < min_tx_space &&
1637                     ((min_tx_space - tx_space) < pba)) {
1638                         pba = pba - (min_tx_space - tx_space);
1639
1640                         /* if short on rx space, rx wins and must trump tx
1641                          * adjustment */
1642                         if (pba < min_rx_space)
1643                                 pba = min_rx_space;
1644                 }
1645                 wr32(E1000_PBA, pba);
1646         }
1647
1648         /* flow control settings */
1649         /* The high water mark must be low enough to fit one full frame
1650          * (or the size used for early receive) above it in the Rx FIFO.
1651          * Set it to the lower of:
1652          * - 90% of the Rx FIFO size, or
1653          * - the full Rx FIFO size minus one full frame */
1654         hwm = min(((pba << 10) * 9 / 10),
1655                         ((pba << 10) - 2 * adapter->max_frame_size));
1656
1657         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1658         fc->low_water = fc->high_water - 16;
1659         fc->pause_time = 0xFFFF;
1660         fc->send_xon = 1;
1661         fc->current_mode = fc->requested_mode;
1662
1663         /* disable receive for all VFs and wait one second */
1664         if (adapter->vfs_allocated_count) {
1665                 int i;
1666                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1668
1669                 /* ping all the active vfs to let them know we are going down */
1670                 igb_ping_all_vfs(adapter);
1671
1672                 /* disable transmits and receives */
1673                 wr32(E1000_VFRE, 0);
1674                 wr32(E1000_VFTE, 0);
1675         }
1676
1677         /* Allow time for pending master requests to run */
1678         hw->mac.ops.reset_hw(hw);
1679         wr32(E1000_WUC, 0);
1680
1681         if (hw->mac.ops.init_hw(hw))
1682                 dev_err(&pdev->dev, "Hardware Error\n");
1683         if (hw->mac.type > e1000_82580) {
1684                 if (adapter->flags & IGB_FLAG_DMAC) {
1685                         u32 reg;
1686
1687                         /*
1688                          * DMA Coalescing high water mark needs to be higher
1689                          * than * the * Rx threshold.  The Rx threshold is
1690                          * currently * pba - 6, so we * should use a high water
1691                          * mark of pba * - 4. */
1692                         hwm = (pba - 4) << 10;
1693
1694                         reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695                                & E1000_DMACR_DMACTHR_MASK);
1696
1697                         /* transition to L0x or L1 if available..*/
1698                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1699
1700                         /* watchdog timer= +-1000 usec in 32usec intervals */
1701                         reg |= (1000 >> 5);
1702                         wr32(E1000_DMACR, reg);
1703
1704                         /* no lower threshold to disable coalescing(smart fifb)
1705                          * -UTRESH=0*/
1706                         wr32(E1000_DMCRTRH, 0);
1707
1708                         /* set hwm to PBA -  2 * max frame size */
1709                         wr32(E1000_FCRTC, hwm);
1710
1711                         /*
1712                          * This sets the time to wait before requesting tran-
1713                          * sition to * low power state to number of usecs needed
1714                          * to receive 1 512 * byte frame at gigabit line rate
1715                          */
1716                         reg = rd32(E1000_DMCTLX);
1717                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
1718
1719                         /* Delay 255 usec before entering Lx state. */
1720                         reg |= 0xFF;
1721                         wr32(E1000_DMCTLX, reg);
1722
1723                         /* free space in Tx packet buffer to wake from DMAC */
1724                         wr32(E1000_DMCTXTH,
1725                              (IGB_MIN_TXPBSIZE -
1726                              (IGB_TX_BUF_4096 + adapter->max_frame_size))
1727                              >> 6);
1728
1729                         /* make low power state decision controlled by DMAC */
1730                         reg = rd32(E1000_PCIEMISC);
1731                         reg |= E1000_PCIEMISC_LX_DECISION;
1732                         wr32(E1000_PCIEMISC, reg);
1733                 } /* end if IGB_FLAG_DMAC set */
1734         }
1735         if (hw->mac.type == e1000_82580) {
1736                 u32 reg = rd32(E1000_PCIEMISC);
1737                 wr32(E1000_PCIEMISC,
1738                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1739         }
1740         if (!netif_running(adapter->netdev))
1741                 igb_power_down_link(adapter);
1742
1743         igb_update_mng_vlan(adapter);
1744
1745         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748         igb_get_phy_info(hw);
1749 }
1750
1751 static int igb_set_features(struct net_device *netdev, u32 features)
1752 {
1753         struct igb_adapter *adapter = netdev_priv(netdev);
1754         int i;
1755
1756         for (i = 0; i < adapter->num_rx_queues; i++) {
1757                 if (features & NETIF_F_RXCSUM)
1758                         adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1759                 else
1760                         adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1761         }
1762
1763         return 0;
1764 }
1765
1766 static const struct net_device_ops igb_netdev_ops = {
1767         .ndo_open               = igb_open,
1768         .ndo_stop               = igb_close,
1769         .ndo_start_xmit         = igb_xmit_frame_adv,
1770         .ndo_get_stats64        = igb_get_stats64,
1771         .ndo_set_rx_mode        = igb_set_rx_mode,
1772         .ndo_set_multicast_list = igb_set_rx_mode,
1773         .ndo_set_mac_address    = igb_set_mac,
1774         .ndo_change_mtu         = igb_change_mtu,
1775         .ndo_do_ioctl           = igb_ioctl,
1776         .ndo_tx_timeout         = igb_tx_timeout,
1777         .ndo_validate_addr      = eth_validate_addr,
1778         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1779         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1780         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1781         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1782         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1783         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1784         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1785 #ifdef CONFIG_NET_POLL_CONTROLLER
1786         .ndo_poll_controller    = igb_netpoll,
1787 #endif
1788         .ndo_set_features       = igb_set_features,
1789 };
1790
1791 /**
1792  * igb_probe - Device Initialization Routine
1793  * @pdev: PCI device information struct
1794  * @ent: entry in igb_pci_tbl
1795  *
1796  * Returns 0 on success, negative on failure
1797  *
1798  * igb_probe initializes an adapter identified by a pci_dev structure.
1799  * The OS initialization, configuring of the adapter private structure,
1800  * and a hardware reset occur.
1801  **/
1802 static int __devinit igb_probe(struct pci_dev *pdev,
1803                                const struct pci_device_id *ent)
1804 {
1805         struct net_device *netdev;
1806         struct igb_adapter *adapter;
1807         struct e1000_hw *hw;
1808         u16 eeprom_data = 0;
1809         s32 ret_val;
1810         static int global_quad_port_a; /* global quad port a indication */
1811         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1812         unsigned long mmio_start, mmio_len;
1813         int err, pci_using_dac;
1814         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1815         u8 part_str[E1000_PBANUM_LENGTH];
1816
1817         /* Catch broken hardware that put the wrong VF device ID in
1818          * the PCIe SR-IOV capability.
1819          */
1820         if (pdev->is_virtfn) {
1821                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1822                      pci_name(pdev), pdev->vendor, pdev->device);
1823                 return -EINVAL;
1824         }
1825
1826         err = pci_enable_device_mem(pdev);
1827         if (err)
1828                 return err;
1829
1830         pci_using_dac = 0;
1831         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1832         if (!err) {
1833                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1834                 if (!err)
1835                         pci_using_dac = 1;
1836         } else {
1837                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1838                 if (err) {
1839                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1840                         if (err) {
1841                                 dev_err(&pdev->dev, "No usable DMA "
1842                                         "configuration, aborting\n");
1843                                 goto err_dma;
1844                         }
1845                 }
1846         }
1847
1848         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1849                                            IORESOURCE_MEM),
1850                                            igb_driver_name);
1851         if (err)
1852                 goto err_pci_reg;
1853
1854         pci_enable_pcie_error_reporting(pdev);
1855
1856         pci_set_master(pdev);
1857         pci_save_state(pdev);
1858
1859         err = -ENOMEM;
1860         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1861                                    IGB_ABS_MAX_TX_QUEUES);
1862         if (!netdev)
1863                 goto err_alloc_etherdev;
1864
1865         SET_NETDEV_DEV(netdev, &pdev->dev);
1866
1867         pci_set_drvdata(pdev, netdev);
1868         adapter = netdev_priv(netdev);
1869         adapter->netdev = netdev;
1870         adapter->pdev = pdev;
1871         hw = &adapter->hw;
1872         hw->back = adapter;
1873         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1874
1875         mmio_start = pci_resource_start(pdev, 0);
1876         mmio_len = pci_resource_len(pdev, 0);
1877
1878         err = -EIO;
1879         hw->hw_addr = ioremap(mmio_start, mmio_len);
1880         if (!hw->hw_addr)
1881                 goto err_ioremap;
1882
1883         netdev->netdev_ops = &igb_netdev_ops;
1884         igb_set_ethtool_ops(netdev);
1885         netdev->watchdog_timeo = 5 * HZ;
1886
1887         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1888
1889         netdev->mem_start = mmio_start;
1890         netdev->mem_end = mmio_start + mmio_len;
1891
1892         /* PCI config space info */
1893         hw->vendor_id = pdev->vendor;
1894         hw->device_id = pdev->device;
1895         hw->revision_id = pdev->revision;
1896         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1897         hw->subsystem_device_id = pdev->subsystem_device;
1898
1899         /* Copy the default MAC, PHY and NVM function pointers */
1900         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1901         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1902         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1903         /* Initialize skew-specific constants */
1904         err = ei->get_invariants(hw);
1905         if (err)
1906                 goto err_sw_init;
1907
1908         /* setup the private structure */
1909         err = igb_sw_init(adapter);
1910         if (err)
1911                 goto err_sw_init;
1912
1913         igb_get_bus_info_pcie(hw);
1914
1915         hw->phy.autoneg_wait_to_complete = false;
1916
1917         /* Copper options */
1918         if (hw->phy.media_type == e1000_media_type_copper) {
1919                 hw->phy.mdix = AUTO_ALL_MODES;
1920                 hw->phy.disable_polarity_correction = false;
1921                 hw->phy.ms_type = e1000_ms_hw_default;
1922         }
1923
1924         if (igb_check_reset_block(hw))
1925                 dev_info(&pdev->dev,
1926                         "PHY reset is blocked due to SOL/IDER session.\n");
1927
1928         netdev->hw_features = NETIF_F_SG |
1929                            NETIF_F_IP_CSUM |
1930                            NETIF_F_IPV6_CSUM |
1931                            NETIF_F_TSO |
1932                            NETIF_F_TSO6 |
1933                            NETIF_F_RXCSUM;
1934
1935         netdev->features = netdev->hw_features |
1936                            NETIF_F_HW_VLAN_TX |
1937                            NETIF_F_HW_VLAN_RX |
1938                            NETIF_F_HW_VLAN_FILTER;
1939
1940         netdev->vlan_features |= NETIF_F_TSO;
1941         netdev->vlan_features |= NETIF_F_TSO6;
1942         netdev->vlan_features |= NETIF_F_IP_CSUM;
1943         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1944         netdev->vlan_features |= NETIF_F_SG;
1945
1946         if (pci_using_dac) {
1947                 netdev->features |= NETIF_F_HIGHDMA;
1948                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1949         }
1950
1951         if (hw->mac.type >= e1000_82576) {
1952                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1953                 netdev->features |= NETIF_F_SCTP_CSUM;
1954         }
1955
1956         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1957
1958         /* before reading the NVM, reset the controller to put the device in a
1959          * known good starting state */
1960         hw->mac.ops.reset_hw(hw);
1961
1962         /* make sure the NVM is good */
1963         if (hw->nvm.ops.validate(hw) < 0) {
1964                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1965                 err = -EIO;
1966                 goto err_eeprom;
1967         }
1968
1969         /* copy the MAC address out of the NVM */
1970         if (hw->mac.ops.read_mac_addr(hw))
1971                 dev_err(&pdev->dev, "NVM Read Error\n");
1972
1973         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1974         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1975
1976         if (!is_valid_ether_addr(netdev->perm_addr)) {
1977                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1978                 err = -EIO;
1979                 goto err_eeprom;
1980         }
1981
1982         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1983                     (unsigned long) adapter);
1984         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1985                     (unsigned long) adapter);
1986
1987         INIT_WORK(&adapter->reset_task, igb_reset_task);
1988         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1989
1990         /* Initialize link properties that are user-changeable */
1991         adapter->fc_autoneg = true;
1992         hw->mac.autoneg = true;
1993         hw->phy.autoneg_advertised = 0x2f;
1994
1995         hw->fc.requested_mode = e1000_fc_default;
1996         hw->fc.current_mode = e1000_fc_default;
1997
1998         igb_validate_mdi_setting(hw);
1999
2000         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2001          * enable the ACPI Magic Packet filter
2002          */
2003
2004         if (hw->bus.func == 0)
2005                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2006         else if (hw->mac.type == e1000_82580)
2007                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2008                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2009                                  &eeprom_data);
2010         else if (hw->bus.func == 1)
2011                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2012
2013         if (eeprom_data & eeprom_apme_mask)
2014                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2015
2016         /* now that we have the eeprom settings, apply the special cases where
2017          * the eeprom may be wrong or the board simply won't support wake on
2018          * lan on a particular port */
2019         switch (pdev->device) {
2020         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2021                 adapter->eeprom_wol = 0;
2022                 break;
2023         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2024         case E1000_DEV_ID_82576_FIBER:
2025         case E1000_DEV_ID_82576_SERDES:
2026                 /* Wake events only supported on port A for dual fiber
2027                  * regardless of eeprom setting */
2028                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2029                         adapter->eeprom_wol = 0;
2030                 break;
2031         case E1000_DEV_ID_82576_QUAD_COPPER:
2032         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2033                 /* if quad port adapter, disable WoL on all but port A */
2034                 if (global_quad_port_a != 0)
2035                         adapter->eeprom_wol = 0;
2036                 else
2037                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2038                 /* Reset for multiple quad port adapters */
2039                 if (++global_quad_port_a == 4)
2040                         global_quad_port_a = 0;
2041                 break;
2042         }
2043
2044         /* initialize the wol settings based on the eeprom settings */
2045         adapter->wol = adapter->eeprom_wol;
2046         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2047
2048         /* reset the hardware with the new settings */
2049         igb_reset(adapter);
2050
2051         /* let the f/w know that the h/w is now under the control of the
2052          * driver. */
2053         igb_get_hw_control(adapter);
2054
2055         strcpy(netdev->name, "eth%d");
2056         err = register_netdev(netdev);
2057         if (err)
2058                 goto err_register;
2059
2060         /* carrier off reporting is important to ethtool even BEFORE open */
2061         netif_carrier_off(netdev);
2062
2063 #ifdef CONFIG_IGB_DCA
2064         if (dca_add_requester(&pdev->dev) == 0) {
2065                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2066                 dev_info(&pdev->dev, "DCA enabled\n");
2067                 igb_setup_dca(adapter);
2068         }
2069
2070 #endif
2071         /* do hw tstamp init after resetting */
2072         igb_init_hw_timer(adapter);
2073
2074         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2075         /* print bus type/speed/width info */
2076         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2077                  netdev->name,
2078                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2079                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2080                                                             "unknown"),
2081                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2082                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2083                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2084                    "unknown"),
2085                  netdev->dev_addr);
2086
2087         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2088         if (ret_val)
2089                 strcpy(part_str, "Unknown");
2090         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2091         dev_info(&pdev->dev,
2092                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2093                 adapter->msix_entries ? "MSI-X" :
2094                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2095                 adapter->num_rx_queues, adapter->num_tx_queues);
2096         switch (hw->mac.type) {
2097         case e1000_i350:
2098                 igb_set_eee_i350(hw);
2099                 break;
2100         default:
2101                 break;
2102         }
2103         return 0;
2104
2105 err_register:
2106         igb_release_hw_control(adapter);
2107 err_eeprom:
2108         if (!igb_check_reset_block(hw))
2109                 igb_reset_phy(hw);
2110
2111         if (hw->flash_address)
2112                 iounmap(hw->flash_address);
2113 err_sw_init:
2114         igb_clear_interrupt_scheme(adapter);
2115         iounmap(hw->hw_addr);
2116 err_ioremap:
2117         free_netdev(netdev);
2118 err_alloc_etherdev:
2119         pci_release_selected_regions(pdev,
2120                                      pci_select_bars(pdev, IORESOURCE_MEM));
2121 err_pci_reg:
2122 err_dma:
2123         pci_disable_device(pdev);
2124         return err;
2125 }
2126
2127 /**
2128  * igb_remove - Device Removal Routine
2129  * @pdev: PCI device information struct
2130  *
2131  * igb_remove is called by the PCI subsystem to alert the driver
2132  * that it should release a PCI device.  The could be caused by a
2133  * Hot-Plug event, or because the driver is going to be removed from
2134  * memory.
2135  **/
2136 static void __devexit igb_remove(struct pci_dev *pdev)
2137 {
2138         struct net_device *netdev = pci_get_drvdata(pdev);
2139         struct igb_adapter *adapter = netdev_priv(netdev);
2140         struct e1000_hw *hw = &adapter->hw;
2141
2142         /*
2143          * The watchdog timer may be rescheduled, so explicitly
2144          * disable watchdog from being rescheduled.
2145          */
2146         set_bit(__IGB_DOWN, &adapter->state);
2147         del_timer_sync(&adapter->watchdog_timer);
2148         del_timer_sync(&adapter->phy_info_timer);
2149
2150         cancel_work_sync(&adapter->reset_task);
2151         cancel_work_sync(&adapter->watchdog_task);
2152
2153 #ifdef CONFIG_IGB_DCA
2154         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2155                 dev_info(&pdev->dev, "DCA disabled\n");
2156                 dca_remove_requester(&pdev->dev);
2157                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2158                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2159         }
2160 #endif
2161
2162         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2163          * would have already happened in close and is redundant. */
2164         igb_release_hw_control(adapter);
2165
2166         unregister_netdev(netdev);
2167
2168         igb_clear_interrupt_scheme(adapter);
2169
2170 #ifdef CONFIG_PCI_IOV
2171         /* reclaim resources allocated to VFs */
2172         if (adapter->vf_data) {
2173                 /* disable iov and allow time for transactions to clear */
2174                 pci_disable_sriov(pdev);
2175                 msleep(500);
2176
2177                 kfree(adapter->vf_data);
2178                 adapter->vf_data = NULL;
2179                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2180                 msleep(100);
2181                 dev_info(&pdev->dev, "IOV Disabled\n");
2182         }
2183 #endif
2184
2185         iounmap(hw->hw_addr);
2186         if (hw->flash_address)
2187                 iounmap(hw->flash_address);
2188         pci_release_selected_regions(pdev,
2189                                      pci_select_bars(pdev, IORESOURCE_MEM));
2190
2191         free_netdev(netdev);
2192
2193         pci_disable_pcie_error_reporting(pdev);
2194
2195         pci_disable_device(pdev);
2196 }
2197
2198 /**
2199  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2200  * @adapter: board private structure to initialize
2201  *
2202  * This function initializes the vf specific data storage and then attempts to
2203  * allocate the VFs.  The reason for ordering it this way is because it is much
2204  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2205  * the memory for the VFs.
2206  **/
2207 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2208 {
2209 #ifdef CONFIG_PCI_IOV
2210         struct pci_dev *pdev = adapter->pdev;
2211
2212         if (adapter->vfs_allocated_count) {
2213                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2214                                            sizeof(struct vf_data_storage),
2215                                            GFP_KERNEL);
2216                 /* if allocation failed then we do not support SR-IOV */
2217                 if (!adapter->vf_data) {
2218                         adapter->vfs_allocated_count = 0;
2219                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2220                                 "Data Storage\n");
2221                 }
2222         }
2223
2224         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2225                 kfree(adapter->vf_data);
2226                 adapter->vf_data = NULL;
2227 #endif /* CONFIG_PCI_IOV */
2228                 adapter->vfs_allocated_count = 0;
2229 #ifdef CONFIG_PCI_IOV
2230         } else {
2231                 unsigned char mac_addr[ETH_ALEN];
2232                 int i;
2233                 dev_info(&pdev->dev, "%d vfs allocated\n",
2234                          adapter->vfs_allocated_count);
2235                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2236                         random_ether_addr(mac_addr);
2237                         igb_set_vf_mac(adapter, i, mac_addr);
2238                 }
2239                 /* DMA Coalescing is not supported in IOV mode. */
2240                 if (adapter->flags & IGB_FLAG_DMAC)
2241                         adapter->flags &= ~IGB_FLAG_DMAC;
2242         }
2243 #endif /* CONFIG_PCI_IOV */
2244 }
2245
2246
2247 /**
2248  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2249  * @adapter: board private structure to initialize
2250  *
2251  * igb_init_hw_timer initializes the function pointer and values for the hw
2252  * timer found in hardware.
2253  **/
2254 static void igb_init_hw_timer(struct igb_adapter *adapter)
2255 {
2256         struct e1000_hw *hw = &adapter->hw;
2257
2258         switch (hw->mac.type) {
2259         case e1000_i350:
2260         case e1000_82580:
2261                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2262                 adapter->cycles.read = igb_read_clock;
2263                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2264                 adapter->cycles.mult = 1;
2265                 /*
2266                  * The 82580 timesync updates the system timer every 8ns by 8ns
2267                  * and the value cannot be shifted.  Instead we need to shift
2268                  * the registers to generate a 64bit timer value.  As a result
2269                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2270                  * 24 in order to generate a larger value for synchronization.
2271                  */
2272                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2273                 /* disable system timer temporarily by setting bit 31 */
2274                 wr32(E1000_TSAUXC, 0x80000000);
2275                 wrfl();
2276
2277                 /* Set registers so that rollover occurs soon to test this. */
2278                 wr32(E1000_SYSTIMR, 0x00000000);
2279                 wr32(E1000_SYSTIML, 0x80000000);
2280                 wr32(E1000_SYSTIMH, 0x000000FF);
2281                 wrfl();
2282
2283                 /* enable system timer by clearing bit 31 */
2284                 wr32(E1000_TSAUXC, 0x0);
2285                 wrfl();
2286
2287                 timecounter_init(&adapter->clock,
2288                                  &adapter->cycles,
2289                                  ktime_to_ns(ktime_get_real()));
2290                 /*
2291                  * Synchronize our NIC clock against system wall clock. NIC
2292                  * time stamp reading requires ~3us per sample, each sample
2293                  * was pretty stable even under load => only require 10
2294                  * samples for each offset comparison.
2295                  */
2296                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2297                 adapter->compare.source = &adapter->clock;
2298                 adapter->compare.target = ktime_get_real;
2299                 adapter->compare.num_samples = 10;
2300                 timecompare_update(&adapter->compare, 0);
2301                 break;
2302         case e1000_82576:
2303                 /*
2304                  * Initialize hardware timer: we keep it running just in case
2305                  * that some program needs it later on.
2306                  */
2307                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2308                 adapter->cycles.read = igb_read_clock;
2309                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2310                 adapter->cycles.mult = 1;
2311                 /**
2312                  * Scale the NIC clock cycle by a large factor so that
2313                  * relatively small clock corrections can be added or
2314                  * subtracted at each clock tick. The drawbacks of a large
2315                  * factor are a) that the clock register overflows more quickly
2316                  * (not such a big deal) and b) that the increment per tick has
2317                  * to fit into 24 bits.  As a result we need to use a shift of
2318                  * 19 so we can fit a value of 16 into the TIMINCA register.
2319                  */
2320                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2321                 wr32(E1000_TIMINCA,
2322                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2323                                 (16 << IGB_82576_TSYNC_SHIFT));
2324
2325                 /* Set registers so that rollover occurs soon to test this. */
2326                 wr32(E1000_SYSTIML, 0x00000000);
2327                 wr32(E1000_SYSTIMH, 0xFF800000);
2328                 wrfl();
2329
2330                 timecounter_init(&adapter->clock,
2331                                  &adapter->cycles,
2332                                  ktime_to_ns(ktime_get_real()));
2333                 /*
2334                  * Synchronize our NIC clock against system wall clock. NIC
2335                  * time stamp reading requires ~3us per sample, each sample
2336                  * was pretty stable even under load => only require 10
2337                  * samples for each offset comparison.
2338                  */
2339                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2340                 adapter->compare.source = &adapter->clock;
2341                 adapter->compare.target = ktime_get_real;
2342                 adapter->compare.num_samples = 10;
2343                 timecompare_update(&adapter->compare, 0);
2344                 break;
2345         case e1000_82575:
2346                 /* 82575 does not support timesync */
2347         default:
2348                 break;
2349         }
2350
2351 }
2352
2353 /**
2354  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2355  * @adapter: board private structure to initialize
2356  *
2357  * igb_sw_init initializes the Adapter private data structure.
2358  * Fields are initialized based on PCI device information and
2359  * OS network device settings (MTU size).
2360  **/
2361 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2362 {
2363         struct e1000_hw *hw = &adapter->hw;
2364         struct net_device *netdev = adapter->netdev;
2365         struct pci_dev *pdev = adapter->pdev;
2366
2367         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2368
2369         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2370         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2371         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2372         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2373
2374         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2375         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2376
2377         spin_lock_init(&adapter->stats64_lock);
2378 #ifdef CONFIG_PCI_IOV
2379         switch (hw->mac.type) {
2380         case e1000_82576:
2381         case e1000_i350:
2382                 if (max_vfs > 7) {
2383                         dev_warn(&pdev->dev,
2384                                  "Maximum of 7 VFs per PF, using max\n");
2385                         adapter->vfs_allocated_count = 7;
2386                 } else
2387                         adapter->vfs_allocated_count = max_vfs;
2388                 break;
2389         default:
2390                 break;
2391         }
2392 #endif /* CONFIG_PCI_IOV */
2393         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2394         /* i350 cannot do RSS and SR-IOV at the same time */
2395         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2396                 adapter->rss_queues = 1;
2397
2398         /*
2399          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2400          * then we should combine the queues into a queue pair in order to
2401          * conserve interrupts due to limited supply
2402          */
2403         if ((adapter->rss_queues > 4) ||
2404             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2405                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2406
2407         /* This call may decrease the number of queues */
2408         if (igb_init_interrupt_scheme(adapter)) {
2409                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2410                 return -ENOMEM;
2411         }
2412
2413         igb_probe_vfs(adapter);
2414
2415         /* Explicitly disable IRQ since the NIC can be in any state. */
2416         igb_irq_disable(adapter);
2417
2418         if (hw->mac.type == e1000_i350)
2419                 adapter->flags &= ~IGB_FLAG_DMAC;
2420
2421         set_bit(__IGB_DOWN, &adapter->state);
2422         return 0;
2423 }
2424
2425 /**
2426  * igb_open - Called when a network interface is made active
2427  * @netdev: network interface device structure
2428  *
2429  * Returns 0 on success, negative value on failure
2430  *
2431  * The open entry point is called when a network interface is made
2432  * active by the system (IFF_UP).  At this point all resources needed
2433  * for transmit and receive operations are allocated, the interrupt
2434  * handler is registered with the OS, the watchdog timer is started,
2435  * and the stack is notified that the interface is ready.
2436  **/
2437 static int igb_open(struct net_device *netdev)
2438 {
2439         struct igb_adapter *adapter = netdev_priv(netdev);
2440         struct e1000_hw *hw = &adapter->hw;
2441         int err;
2442         int i;
2443
2444         /* disallow open during test */
2445         if (test_bit(__IGB_TESTING, &adapter->state))
2446                 return -EBUSY;
2447
2448         netif_carrier_off(netdev);
2449
2450         /* allocate transmit descriptors */
2451         err = igb_setup_all_tx_resources(adapter);
2452         if (err)
2453                 goto err_setup_tx;
2454
2455         /* allocate receive descriptors */
2456         err = igb_setup_all_rx_resources(adapter);
2457         if (err)
2458                 goto err_setup_rx;
2459
2460         igb_power_up_link(adapter);
2461
2462         /* before we allocate an interrupt, we must be ready to handle it.
2463          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2464          * as soon as we call pci_request_irq, so we have to setup our
2465          * clean_rx handler before we do so.  */
2466         igb_configure(adapter);
2467
2468         err = igb_request_irq(adapter);
2469         if (err)
2470                 goto err_req_irq;
2471
2472         /* From here on the code is the same as igb_up() */
2473         clear_bit(__IGB_DOWN, &adapter->state);
2474
2475         for (i = 0; i < adapter->num_q_vectors; i++) {
2476                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2477                 napi_enable(&q_vector->napi);
2478         }
2479
2480         /* Clear any pending interrupts. */
2481         rd32(E1000_ICR);
2482
2483         igb_irq_enable(adapter);
2484
2485         /* notify VFs that reset has been completed */
2486         if (adapter->vfs_allocated_count) {
2487                 u32 reg_data = rd32(E1000_CTRL_EXT);
2488                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2489                 wr32(E1000_CTRL_EXT, reg_data);
2490         }
2491
2492         netif_tx_start_all_queues(netdev);
2493
2494         /* start the watchdog. */
2495         hw->mac.get_link_status = 1;
2496         schedule_work(&adapter->watchdog_task);
2497
2498         return 0;
2499
2500 err_req_irq:
2501         igb_release_hw_control(adapter);
2502         igb_power_down_link(adapter);
2503         igb_free_all_rx_resources(adapter);
2504 err_setup_rx:
2505         igb_free_all_tx_resources(adapter);
2506 err_setup_tx:
2507         igb_reset(adapter);
2508
2509         return err;
2510 }
2511
2512 /**
2513  * igb_close - Disables a network interface
2514  * @netdev: network interface device structure
2515  *
2516  * Returns 0, this is not allowed to fail
2517  *
2518  * The close entry point is called when an interface is de-activated
2519  * by the OS.  The hardware is still under the driver's control, but
2520  * needs to be disabled.  A global MAC reset is issued to stop the
2521  * hardware, and all transmit and receive resources are freed.
2522  **/
2523 static int igb_close(struct net_device *netdev)
2524 {
2525         struct igb_adapter *adapter = netdev_priv(netdev);
2526
2527         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2528         igb_down(adapter);
2529
2530         igb_free_irq(adapter);
2531
2532         igb_free_all_tx_resources(adapter);
2533         igb_free_all_rx_resources(adapter);
2534
2535         return 0;
2536 }
2537
2538 /**
2539  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2540  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2541  *
2542  * Return 0 on success, negative on failure
2543  **/
2544 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2545 {
2546         struct device *dev = tx_ring->dev;
2547         int size;
2548
2549         size = sizeof(struct igb_buffer) * tx_ring->count;
2550         tx_ring->buffer_info = vzalloc(size);
2551         if (!tx_ring->buffer_info)
2552                 goto err;
2553
2554         /* round up to nearest 4K */
2555         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2556         tx_ring->size = ALIGN(tx_ring->size, 4096);
2557
2558         tx_ring->desc = dma_alloc_coherent(dev,
2559                                            tx_ring->size,
2560                                            &tx_ring->dma,
2561                                            GFP_KERNEL);
2562
2563         if (!tx_ring->desc)
2564                 goto err;
2565
2566         tx_ring->next_to_use = 0;
2567         tx_ring->next_to_clean = 0;
2568         return 0;
2569
2570 err:
2571         vfree(tx_ring->buffer_info);
2572         dev_err(dev,
2573                 "Unable to allocate memory for the transmit descriptor ring\n");
2574         return -ENOMEM;
2575 }
2576
2577 /**
2578  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2579  *                                (Descriptors) for all queues
2580  * @adapter: board private structure
2581  *
2582  * Return 0 on success, negative on failure
2583  **/
2584 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2585 {
2586         struct pci_dev *pdev = adapter->pdev;
2587         int i, err = 0;
2588
2589         for (i = 0; i < adapter->num_tx_queues; i++) {
2590                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2591                 if (err) {
2592                         dev_err(&pdev->dev,
2593                                 "Allocation for Tx Queue %u failed\n", i);
2594                         for (i--; i >= 0; i--)
2595                                 igb_free_tx_resources(adapter->tx_ring[i]);
2596                         break;
2597                 }
2598         }
2599
2600         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2601                 int r_idx = i % adapter->num_tx_queues;
2602                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2603         }
2604         return err;
2605 }
2606
2607 /**
2608  * igb_setup_tctl - configure the transmit control registers
2609  * @adapter: Board private structure
2610  **/
2611 void igb_setup_tctl(struct igb_adapter *adapter)
2612 {
2613         struct e1000_hw *hw = &adapter->hw;
2614         u32 tctl;
2615
2616         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2617         wr32(E1000_TXDCTL(0), 0);
2618
2619         /* Program the Transmit Control Register */
2620         tctl = rd32(E1000_TCTL);
2621         tctl &= ~E1000_TCTL_CT;
2622         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2623                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2624
2625         igb_config_collision_dist(hw);
2626
2627         /* Enable transmits */
2628         tctl |= E1000_TCTL_EN;
2629
2630         wr32(E1000_TCTL, tctl);
2631 }
2632
2633 /**
2634  * igb_configure_tx_ring - Configure transmit ring after Reset
2635  * @adapter: board private structure
2636  * @ring: tx ring to configure
2637  *
2638  * Configure a transmit ring after a reset.
2639  **/
2640 void igb_configure_tx_ring(struct igb_adapter *adapter,
2641                            struct igb_ring *ring)
2642 {
2643         struct e1000_hw *hw = &adapter->hw;
2644         u32 txdctl;
2645         u64 tdba = ring->dma;
2646         int reg_idx = ring->reg_idx;
2647
2648         /* disable the queue */
2649         txdctl = rd32(E1000_TXDCTL(reg_idx));
2650         wr32(E1000_TXDCTL(reg_idx),
2651                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2652         wrfl();
2653         mdelay(10);
2654
2655         wr32(E1000_TDLEN(reg_idx),
2656                         ring->count * sizeof(union e1000_adv_tx_desc));
2657         wr32(E1000_TDBAL(reg_idx),
2658                         tdba & 0x00000000ffffffffULL);
2659         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2660
2661         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2662         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2663         writel(0, ring->head);
2664         writel(0, ring->tail);
2665
2666         txdctl |= IGB_TX_PTHRESH;
2667         txdctl |= IGB_TX_HTHRESH << 8;
2668         txdctl |= IGB_TX_WTHRESH << 16;
2669
2670         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2671         wr32(E1000_TXDCTL(reg_idx), txdctl);
2672 }
2673
2674 /**
2675  * igb_configure_tx - Configure transmit Unit after Reset
2676  * @adapter: board private structure
2677  *
2678  * Configure the Tx unit of the MAC after a reset.
2679  **/
2680 static void igb_configure_tx(struct igb_adapter *adapter)
2681 {
2682         int i;
2683
2684         for (i = 0; i < adapter->num_tx_queues; i++)
2685                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2686 }
2687
2688 /**
2689  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2690  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2691  *
2692  * Returns 0 on success, negative on failure
2693  **/
2694 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2695 {
2696         struct device *dev = rx_ring->dev;
2697         int size, desc_len;
2698
2699         size = sizeof(struct igb_buffer) * rx_ring->count;
2700         rx_ring->buffer_info = vzalloc(size);
2701         if (!rx_ring->buffer_info)
2702                 goto err;
2703
2704         desc_len = sizeof(union e1000_adv_rx_desc);
2705
2706         /* Round up to nearest 4K */
2707         rx_ring->size = rx_ring->count * desc_len;
2708         rx_ring->size = ALIGN(rx_ring->size, 4096);
2709
2710         rx_ring->desc = dma_alloc_coherent(dev,
2711                                            rx_ring->size,
2712                                            &rx_ring->dma,
2713                                            GFP_KERNEL);
2714
2715         if (!rx_ring->desc)
2716                 goto err;
2717
2718         rx_ring->next_to_clean = 0;
2719         rx_ring->next_to_use = 0;
2720
2721         return 0;
2722
2723 err:
2724         vfree(rx_ring->buffer_info);
2725         rx_ring->buffer_info = NULL;
2726         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2727                 " ring\n");
2728         return -ENOMEM;
2729 }
2730
2731 /**
2732  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2733  *                                (Descriptors) for all queues
2734  * @adapter: board private structure
2735  *
2736  * Return 0 on success, negative on failure
2737  **/
2738 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2739 {
2740         struct pci_dev *pdev = adapter->pdev;
2741         int i, err = 0;
2742
2743         for (i = 0; i < adapter->num_rx_queues; i++) {
2744                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2745                 if (err) {
2746                         dev_err(&pdev->dev,
2747                                 "Allocation for Rx Queue %u failed\n", i);
2748                         for (i--; i >= 0; i--)
2749                                 igb_free_rx_resources(adapter->rx_ring[i]);
2750                         break;
2751                 }
2752         }
2753
2754         return err;
2755 }
2756
2757 /**
2758  * igb_setup_mrqc - configure the multiple receive queue control registers
2759  * @adapter: Board private structure
2760  **/
2761 static void igb_setup_mrqc(struct igb_adapter *adapter)
2762 {
2763         struct e1000_hw *hw = &adapter->hw;
2764         u32 mrqc, rxcsum;
2765         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2766         union e1000_reta {
2767                 u32 dword;
2768                 u8  bytes[4];
2769         } reta;
2770         static const u8 rsshash[40] = {
2771                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2772                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2773                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2774                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2775
2776         /* Fill out hash function seeds */
2777         for (j = 0; j < 10; j++) {
2778                 u32 rsskey = rsshash[(j * 4)];
2779                 rsskey |= rsshash[(j * 4) + 1] << 8;
2780                 rsskey |= rsshash[(j * 4) + 2] << 16;
2781                 rsskey |= rsshash[(j * 4) + 3] << 24;
2782                 array_wr32(E1000_RSSRK(0), j, rsskey);
2783         }
2784
2785         num_rx_queues = adapter->rss_queues;
2786
2787         if (adapter->vfs_allocated_count) {
2788                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2789                 switch (hw->mac.type) {
2790                 case e1000_i350:
2791                 case e1000_82580:
2792                         num_rx_queues = 1;
2793                         shift = 0;
2794                         break;
2795                 case e1000_82576:
2796                         shift = 3;
2797                         num_rx_queues = 2;
2798                         break;
2799                 case e1000_82575:
2800                         shift = 2;
2801                         shift2 = 6;
2802                 default:
2803                         break;
2804                 }
2805         } else {
2806                 if (hw->mac.type == e1000_82575)
2807                         shift = 6;
2808         }
2809
2810         for (j = 0; j < (32 * 4); j++) {
2811                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2812                 if (shift2)
2813                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2814                 if ((j & 3) == 3)
2815                         wr32(E1000_RETA(j >> 2), reta.dword);
2816         }
2817
2818         /*
2819          * Disable raw packet checksumming so that RSS hash is placed in
2820          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2821          * offloads as they are enabled by default
2822          */
2823         rxcsum = rd32(E1000_RXCSUM);
2824         rxcsum |= E1000_RXCSUM_PCSD;
2825
2826         if (adapter->hw.mac.type >= e1000_82576)
2827                 /* Enable Receive Checksum Offload for SCTP */
2828                 rxcsum |= E1000_RXCSUM_CRCOFL;
2829
2830         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2831         wr32(E1000_RXCSUM, rxcsum);
2832
2833         /* If VMDq is enabled then we set the appropriate mode for that, else
2834          * we default to RSS so that an RSS hash is calculated per packet even
2835          * if we are only using one queue */
2836         if (adapter->vfs_allocated_count) {
2837                 if (hw->mac.type > e1000_82575) {
2838                         /* Set the default pool for the PF's first queue */
2839                         u32 vtctl = rd32(E1000_VT_CTL);
2840                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2841                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2842                         vtctl |= adapter->vfs_allocated_count <<
2843                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2844                         wr32(E1000_VT_CTL, vtctl);
2845                 }
2846                 if (adapter->rss_queues > 1)
2847                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2848                 else
2849                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2850         } else {
2851                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2852         }
2853         igb_vmm_control(adapter);
2854
2855         /*
2856          * Generate RSS hash based on TCP port numbers and/or
2857          * IPv4/v6 src and dst addresses since UDP cannot be
2858          * hashed reliably due to IP fragmentation
2859          */
2860         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2861                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2862                 E1000_MRQC_RSS_FIELD_IPV6 |
2863                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2864                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2865
2866         wr32(E1000_MRQC, mrqc);
2867 }
2868
2869 /**
2870  * igb_setup_rctl - configure the receive control registers
2871  * @adapter: Board private structure
2872  **/
2873 void igb_setup_rctl(struct igb_adapter *adapter)
2874 {
2875         struct e1000_hw *hw = &adapter->hw;
2876         u32 rctl;
2877
2878         rctl = rd32(E1000_RCTL);
2879
2880         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2881         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2882
2883         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2884                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2885
2886         /*
2887          * enable stripping of CRC. It's unlikely this will break BMC
2888          * redirection as it did with e1000. Newer features require
2889          * that the HW strips the CRC.
2890          */
2891         rctl |= E1000_RCTL_SECRC;
2892
2893         /* disable store bad packets and clear size bits. */
2894         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2895
2896         /* enable LPE to prevent packets larger than max_frame_size */
2897         rctl |= E1000_RCTL_LPE;
2898
2899         /* disable queue 0 to prevent tail write w/o re-config */
2900         wr32(E1000_RXDCTL(0), 0);
2901
2902         /* Attention!!!  For SR-IOV PF driver operations you must enable
2903          * queue drop for all VF and PF queues to prevent head of line blocking
2904          * if an un-trusted VF does not provide descriptors to hardware.
2905          */
2906         if (adapter->vfs_allocated_count) {
2907                 /* set all queue drop enable bits */
2908                 wr32(E1000_QDE, ALL_QUEUES);
2909         }
2910
2911         wr32(E1000_RCTL, rctl);
2912 }
2913
2914 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2915                                    int vfn)
2916 {
2917         struct e1000_hw *hw = &adapter->hw;
2918         u32 vmolr;
2919
2920         /* if it isn't the PF check to see if VFs are enabled and
2921          * increase the size to support vlan tags */
2922         if (vfn < adapter->vfs_allocated_count &&
2923             adapter->vf_data[vfn].vlans_enabled)
2924                 size += VLAN_TAG_SIZE;
2925
2926         vmolr = rd32(E1000_VMOLR(vfn));
2927         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2928         vmolr |= size | E1000_VMOLR_LPE;
2929         wr32(E1000_VMOLR(vfn), vmolr);
2930
2931         return 0;
2932 }
2933
2934 /**
2935  * igb_rlpml_set - set maximum receive packet size
2936  * @adapter: board private structure
2937  *
2938  * Configure maximum receivable packet size.
2939  **/
2940 static void igb_rlpml_set(struct igb_adapter *adapter)
2941 {
2942         u32 max_frame_size = adapter->max_frame_size;
2943         struct e1000_hw *hw = &adapter->hw;
2944         u16 pf_id = adapter->vfs_allocated_count;
2945
2946         if (adapter->vlgrp)
2947                 max_frame_size += VLAN_TAG_SIZE;
2948
2949         /* if vfs are enabled we set RLPML to the largest possible request
2950          * size and set the VMOLR RLPML to the size we need */
2951         if (pf_id) {
2952                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2953                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2954         }
2955
2956         wr32(E1000_RLPML, max_frame_size);
2957 }
2958
2959 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2960                                  int vfn, bool aupe)
2961 {
2962         struct e1000_hw *hw = &adapter->hw;
2963         u32 vmolr;
2964
2965         /*
2966          * This register exists only on 82576 and newer so if we are older then
2967          * we should exit and do nothing
2968          */
2969         if (hw->mac.type < e1000_82576)
2970                 return;
2971
2972         vmolr = rd32(E1000_VMOLR(vfn));
2973         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2974         if (aupe)
2975                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2976         else
2977                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2978
2979         /* clear all bits that might not be set */
2980         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2981
2982         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2983                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2984         /*
2985          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2986          * multicast packets
2987          */
2988         if (vfn <= adapter->vfs_allocated_count)
2989                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2990
2991         wr32(E1000_VMOLR(vfn), vmolr);
2992 }
2993
2994 /**
2995  * igb_configure_rx_ring - Configure a receive ring after Reset
2996  * @adapter: board private structure
2997  * @ring: receive ring to be configured
2998  *
2999  * Configure the Rx unit of the MAC after a reset.
3000  **/
3001 void igb_configure_rx_ring(struct igb_adapter *adapter,
3002                            struct igb_ring *ring)
3003 {
3004         struct e1000_hw *hw = &adapter->hw;
3005         u64 rdba = ring->dma;
3006         int reg_idx = ring->reg_idx;
3007         u32 srrctl, rxdctl;
3008
3009         /* disable the queue */
3010         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3011         wr32(E1000_RXDCTL(reg_idx),
3012                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3013
3014         /* Set DMA base address registers */
3015         wr32(E1000_RDBAL(reg_idx),
3016              rdba & 0x00000000ffffffffULL);
3017         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3018         wr32(E1000_RDLEN(reg_idx),
3019                        ring->count * sizeof(union e1000_adv_rx_desc));
3020
3021         /* initialize head and tail */
3022         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3023         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3024         writel(0, ring->head);
3025         writel(0, ring->tail);
3026
3027         /* set descriptor configuration */
3028         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3029                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3030                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3031 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3032                 srrctl |= IGB_RXBUFFER_16384 >>
3033                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3034 #else
3035                 srrctl |= (PAGE_SIZE / 2) >>
3036                           E1000_SRRCTL_BSIZEPKT_SHIFT;
3037 #endif
3038                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3039         } else {
3040                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3041                          E1000_SRRCTL_BSIZEPKT_SHIFT;
3042                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3043         }
3044         if (hw->mac.type == e1000_82580)
3045                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3046         /* Only set Drop Enable if we are supporting multiple queues */
3047         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3048                 srrctl |= E1000_SRRCTL_DROP_EN;
3049
3050         wr32(E1000_SRRCTL(reg_idx), srrctl);
3051
3052         /* set filtering for VMDQ pools */
3053         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3054
3055         /* enable receive descriptor fetching */
3056         rxdctl = rd32(E1000_RXDCTL(reg_idx));
3057         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3058         rxdctl &= 0xFFF00000;
3059         rxdctl |= IGB_RX_PTHRESH;
3060         rxdctl |= IGB_RX_HTHRESH << 8;
3061         rxdctl |= IGB_RX_WTHRESH << 16;
3062         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3063 }
3064
3065 /**
3066  * igb_configure_rx - Configure receive Unit after Reset
3067  * @adapter: board private structure
3068  *
3069  * Configure the Rx unit of the MAC after a reset.
3070  **/
3071 static void igb_configure_rx(struct igb_adapter *adapter)
3072 {
3073         int i;
3074
3075         /* set UTA to appropriate mode */
3076         igb_set_uta(adapter);
3077
3078         /* set the correct pool for the PF default MAC address in entry 0 */
3079         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3080                          adapter->vfs_allocated_count);
3081
3082         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3083          * the Base and Length of the Rx Descriptor Ring */
3084         for (i = 0; i < adapter->num_rx_queues; i++)
3085                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3086 }
3087
3088 /**
3089  * igb_free_tx_resources - Free Tx Resources per Queue
3090  * @tx_ring: Tx descriptor ring for a specific queue
3091  *
3092  * Free all transmit software resources
3093  **/
3094 void igb_free_tx_resources(struct igb_ring *tx_ring)
3095 {
3096         igb_clean_tx_ring(tx_ring);
3097
3098         vfree(tx_ring->buffer_info);
3099         tx_ring->buffer_info = NULL;
3100
3101         /* if not set, then don't free */
3102         if (!tx_ring->desc)
3103                 return;
3104
3105         dma_free_coherent(tx_ring->dev, tx_ring->size,
3106                           tx_ring->desc, tx_ring->dma);
3107
3108         tx_ring->desc = NULL;
3109 }
3110
3111 /**
3112  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3113  * @adapter: board private structure
3114  *
3115  * Free all transmit software resources
3116  **/
3117 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3118 {
3119         int i;
3120
3121         for (i = 0; i < adapter->num_tx_queues; i++)
3122                 igb_free_tx_resources(adapter->tx_ring[i]);
3123 }
3124
3125 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3126                                     struct igb_buffer *buffer_info)
3127 {
3128         if (buffer_info->dma) {
3129                 if (buffer_info->mapped_as_page)
3130                         dma_unmap_page(tx_ring->dev,
3131                                         buffer_info->dma,
3132                                         buffer_info->length,
3133                                         DMA_TO_DEVICE);
3134                 else
3135                         dma_unmap_single(tx_ring->dev,
3136                                         buffer_info->dma,
3137                                         buffer_info->length,
3138                                         DMA_TO_DEVICE);
3139                 buffer_info->dma = 0;
3140         }
3141         if (buffer_info->skb) {
3142                 dev_kfree_skb_any(buffer_info->skb);
3143                 buffer_info->skb = NULL;
3144         }
3145         buffer_info->time_stamp = 0;
3146         buffer_info->length = 0;
3147         buffer_info->next_to_watch = 0;
3148         buffer_info->mapped_as_page = false;
3149 }
3150
3151 /**
3152  * igb_clean_tx_ring - Free Tx Buffers
3153  * @tx_ring: ring to be cleaned
3154  **/
3155 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3156 {
3157         struct igb_buffer *buffer_info;
3158         unsigned long size;
3159         unsigned int i;
3160
3161         if (!tx_ring->buffer_info)
3162                 return;
3163         /* Free all the Tx ring sk_buffs */
3164
3165         for (i = 0; i < tx_ring->count; i++) {
3166                 buffer_info = &tx_ring->buffer_info[i];
3167                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3168         }
3169
3170         size = sizeof(struct igb_buffer) * tx_ring->count;
3171         memset(tx_ring->buffer_info, 0, size);
3172
3173         /* Zero out the descriptor ring */
3174         memset(tx_ring->desc, 0, tx_ring->size);
3175
3176         tx_ring->next_to_use = 0;
3177         tx_ring->next_to_clean = 0;
3178 }
3179
3180 /**
3181  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3182  * @adapter: board private structure
3183  **/
3184 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3185 {
3186         int i;
3187
3188         for (i = 0; i < adapter->num_tx_queues; i++)
3189                 igb_clean_tx_ring(adapter->tx_ring[i]);
3190 }
3191
3192 /**
3193  * igb_free_rx_resources - Free Rx Resources
3194  * @rx_ring: ring to clean the resources from
3195  *
3196  * Free all receive software resources
3197  **/
3198 void igb_free_rx_resources(struct igb_ring *rx_ring)
3199 {
3200         igb_clean_rx_ring(rx_ring);
3201
3202         vfree(rx_ring->buffer_info);
3203         rx_ring->buffer_info = NULL;
3204
3205         /* if not set, then don't free */
3206         if (!rx_ring->desc)
3207                 return;
3208
3209         dma_free_coherent(rx_ring->dev, rx_ring->size,
3210                           rx_ring->desc, rx_ring->dma);
3211
3212         rx_ring->desc = NULL;
3213 }
3214
3215 /**
3216  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3217  * @adapter: board private structure
3218  *
3219  * Free all receive software resources
3220  **/
3221 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3222 {
3223         int i;
3224
3225         for (i = 0; i < adapter->num_rx_queues; i++)
3226                 igb_free_rx_resources(adapter->rx_ring[i]);
3227 }
3228
3229 /**
3230  * igb_clean_rx_ring - Free Rx Buffers per Queue
3231  * @rx_ring: ring to free buffers from
3232  **/
3233 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3234 {
3235         struct igb_buffer *buffer_info;
3236         unsigned long size;
3237         unsigned int i;
3238
3239         if (!rx_ring->buffer_info)
3240                 return;
3241
3242         /* Free all the Rx ring sk_buffs */
3243         for (i = 0; i < rx_ring->count; i++) {
3244                 buffer_info = &rx_ring->buffer_info[i];
3245                 if (buffer_info->dma) {
3246                         dma_unmap_single(rx_ring->dev,
3247                                          buffer_info->dma,
3248                                          rx_ring->rx_buffer_len,
3249                                          DMA_FROM_DEVICE);
3250                         buffer_info->dma = 0;
3251                 }
3252
3253                 if (buffer_info->skb) {
3254                         dev_kfree_skb(buffer_info->skb);
3255                         buffer_info->skb = NULL;
3256                 }
3257                 if (buffer_info->page_dma) {
3258                         dma_unmap_page(rx_ring->dev,
3259                                        buffer_info->page_dma,
3260                                        PAGE_SIZE / 2,
3261                                        DMA_FROM_DEVICE);
3262                         buffer_info->page_dma = 0;
3263                 }
3264                 if (buffer_info->page) {
3265                         put_page(buffer_info->page);
3266                         buffer_info->page = NULL;
3267                         buffer_info->page_offset = 0;
3268                 }
3269         }
3270
3271         size = sizeof(struct igb_buffer) * rx_ring->count;
3272         memset(rx_ring->buffer_info, 0, size);
3273
3274         /* Zero out the descriptor ring */
3275         memset(rx_ring->desc, 0, rx_ring->size);
3276
3277         rx_ring->next_to_clean = 0;
3278         rx_ring->next_to_use = 0;
3279 }
3280
3281 /**
3282  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3283  * @adapter: board private structure
3284  **/
3285 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3286 {
3287         int i;
3288
3289         for (i = 0; i < adapter->num_rx_queues; i++)
3290                 igb_clean_rx_ring(adapter->rx_ring[i]);
3291 }
3292
3293 /**
3294  * igb_set_mac - Change the Ethernet Address of the NIC
3295  * @netdev: network interface device structure
3296  * @p: pointer to an address structure
3297  *
3298  * Returns 0 on success, negative on failure
3299  **/
3300 static int igb_set_mac(struct net_device *netdev, void *p)
3301 {
3302         struct igb_adapter *adapter = netdev_priv(netdev);
3303         struct e1000_hw *hw = &adapter->hw;
3304         struct sockaddr *addr = p;
3305
3306         if (!is_valid_ether_addr(addr->sa_data))
3307                 return -EADDRNOTAVAIL;
3308
3309         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3310         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3311
3312         /* set the correct pool for the new PF MAC address in entry 0 */
3313         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3314                          adapter->vfs_allocated_count);
3315
3316         return 0;
3317 }
3318
3319 /**
3320  * igb_write_mc_addr_list - write multicast addresses to MTA
3321  * @netdev: network interface device structure
3322  *
3323  * Writes multicast address list to the MTA hash table.
3324  * Returns: -ENOMEM on failure
3325  *                0 on no addresses written
3326  *                X on writing X addresses to MTA
3327  **/
3328 static int igb_write_mc_addr_list(struct net_device *netdev)
3329 {
3330         struct igb_adapter *adapter = netdev_priv(netdev);
3331         struct e1000_hw *hw = &adapter->hw;
3332         struct netdev_hw_addr *ha;
3333         u8  *mta_list;
3334         int i;
3335
3336         if (netdev_mc_empty(netdev)) {
3337                 /* nothing to program, so clear mc list */
3338                 igb_update_mc_addr_list(hw, NULL, 0);
3339                 igb_restore_vf_multicasts(adapter);
3340                 return 0;
3341         }
3342
3343         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3344         if (!mta_list)
3345                 return -ENOMEM;
3346
3347         /* The shared function expects a packed array of only addresses. */
3348         i = 0;
3349         netdev_for_each_mc_addr(ha, netdev)
3350                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3351
3352         igb_update_mc_addr_list(hw, mta_list, i);
3353         kfree(mta_list);
3354
3355         return netdev_mc_count(netdev);
3356 }
3357
3358 /**
3359  * igb_write_uc_addr_list - write unicast addresses to RAR table
3360  * @netdev: network interface device structure
3361  *
3362  * Writes unicast address list to the RAR table.
3363  * Returns: -ENOMEM on failure/insufficient address space
3364  *                0 on no addresses written
3365  *                X on writing X addresses to the RAR table
3366  **/
3367 static int igb_write_uc_addr_list(struct net_device *netdev)
3368 {
3369         struct igb_adapter *adapter = netdev_priv(netdev);
3370         struct e1000_hw *hw = &adapter->hw;
3371         unsigned int vfn = adapter->vfs_allocated_count;
3372         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3373         int count = 0;
3374
3375         /* return ENOMEM indicating insufficient memory for addresses */
3376         if (netdev_uc_count(netdev) > rar_entries)
3377                 return -ENOMEM;
3378
3379         if (!netdev_uc_empty(netdev) && rar_entries) {
3380                 struct netdev_hw_addr *ha;
3381
3382                 netdev_for_each_uc_addr(ha, netdev) {
3383                         if (!rar_entries)
3384                                 break;
3385                         igb_rar_set_qsel(adapter, ha->addr,
3386                                          rar_entries--,
3387                                          vfn);
3388                         count++;
3389                 }
3390         }
3391         /* write the addresses in reverse order to avoid write combining */
3392         for (; rar_entries > 0 ; rar_entries--) {
3393                 wr32(E1000_RAH(rar_entries), 0);
3394                 wr32(E1000_RAL(rar_entries), 0);
3395         }
3396         wrfl();
3397
3398         return count;
3399 }
3400
3401 /**
3402  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3403  * @netdev: network interface device structure
3404  *
3405  * The set_rx_mode entry point is called whenever the unicast or multicast
3406  * address lists or the network interface flags are updated.  This routine is
3407  * responsible for configuring the hardware for proper unicast, multicast,
3408  * promiscuous mode, and all-multi behavior.
3409  **/
3410 static void igb_set_rx_mode(struct net_device *netdev)
3411 {
3412         struct igb_adapter *adapter = netdev_priv(netdev);
3413         struct e1000_hw *hw = &adapter->hw;
3414         unsigned int vfn = adapter->vfs_allocated_count;
3415         u32 rctl, vmolr = 0;
3416         int count;
3417
3418         /* Check for Promiscuous and All Multicast modes */
3419         rctl = rd32(E1000_RCTL);
3420
3421         /* clear the effected bits */
3422         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3423
3424         if (netdev->flags & IFF_PROMISC) {
3425                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3426                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3427         } else {
3428                 if (netdev->flags & IFF_ALLMULTI) {
3429                         rctl |= E1000_RCTL_MPE;
3430                         vmolr |= E1000_VMOLR_MPME;
3431                 } else {
3432                         /*
3433                          * Write addresses to the MTA, if the attempt fails
3434                          * then we should just turn on promiscuous mode so
3435                          * that we can at least receive multicast traffic
3436                          */
3437                         count = igb_write_mc_addr_list(netdev);
3438                         if (count < 0) {
3439                                 rctl |= E1000_RCTL_MPE;
3440                                 vmolr |= E1000_VMOLR_MPME;
3441                         } else if (count) {
3442                                 vmolr |= E1000_VMOLR_ROMPE;
3443                         }
3444                 }
3445                 /*
3446                  * Write addresses to available RAR registers, if there is not
3447                  * sufficient space to store all the addresses then enable
3448                  * unicast promiscuous mode
3449                  */
3450                 count = igb_write_uc_addr_list(netdev);
3451                 if (count < 0) {
3452                         rctl |= E1000_RCTL_UPE;
3453                         vmolr |= E1000_VMOLR_ROPE;
3454                 }
3455                 rctl |= E1000_RCTL_VFE;
3456         }
3457         wr32(E1000_RCTL, rctl);
3458
3459         /*
3460          * In order to support SR-IOV and eventually VMDq it is necessary to set
3461          * the VMOLR to enable the appropriate modes.  Without this workaround
3462          * we will have issues with VLAN tag stripping not being done for frames
3463          * that are only arriving because we are the default pool
3464          */
3465         if (hw->mac.type < e1000_82576)
3466                 return;
3467
3468         vmolr |= rd32(E1000_VMOLR(vfn)) &
3469                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3470         wr32(E1000_VMOLR(vfn), vmolr);
3471         igb_restore_vf_multicasts(adapter);
3472 }
3473
3474 static void igb_check_wvbr(struct igb_adapter *adapter)
3475 {
3476         struct e1000_hw *hw = &adapter->hw;
3477         u32 wvbr = 0;
3478
3479         switch (hw->mac.type) {
3480         case e1000_82576:
3481         case e1000_i350:
3482                 if (!(wvbr = rd32(E1000_WVBR)))
3483                         return;
3484                 break;
3485         default:
3486                 break;
3487         }
3488
3489         adapter->wvbr |= wvbr;
3490 }
3491
3492 #define IGB_STAGGERED_QUEUE_OFFSET 8
3493
3494 static void igb_spoof_check(struct igb_adapter *adapter)
3495 {
3496         int j;
3497
3498         if (!adapter->wvbr)
3499                 return;
3500
3501         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3502                 if (adapter->wvbr & (1 << j) ||
3503                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3504                         dev_warn(&adapter->pdev->dev,
3505                                 "Spoof event(s) detected on VF %d\n", j);
3506                         adapter->wvbr &=
3507                                 ~((1 << j) |
3508                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3509                 }
3510         }
3511 }
3512
3513 /* Need to wait a few seconds after link up to get diagnostic information from
3514  * the phy */
3515 static void igb_update_phy_info(unsigned long data)
3516 {
3517         struct igb_adapter *adapter = (struct igb_adapter *) data;
3518         igb_get_phy_info(&adapter->hw);
3519 }
3520
3521 /**
3522  * igb_has_link - check shared code for link and determine up/down
3523  * @adapter: pointer to driver private info
3524  **/
3525 bool igb_has_link(struct igb_adapter *adapter)
3526 {
3527         struct e1000_hw *hw = &adapter->hw;
3528         bool link_active = false;
3529         s32 ret_val = 0;
3530
3531         /* get_link_status is set on LSC (link status) interrupt or
3532          * rx sequence error interrupt.  get_link_status will stay
3533          * false until the e1000_check_for_link establishes link
3534          * for copper adapters ONLY
3535          */
3536         switch (hw->phy.media_type) {
3537         case e1000_media_type_copper:
3538                 if (hw->mac.get_link_status) {
3539                         ret_val = hw->mac.ops.check_for_link(hw);
3540                         link_active = !hw->mac.get_link_status;
3541                 } else {
3542                         link_active = true;
3543                 }
3544                 break;
3545         case e1000_media_type_internal_serdes:
3546                 ret_val = hw->mac.ops.check_for_link(hw);
3547                 link_active = hw->mac.serdes_has_link;
3548                 break;
3549         default:
3550         case e1000_media_type_unknown:
3551                 break;
3552         }
3553
3554         return link_active;
3555 }
3556
3557 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3558 {
3559         bool ret = false;
3560         u32 ctrl_ext, thstat;
3561
3562         /* check for thermal sensor event on i350, copper only */
3563         if (hw->mac.type == e1000_i350) {
3564                 thstat = rd32(E1000_THSTAT);
3565                 ctrl_ext = rd32(E1000_CTRL_EXT);
3566
3567                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3568                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3569                         ret = !!(thstat & event);
3570                 }
3571         }
3572
3573         return ret;
3574 }
3575
3576 /**
3577  * igb_watchdog - Timer Call-back
3578  * @data: pointer to adapter cast into an unsigned long
3579  **/
3580 static void igb_watchdog(unsigned long data)
3581 {
3582         struct igb_adapter *adapter = (struct igb_adapter *)data;
3583         /* Do the rest outside of interrupt context */
3584         schedule_work(&adapter->watchdog_task);
3585 }
3586
3587 static void igb_watchdog_task(struct work_struct *work)
3588 {
3589         struct igb_adapter *adapter = container_of(work,
3590                                                    struct igb_adapter,
3591                                                    watchdog_task);
3592         struct e1000_hw *hw = &adapter->hw;
3593         struct net_device *netdev = adapter->netdev;
3594         u32 link;
3595         int i;
3596
3597         link = igb_has_link(adapter);
3598         if (link) {
3599                 if (!netif_carrier_ok(netdev)) {
3600                         u32 ctrl;
3601                         hw->mac.ops.get_speed_and_duplex(hw,
3602                                                          &adapter->link_speed,
3603                                                          &adapter->link_duplex);
3604
3605                         ctrl = rd32(E1000_CTRL);
3606                         /* Links status message must follow this format */
3607                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3608                                  "Flow Control: %s\n",
3609                                netdev->name,
3610                                adapter->link_speed,
3611                                adapter->link_duplex == FULL_DUPLEX ?
3612                                  "Full Duplex" : "Half Duplex",
3613                                ((ctrl & E1000_CTRL_TFCE) &&
3614                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3615                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3616                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3617
3618                         /* check for thermal sensor event */
3619                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3620                                 printk(KERN_INFO "igb: %s The network adapter "
3621                                                  "link speed was downshifted "
3622                                                  "because it overheated.\n",
3623                                                  netdev->name);
3624                         }
3625
3626                         /* adjust timeout factor according to speed/duplex */
3627                         adapter->tx_timeout_factor = 1;
3628                         switch (adapter->link_speed) {
3629                         case SPEED_10:
3630                                 adapter->tx_timeout_factor = 14;
3631                                 break;
3632                         case SPEED_100:
3633                                 /* maybe add some timeout factor ? */
3634                                 break;
3635                         }
3636
3637                         netif_carrier_on(netdev);
3638
3639                         igb_ping_all_vfs(adapter);
3640                         igb_check_vf_rate_limit(adapter);
3641
3642                         /* link state has changed, schedule phy info update */
3643                         if (!test_bit(__IGB_DOWN, &adapter->state))
3644                                 mod_timer(&adapter->phy_info_timer,
3645                                           round_jiffies(jiffies + 2 * HZ));
3646                 }
3647         } else {
3648                 if (netif_carrier_ok(netdev)) {
3649                         adapter->link_speed = 0;
3650                         adapter->link_duplex = 0;
3651
3652                         /* check for thermal sensor event */
3653                         if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3654                                 printk(KERN_ERR "igb: %s The network adapter "
3655                                                 "was stopped because it "
3656                                                 "overheated.\n",
3657                                                 netdev->name);
3658                         }
3659
3660                         /* Links status message must follow this format */
3661                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3662                                netdev->name);
3663                         netif_carrier_off(netdev);
3664
3665                         igb_ping_all_vfs(adapter);
3666
3667                         /* link state has changed, schedule phy info update */
3668                         if (!test_bit(__IGB_DOWN, &adapter->state))
3669                                 mod_timer(&adapter->phy_info_timer,
3670                                           round_jiffies(jiffies + 2 * HZ));
3671                 }
3672         }
3673
3674         spin_lock(&adapter->stats64_lock);
3675         igb_update_stats(adapter, &adapter->stats64);
3676         spin_unlock(&adapter->stats64_lock);
3677
3678         for (i = 0; i < adapter->num_tx_queues; i++) {
3679                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3680                 if (!netif_carrier_ok(netdev)) {
3681                         /* We've lost link, so the controller stops DMA,
3682                          * but we've got queued Tx work that's never going
3683                          * to get done, so reset controller to flush Tx.
3684                          * (Do the reset outside of interrupt context). */
3685                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3686                                 adapter->tx_timeout_count++;
3687                                 schedule_work(&adapter->reset_task);
3688                                 /* return immediately since reset is imminent */
3689                                 return;
3690                         }
3691                 }
3692
3693                 /* Force detection of hung controller every watchdog period */
3694                 tx_ring->detect_tx_hung = true;
3695         }
3696
3697         /* Cause software interrupt to ensure rx ring is cleaned */
3698         if (adapter->msix_entries) {
3699                 u32 eics = 0;
3700                 for (i = 0; i < adapter->num_q_vectors; i++) {
3701                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3702                         eics |= q_vector->eims_value;
3703                 }
3704                 wr32(E1000_EICS, eics);
3705         } else {
3706                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3707         }
3708
3709         igb_spoof_check(adapter);
3710
3711         /* Reset the timer */
3712         if (!test_bit(__IGB_DOWN, &adapter->state))
3713                 mod_timer(&adapter->watchdog_timer,
3714                           round_jiffies(jiffies + 2 * HZ));
3715 }
3716
3717 enum latency_range {
3718         lowest_latency = 0,
3719         low_latency = 1,
3720         bulk_latency = 2,
3721         latency_invalid = 255
3722 };
3723
3724 /**
3725  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3726  *
3727  *      Stores a new ITR value based on strictly on packet size.  This
3728  *      algorithm is less sophisticated than that used in igb_update_itr,
3729  *      due to the difficulty of synchronizing statistics across multiple
3730  *      receive rings.  The divisors and thresholds used by this function
3731  *      were determined based on theoretical maximum wire speed and testing
3732  *      data, in order to minimize response time while increasing bulk
3733  *      throughput.
3734  *      This functionality is controlled by the InterruptThrottleRate module
3735  *      parameter (see igb_param.c)
3736  *      NOTE:  This function is called only when operating in a multiqueue
3737  *             receive environment.
3738  * @q_vector: pointer to q_vector
3739  **/
3740 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3741 {
3742         int new_val = q_vector->itr_val;
3743         int avg_wire_size = 0;
3744         struct igb_adapter *adapter = q_vector->adapter;
3745         struct igb_ring *ring;
3746         unsigned int packets;
3747
3748         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3749          * ints/sec - ITR timer value of 120 ticks.
3750          */
3751         if (adapter->link_speed != SPEED_1000) {
3752                 new_val = 976;
3753                 goto set_itr_val;
3754         }
3755
3756         ring = q_vector->rx_ring;
3757         if (ring) {
3758                 packets = ACCESS_ONCE(ring->total_packets);
3759
3760                 if (packets)
3761                         avg_wire_size = ring->total_bytes / packets;
3762         }
3763
3764         ring = q_vector->tx_ring;
3765         if (ring) {
3766                 packets = ACCESS_ONCE(ring->total_packets);
3767
3768                 if (packets)
3769                         avg_wire_size = max_t(u32, avg_wire_size,
3770                                               ring->total_bytes / packets);
3771         }
3772
3773         /* if avg_wire_size isn't set no work was done */
3774         if (!avg_wire_size)
3775                 goto clear_counts;
3776
3777         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3778         avg_wire_size += 24;
3779
3780         /* Don't starve jumbo frames */
3781         avg_wire_size = min(avg_wire_size, 3000);
3782
3783         /* Give a little boost to mid-size frames */
3784         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3785                 new_val = avg_wire_size / 3;
3786         else
3787                 new_val = avg_wire_size / 2;
3788
3789         /* when in itr mode 3 do not exceed 20K ints/sec */
3790         if (adapter->rx_itr_setting == 3 && new_val < 196)
3791                 new_val = 196;
3792
3793 set_itr_val:
3794         if (new_val != q_vector->itr_val) {
3795                 q_vector->itr_val = new_val;
3796                 q_vector->set_itr = 1;
3797         }
3798 clear_counts:
3799         if (q_vector->rx_ring) {
3800                 q_vector->rx_ring->total_bytes = 0;
3801                 q_vector->rx_ring->total_packets = 0;
3802         }
3803         if (q_vector->tx_ring) {
3804                 q_vector->tx_ring->total_bytes = 0;
3805                 q_vector->tx_ring->total_packets = 0;
3806         }
3807 }
3808
3809 /**
3810  * igb_update_itr - update the dynamic ITR value based on statistics
3811  *      Stores a new ITR value based on packets and byte
3812  *      counts during the last interrupt.  The advantage of per interrupt
3813  *      computation is faster updates and more accurate ITR for the current
3814  *      traffic pattern.  Constants in this function were computed
3815  *      based on theoretical maximum wire speed and thresholds were set based
3816  *      on testing data as well as attempting to minimize response time
3817  *      while increasing bulk throughput.
3818  *      this functionality is controlled by the InterruptThrottleRate module
3819  *      parameter (see igb_param.c)
3820  *      NOTE:  These calculations are only valid when operating in a single-
3821  *             queue environment.
3822  * @adapter: pointer to adapter
3823  * @itr_setting: current q_vector->itr_val
3824  * @packets: the number of packets during this measurement interval
3825  * @bytes: the number of bytes during this measurement interval
3826  **/
3827 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3828                                    int packets, int bytes)
3829 {
3830         unsigned int retval = itr_setting;
3831
3832         if (packets == 0)
3833                 goto update_itr_done;
3834
3835         switch (itr_setting) {
3836         case lowest_latency:
3837                 /* handle TSO and jumbo frames */
3838                 if (bytes/packets > 8000)
3839                         retval = bulk_latency;
3840                 else if ((packets < 5) && (bytes > 512))
3841                         retval = low_latency;
3842                 break;
3843         case low_latency:  /* 50 usec aka 20000 ints/s */
3844                 if (bytes > 10000) {
3845                         /* this if handles the TSO accounting */
3846                         if (bytes/packets > 8000) {
3847                                 retval = bulk_latency;
3848                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3849                                 retval = bulk_latency;
3850                         } else if ((packets > 35)) {
3851                                 retval = lowest_latency;
3852                         }
3853                 } else if (bytes/packets > 2000) {
3854                         retval = bulk_latency;
3855                 } else if (packets <= 2 && bytes < 512) {
3856                         retval = lowest_latency;
3857                 }
3858                 break;
3859         case bulk_latency: /* 250 usec aka 4000 ints/s */
3860                 if (bytes > 25000) {
3861                         if (packets > 35)
3862                                 retval = low_latency;
3863                 } else if (bytes < 1500) {
3864                         retval = low_latency;
3865                 }
3866                 break;
3867         }
3868
3869 update_itr_done:
3870         return retval;
3871 }
3872
3873 static void igb_set_itr(struct igb_adapter *adapter)
3874 {
3875         struct igb_q_vector *q_vector = adapter->q_vector[0];
3876         u16 current_itr;
3877         u32 new_itr = q_vector->itr_val;
3878
3879         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3880         if (adapter->link_speed != SPEED_1000) {
3881                 current_itr = 0;
3882                 new_itr = 4000;
3883                 goto set_itr_now;
3884         }
3885
3886         adapter->rx_itr = igb_update_itr(adapter,
3887                                     adapter->rx_itr,
3888                                     q_vector->rx_ring->total_packets,
3889                                     q_vector->rx_ring->total_bytes);
3890
3891         adapter->tx_itr = igb_update_itr(adapter,
3892                                     adapter->tx_itr,
3893                                     q_vector->tx_ring->total_packets,
3894                                     q_vector->tx_ring->total_bytes);
3895         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3896
3897         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3898         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3899                 current_itr = low_latency;
3900
3901         switch (current_itr) {
3902         /* counts and packets in update_itr are dependent on these numbers */
3903         case lowest_latency:
3904                 new_itr = 56;  /* aka 70,000 ints/sec */
3905                 break;
3906         case low_latency:
3907                 new_itr = 196; /* aka 20,000 ints/sec */
3908                 break;
3909         case bulk_latency:
3910                 new_itr = 980; /* aka 4,000 ints/sec */
3911                 break;
3912         default:
3913                 break;
3914         }
3915
3916 set_itr_now:
3917         q_vector->rx_ring->total_bytes = 0;
3918         q_vector->rx_ring->total_packets = 0;
3919         q_vector->tx_ring->total_bytes = 0;
3920         q_vector->tx_ring->total_packets = 0;
3921
3922         if (new_itr != q_vector->itr_val) {
3923                 /* this attempts to bias the interrupt rate towards Bulk
3924                  * by adding intermediate steps when interrupt rate is
3925                  * increasing */
3926                 new_itr = new_itr > q_vector->itr_val ?
3927                              max((new_itr * q_vector->itr_val) /
3928                                  (new_itr + (q_vector->itr_val >> 2)),
3929                                  new_itr) :
3930                              new_itr;
3931                 /* Don't write the value here; it resets the adapter's
3932                  * internal timer, and causes us to delay far longer than
3933                  * we should between interrupts.  Instead, we write the ITR
3934                  * value at the beginning of the next interrupt so the timing
3935                  * ends up being correct.
3936                  */
3937                 q_vector->itr_val = new_itr;
3938                 q_vector->set_itr = 1;
3939         }
3940 }
3941
3942 #define IGB_TX_FLAGS_CSUM               0x00000001
3943 #define IGB_TX_FLAGS_VLAN               0x00000002
3944 #define IGB_TX_FLAGS_TSO                0x00000004
3945 #define IGB_TX_FLAGS_IPV4               0x00000008
3946 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3947 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3948 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3949
3950 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3951                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3952 {
3953         struct e1000_adv_tx_context_desc *context_desc;
3954         unsigned int i;
3955         int err;
3956         struct igb_buffer *buffer_info;
3957         u32 info = 0, tu_cmd = 0;
3958         u32 mss_l4len_idx;
3959         u8 l4len;
3960
3961         if (skb_header_cloned(skb)) {
3962                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3963                 if (err)
3964                         return err;
3965         }
3966
3967         l4len = tcp_hdrlen(skb);
3968         *hdr_len += l4len;
3969
3970         if (skb->protocol == htons(ETH_P_IP)) {
3971                 struct iphdr *iph = ip_hdr(skb);
3972                 iph->tot_len = 0;
3973                 iph->check = 0;
3974                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3975                                                          iph->daddr, 0,
3976                                                          IPPROTO_TCP,
3977                                                          0);
3978         } else if (skb_is_gso_v6(skb)) {
3979                 ipv6_hdr(skb)->payload_len = 0;
3980                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3981                                                        &ipv6_hdr(skb)->daddr,
3982                                                        0, IPPROTO_TCP, 0);
3983         }
3984
3985         i = tx_ring->next_to_use;
3986
3987         buffer_info = &tx_ring->buffer_info[i];
3988         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3989         /* VLAN MACLEN IPLEN */
3990         if (tx_flags & IGB_TX_FLAGS_VLAN)
3991                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3992         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3993         *hdr_len += skb_network_offset(skb);
3994         info |= skb_network_header_len(skb);
3995         *hdr_len += skb_network_header_len(skb);
3996         context_desc->vlan_macip_lens = cpu_to_le32(info);
3997
3998         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3999         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4000
4001         if (skb->protocol == htons(ETH_P_IP))
4002                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4003         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4004
4005         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4006
4007         /* MSS L4LEN IDX */
4008         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4009         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4010
4011         /* For 82575, context index must be unique per ring. */
4012         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4013                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4014
4015         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4016         context_desc->seqnum_seed = 0;
4017
4018         buffer_info->time_stamp = jiffies;
4019         buffer_info->next_to_watch = i;
4020         buffer_info->dma = 0;
4021         i++;
4022         if (i == tx_ring->count)
4023                 i = 0;
4024
4025         tx_ring->next_to_use = i;
4026
4027         return true;
4028 }
4029
4030 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4031                                    struct sk_buff *skb, u32 tx_flags)
4032 {
4033         struct e1000_adv_tx_context_desc *context_desc;
4034         struct device *dev = tx_ring->dev;
4035         struct igb_buffer *buffer_info;
4036         u32 info = 0, tu_cmd = 0;
4037         unsigned int i;
4038
4039         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4040             (tx_flags & IGB_TX_FLAGS_VLAN)) {
4041                 i = tx_ring->next_to_use;
4042                 buffer_info = &tx_ring->buffer_info[i];
4043                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4044
4045                 if (tx_flags & IGB_TX_FLAGS_VLAN)
4046                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4047
4048                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4049                 if (skb->ip_summed == CHECKSUM_PARTIAL)
4050                         info |= skb_network_header_len(skb);
4051
4052                 context_desc->vlan_macip_lens = cpu_to_le32(info);
4053
4054                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4055
4056                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4057                         __be16 protocol;
4058
4059                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4060                                 const struct vlan_ethhdr *vhdr =
4061                                           (const struct vlan_ethhdr*)skb->data;
4062
4063                                 protocol = vhdr->h_vlan_encapsulated_proto;
4064                         } else {
4065                                 protocol = skb->protocol;
4066                         }
4067
4068                         switch (protocol) {
4069                         case cpu_to_be16(ETH_P_IP):
4070                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4071                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4072                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4073                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4074                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4075                                 break;
4076                         case cpu_to_be16(ETH_P_IPV6):
4077                                 /* XXX what about other V6 headers?? */
4078                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4079                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4080                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4081                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4082                                 break;
4083                         default:
4084                                 if (unlikely(net_ratelimit()))
4085                                         dev_warn(dev,
4086                                             "partial checksum but proto=%x!\n",
4087                                             skb->protocol);
4088                                 break;
4089                         }
4090                 }
4091
4092                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4093                 context_desc->seqnum_seed = 0;
4094                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4095                         context_desc->mss_l4len_idx =
4096                                 cpu_to_le32(tx_ring->reg_idx << 4);
4097
4098                 buffer_info->time_stamp = jiffies;
4099                 buffer_info->next_to_watch = i;
4100                 buffer_info->dma = 0;
4101
4102                 i++;
4103                 if (i == tx_ring->count)
4104                         i = 0;
4105                 tx_ring->next_to_use = i;
4106
4107                 return true;
4108         }
4109         return false;
4110 }
4111
4112 #define IGB_MAX_TXD_PWR 16
4113 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4114
4115 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4116                                  unsigned int first)
4117 {
4118         struct igb_buffer *buffer_info;
4119         struct device *dev = tx_ring->dev;
4120         unsigned int hlen = skb_headlen(skb);
4121         unsigned int count = 0, i;
4122         unsigned int f;
4123         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4124
4125         i = tx_ring->next_to_use;
4126
4127         buffer_info = &tx_ring->buffer_info[i];
4128         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4129         buffer_info->length = hlen;
4130         /* set time_stamp *before* dma to help avoid a possible race */
4131         buffer_info->time_stamp = jiffies;
4132         buffer_info->next_to_watch = i;
4133         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4134                                           DMA_TO_DEVICE);
4135         if (dma_mapping_error(dev, buffer_info->dma))
4136                 goto dma_error;
4137
4138         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4139                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4140                 unsigned int len = frag->size;
4141
4142                 count++;
4143                 i++;
4144                 if (i == tx_ring->count)
4145                         i = 0;
4146
4147                 buffer_info = &tx_ring->buffer_info[i];
4148                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4149                 buffer_info->length = len;
4150                 buffer_info->time_stamp = jiffies;
4151                 buffer_info->next_to_watch = i;
4152                 buffer_info->mapped_as_page = true;
4153                 buffer_info->dma = dma_map_page(dev,
4154                                                 frag->page,
4155                                                 frag->page_offset,
4156                                                 len,
4157                                                 DMA_TO_DEVICE);
4158                 if (dma_mapping_error(dev, buffer_info->dma))
4159                         goto dma_error;
4160
4161         }
4162
4163         tx_ring->buffer_info[i].skb = skb;
4164         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4165         /* multiply data chunks by size of headers */
4166         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4167         tx_ring->buffer_info[i].gso_segs = gso_segs;
4168         tx_ring->buffer_info[first].next_to_watch = i;
4169
4170         return ++count;
4171
4172 dma_error:
4173         dev_err(dev, "TX DMA map failed\n");
4174
4175         /* clear timestamp and dma mappings for failed buffer_info mapping */
4176         buffer_info->dma = 0;
4177         buffer_info->time_stamp = 0;
4178         buffer_info->length = 0;
4179         buffer_info->next_to_watch = 0;
4180         buffer_info->mapped_as_page = false;
4181
4182         /* clear timestamp and dma mappings for remaining portion of packet */
4183         while (count--) {
4184                 if (i == 0)
4185                         i = tx_ring->count;
4186                 i--;
4187                 buffer_info = &tx_ring->buffer_info[i];
4188                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4189         }
4190
4191         return 0;
4192 }
4193
4194 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4195                                     u32 tx_flags, int count, u32 paylen,
4196                                     u8 hdr_len)
4197 {
4198         union e1000_adv_tx_desc *tx_desc;
4199         struct igb_buffer *buffer_info;
4200         u32 olinfo_status = 0, cmd_type_len;
4201         unsigned int i = tx_ring->next_to_use;
4202
4203         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4204                         E1000_ADVTXD_DCMD_DEXT);
4205
4206         if (tx_flags & IGB_TX_FLAGS_VLAN)
4207                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4208
4209         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4210                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4211
4212         if (tx_flags & IGB_TX_FLAGS_TSO) {
4213                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4214
4215                 /* insert tcp checksum */
4216                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4217
4218                 /* insert ip checksum */
4219                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4220                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4221
4222         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4223                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4224         }
4225
4226         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4227             (tx_flags & (IGB_TX_FLAGS_CSUM |
4228                          IGB_TX_FLAGS_TSO |
4229                          IGB_TX_FLAGS_VLAN)))
4230                 olinfo_status |= tx_ring->reg_idx << 4;
4231
4232         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4233
4234         do {
4235                 buffer_info = &tx_ring->buffer_info[i];
4236                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4237                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4238                 tx_desc->read.cmd_type_len =
4239                         cpu_to_le32(cmd_type_len | buffer_info->length);
4240                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4241                 count--;
4242                 i++;
4243                 if (i == tx_ring->count)
4244                         i = 0;
4245         } while (count > 0);
4246
4247         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4248         /* Force memory writes to complete before letting h/w
4249          * know there are new descriptors to fetch.  (Only
4250          * applicable for weak-ordered memory model archs,
4251          * such as IA-64). */
4252         wmb();
4253
4254         tx_ring->next_to_use = i;
4255         writel(i, tx_ring->tail);
4256         /* we need this if more than one processor can write to our tail
4257          * at a time, it syncronizes IO on IA64/Altix systems */
4258         mmiowb();
4259 }
4260
4261 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4262 {
4263         struct net_device *netdev = tx_ring->netdev;
4264
4265         netif_stop_subqueue(netdev, tx_ring->queue_index);
4266
4267         /* Herbert's original patch had:
4268          *  smp_mb__after_netif_stop_queue();
4269          * but since that doesn't exist yet, just open code it. */
4270         smp_mb();
4271
4272         /* We need to check again in a case another CPU has just
4273          * made room available. */
4274         if (igb_desc_unused(tx_ring) < size)
4275                 return -EBUSY;
4276
4277         /* A reprieve! */
4278         netif_wake_subqueue(netdev, tx_ring->queue_index);
4279
4280         u64_stats_update_begin(&tx_ring->tx_syncp2);
4281         tx_ring->tx_stats.restart_queue2++;
4282         u64_stats_update_end(&tx_ring->tx_syncp2);
4283
4284         return 0;
4285 }
4286
4287 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4288 {
4289         if (igb_desc_unused(tx_ring) >= size)
4290                 return 0;
4291         return __igb_maybe_stop_tx(tx_ring, size);
4292 }
4293
4294 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4295                                     struct igb_ring *tx_ring)
4296 {
4297         int tso = 0, count;
4298         u32 tx_flags = 0;
4299         u16 first;
4300         u8 hdr_len = 0;
4301
4302         /* need: 1 descriptor per page,
4303          *       + 2 desc gap to keep tail from touching head,
4304          *       + 1 desc for skb->data,
4305          *       + 1 desc for context descriptor,
4306          * otherwise try next time */
4307         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4308                 /* this is a hard error */
4309                 return NETDEV_TX_BUSY;
4310         }
4311
4312         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4313                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4314                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4315         }
4316
4317         if (vlan_tx_tag_present(skb)) {
4318                 tx_flags |= IGB_TX_FLAGS_VLAN;
4319                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4320         }
4321
4322         if (skb->protocol == htons(ETH_P_IP))
4323                 tx_flags |= IGB_TX_FLAGS_IPV4;
4324
4325         first = tx_ring->next_to_use;
4326         if (skb_is_gso(skb)) {
4327                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4328
4329                 if (tso < 0) {
4330                         dev_kfree_skb_any(skb);
4331                         return NETDEV_TX_OK;
4332                 }
4333         }
4334
4335         if (tso)
4336                 tx_flags |= IGB_TX_FLAGS_TSO;
4337         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4338                  (skb->ip_summed == CHECKSUM_PARTIAL))
4339                 tx_flags |= IGB_TX_FLAGS_CSUM;
4340
4341         /*
4342          * count reflects descriptors mapped, if 0 or less then mapping error
4343          * has occurred and we need to rewind the descriptor queue
4344          */
4345         count = igb_tx_map_adv(tx_ring, skb, first);
4346         if (!count) {
4347                 dev_kfree_skb_any(skb);
4348                 tx_ring->buffer_info[first].time_stamp = 0;
4349                 tx_ring->next_to_use = first;
4350                 return NETDEV_TX_OK;
4351         }
4352
4353         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4354
4355         /* Make sure there is space in the ring for the next send. */
4356         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4357
4358         return NETDEV_TX_OK;
4359 }
4360
4361 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4362                                       struct net_device *netdev)
4363 {
4364         struct igb_adapter *adapter = netdev_priv(netdev);
4365         struct igb_ring *tx_ring;
4366         int r_idx = 0;
4367
4368         if (test_bit(__IGB_DOWN, &adapter->state)) {
4369                 dev_kfree_skb_any(skb);
4370                 return NETDEV_TX_OK;
4371         }
4372
4373         if (skb->len <= 0) {
4374                 dev_kfree_skb_any(skb);
4375                 return NETDEV_TX_OK;
4376         }
4377
4378         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4379         tx_ring = adapter->multi_tx_table[r_idx];
4380
4381         /* This goes back to the question of how to logically map a tx queue
4382          * to a flow.  Right now, performance is impacted slightly negatively
4383          * if using multiple tx queues.  If the stack breaks away from a
4384          * single qdisc implementation, we can look at this again. */
4385         return igb_xmit_frame_ring_adv(skb, tx_ring);
4386 }
4387
4388 /**
4389  * igb_tx_timeout - Respond to a Tx Hang
4390  * @netdev: network interface device structure
4391  **/
4392 static void igb_tx_timeout(struct net_device *netdev)
4393 {
4394         struct igb_adapter *adapter = netdev_priv(netdev);
4395         struct e1000_hw *hw = &adapter->hw;
4396
4397         /* Do the reset outside of interrupt context */
4398         adapter->tx_timeout_count++;
4399
4400         if (hw->mac.type == e1000_82580)
4401                 hw->dev_spec._82575.global_device_reset = true;
4402
4403         schedule_work(&adapter->reset_task);
4404         wr32(E1000_EICS,
4405              (adapter->eims_enable_mask & ~adapter->eims_other));
4406 }
4407
4408 static void igb_reset_task(struct work_struct *work)
4409 {
4410         struct igb_adapter *adapter;
4411         adapter = container_of(work, struct igb_adapter, reset_task);
4412
4413         igb_dump(adapter);
4414         netdev_err(adapter->netdev, "Reset adapter\n");
4415         igb_reinit_locked(adapter);
4416 }
4417
4418 /**
4419  * igb_get_stats64 - Get System Network Statistics
4420  * @netdev: network interface device structure
4421  * @stats: rtnl_link_stats64 pointer
4422  *
4423  **/
4424 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4425                                                  struct rtnl_link_stats64 *stats)
4426 {
4427         struct igb_adapter *adapter = netdev_priv(netdev);
4428
4429         spin_lock(&adapter->stats64_lock);
4430         igb_update_stats(adapter, &adapter->stats64);
4431         memcpy(stats, &adapter->stats64, sizeof(*stats));
4432         spin_unlock(&adapter->stats64_lock);
4433
4434         return stats;
4435 }
4436
4437 /**
4438  * igb_change_mtu - Change the Maximum Transfer Unit
4439  * @netdev: network interface device structure
4440  * @new_mtu: new value for maximum frame size
4441  *
4442  * Returns 0 on success, negative on failure
4443  **/
4444 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4445 {
4446         struct igb_adapter *adapter = netdev_priv(netdev);
4447         struct pci_dev *pdev = adapter->pdev;
4448         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4449         u32 rx_buffer_len, i;
4450
4451         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4452                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4453                 return -EINVAL;
4454         }
4455
4456         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4457                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4458                 return -EINVAL;
4459         }
4460
4461         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4462                 msleep(1);
4463
4464         /* igb_down has a dependency on max_frame_size */
4465         adapter->max_frame_size = max_frame;
4466
4467         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4468          * means we reserve 2 more, this pushes us to allocate from the next
4469          * larger slab size.
4470          * i.e. RXBUFFER_2048 --> size-4096 slab
4471          */
4472
4473         if (adapter->hw.mac.type == e1000_82580)
4474                 max_frame += IGB_TS_HDR_LEN;
4475
4476         if (max_frame <= IGB_RXBUFFER_1024)
4477                 rx_buffer_len = IGB_RXBUFFER_1024;
4478         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4479                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4480         else
4481                 rx_buffer_len = IGB_RXBUFFER_128;
4482
4483         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4484              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4485                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4486
4487         if ((adapter->hw.mac.type == e1000_82580) &&
4488             (rx_buffer_len == IGB_RXBUFFER_128))
4489                 rx_buffer_len += IGB_RXBUFFER_64;
4490
4491         if (netif_running(netdev))
4492                 igb_down(adapter);
4493
4494         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4495                  netdev->mtu, new_mtu);
4496         netdev->mtu = new_mtu;
4497
4498         for (i = 0; i < adapter->num_rx_queues; i++)
4499                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4500
4501         if (netif_running(netdev))
4502                 igb_up(adapter);
4503         else
4504                 igb_reset(adapter);
4505
4506         clear_bit(__IGB_RESETTING, &adapter->state);
4507
4508         return 0;
4509 }
4510
4511 /**
4512  * igb_update_stats - Update the board statistics counters
4513  * @adapter: board private structure
4514  **/
4515
4516 void igb_update_stats(struct igb_adapter *adapter,
4517                       struct rtnl_link_stats64 *net_stats)
4518 {
4519         struct e1000_hw *hw = &adapter->hw;
4520         struct pci_dev *pdev = adapter->pdev;
4521         u32 reg, mpc;
4522         u16 phy_tmp;
4523         int i;
4524         u64 bytes, packets;
4525         unsigned int start;
4526         u64 _bytes, _packets;
4527
4528 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4529
4530         /*
4531          * Prevent stats update while adapter is being reset, or if the pci
4532          * connection is down.
4533          */
4534         if (adapter->link_speed == 0)
4535                 return;
4536         if (pci_channel_offline(pdev))
4537                 return;
4538
4539         bytes = 0;
4540         packets = 0;
4541         for (i = 0; i < adapter->num_rx_queues; i++) {
4542                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4543                 struct igb_ring *ring = adapter->rx_ring[i];
4544
4545                 ring->rx_stats.drops += rqdpc_tmp;
4546                 net_stats->rx_fifo_errors += rqdpc_tmp;
4547
4548                 do {
4549                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4550                         _bytes = ring->rx_stats.bytes;
4551                         _packets = ring->rx_stats.packets;
4552                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4553                 bytes += _bytes;
4554                 packets += _packets;
4555         }
4556
4557         net_stats->rx_bytes = bytes;
4558         net_stats->rx_packets = packets;
4559
4560         bytes = 0;
4561         packets = 0;
4562         for (i = 0; i < adapter->num_tx_queues; i++) {
4563                 struct igb_ring *ring = adapter->tx_ring[i];
4564                 do {
4565                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4566                         _bytes = ring->tx_stats.bytes;
4567                         _packets = ring->tx_stats.packets;
4568                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4569                 bytes += _bytes;
4570                 packets += _packets;
4571         }
4572         net_stats->tx_bytes = bytes;
4573         net_stats->tx_packets = packets;
4574
4575         /* read stats registers */
4576         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4577         adapter->stats.gprc += rd32(E1000_GPRC);
4578         adapter->stats.gorc += rd32(E1000_GORCL);
4579         rd32(E1000_GORCH); /* clear GORCL */
4580         adapter->stats.bprc += rd32(E1000_BPRC);
4581         adapter->stats.mprc += rd32(E1000_MPRC);
4582         adapter->stats.roc += rd32(E1000_ROC);
4583
4584         adapter->stats.prc64 += rd32(E1000_PRC64);
4585         adapter->stats.prc127 += rd32(E1000_PRC127);
4586         adapter->stats.prc255 += rd32(E1000_PRC255);
4587         adapter->stats.prc511 += rd32(E1000_PRC511);
4588         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4589         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4590         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4591         adapter->stats.sec += rd32(E1000_SEC);
4592
4593         mpc = rd32(E1000_MPC);
4594         adapter->stats.mpc += mpc;
4595         net_stats->rx_fifo_errors += mpc;
4596         adapter->stats.scc += rd32(E1000_SCC);
4597         adapter->stats.ecol += rd32(E1000_ECOL);
4598         adapter->stats.mcc += rd32(E1000_MCC);
4599         adapter->stats.latecol += rd32(E1000_LATECOL);
4600         adapter->stats.dc += rd32(E1000_DC);
4601         adapter->stats.rlec += rd32(E1000_RLEC);
4602         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4603         adapter->stats.xontxc += rd32(E1000_XONTXC);
4604         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4605         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4606         adapter->stats.fcruc += rd32(E1000_FCRUC);
4607         adapter->stats.gptc += rd32(E1000_GPTC);
4608         adapter->stats.gotc += rd32(E1000_GOTCL);
4609         rd32(E1000_GOTCH); /* clear GOTCL */
4610         adapter->stats.rnbc += rd32(E1000_RNBC);
4611         adapter->stats.ruc += rd32(E1000_RUC);
4612         adapter->stats.rfc += rd32(E1000_RFC);
4613         adapter->stats.rjc += rd32(E1000_RJC);
4614         adapter->stats.tor += rd32(E1000_TORH);
4615         adapter->stats.tot += rd32(E1000_TOTH);
4616         adapter->stats.tpr += rd32(E1000_TPR);
4617
4618         adapter->stats.ptc64 += rd32(E1000_PTC64);
4619         adapter->stats.ptc127 += rd32(E1000_PTC127);
4620         adapter->stats.ptc255 += rd32(E1000_PTC255);
4621         adapter->stats.ptc511 += rd32(E1000_PTC511);
4622         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4623         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4624
4625         adapter->stats.mptc += rd32(E1000_MPTC);
4626         adapter->stats.bptc += rd32(E1000_BPTC);
4627
4628         adapter->stats.tpt += rd32(E1000_TPT);
4629         adapter->stats.colc += rd32(E1000_COLC);
4630
4631         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4632         /* read internal phy specific stats */
4633         reg = rd32(E1000_CTRL_EXT);
4634         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4635                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4636                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4637         }
4638
4639         adapter->stats.tsctc += rd32(E1000_TSCTC);
4640         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4641
4642         adapter->stats.iac += rd32(E1000_IAC);
4643         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4644         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4645         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4646         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4647         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4648         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4649         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4650         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4651
4652         /* Fill out the OS statistics structure */
4653         net_stats->multicast = adapter->stats.mprc;
4654         net_stats->collisions = adapter->stats.colc;
4655
4656         /* Rx Errors */
4657
4658         /* RLEC on some newer hardware can be incorrect so build
4659          * our own version based on RUC and ROC */
4660         net_stats->rx_errors = adapter->stats.rxerrc +
4661                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4662                 adapter->stats.ruc + adapter->stats.roc +
4663                 adapter->stats.cexterr;
4664         net_stats->rx_length_errors = adapter->stats.ruc +
4665                                       adapter->stats.roc;
4666         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4667         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4668         net_stats->rx_missed_errors = adapter->stats.mpc;
4669
4670         /* Tx Errors */
4671         net_stats->tx_errors = adapter->stats.ecol +
4672                                adapter->stats.latecol;
4673         net_stats->tx_aborted_errors = adapter->stats.ecol;
4674         net_stats->tx_window_errors = adapter->stats.latecol;
4675         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4676
4677         /* Tx Dropped needs to be maintained elsewhere */
4678
4679         /* Phy Stats */
4680         if (hw->phy.media_type == e1000_media_type_copper) {
4681                 if ((adapter->link_speed == SPEED_1000) &&
4682                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4683                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4684                         adapter->phy_stats.idle_errors += phy_tmp;
4685                 }
4686         }
4687
4688         /* Management Stats */
4689         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4690         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4691         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4692
4693         /* OS2BMC Stats */
4694         reg = rd32(E1000_MANC);
4695         if (reg & E1000_MANC_EN_BMC2OS) {
4696                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4697                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4698                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4699                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4700         }
4701 }
4702
4703 static irqreturn_t igb_msix_other(int irq, void *data)
4704 {
4705         struct igb_adapter *adapter = data;
4706         struct e1000_hw *hw = &adapter->hw;
4707         u32 icr = rd32(E1000_ICR);
4708         /* reading ICR causes bit 31 of EICR to be cleared */
4709
4710         if (icr & E1000_ICR_DRSTA)
4711                 schedule_work(&adapter->reset_task);
4712
4713         if (icr & E1000_ICR_DOUTSYNC) {
4714                 /* HW is reporting DMA is out of sync */
4715                 adapter->stats.doosync++;
4716                 /* The DMA Out of Sync is also indication of a spoof event
4717                  * in IOV mode. Check the Wrong VM Behavior register to
4718                  * see if it is really a spoof event. */
4719                 igb_check_wvbr(adapter);
4720         }
4721
4722         /* Check for a mailbox event */
4723         if (icr & E1000_ICR_VMMB)
4724                 igb_msg_task(adapter);
4725
4726         if (icr & E1000_ICR_LSC) {
4727                 hw->mac.get_link_status = 1;
4728                 /* guard against interrupt when we're going down */
4729                 if (!test_bit(__IGB_DOWN, &adapter->state))
4730                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4731         }
4732
4733         if (adapter->vfs_allocated_count)
4734                 wr32(E1000_IMS, E1000_IMS_LSC |
4735                                 E1000_IMS_VMMB |
4736                                 E1000_IMS_DOUTSYNC);
4737         else
4738                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4739         wr32(E1000_EIMS, adapter->eims_other);
4740
4741         return IRQ_HANDLED;
4742 }
4743
4744 static void igb_write_itr(struct igb_q_vector *q_vector)
4745 {
4746         struct igb_adapter *adapter = q_vector->adapter;
4747         u32 itr_val = q_vector->itr_val & 0x7FFC;
4748
4749         if (!q_vector->set_itr)
4750                 return;
4751
4752         if (!itr_val)
4753                 itr_val = 0x4;
4754
4755         if (adapter->hw.mac.type == e1000_82575)
4756                 itr_val |= itr_val << 16;
4757         else
4758                 itr_val |= 0x8000000;
4759
4760         writel(itr_val, q_vector->itr_register);
4761         q_vector->set_itr = 0;
4762 }
4763
4764 static irqreturn_t igb_msix_ring(int irq, void *data)
4765 {
4766         struct igb_q_vector *q_vector = data;
4767
4768         /* Write the ITR value calculated from the previous interrupt. */
4769         igb_write_itr(q_vector);
4770
4771         napi_schedule(&q_vector->napi);
4772
4773         return IRQ_HANDLED;
4774 }
4775
4776 #ifdef CONFIG_IGB_DCA
4777 static void igb_update_dca(struct igb_q_vector *q_vector)
4778 {
4779         struct igb_adapter *adapter = q_vector->adapter;
4780         struct e1000_hw *hw = &adapter->hw;
4781         int cpu = get_cpu();
4782
4783         if (q_vector->cpu == cpu)
4784                 goto out_no_update;
4785
4786         if (q_vector->tx_ring) {
4787                 int q = q_vector->tx_ring->reg_idx;
4788                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4789                 if (hw->mac.type == e1000_82575) {
4790                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4791                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4792                 } else {
4793                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4794                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4795                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4796                 }
4797                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4798                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4799         }
4800         if (q_vector->rx_ring) {
4801                 int q = q_vector->rx_ring->reg_idx;
4802                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4803                 if (hw->mac.type == e1000_82575) {
4804                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4805                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4806                 } else {
4807                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4808                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4809                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4810                 }
4811                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4812                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4813                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4814                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4815         }
4816         q_vector->cpu = cpu;
4817 out_no_update:
4818         put_cpu();
4819 }
4820
4821 static void igb_setup_dca(struct igb_adapter *adapter)
4822 {
4823         struct e1000_hw *hw = &adapter->hw;
4824         int i;
4825
4826         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4827                 return;
4828
4829         /* Always use CB2 mode, difference is masked in the CB driver. */
4830         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4831
4832         for (i = 0; i < adapter->num_q_vectors; i++) {
4833                 adapter->q_vector[i]->cpu = -1;
4834                 igb_update_dca(adapter->q_vector[i]);
4835         }
4836 }
4837
4838 static int __igb_notify_dca(struct device *dev, void *data)
4839 {
4840         struct net_device *netdev = dev_get_drvdata(dev);
4841         struct igb_adapter *adapter = netdev_priv(netdev);
4842         struct pci_dev *pdev = adapter->pdev;
4843         struct e1000_hw *hw = &adapter->hw;
4844         unsigned long event = *(unsigned long *)data;
4845
4846         switch (event) {
4847         case DCA_PROVIDER_ADD:
4848                 /* if already enabled, don't do it again */
4849                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4850                         break;
4851                 if (dca_add_requester(dev) == 0) {
4852                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4853                         dev_info(&pdev->dev, "DCA enabled\n");
4854                         igb_setup_dca(adapter);
4855                         break;
4856                 }
4857                 /* Fall Through since DCA is disabled. */
4858         case DCA_PROVIDER_REMOVE:
4859                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4860                         /* without this a class_device is left
4861                          * hanging around in the sysfs model */
4862                         dca_remove_requester(dev);
4863                         dev_info(&pdev->dev, "DCA disabled\n");
4864                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4865                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4866                 }
4867                 break;
4868         }
4869
4870         return 0;
4871 }
4872
4873 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4874                           void *p)
4875 {
4876         int ret_val;
4877
4878         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4879                                          __igb_notify_dca);
4880
4881         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4882 }
4883 #endif /* CONFIG_IGB_DCA */
4884
4885 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4886 {
4887         struct e1000_hw *hw = &adapter->hw;
4888         u32 ping;
4889         int i;
4890
4891         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4892                 ping = E1000_PF_CONTROL_MSG;
4893                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4894                         ping |= E1000_VT_MSGTYPE_CTS;
4895                 igb_write_mbx(hw, &ping, 1, i);
4896         }
4897 }
4898
4899 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4900 {
4901         struct e1000_hw *hw = &adapter->hw;
4902         u32 vmolr = rd32(E1000_VMOLR(vf));
4903         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4904
4905         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4906                             IGB_VF_FLAG_MULTI_PROMISC);
4907         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4908
4909         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4910                 vmolr |= E1000_VMOLR_MPME;
4911                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4912                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4913         } else {
4914                 /*
4915                  * if we have hashes and we are clearing a multicast promisc
4916                  * flag we need to write the hashes to the MTA as this step
4917                  * was previously skipped
4918                  */
4919                 if (vf_data->num_vf_mc_hashes > 30) {
4920                         vmolr |= E1000_VMOLR_MPME;
4921                 } else if (vf_data->num_vf_mc_hashes) {
4922                         int j;
4923                         vmolr |= E1000_VMOLR_ROMPE;
4924                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4925                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4926                 }
4927         }
4928
4929         wr32(E1000_VMOLR(vf), vmolr);
4930
4931         /* there are flags left unprocessed, likely not supported */
4932         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4933                 return -EINVAL;
4934
4935         return 0;
4936
4937 }
4938
4939 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4940                                   u32 *msgbuf, u32 vf)
4941 {
4942         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4943         u16 *hash_list = (u16 *)&msgbuf[1];
4944         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4945         int i;
4946
4947         /* salt away the number of multicast addresses assigned
4948          * to this VF for later use to restore when the PF multi cast
4949          * list changes
4950          */
4951         vf_data->num_vf_mc_hashes = n;
4952
4953         /* only up to 30 hash values supported */
4954         if (n > 30)
4955                 n = 30;
4956
4957         /* store the hashes for later use */
4958         for (i = 0; i < n; i++)
4959                 vf_data->vf_mc_hashes[i] = hash_list[i];
4960
4961         /* Flush and reset the mta with the new values */
4962         igb_set_rx_mode(adapter->netdev);
4963
4964         return 0;
4965 }
4966
4967 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4968 {
4969         struct e1000_hw *hw = &adapter->hw;
4970         struct vf_data_storage *vf_data;
4971         int i, j;
4972
4973         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4974                 u32 vmolr = rd32(E1000_VMOLR(i));
4975                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4976
4977                 vf_data = &adapter->vf_data[i];
4978
4979                 if ((vf_data->num_vf_mc_hashes > 30) ||
4980                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4981                         vmolr |= E1000_VMOLR_MPME;
4982                 } else if (vf_data->num_vf_mc_hashes) {
4983                         vmolr |= E1000_VMOLR_ROMPE;
4984                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4985                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4986                 }
4987                 wr32(E1000_VMOLR(i), vmolr);
4988         }
4989 }
4990
4991 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4992 {
4993         struct e1000_hw *hw = &adapter->hw;
4994         u32 pool_mask, reg, vid;
4995         int i;
4996
4997         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4998
4999         /* Find the vlan filter for this id */
5000         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5001                 reg = rd32(E1000_VLVF(i));
5002
5003                 /* remove the vf from the pool */
5004                 reg &= ~pool_mask;
5005
5006                 /* if pool is empty then remove entry from vfta */
5007                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5008                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5009                         reg = 0;
5010                         vid = reg & E1000_VLVF_VLANID_MASK;
5011                         igb_vfta_set(hw, vid, false);
5012                 }
5013
5014                 wr32(E1000_VLVF(i), reg);
5015         }
5016
5017         adapter->vf_data[vf].vlans_enabled = 0;
5018 }
5019
5020 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5021 {
5022         struct e1000_hw *hw = &adapter->hw;
5023         u32 reg, i;
5024
5025         /* The vlvf table only exists on 82576 hardware and newer */
5026         if (hw->mac.type < e1000_82576)
5027                 return -1;
5028
5029         /* we only need to do this if VMDq is enabled */
5030         if (!adapter->vfs_allocated_count)
5031                 return -1;
5032
5033         /* Find the vlan filter for this id */
5034         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5035                 reg = rd32(E1000_VLVF(i));
5036                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5037                     vid == (reg & E1000_VLVF_VLANID_MASK))
5038                         break;
5039         }
5040
5041         if (add) {
5042                 if (i == E1000_VLVF_ARRAY_SIZE) {
5043                         /* Did not find a matching VLAN ID entry that was
5044                          * enabled.  Search for a free filter entry, i.e.
5045                          * one without the enable bit set
5046                          */
5047                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5048                                 reg = rd32(E1000_VLVF(i));
5049                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5050                                         break;
5051                         }
5052                 }
5053                 if (i < E1000_VLVF_ARRAY_SIZE) {
5054                         /* Found an enabled/available entry */
5055                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5056
5057                         /* if !enabled we need to set this up in vfta */
5058                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5059                                 /* add VID to filter table */
5060                                 igb_vfta_set(hw, vid, true);
5061                                 reg |= E1000_VLVF_VLANID_ENABLE;
5062                         }
5063                         reg &= ~E1000_VLVF_VLANID_MASK;
5064                         reg |= vid;
5065                         wr32(E1000_VLVF(i), reg);
5066
5067                         /* do not modify RLPML for PF devices */
5068                         if (vf >= adapter->vfs_allocated_count)
5069                                 return 0;
5070
5071                         if (!adapter->vf_data[vf].vlans_enabled) {
5072                                 u32 size;
5073                                 reg = rd32(E1000_VMOLR(vf));
5074                                 size = reg & E1000_VMOLR_RLPML_MASK;
5075                                 size += 4;
5076                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5077                                 reg |= size;
5078                                 wr32(E1000_VMOLR(vf), reg);
5079                         }
5080
5081                         adapter->vf_data[vf].vlans_enabled++;
5082                         return 0;
5083                 }
5084         } else {
5085                 if (i < E1000_VLVF_ARRAY_SIZE) {
5086                         /* remove vf from the pool */
5087                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5088                         /* if pool is empty then remove entry from vfta */
5089                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5090                                 reg = 0;
5091                                 igb_vfta_set(hw, vid, false);
5092                         }
5093                         wr32(E1000_VLVF(i), reg);
5094
5095                         /* do not modify RLPML for PF devices */
5096                         if (vf >= adapter->vfs_allocated_count)
5097                                 return 0;
5098
5099                         adapter->vf_data[vf].vlans_enabled--;
5100                         if (!adapter->vf_data[vf].vlans_enabled) {
5101                                 u32 size;
5102                                 reg = rd32(E1000_VMOLR(vf));
5103                                 size = reg & E1000_VMOLR_RLPML_MASK;
5104                                 size -= 4;
5105                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5106                                 reg |= size;
5107                                 wr32(E1000_VMOLR(vf), reg);
5108                         }
5109                 }
5110         }
5111         return 0;
5112 }
5113
5114 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5115 {
5116         struct e1000_hw *hw = &adapter->hw;
5117
5118         if (vid)
5119                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5120         else
5121                 wr32(E1000_VMVIR(vf), 0);
5122 }
5123
5124 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5125                                int vf, u16 vlan, u8 qos)
5126 {
5127         int err = 0;
5128         struct igb_adapter *adapter = netdev_priv(netdev);
5129
5130         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5131                 return -EINVAL;
5132         if (vlan || qos) {
5133                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5134                 if (err)
5135                         goto out;
5136                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5137                 igb_set_vmolr(adapter, vf, !vlan);
5138                 adapter->vf_data[vf].pf_vlan = vlan;
5139                 adapter->vf_data[vf].pf_qos = qos;
5140                 dev_info(&adapter->pdev->dev,
5141                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5142                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5143                         dev_warn(&adapter->pdev->dev,
5144                                  "The VF VLAN has been set,"
5145                                  " but the PF device is not up.\n");
5146                         dev_warn(&adapter->pdev->dev,
5147                                  "Bring the PF device up before"
5148                                  " attempting to use the VF device.\n");
5149                 }
5150         } else {
5151                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5152                                    false, vf);
5153                 igb_set_vmvir(adapter, vlan, vf);
5154                 igb_set_vmolr(adapter, vf, true);
5155                 adapter->vf_data[vf].pf_vlan = 0;
5156                 adapter->vf_data[vf].pf_qos = 0;
5157        }
5158 out:
5159        return err;
5160 }
5161
5162 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5163 {
5164         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5165         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5166
5167         return igb_vlvf_set(adapter, vid, add, vf);
5168 }
5169
5170 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5171 {
5172         /* clear flags - except flag that indicates PF has set the MAC */
5173         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5174         adapter->vf_data[vf].last_nack = jiffies;
5175
5176         /* reset offloads to defaults */
5177         igb_set_vmolr(adapter, vf, true);
5178
5179         /* reset vlans for device */
5180         igb_clear_vf_vfta(adapter, vf);
5181         if (adapter->vf_data[vf].pf_vlan)
5182                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5183                                     adapter->vf_data[vf].pf_vlan,
5184                                     adapter->vf_data[vf].pf_qos);
5185         else
5186                 igb_clear_vf_vfta(adapter, vf);
5187
5188         /* reset multicast table array for vf */
5189         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5190
5191         /* Flush and reset the mta with the new values */
5192         igb_set_rx_mode(adapter->netdev);
5193 }
5194
5195 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5196 {
5197         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5198
5199         /* generate a new mac address as we were hotplug removed/added */
5200         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5201                 random_ether_addr(vf_mac);
5202
5203         /* process remaining reset events */
5204         igb_vf_reset(adapter, vf);
5205 }
5206
5207 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5208 {
5209         struct e1000_hw *hw = &adapter->hw;
5210         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5211         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5212         u32 reg, msgbuf[3];
5213         u8 *addr = (u8 *)(&msgbuf[1]);
5214
5215         /* process all the same items cleared in a function level reset */
5216         igb_vf_reset(adapter, vf);
5217
5218         /* set vf mac address */
5219         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5220
5221         /* enable transmit and receive for vf */
5222         reg = rd32(E1000_VFTE);
5223         wr32(E1000_VFTE, reg | (1 << vf));
5224         reg = rd32(E1000_VFRE);
5225         wr32(E1000_VFRE, reg | (1 << vf));
5226
5227         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5228
5229         /* reply to reset with ack and vf mac address */
5230         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5231         memcpy(addr, vf_mac, 6);
5232         igb_write_mbx(hw, msgbuf, 3, vf);
5233 }
5234
5235 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5236 {
5237         /*
5238          * The VF MAC Address is stored in a packed array of bytes
5239          * starting at the second 32 bit word of the msg array
5240          */
5241         unsigned char *addr = (char *)&msg[1];
5242         int err = -1;
5243
5244         if (is_valid_ether_addr(addr))
5245                 err = igb_set_vf_mac(adapter, vf, addr);
5246
5247         return err;
5248 }
5249
5250 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5251 {
5252         struct e1000_hw *hw = &adapter->hw;
5253         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5254         u32 msg = E1000_VT_MSGTYPE_NACK;
5255
5256         /* if device isn't clear to send it shouldn't be reading either */
5257         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5258             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5259                 igb_write_mbx(hw, &msg, 1, vf);
5260                 vf_data->last_nack = jiffies;
5261         }
5262 }
5263
5264 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5265 {
5266         struct pci_dev *pdev = adapter->pdev;
5267         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5268         struct e1000_hw *hw = &adapter->hw;
5269         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5270         s32 retval;
5271
5272         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5273
5274         if (retval) {
5275                 /* if receive failed revoke VF CTS stats and restart init */
5276                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5277                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5278                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5279                         return;
5280                 goto out;
5281         }
5282
5283         /* this is a message we already processed, do nothing */
5284         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5285                 return;
5286
5287         /*
5288          * until the vf completes a reset it should not be
5289          * allowed to start any configuration.
5290          */
5291
5292         if (msgbuf[0] == E1000_VF_RESET) {
5293                 igb_vf_reset_msg(adapter, vf);
5294                 return;
5295         }
5296
5297         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5298                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5299                         return;
5300                 retval = -1;
5301                 goto out;
5302         }
5303
5304         switch ((msgbuf[0] & 0xFFFF)) {
5305         case E1000_VF_SET_MAC_ADDR:
5306                 retval = -EINVAL;
5307                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5308                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5309                 else
5310                         dev_warn(&pdev->dev,
5311                                  "VF %d attempted to override administratively "
5312                                  "set MAC address\nReload the VF driver to "
5313                                  "resume operations\n", vf);
5314                 break;
5315         case E1000_VF_SET_PROMISC:
5316                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5317                 break;
5318         case E1000_VF_SET_MULTICAST:
5319                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5320                 break;
5321         case E1000_VF_SET_LPE:
5322                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5323                 break;
5324         case E1000_VF_SET_VLAN:
5325                 retval = -1;
5326                 if (vf_data->pf_vlan)
5327                         dev_warn(&pdev->dev,
5328                                  "VF %d attempted to override administratively "
5329                                  "set VLAN tag\nReload the VF driver to "
5330                                  "resume operations\n", vf);
5331                 else
5332                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5333                 break;
5334         default:
5335                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5336                 retval = -1;
5337                 break;
5338         }
5339
5340         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5341 out:
5342         /* notify the VF of the results of what it sent us */
5343         if (retval)
5344                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5345         else
5346                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5347
5348         igb_write_mbx(hw, msgbuf, 1, vf);
5349 }
5350
5351 static void igb_msg_task(struct igb_adapter *adapter)
5352 {
5353         struct e1000_hw *hw = &adapter->hw;
5354         u32 vf;
5355
5356         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5357                 /* process any reset requests */
5358                 if (!igb_check_for_rst(hw, vf))
5359                         igb_vf_reset_event(adapter, vf);
5360
5361                 /* process any messages pending */
5362                 if (!igb_check_for_msg(hw, vf))
5363                         igb_rcv_msg_from_vf(adapter, vf);
5364
5365                 /* process any acks */
5366                 if (!igb_check_for_ack(hw, vf))
5367                         igb_rcv_ack_from_vf(adapter, vf);
5368         }
5369 }
5370
5371 /**
5372  *  igb_set_uta - Set unicast filter table address
5373  *  @adapter: board private structure
5374  *
5375  *  The unicast table address is a register array of 32-bit registers.
5376  *  The table is meant to be used in a way similar to how the MTA is used
5377  *  however due to certain limitations in the hardware it is necessary to
5378  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5379  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5380  **/
5381 static void igb_set_uta(struct igb_adapter *adapter)
5382 {
5383         struct e1000_hw *hw = &adapter->hw;
5384         int i;
5385
5386         /* The UTA table only exists on 82576 hardware and newer */
5387         if (hw->mac.type < e1000_82576)
5388                 return;
5389
5390         /* we only need to do this if VMDq is enabled */
5391         if (!adapter->vfs_allocated_count)
5392                 return;
5393
5394         for (i = 0; i < hw->mac.uta_reg_count; i++)
5395                 array_wr32(E1000_UTA, i, ~0);
5396 }
5397
5398 /**
5399  * igb_intr_msi - Interrupt Handler
5400  * @irq: interrupt number
5401  * @data: pointer to a network interface device structure
5402  **/
5403 static irqreturn_t igb_intr_msi(int irq, void *data)
5404 {
5405         struct igb_adapter *adapter = data;
5406         struct igb_q_vector *q_vector = adapter->q_vector[0];
5407         struct e1000_hw *hw = &adapter->hw;
5408         /* read ICR disables interrupts using IAM */
5409         u32 icr = rd32(E1000_ICR);
5410
5411         igb_write_itr(q_vector);
5412
5413         if (icr & E1000_ICR_DRSTA)
5414                 schedule_work(&adapter->reset_task);
5415
5416         if (icr & E1000_ICR_DOUTSYNC) {
5417                 /* HW is reporting DMA is out of sync */
5418                 adapter->stats.doosync++;
5419         }
5420
5421         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5422                 hw->mac.get_link_status = 1;
5423                 if (!test_bit(__IGB_DOWN, &adapter->state))
5424                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5425         }
5426
5427         napi_schedule(&q_vector->napi);
5428
5429         return IRQ_HANDLED;
5430 }
5431
5432 /**
5433  * igb_intr - Legacy Interrupt Handler
5434  * @irq: interrupt number
5435  * @data: pointer to a network interface device structure
5436  **/
5437 static irqreturn_t igb_intr(int irq, void *data)
5438 {
5439         struct igb_adapter *adapter = data;
5440         struct igb_q_vector *q_vector = adapter->q_vector[0];
5441         struct e1000_hw *hw = &adapter->hw;
5442         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5443          * need for the IMC write */
5444         u32 icr = rd32(E1000_ICR);
5445         if (!icr)
5446                 return IRQ_NONE;  /* Not our interrupt */
5447
5448         igb_write_itr(q_vector);
5449
5450         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5451          * not set, then the adapter didn't send an interrupt */
5452         if (!(icr & E1000_ICR_INT_ASSERTED))
5453                 return IRQ_NONE;
5454
5455         if (icr & E1000_ICR_DRSTA)
5456                 schedule_work(&adapter->reset_task);
5457
5458         if (icr & E1000_ICR_DOUTSYNC) {
5459                 /* HW is reporting DMA is out of sync */
5460                 adapter->stats.doosync++;
5461         }
5462
5463         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5464                 hw->mac.get_link_status = 1;
5465                 /* guard against interrupt when we're going down */
5466                 if (!test_bit(__IGB_DOWN, &adapter->state))
5467                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5468         }
5469
5470         napi_schedule(&q_vector->napi);
5471
5472         return IRQ_HANDLED;
5473 }
5474
5475 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5476 {
5477         struct igb_adapter *adapter = q_vector->adapter;
5478         struct e1000_hw *hw = &adapter->hw;
5479
5480         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5481             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5482                 if (!adapter->msix_entries)
5483                         igb_set_itr(adapter);
5484                 else
5485                         igb_update_ring_itr(q_vector);
5486         }
5487
5488         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5489                 if (adapter->msix_entries)
5490                         wr32(E1000_EIMS, q_vector->eims_value);
5491                 else
5492                         igb_irq_enable(adapter);
5493         }
5494 }
5495
5496 /**
5497  * igb_poll - NAPI Rx polling callback
5498  * @napi: napi polling structure
5499  * @budget: count of how many packets we should handle
5500  **/
5501 static int igb_poll(struct napi_struct *napi, int budget)
5502 {
5503         struct igb_q_vector *q_vector = container_of(napi,
5504                                                      struct igb_q_vector,
5505                                                      napi);
5506         int tx_clean_complete = 1, work_done = 0;
5507
5508 #ifdef CONFIG_IGB_DCA
5509         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5510                 igb_update_dca(q_vector);
5511 #endif
5512         if (q_vector->tx_ring)
5513                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5514
5515         if (q_vector->rx_ring)
5516                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5517
5518         if (!tx_clean_complete)
5519                 work_done = budget;
5520
5521         /* If not enough Rx work done, exit the polling mode */
5522         if (work_done < budget) {
5523                 napi_complete(napi);
5524                 igb_ring_irq_enable(q_vector);
5525         }
5526
5527         return work_done;
5528 }
5529
5530 /**
5531  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5532  * @adapter: board private structure
5533  * @shhwtstamps: timestamp structure to update
5534  * @regval: unsigned 64bit system time value.
5535  *
5536  * We need to convert the system time value stored in the RX/TXSTMP registers
5537  * into a hwtstamp which can be used by the upper level timestamping functions
5538  */
5539 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5540                                    struct skb_shared_hwtstamps *shhwtstamps,
5541                                    u64 regval)
5542 {
5543         u64 ns;
5544
5545         /*
5546          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5547          * 24 to match clock shift we setup earlier.
5548          */
5549         if (adapter->hw.mac.type == e1000_82580)
5550                 regval <<= IGB_82580_TSYNC_SHIFT;
5551
5552         ns = timecounter_cyc2time(&adapter->clock, regval);
5553         timecompare_update(&adapter->compare, ns);
5554         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5555         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5556         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5557 }
5558
5559 /**
5560  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5561  * @q_vector: pointer to q_vector containing needed info
5562  * @buffer: pointer to igb_buffer structure
5563  *
5564  * If we were asked to do hardware stamping and such a time stamp is
5565  * available, then it must have been for this skb here because we only
5566  * allow only one such packet into the queue.
5567  */
5568 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5569 {
5570         struct igb_adapter *adapter = q_vector->adapter;
5571         struct e1000_hw *hw = &adapter->hw;
5572         struct skb_shared_hwtstamps shhwtstamps;
5573         u64 regval;
5574
5575         /* if skb does not support hw timestamp or TX stamp not valid exit */
5576         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5577             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5578                 return;
5579
5580         regval = rd32(E1000_TXSTMPL);
5581         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5582
5583         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5584         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5585 }
5586
5587 /**
5588  * igb_clean_tx_irq - Reclaim resources after transmit completes
5589  * @q_vector: pointer to q_vector containing needed info
5590  * returns true if ring is completely cleaned
5591  **/
5592 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5593 {
5594         struct igb_adapter *adapter = q_vector->adapter;
5595         struct igb_ring *tx_ring = q_vector->tx_ring;
5596         struct net_device *netdev = tx_ring->netdev;
5597         struct e1000_hw *hw = &adapter->hw;
5598         struct igb_buffer *buffer_info;
5599         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5600         unsigned int total_bytes = 0, total_packets = 0;
5601         unsigned int i, eop, count = 0;
5602         bool cleaned = false;
5603
5604         i = tx_ring->next_to_clean;
5605         eop = tx_ring->buffer_info[i].next_to_watch;
5606         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5607
5608         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5609                (count < tx_ring->count)) {
5610                 rmb();  /* read buffer_info after eop_desc status */
5611                 for (cleaned = false; !cleaned; count++) {
5612                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5613                         buffer_info = &tx_ring->buffer_info[i];
5614                         cleaned = (i == eop);
5615
5616                         if (buffer_info->skb) {
5617                                 total_bytes += buffer_info->bytecount;
5618                                 /* gso_segs is currently only valid for tcp */
5619                                 total_packets += buffer_info->gso_segs;
5620                                 igb_tx_hwtstamp(q_vector, buffer_info);
5621                         }
5622
5623                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5624                         tx_desc->wb.status = 0;
5625
5626                         i++;
5627                         if (i == tx_ring->count)
5628                                 i = 0;
5629                 }
5630                 eop = tx_ring->buffer_info[i].next_to_watch;
5631                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5632         }
5633
5634         tx_ring->next_to_clean = i;
5635
5636         if (unlikely(count &&
5637                      netif_carrier_ok(netdev) &&
5638                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5639                 /* Make sure that anybody stopping the queue after this
5640                  * sees the new next_to_clean.
5641                  */
5642                 smp_mb();
5643                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5644                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5645                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5646
5647                         u64_stats_update_begin(&tx_ring->tx_syncp);
5648                         tx_ring->tx_stats.restart_queue++;
5649                         u64_stats_update_end(&tx_ring->tx_syncp);
5650                 }
5651         }
5652
5653         if (tx_ring->detect_tx_hung) {
5654                 /* Detect a transmit hang in hardware, this serializes the
5655                  * check with the clearing of time_stamp and movement of i */
5656                 tx_ring->detect_tx_hung = false;
5657                 if (tx_ring->buffer_info[i].time_stamp &&
5658                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5659                                (adapter->tx_timeout_factor * HZ)) &&
5660                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5661
5662                         /* detected Tx unit hang */
5663                         dev_err(tx_ring->dev,
5664                                 "Detected Tx Unit Hang\n"
5665                                 "  Tx Queue             <%d>\n"
5666                                 "  TDH                  <%x>\n"
5667                                 "  TDT                  <%x>\n"
5668                                 "  next_to_use          <%x>\n"
5669                                 "  next_to_clean        <%x>\n"
5670                                 "buffer_info[next_to_clean]\n"
5671                                 "  time_stamp           <%lx>\n"
5672                                 "  next_to_watch        <%x>\n"
5673                                 "  jiffies              <%lx>\n"
5674                                 "  desc.status          <%x>\n",
5675                                 tx_ring->queue_index,
5676                                 readl(tx_ring->head),
5677                                 readl(tx_ring->tail),
5678                                 tx_ring->next_to_use,
5679                                 tx_ring->next_to_clean,
5680                                 tx_ring->buffer_info[eop].time_stamp,
5681                                 eop,
5682                                 jiffies,
5683                                 eop_desc->wb.status);
5684                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5685                 }
5686         }
5687         tx_ring->total_bytes += total_bytes;
5688         tx_ring->total_packets += total_packets;
5689         u64_stats_update_begin(&tx_ring->tx_syncp);
5690         tx_ring->tx_stats.bytes += total_bytes;
5691         tx_ring->tx_stats.packets += total_packets;
5692         u64_stats_update_end(&tx_ring->tx_syncp);
5693         return count < tx_ring->count;
5694 }
5695
5696 /**
5697  * igb_receive_skb - helper function to handle rx indications
5698  * @q_vector: structure containing interrupt and ring information
5699  * @skb: packet to send up
5700  * @vlan_tag: vlan tag for packet
5701  **/
5702 static void igb_receive_skb(struct igb_q_vector *q_vector,
5703                             struct sk_buff *skb,
5704                             u16 vlan_tag)
5705 {
5706         struct igb_adapter *adapter = q_vector->adapter;
5707
5708         if (vlan_tag && adapter->vlgrp)
5709                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5710                                  vlan_tag, skb);
5711         else
5712                 napi_gro_receive(&q_vector->napi, skb);
5713 }
5714
5715 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5716                                        u32 status_err, struct sk_buff *skb)
5717 {
5718         skb_checksum_none_assert(skb);
5719
5720         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5721         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5722              (status_err & E1000_RXD_STAT_IXSM))
5723                 return;
5724
5725         /* TCP/UDP checksum error bit is set */
5726         if (status_err &
5727             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5728                 /*
5729                  * work around errata with sctp packets where the TCPE aka
5730                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5731                  * packets, (aka let the stack check the crc32c)
5732                  */
5733                 if ((skb->len == 60) &&
5734                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5735                         u64_stats_update_begin(&ring->rx_syncp);
5736                         ring->rx_stats.csum_err++;
5737                         u64_stats_update_end(&ring->rx_syncp);
5738                 }
5739                 /* let the stack verify checksum errors */
5740                 return;
5741         }
5742         /* It must be a TCP or UDP packet with a valid checksum */
5743         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5744                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5745
5746         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5747 }
5748
5749 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5750                                    struct sk_buff *skb)
5751 {
5752         struct igb_adapter *adapter = q_vector->adapter;
5753         struct e1000_hw *hw = &adapter->hw;
5754         u64 regval;
5755
5756         /*
5757          * If this bit is set, then the RX registers contain the time stamp. No
5758          * other packet will be time stamped until we read these registers, so
5759          * read the registers to make them available again. Because only one
5760          * packet can be time stamped at a time, we know that the register
5761          * values must belong to this one here and therefore we don't need to
5762          * compare any of the additional attributes stored for it.
5763          *
5764          * If nothing went wrong, then it should have a shared tx_flags that we
5765          * can turn into a skb_shared_hwtstamps.
5766          */
5767         if (staterr & E1000_RXDADV_STAT_TSIP) {
5768                 u32 *stamp = (u32 *)skb->data;
5769                 regval = le32_to_cpu(*(stamp + 2));
5770                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5771                 skb_pull(skb, IGB_TS_HDR_LEN);
5772         } else {
5773                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5774                         return;
5775
5776                 regval = rd32(E1000_RXSTMPL);
5777                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5778         }
5779
5780         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5781 }
5782 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5783                                union e1000_adv_rx_desc *rx_desc)
5784 {
5785         /* HW will not DMA in data larger than the given buffer, even if it
5786          * parses the (NFS, of course) header to be larger.  In that case, it
5787          * fills the header buffer and spills the rest into the page.
5788          */
5789         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5790                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5791         if (hlen > rx_ring->rx_buffer_len)
5792                 hlen = rx_ring->rx_buffer_len;
5793         return hlen;
5794 }
5795
5796 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5797                                  int *work_done, int budget)
5798 {
5799         struct igb_ring *rx_ring = q_vector->rx_ring;
5800         struct net_device *netdev = rx_ring->netdev;
5801         struct device *dev = rx_ring->dev;
5802         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5803         struct igb_buffer *buffer_info , *next_buffer;
5804         struct sk_buff *skb;
5805         bool cleaned = false;
5806         int cleaned_count = 0;
5807         int current_node = numa_node_id();
5808         unsigned int total_bytes = 0, total_packets = 0;
5809         unsigned int i;
5810         u32 staterr;
5811         u16 length;
5812         u16 vlan_tag;
5813
5814         i = rx_ring->next_to_clean;
5815         buffer_info = &rx_ring->buffer_info[i];
5816         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5817         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5818
5819         while (staterr & E1000_RXD_STAT_DD) {
5820                 if (*work_done >= budget)
5821                         break;
5822                 (*work_done)++;
5823                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5824
5825                 skb = buffer_info->skb;
5826                 prefetch(skb->data - NET_IP_ALIGN);
5827                 buffer_info->skb = NULL;
5828
5829                 i++;
5830                 if (i == rx_ring->count)
5831                         i = 0;
5832
5833                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5834                 prefetch(next_rxd);
5835                 next_buffer = &rx_ring->buffer_info[i];
5836
5837                 length = le16_to_cpu(rx_desc->wb.upper.length);
5838                 cleaned = true;
5839                 cleaned_count++;
5840
5841                 if (buffer_info->dma) {
5842                         dma_unmap_single(dev, buffer_info->dma,
5843                                          rx_ring->rx_buffer_len,
5844                                          DMA_FROM_DEVICE);
5845                         buffer_info->dma = 0;
5846                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5847                                 skb_put(skb, length);
5848                                 goto send_up;
5849                         }
5850                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5851                 }
5852
5853                 if (length) {
5854                         dma_unmap_page(dev, buffer_info->page_dma,
5855                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5856                         buffer_info->page_dma = 0;
5857
5858                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5859                                                 buffer_info->page,
5860                                                 buffer_info->page_offset,
5861                                                 length);
5862
5863                         if ((page_count(buffer_info->page) != 1) ||
5864                             (page_to_nid(buffer_info->page) != current_node))
5865                                 buffer_info->page = NULL;
5866                         else
5867                                 get_page(buffer_info->page);
5868
5869                         skb->len += length;
5870                         skb->data_len += length;
5871                         skb->truesize += length;
5872                 }
5873
5874                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5875                         buffer_info->skb = next_buffer->skb;
5876                         buffer_info->dma = next_buffer->dma;
5877                         next_buffer->skb = skb;
5878                         next_buffer->dma = 0;
5879                         goto next_desc;
5880                 }
5881 send_up:
5882                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5883                         dev_kfree_skb_irq(skb);
5884                         goto next_desc;
5885                 }
5886
5887                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5888                         igb_rx_hwtstamp(q_vector, staterr, skb);
5889                 total_bytes += skb->len;
5890                 total_packets++;
5891
5892                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5893
5894                 skb->protocol = eth_type_trans(skb, netdev);
5895                 skb_record_rx_queue(skb, rx_ring->queue_index);
5896
5897                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5898                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5899
5900                 igb_receive_skb(q_vector, skb, vlan_tag);
5901
5902 next_desc:
5903                 rx_desc->wb.upper.status_error = 0;
5904
5905                 /* return some buffers to hardware, one at a time is too slow */
5906                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5907                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5908                         cleaned_count = 0;
5909                 }
5910
5911                 /* use prefetched values */
5912                 rx_desc = next_rxd;
5913                 buffer_info = next_buffer;
5914                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5915         }
5916
5917         rx_ring->next_to_clean = i;
5918         cleaned_count = igb_desc_unused(rx_ring);
5919
5920         if (cleaned_count)
5921                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5922
5923         rx_ring->total_packets += total_packets;
5924         rx_ring->total_bytes += total_bytes;
5925         u64_stats_update_begin(&rx_ring->rx_syncp);
5926         rx_ring->rx_stats.packets += total_packets;
5927         rx_ring->rx_stats.bytes += total_bytes;
5928         u64_stats_update_end(&rx_ring->rx_syncp);
5929         return cleaned;
5930 }
5931
5932 /**
5933  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5934  * @adapter: address of board private structure
5935  **/
5936 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5937 {
5938         struct net_device *netdev = rx_ring->netdev;
5939         union e1000_adv_rx_desc *rx_desc;
5940         struct igb_buffer *buffer_info;
5941         struct sk_buff *skb;
5942         unsigned int i;
5943         int bufsz;
5944
5945         i = rx_ring->next_to_use;
5946         buffer_info = &rx_ring->buffer_info[i];
5947
5948         bufsz = rx_ring->rx_buffer_len;
5949
5950         while (cleaned_count--) {
5951                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5952
5953                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5954                         if (!buffer_info->page) {
5955                                 buffer_info->page = netdev_alloc_page(netdev);
5956                                 if (unlikely(!buffer_info->page)) {
5957                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5958                                         rx_ring->rx_stats.alloc_failed++;
5959                                         u64_stats_update_end(&rx_ring->rx_syncp);
5960                                         goto no_buffers;
5961                                 }
5962                                 buffer_info->page_offset = 0;
5963                         } else {
5964                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5965                         }
5966                         buffer_info->page_dma =
5967                                 dma_map_page(rx_ring->dev, buffer_info->page,
5968                                              buffer_info->page_offset,
5969                                              PAGE_SIZE / 2,
5970                                              DMA_FROM_DEVICE);
5971                         if (dma_mapping_error(rx_ring->dev,
5972                                               buffer_info->page_dma)) {
5973                                 buffer_info->page_dma = 0;
5974                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5975                                 rx_ring->rx_stats.alloc_failed++;
5976                                 u64_stats_update_end(&rx_ring->rx_syncp);
5977                                 goto no_buffers;
5978                         }
5979                 }
5980
5981                 skb = buffer_info->skb;
5982                 if (!skb) {
5983                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5984                         if (unlikely(!skb)) {
5985                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5986                                 rx_ring->rx_stats.alloc_failed++;
5987                                 u64_stats_update_end(&rx_ring->rx_syncp);
5988                                 goto no_buffers;
5989                         }
5990
5991                         buffer_info->skb = skb;
5992                 }
5993                 if (!buffer_info->dma) {
5994                         buffer_info->dma = dma_map_single(rx_ring->dev,
5995                                                           skb->data,
5996                                                           bufsz,
5997                                                           DMA_FROM_DEVICE);
5998                         if (dma_mapping_error(rx_ring->dev,
5999                                               buffer_info->dma)) {
6000                                 buffer_info->dma = 0;
6001                                 u64_stats_update_begin(&rx_ring->rx_syncp);
6002                                 rx_ring->rx_stats.alloc_failed++;
6003                                 u64_stats_update_end(&rx_ring->rx_syncp);
6004                                 goto no_buffers;
6005                         }
6006                 }
6007                 /* Refresh the desc even if buffer_addrs didn't change because
6008                  * each write-back erases this info. */
6009                 if (bufsz < IGB_RXBUFFER_1024) {
6010                         rx_desc->read.pkt_addr =
6011                              cpu_to_le64(buffer_info->page_dma);
6012                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6013                 } else {
6014                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6015                         rx_desc->read.hdr_addr = 0;
6016                 }
6017
6018                 i++;
6019                 if (i == rx_ring->count)
6020                         i = 0;
6021                 buffer_info = &rx_ring->buffer_info[i];
6022         }
6023
6024 no_buffers:
6025         if (rx_ring->next_to_use != i) {
6026                 rx_ring->next_to_use = i;
6027                 if (i == 0)
6028                         i = (rx_ring->count - 1);
6029                 else
6030                         i--;
6031
6032                 /* Force memory writes to complete before letting h/w
6033                  * know there are new descriptors to fetch.  (Only
6034                  * applicable for weak-ordered memory model archs,
6035                  * such as IA-64). */
6036                 wmb();
6037                 writel(i, rx_ring->tail);
6038         }
6039 }
6040
6041 /**
6042  * igb_mii_ioctl -
6043  * @netdev:
6044  * @ifreq:
6045  * @cmd:
6046  **/
6047 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6048 {
6049         struct igb_adapter *adapter = netdev_priv(netdev);
6050         struct mii_ioctl_data *data = if_mii(ifr);
6051
6052         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6053                 return -EOPNOTSUPP;
6054
6055         switch (cmd) {
6056         case SIOCGMIIPHY:
6057                 data->phy_id = adapter->hw.phy.addr;
6058                 break;
6059         case SIOCGMIIREG:
6060                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6061                                      &data->val_out))
6062                         return -EIO;
6063                 break;
6064         case SIOCSMIIREG:
6065         default:
6066                 return -EOPNOTSUPP;
6067         }
6068         return 0;
6069 }
6070
6071 /**
6072  * igb_hwtstamp_ioctl - control hardware time stamping
6073  * @netdev:
6074  * @ifreq:
6075  * @cmd:
6076  *
6077  * Outgoing time stamping can be enabled and disabled. Play nice and
6078  * disable it when requested, although it shouldn't case any overhead
6079  * when no packet needs it. At most one packet in the queue may be
6080  * marked for time stamping, otherwise it would be impossible to tell
6081  * for sure to which packet the hardware time stamp belongs.
6082  *
6083  * Incoming time stamping has to be configured via the hardware
6084  * filters. Not all combinations are supported, in particular event
6085  * type has to be specified. Matching the kind of event packet is
6086  * not supported, with the exception of "all V2 events regardless of
6087  * level 2 or 4".
6088  *
6089  **/
6090 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6091                               struct ifreq *ifr, int cmd)
6092 {
6093         struct igb_adapter *adapter = netdev_priv(netdev);
6094         struct e1000_hw *hw = &adapter->hw;
6095         struct hwtstamp_config config;
6096         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6097         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6098         u32 tsync_rx_cfg = 0;
6099         bool is_l4 = false;
6100         bool is_l2 = false;
6101         u32 regval;
6102
6103         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6104                 return -EFAULT;
6105
6106         /* reserved for future extensions */
6107         if (config.flags)
6108                 return -EINVAL;
6109
6110         switch (config.tx_type) {
6111         case HWTSTAMP_TX_OFF:
6112                 tsync_tx_ctl = 0;
6113         case HWTSTAMP_TX_ON:
6114                 break;
6115         default:
6116                 return -ERANGE;
6117         }
6118
6119         switch (config.rx_filter) {
6120         case HWTSTAMP_FILTER_NONE:
6121                 tsync_rx_ctl = 0;
6122                 break;
6123         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6124         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6125         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6126         case HWTSTAMP_FILTER_ALL:
6127                 /*
6128                  * register TSYNCRXCFG must be set, therefore it is not
6129                  * possible to time stamp both Sync and Delay_Req messages
6130                  * => fall back to time stamping all packets
6131                  */
6132                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6133                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6134                 break;
6135         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6136                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6137                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6138                 is_l4 = true;
6139                 break;
6140         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6141                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6142                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6143                 is_l4 = true;
6144                 break;
6145         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6146         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6147                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6148                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6149                 is_l2 = true;
6150                 is_l4 = true;
6151                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6152                 break;
6153         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6154         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6155                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6156                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6157                 is_l2 = true;
6158                 is_l4 = true;
6159                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6160                 break;
6161         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6162         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6163         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6164                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6165                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6166                 is_l2 = true;
6167                 break;
6168         default:
6169                 return -ERANGE;
6170         }
6171
6172         if (hw->mac.type == e1000_82575) {
6173                 if (tsync_rx_ctl | tsync_tx_ctl)
6174                         return -EINVAL;
6175                 return 0;
6176         }
6177
6178         /*
6179          * Per-packet timestamping only works if all packets are
6180          * timestamped, so enable timestamping in all packets as
6181          * long as one rx filter was configured.
6182          */
6183         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6184                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6185                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6186         }
6187
6188         /* enable/disable TX */
6189         regval = rd32(E1000_TSYNCTXCTL);
6190         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6191         regval |= tsync_tx_ctl;
6192         wr32(E1000_TSYNCTXCTL, regval);
6193
6194         /* enable/disable RX */
6195         regval = rd32(E1000_TSYNCRXCTL);
6196         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6197         regval |= tsync_rx_ctl;
6198         wr32(E1000_TSYNCRXCTL, regval);
6199
6200         /* define which PTP packets are time stamped */
6201         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6202
6203         /* define ethertype filter for timestamped packets */
6204         if (is_l2)
6205                 wr32(E1000_ETQF(3),
6206                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6207                                  E1000_ETQF_1588 | /* enable timestamping */
6208                                  ETH_P_1588));     /* 1588 eth protocol type */
6209         else
6210                 wr32(E1000_ETQF(3), 0);
6211
6212 #define PTP_PORT 319
6213         /* L4 Queue Filter[3]: filter by destination port and protocol */
6214         if (is_l4) {
6215                 u32 ftqf = (IPPROTO_UDP /* UDP */
6216                         | E1000_FTQF_VF_BP /* VF not compared */
6217                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6218                         | E1000_FTQF_MASK); /* mask all inputs */
6219                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6220
6221                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6222                 wr32(E1000_IMIREXT(3),
6223                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6224                 if (hw->mac.type == e1000_82576) {
6225                         /* enable source port check */
6226                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6227                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6228                 }
6229                 wr32(E1000_FTQF(3), ftqf);
6230         } else {
6231                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6232         }
6233         wrfl();
6234
6235         adapter->hwtstamp_config = config;
6236
6237         /* clear TX/RX time stamp registers, just to be sure */
6238         regval = rd32(E1000_TXSTMPH);
6239         regval = rd32(E1000_RXSTMPH);
6240
6241         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6242                 -EFAULT : 0;
6243 }
6244
6245 /**
6246  * igb_ioctl -
6247  * @netdev:
6248  * @ifreq:
6249  * @cmd:
6250  **/
6251 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6252 {
6253         switch (cmd) {
6254         case SIOCGMIIPHY:
6255         case SIOCGMIIREG:
6256         case SIOCSMIIREG:
6257                 return igb_mii_ioctl(netdev, ifr, cmd);
6258         case SIOCSHWTSTAMP:
6259                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6260         default:
6261                 return -EOPNOTSUPP;
6262         }
6263 }
6264
6265 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6266 {
6267         struct igb_adapter *adapter = hw->back;
6268         u16 cap_offset;
6269
6270         cap_offset = adapter->pdev->pcie_cap;
6271         if (!cap_offset)
6272                 return -E1000_ERR_CONFIG;
6273
6274         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6275
6276         return 0;
6277 }
6278
6279 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6280 {
6281         struct igb_adapter *adapter = hw->back;
6282         u16 cap_offset;
6283
6284         cap_offset = adapter->pdev->pcie_cap;
6285         if (!cap_offset)
6286                 return -E1000_ERR_CONFIG;
6287
6288         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6289
6290         return 0;
6291 }
6292
6293 static void igb_vlan_rx_register(struct net_device *netdev,
6294                                  struct vlan_group *grp)
6295 {
6296         struct igb_adapter *adapter = netdev_priv(netdev);
6297         struct e1000_hw *hw = &adapter->hw;
6298         u32 ctrl, rctl;
6299
6300         igb_irq_disable(adapter);
6301         adapter->vlgrp = grp;
6302
6303         if (grp) {
6304                 /* enable VLAN tag insert/strip */
6305                 ctrl = rd32(E1000_CTRL);
6306                 ctrl |= E1000_CTRL_VME;
6307                 wr32(E1000_CTRL, ctrl);
6308
6309                 /* Disable CFI check */
6310                 rctl = rd32(E1000_RCTL);
6311                 rctl &= ~E1000_RCTL_CFIEN;
6312                 wr32(E1000_RCTL, rctl);
6313         } else {
6314                 /* disable VLAN tag insert/strip */
6315                 ctrl = rd32(E1000_CTRL);
6316                 ctrl &= ~E1000_CTRL_VME;
6317                 wr32(E1000_CTRL, ctrl);
6318         }
6319
6320         igb_rlpml_set(adapter);
6321
6322         if (!test_bit(__IGB_DOWN, &adapter->state))
6323                 igb_irq_enable(adapter);
6324 }
6325
6326 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6327 {
6328         struct igb_adapter *adapter = netdev_priv(netdev);
6329         struct e1000_hw *hw = &adapter->hw;
6330         int pf_id = adapter->vfs_allocated_count;
6331
6332         /* attempt to add filter to vlvf array */
6333         igb_vlvf_set(adapter, vid, true, pf_id);
6334
6335         /* add the filter since PF can receive vlans w/o entry in vlvf */
6336         igb_vfta_set(hw, vid, true);
6337 }
6338
6339 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6340 {
6341         struct igb_adapter *adapter = netdev_priv(netdev);
6342         struct e1000_hw *hw = &adapter->hw;
6343         int pf_id = adapter->vfs_allocated_count;
6344         s32 err;
6345
6346         igb_irq_disable(adapter);
6347         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6348
6349         if (!test_bit(__IGB_DOWN, &adapter->state))
6350                 igb_irq_enable(adapter);
6351
6352         /* remove vlan from VLVF table array */
6353         err = igb_vlvf_set(adapter, vid, false, pf_id);
6354
6355         /* if vid was not present in VLVF just remove it from table */
6356         if (err)
6357                 igb_vfta_set(hw, vid, false);
6358 }
6359
6360 static void igb_restore_vlan(struct igb_adapter *adapter)
6361 {
6362         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6363
6364         if (adapter->vlgrp) {
6365                 u16 vid;
6366                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6367                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6368                                 continue;
6369                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6370                 }
6371         }
6372 }
6373
6374 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6375 {
6376         struct pci_dev *pdev = adapter->pdev;
6377         struct e1000_mac_info *mac = &adapter->hw.mac;
6378
6379         mac->autoneg = 0;
6380
6381         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6382          * for the switch() below to work */
6383         if ((spd & 1) || (dplx & ~1))
6384                 goto err_inval;
6385
6386         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6387         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6388             spd != SPEED_1000 &&
6389             dplx != DUPLEX_FULL)
6390                 goto err_inval;
6391
6392         switch (spd + dplx) {
6393         case SPEED_10 + DUPLEX_HALF:
6394                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6395                 break;
6396         case SPEED_10 + DUPLEX_FULL:
6397                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6398                 break;
6399         case SPEED_100 + DUPLEX_HALF:
6400                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6401                 break;
6402         case SPEED_100 + DUPLEX_FULL:
6403                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6404                 break;
6405         case SPEED_1000 + DUPLEX_FULL:
6406                 mac->autoneg = 1;
6407                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6408                 break;
6409         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6410         default:
6411                 goto err_inval;
6412         }
6413         return 0;
6414
6415 err_inval:
6416         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6417         return -EINVAL;
6418 }
6419
6420 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6421 {
6422         struct net_device *netdev = pci_get_drvdata(pdev);
6423         struct igb_adapter *adapter = netdev_priv(netdev);
6424         struct e1000_hw *hw = &adapter->hw;
6425         u32 ctrl, rctl, status;
6426         u32 wufc = adapter->wol;
6427 #ifdef CONFIG_PM
6428         int retval = 0;
6429 #endif
6430
6431         netif_device_detach(netdev);
6432
6433         if (netif_running(netdev))
6434                 igb_close(netdev);
6435
6436         igb_clear_interrupt_scheme(adapter);
6437
6438 #ifdef CONFIG_PM
6439         retval = pci_save_state(pdev);
6440         if (retval)
6441                 return retval;
6442 #endif
6443
6444         status = rd32(E1000_STATUS);
6445         if (status & E1000_STATUS_LU)
6446                 wufc &= ~E1000_WUFC_LNKC;
6447
6448         if (wufc) {
6449                 igb_setup_rctl(adapter);
6450                 igb_set_rx_mode(netdev);
6451
6452                 /* turn on all-multi mode if wake on multicast is enabled */
6453                 if (wufc & E1000_WUFC_MC) {
6454                         rctl = rd32(E1000_RCTL);
6455                         rctl |= E1000_RCTL_MPE;
6456                         wr32(E1000_RCTL, rctl);
6457                 }
6458
6459                 ctrl = rd32(E1000_CTRL);
6460                 /* advertise wake from D3Cold */
6461                 #define E1000_CTRL_ADVD3WUC 0x00100000
6462                 /* phy power management enable */
6463                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6464                 ctrl |= E1000_CTRL_ADVD3WUC;
6465                 wr32(E1000_CTRL, ctrl);
6466
6467                 /* Allow time for pending master requests to run */
6468                 igb_disable_pcie_master(hw);
6469
6470                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6471                 wr32(E1000_WUFC, wufc);
6472         } else {
6473                 wr32(E1000_WUC, 0);
6474                 wr32(E1000_WUFC, 0);
6475         }
6476
6477         *enable_wake = wufc || adapter->en_mng_pt;
6478         if (!*enable_wake)
6479                 igb_power_down_link(adapter);
6480         else
6481                 igb_power_up_link(adapter);
6482
6483         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6484          * would have already happened in close and is redundant. */
6485         igb_release_hw_control(adapter);
6486
6487         pci_disable_device(pdev);
6488
6489         return 0;
6490 }
6491
6492 #ifdef CONFIG_PM
6493 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6494 {
6495         int retval;
6496         bool wake;
6497
6498         retval = __igb_shutdown(pdev, &wake);
6499         if (retval)
6500                 return retval;
6501
6502         if (wake) {
6503                 pci_prepare_to_sleep(pdev);
6504         } else {
6505                 pci_wake_from_d3(pdev, false);
6506                 pci_set_power_state(pdev, PCI_D3hot);
6507         }
6508
6509         return 0;
6510 }
6511
6512 static int igb_resume(struct pci_dev *pdev)
6513 {
6514         struct net_device *netdev = pci_get_drvdata(pdev);
6515         struct igb_adapter *adapter = netdev_priv(netdev);
6516         struct e1000_hw *hw = &adapter->hw;
6517         u32 err;
6518
6519         pci_set_power_state(pdev, PCI_D0);
6520         pci_restore_state(pdev);
6521         pci_save_state(pdev);
6522
6523         err = pci_enable_device_mem(pdev);
6524         if (err) {
6525                 dev_err(&pdev->dev,
6526                         "igb: Cannot enable PCI device from suspend\n");
6527                 return err;
6528         }
6529         pci_set_master(pdev);
6530
6531         pci_enable_wake(pdev, PCI_D3hot, 0);
6532         pci_enable_wake(pdev, PCI_D3cold, 0);
6533
6534         if (igb_init_interrupt_scheme(adapter)) {
6535                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6536                 return -ENOMEM;
6537         }
6538
6539         igb_reset(adapter);
6540
6541         /* let the f/w know that the h/w is now under the control of the
6542          * driver. */
6543         igb_get_hw_control(adapter);
6544
6545         wr32(E1000_WUS, ~0);
6546
6547         if (netif_running(netdev)) {
6548                 err = igb_open(netdev);
6549                 if (err)
6550                         return err;
6551         }
6552
6553         netif_device_attach(netdev);
6554
6555         return 0;
6556 }
6557 #endif
6558
6559 static void igb_shutdown(struct pci_dev *pdev)
6560 {
6561         bool wake;
6562
6563         __igb_shutdown(pdev, &wake);
6564
6565         if (system_state == SYSTEM_POWER_OFF) {
6566                 pci_wake_from_d3(pdev, wake);
6567                 pci_set_power_state(pdev, PCI_D3hot);
6568         }
6569 }
6570
6571 #ifdef CONFIG_NET_POLL_CONTROLLER
6572 /*
6573  * Polling 'interrupt' - used by things like netconsole to send skbs
6574  * without having to re-enable interrupts. It's not called while
6575  * the interrupt routine is executing.
6576  */
6577 static void igb_netpoll(struct net_device *netdev)
6578 {
6579         struct igb_adapter *adapter = netdev_priv(netdev);
6580         struct e1000_hw *hw = &adapter->hw;
6581         int i;
6582
6583         if (!adapter->msix_entries) {
6584                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6585                 igb_irq_disable(adapter);
6586                 napi_schedule(&q_vector->napi);
6587                 return;
6588         }
6589
6590         for (i = 0; i < adapter->num_q_vectors; i++) {
6591                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6592                 wr32(E1000_EIMC, q_vector->eims_value);
6593                 napi_schedule(&q_vector->napi);
6594         }
6595 }
6596 #endif /* CONFIG_NET_POLL_CONTROLLER */
6597
6598 /**
6599  * igb_io_error_detected - called when PCI error is detected
6600  * @pdev: Pointer to PCI device
6601  * @state: The current pci connection state
6602  *
6603  * This function is called after a PCI bus error affecting
6604  * this device has been detected.
6605  */
6606 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6607                                               pci_channel_state_t state)
6608 {
6609         struct net_device *netdev = pci_get_drvdata(pdev);
6610         struct igb_adapter *adapter = netdev_priv(netdev);
6611
6612         netif_device_detach(netdev);
6613
6614         if (state == pci_channel_io_perm_failure)
6615                 return PCI_ERS_RESULT_DISCONNECT;
6616
6617         if (netif_running(netdev))
6618                 igb_down(adapter);
6619         pci_disable_device(pdev);
6620
6621         /* Request a slot slot reset. */
6622         return PCI_ERS_RESULT_NEED_RESET;
6623 }
6624
6625 /**
6626  * igb_io_slot_reset - called after the pci bus has been reset.
6627  * @pdev: Pointer to PCI device
6628  *
6629  * Restart the card from scratch, as if from a cold-boot. Implementation
6630  * resembles the first-half of the igb_resume routine.
6631  */
6632 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6633 {
6634         struct net_device *netdev = pci_get_drvdata(pdev);
6635         struct igb_adapter *adapter = netdev_priv(netdev);
6636         struct e1000_hw *hw = &adapter->hw;
6637         pci_ers_result_t result;
6638         int err;
6639
6640         if (pci_enable_device_mem(pdev)) {
6641                 dev_err(&pdev->dev,
6642                         "Cannot re-enable PCI device after reset.\n");
6643                 result = PCI_ERS_RESULT_DISCONNECT;
6644         } else {
6645                 pci_set_master(pdev);
6646                 pci_restore_state(pdev);
6647                 pci_save_state(pdev);
6648
6649                 pci_enable_wake(pdev, PCI_D3hot, 0);
6650                 pci_enable_wake(pdev, PCI_D3cold, 0);
6651
6652                 igb_reset(adapter);
6653                 wr32(E1000_WUS, ~0);
6654                 result = PCI_ERS_RESULT_RECOVERED;
6655         }
6656
6657         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6658         if (err) {
6659                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6660                         "failed 0x%0x\n", err);
6661                 /* non-fatal, continue */
6662         }
6663
6664         return result;
6665 }
6666
6667 /**
6668  * igb_io_resume - called when traffic can start flowing again.
6669  * @pdev: Pointer to PCI device
6670  *
6671  * This callback is called when the error recovery driver tells us that
6672  * its OK to resume normal operation. Implementation resembles the
6673  * second-half of the igb_resume routine.
6674  */
6675 static void igb_io_resume(struct pci_dev *pdev)
6676 {
6677         struct net_device *netdev = pci_get_drvdata(pdev);
6678         struct igb_adapter *adapter = netdev_priv(netdev);
6679
6680         if (netif_running(netdev)) {
6681                 if (igb_up(adapter)) {
6682                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6683                         return;
6684                 }
6685         }
6686
6687         netif_device_attach(netdev);
6688
6689         /* let the f/w know that the h/w is now under the control of the
6690          * driver. */
6691         igb_get_hw_control(adapter);
6692 }
6693
6694 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6695                              u8 qsel)
6696 {
6697         u32 rar_low, rar_high;
6698         struct e1000_hw *hw = &adapter->hw;
6699
6700         /* HW expects these in little endian so we reverse the byte order
6701          * from network order (big endian) to little endian
6702          */
6703         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6704                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6705         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6706
6707         /* Indicate to hardware the Address is Valid. */
6708         rar_high |= E1000_RAH_AV;
6709
6710         if (hw->mac.type == e1000_82575)
6711                 rar_high |= E1000_RAH_POOL_1 * qsel;
6712         else
6713                 rar_high |= E1000_RAH_POOL_1 << qsel;
6714
6715         wr32(E1000_RAL(index), rar_low);
6716         wrfl();
6717         wr32(E1000_RAH(index), rar_high);
6718         wrfl();
6719 }
6720
6721 static int igb_set_vf_mac(struct igb_adapter *adapter,
6722                           int vf, unsigned char *mac_addr)
6723 {
6724         struct e1000_hw *hw = &adapter->hw;
6725         /* VF MAC addresses start at end of receive addresses and moves
6726          * torwards the first, as a result a collision should not be possible */
6727         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6728
6729         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6730
6731         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6732
6733         return 0;
6734 }
6735
6736 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6737 {
6738         struct igb_adapter *adapter = netdev_priv(netdev);
6739         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6740                 return -EINVAL;
6741         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6742         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6743         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6744                                       " change effective.");
6745         if (test_bit(__IGB_DOWN, &adapter->state)) {
6746                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6747                          " but the PF device is not up.\n");
6748                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6749                          " attempting to use the VF device.\n");
6750         }
6751         return igb_set_vf_mac(adapter, vf, mac);
6752 }
6753
6754 static int igb_link_mbps(int internal_link_speed)
6755 {
6756         switch (internal_link_speed) {
6757         case SPEED_100:
6758                 return 100;
6759         case SPEED_1000:
6760                 return 1000;
6761         default:
6762                 return 0;
6763         }
6764 }
6765
6766 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6767                                   int link_speed)
6768 {
6769         int rf_dec, rf_int;
6770         u32 bcnrc_val;
6771
6772         if (tx_rate != 0) {
6773                 /* Calculate the rate factor values to set */
6774                 rf_int = link_speed / tx_rate;
6775                 rf_dec = (link_speed - (rf_int * tx_rate));
6776                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6777
6778                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6779                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6780                                E1000_RTTBCNRC_RF_INT_MASK);
6781                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6782         } else {
6783                 bcnrc_val = 0;
6784         }
6785
6786         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6787         wr32(E1000_RTTBCNRC, bcnrc_val);
6788 }
6789
6790 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6791 {
6792         int actual_link_speed, i;
6793         bool reset_rate = false;
6794
6795         /* VF TX rate limit was not set or not supported */
6796         if ((adapter->vf_rate_link_speed == 0) ||
6797             (adapter->hw.mac.type != e1000_82576))
6798                 return;
6799
6800         actual_link_speed = igb_link_mbps(adapter->link_speed);
6801         if (actual_link_speed != adapter->vf_rate_link_speed) {
6802                 reset_rate = true;
6803                 adapter->vf_rate_link_speed = 0;
6804                 dev_info(&adapter->pdev->dev,
6805                          "Link speed has been changed. VF Transmit "
6806                          "rate is disabled\n");
6807         }
6808
6809         for (i = 0; i < adapter->vfs_allocated_count; i++) {
6810                 if (reset_rate)
6811                         adapter->vf_data[i].tx_rate = 0;
6812
6813                 igb_set_vf_rate_limit(&adapter->hw, i,
6814                                       adapter->vf_data[i].tx_rate,
6815                                       actual_link_speed);
6816         }
6817 }
6818
6819 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6820 {
6821         struct igb_adapter *adapter = netdev_priv(netdev);
6822         struct e1000_hw *hw = &adapter->hw;
6823         int actual_link_speed;
6824
6825         if (hw->mac.type != e1000_82576)
6826                 return -EOPNOTSUPP;
6827
6828         actual_link_speed = igb_link_mbps(adapter->link_speed);
6829         if ((vf >= adapter->vfs_allocated_count) ||
6830             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6831             (tx_rate < 0) || (tx_rate > actual_link_speed))
6832                 return -EINVAL;
6833
6834         adapter->vf_rate_link_speed = actual_link_speed;
6835         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6836         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6837
6838         return 0;
6839 }
6840
6841 static int igb_ndo_get_vf_config(struct net_device *netdev,
6842                                  int vf, struct ifla_vf_info *ivi)
6843 {
6844         struct igb_adapter *adapter = netdev_priv(netdev);
6845         if (vf >= adapter->vfs_allocated_count)
6846                 return -EINVAL;
6847         ivi->vf = vf;
6848         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6849         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6850         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6851         ivi->qos = adapter->vf_data[vf].pf_qos;
6852         return 0;
6853 }
6854
6855 static void igb_vmm_control(struct igb_adapter *adapter)
6856 {
6857         struct e1000_hw *hw = &adapter->hw;
6858         u32 reg;
6859
6860         switch (hw->mac.type) {
6861         case e1000_82575:
6862         default:
6863                 /* replication is not supported for 82575 */
6864                 return;
6865         case e1000_82576:
6866                 /* notify HW that the MAC is adding vlan tags */
6867                 reg = rd32(E1000_DTXCTL);
6868                 reg |= E1000_DTXCTL_VLAN_ADDED;
6869                 wr32(E1000_DTXCTL, reg);
6870         case e1000_82580:
6871                 /* enable replication vlan tag stripping */
6872                 reg = rd32(E1000_RPLOLR);
6873                 reg |= E1000_RPLOLR_STRVLAN;
6874                 wr32(E1000_RPLOLR, reg);
6875         case e1000_i350:
6876                 /* none of the above registers are supported by i350 */
6877                 break;
6878         }
6879
6880         if (adapter->vfs_allocated_count) {
6881                 igb_vmdq_set_loopback_pf(hw, true);
6882                 igb_vmdq_set_replication_pf(hw, true);
6883                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6884                                                 adapter->vfs_allocated_count);
6885         } else {
6886                 igb_vmdq_set_loopback_pf(hw, false);
6887                 igb_vmdq_set_replication_pf(hw, false);
6888         }
6889 }
6890
6891 /* igb_main.c */