1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
49 #include <linux/dca.h>
53 #define DRV_VERSION "2.4.13-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
60 static const struct e1000_info *igb_info_tbl[] = {
61 [board_82575] = &e1000_82575_info,
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
90 /* required last entry */
94 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
96 void igb_reset(struct igb_adapter *);
97 static int igb_setup_all_tx_resources(struct igb_adapter *);
98 static int igb_setup_all_rx_resources(struct igb_adapter *);
99 static void igb_free_all_tx_resources(struct igb_adapter *);
100 static void igb_free_all_rx_resources(struct igb_adapter *);
101 static void igb_setup_mrqc(struct igb_adapter *);
102 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
103 static void __devexit igb_remove(struct pci_dev *pdev);
104 static int igb_sw_init(struct igb_adapter *);
105 static int igb_open(struct net_device *);
106 static int igb_close(struct net_device *);
107 static void igb_configure_tx(struct igb_adapter *);
108 static void igb_configure_rx(struct igb_adapter *);
109 static void igb_clean_all_tx_rings(struct igb_adapter *);
110 static void igb_clean_all_rx_rings(struct igb_adapter *);
111 static void igb_clean_tx_ring(struct igb_ring *);
112 static void igb_clean_rx_ring(struct igb_ring *);
113 static void igb_set_rx_mode(struct net_device *);
114 static void igb_update_phy_info(unsigned long);
115 static void igb_watchdog(unsigned long);
116 static void igb_watchdog_task(struct work_struct *);
117 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
118 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
119 struct rtnl_link_stats64 *stats);
120 static int igb_change_mtu(struct net_device *, int);
121 static int igb_set_mac(struct net_device *, void *);
122 static void igb_set_uta(struct igb_adapter *adapter);
123 static irqreturn_t igb_intr(int irq, void *);
124 static irqreturn_t igb_intr_msi(int irq, void *);
125 static irqreturn_t igb_msix_other(int irq, void *);
126 static irqreturn_t igb_msix_ring(int irq, void *);
127 #ifdef CONFIG_IGB_DCA
128 static void igb_update_dca(struct igb_q_vector *);
129 static void igb_setup_dca(struct igb_adapter *);
130 #endif /* CONFIG_IGB_DCA */
131 static bool igb_clean_tx_irq(struct igb_q_vector *);
132 static int igb_poll(struct napi_struct *, int);
133 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
134 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
135 static void igb_tx_timeout(struct net_device *);
136 static void igb_reset_task(struct work_struct *);
137 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
138 static void igb_vlan_rx_add_vid(struct net_device *, u16);
139 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
140 static void igb_restore_vlan(struct igb_adapter *);
141 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
142 static void igb_ping_all_vfs(struct igb_adapter *);
143 static void igb_msg_task(struct igb_adapter *);
144 static void igb_vmm_control(struct igb_adapter *);
145 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
146 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
147 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
148 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
149 int vf, u16 vlan, u8 qos);
150 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
151 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
152 struct ifla_vf_info *ivi);
153 static void igb_check_vf_rate_limit(struct igb_adapter *);
156 static int igb_suspend(struct pci_dev *, pm_message_t);
157 static int igb_resume(struct pci_dev *);
159 static void igb_shutdown(struct pci_dev *);
160 #ifdef CONFIG_IGB_DCA
161 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
162 static struct notifier_block dca_notifier = {
163 .notifier_call = igb_notify_dca,
168 #ifdef CONFIG_NET_POLL_CONTROLLER
169 /* for netdump / net console */
170 static void igb_netpoll(struct net_device *);
172 #ifdef CONFIG_PCI_IOV
173 static unsigned int max_vfs = 0;
174 module_param(max_vfs, uint, 0);
175 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
176 "per physical function");
177 #endif /* CONFIG_PCI_IOV */
179 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
180 pci_channel_state_t);
181 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
182 static void igb_io_resume(struct pci_dev *);
184 static struct pci_error_handlers igb_err_handler = {
185 .error_detected = igb_io_error_detected,
186 .slot_reset = igb_io_slot_reset,
187 .resume = igb_io_resume,
191 static struct pci_driver igb_driver = {
192 .name = igb_driver_name,
193 .id_table = igb_pci_tbl,
195 .remove = __devexit_p(igb_remove),
197 /* Power Managment Hooks */
198 .suspend = igb_suspend,
199 .resume = igb_resume,
201 .shutdown = igb_shutdown,
202 .err_handler = &igb_err_handler
205 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
206 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
207 MODULE_LICENSE("GPL");
208 MODULE_VERSION(DRV_VERSION);
210 struct igb_reg_info {
215 static const struct igb_reg_info igb_reg_info_tbl[] = {
217 /* General Registers */
218 {E1000_CTRL, "CTRL"},
219 {E1000_STATUS, "STATUS"},
220 {E1000_CTRL_EXT, "CTRL_EXT"},
222 /* Interrupt Registers */
226 {E1000_RCTL, "RCTL"},
227 {E1000_RDLEN(0), "RDLEN"},
228 {E1000_RDH(0), "RDH"},
229 {E1000_RDT(0), "RDT"},
230 {E1000_RXDCTL(0), "RXDCTL"},
231 {E1000_RDBAL(0), "RDBAL"},
232 {E1000_RDBAH(0), "RDBAH"},
235 {E1000_TCTL, "TCTL"},
236 {E1000_TDBAL(0), "TDBAL"},
237 {E1000_TDBAH(0), "TDBAH"},
238 {E1000_TDLEN(0), "TDLEN"},
239 {E1000_TDH(0), "TDH"},
240 {E1000_TDT(0), "TDT"},
241 {E1000_TXDCTL(0), "TXDCTL"},
242 {E1000_TDFH, "TDFH"},
243 {E1000_TDFT, "TDFT"},
244 {E1000_TDFHS, "TDFHS"},
245 {E1000_TDFPC, "TDFPC"},
247 /* List Terminator */
252 * igb_regdump - register printout routine
254 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
260 switch (reginfo->ofs) {
262 for (n = 0; n < 4; n++)
263 regs[n] = rd32(E1000_RDLEN(n));
266 for (n = 0; n < 4; n++)
267 regs[n] = rd32(E1000_RDH(n));
270 for (n = 0; n < 4; n++)
271 regs[n] = rd32(E1000_RDT(n));
273 case E1000_RXDCTL(0):
274 for (n = 0; n < 4; n++)
275 regs[n] = rd32(E1000_RXDCTL(n));
278 for (n = 0; n < 4; n++)
279 regs[n] = rd32(E1000_RDBAL(n));
282 for (n = 0; n < 4; n++)
283 regs[n] = rd32(E1000_RDBAH(n));
286 for (n = 0; n < 4; n++)
287 regs[n] = rd32(E1000_RDBAL(n));
290 for (n = 0; n < 4; n++)
291 regs[n] = rd32(E1000_TDBAH(n));
294 for (n = 0; n < 4; n++)
295 regs[n] = rd32(E1000_TDLEN(n));
298 for (n = 0; n < 4; n++)
299 regs[n] = rd32(E1000_TDH(n));
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_TDT(n));
305 case E1000_TXDCTL(0):
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_TXDCTL(n));
310 printk(KERN_INFO "%-15s %08x\n",
311 reginfo->name, rd32(reginfo->ofs));
315 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
316 printk(KERN_INFO "%-15s ", rname);
317 for (n = 0; n < 4; n++)
318 printk(KERN_CONT "%08x ", regs[n]);
319 printk(KERN_CONT "\n");
323 * igb_dump - Print registers, tx-rings and rx-rings
325 static void igb_dump(struct igb_adapter *adapter)
327 struct net_device *netdev = adapter->netdev;
328 struct e1000_hw *hw = &adapter->hw;
329 struct igb_reg_info *reginfo;
331 struct igb_ring *tx_ring;
332 union e1000_adv_tx_desc *tx_desc;
333 struct my_u0 { u64 a; u64 b; } *u0;
334 struct igb_buffer *buffer_info;
335 struct igb_ring *rx_ring;
336 union e1000_adv_rx_desc *rx_desc;
340 if (!netif_msg_hw(adapter))
343 /* Print netdevice Info */
345 dev_info(&adapter->pdev->dev, "Net device Info\n");
346 printk(KERN_INFO "Device Name state "
347 "trans_start last_rx\n");
348 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
355 /* Print Registers */
356 dev_info(&adapter->pdev->dev, "Register Dump\n");
357 printk(KERN_INFO " Register Name Value\n");
358 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
359 reginfo->name; reginfo++) {
360 igb_regdump(hw, reginfo);
363 /* Print TX Ring Summary */
364 if (!netdev || !netif_running(netdev))
367 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
368 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
369 " leng ntw timestamp\n");
370 for (n = 0; n < adapter->num_tx_queues; n++) {
371 tx_ring = adapter->tx_ring[n];
372 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
373 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
374 n, tx_ring->next_to_use, tx_ring->next_to_clean,
375 (u64)buffer_info->dma,
377 buffer_info->next_to_watch,
378 (u64)buffer_info->time_stamp);
382 if (!netif_msg_tx_done(adapter))
383 goto rx_ring_summary;
385 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
387 /* Transmit Descriptor Formats
389 * Advanced Transmit Descriptor
390 * +--------------------------------------------------------------+
391 * 0 | Buffer Address [63:0] |
392 * +--------------------------------------------------------------+
393 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
394 * +--------------------------------------------------------------+
395 * 63 46 45 40 39 38 36 35 32 31 24 15 0
398 for (n = 0; n < adapter->num_tx_queues; n++) {
399 tx_ring = adapter->tx_ring[n];
400 printk(KERN_INFO "------------------------------------\n");
401 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
402 printk(KERN_INFO "------------------------------------\n");
403 printk(KERN_INFO "T [desc] [address 63:0 ] "
404 "[PlPOCIStDDM Ln] [bi->dma ] "
405 "leng ntw timestamp bi->skb\n");
407 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
408 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
409 buffer_info = &tx_ring->buffer_info[i];
410 u0 = (struct my_u0 *)tx_desc;
411 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
412 " %04X %3X %016llX %p", i,
415 (u64)buffer_info->dma,
417 buffer_info->next_to_watch,
418 (u64)buffer_info->time_stamp,
420 if (i == tx_ring->next_to_use &&
421 i == tx_ring->next_to_clean)
422 printk(KERN_CONT " NTC/U\n");
423 else if (i == tx_ring->next_to_use)
424 printk(KERN_CONT " NTU\n");
425 else if (i == tx_ring->next_to_clean)
426 printk(KERN_CONT " NTC\n");
428 printk(KERN_CONT "\n");
430 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
431 print_hex_dump(KERN_INFO, "",
433 16, 1, phys_to_virt(buffer_info->dma),
434 buffer_info->length, true);
438 /* Print RX Rings Summary */
440 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
441 printk(KERN_INFO "Queue [NTU] [NTC]\n");
442 for (n = 0; n < adapter->num_rx_queues; n++) {
443 rx_ring = adapter->rx_ring[n];
444 printk(KERN_INFO " %5d %5X %5X\n", n,
445 rx_ring->next_to_use, rx_ring->next_to_clean);
449 if (!netif_msg_rx_status(adapter))
452 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
454 /* Advanced Receive Descriptor (Read) Format
456 * +-----------------------------------------------------+
457 * 0 | Packet Buffer Address [63:1] |A0/NSE|
458 * +----------------------------------------------+------+
459 * 8 | Header Buffer Address [63:1] | DD |
460 * +-----------------------------------------------------+
463 * Advanced Receive Descriptor (Write-Back) Format
465 * 63 48 47 32 31 30 21 20 17 16 4 3 0
466 * +------------------------------------------------------+
467 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
468 * | Checksum Ident | | | | Type | Type |
469 * +------------------------------------------------------+
470 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
471 * +------------------------------------------------------+
472 * 63 48 47 32 31 20 19 0
475 for (n = 0; n < adapter->num_rx_queues; n++) {
476 rx_ring = adapter->rx_ring[n];
477 printk(KERN_INFO "------------------------------------\n");
478 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
479 printk(KERN_INFO "------------------------------------\n");
480 printk(KERN_INFO "R [desc] [ PktBuf A0] "
481 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
482 "<-- Adv Rx Read format\n");
483 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
484 "[vl er S cks ln] ---------------- [bi->skb] "
485 "<-- Adv Rx Write-Back format\n");
487 for (i = 0; i < rx_ring->count; i++) {
488 buffer_info = &rx_ring->buffer_info[i];
489 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
490 u0 = (struct my_u0 *)rx_desc;
491 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
492 if (staterr & E1000_RXD_STAT_DD) {
493 /* Descriptor Done */
494 printk(KERN_INFO "RWB[0x%03X] %016llX "
495 "%016llX ---------------- %p", i,
500 printk(KERN_INFO "R [0x%03X] %016llX "
501 "%016llX %016llX %p", i,
504 (u64)buffer_info->dma,
507 if (netif_msg_pktdata(adapter)) {
508 print_hex_dump(KERN_INFO, "",
511 phys_to_virt(buffer_info->dma),
512 rx_ring->rx_buffer_len, true);
513 if (rx_ring->rx_buffer_len
515 print_hex_dump(KERN_INFO, "",
519 buffer_info->page_dma +
520 buffer_info->page_offset),
525 if (i == rx_ring->next_to_use)
526 printk(KERN_CONT " NTU\n");
527 else if (i == rx_ring->next_to_clean)
528 printk(KERN_CONT " NTC\n");
530 printk(KERN_CONT "\n");
541 * igb_read_clock - read raw cycle counter (to be used by time counter)
543 static cycle_t igb_read_clock(const struct cyclecounter *tc)
545 struct igb_adapter *adapter =
546 container_of(tc, struct igb_adapter, cycles);
547 struct e1000_hw *hw = &adapter->hw;
552 * The timestamp latches on lowest register read. For the 82580
553 * the lowest register is SYSTIMR instead of SYSTIML. However we never
554 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
556 if (hw->mac.type == e1000_82580) {
557 stamp = rd32(E1000_SYSTIMR) >> 8;
558 shift = IGB_82580_TSYNC_SHIFT;
561 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
562 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
567 * igb_get_hw_dev - return device
568 * used by hardware layer to print debugging information
570 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
572 struct igb_adapter *adapter = hw->back;
573 return adapter->netdev;
577 * igb_init_module - Driver Registration Routine
579 * igb_init_module is the first routine called when the driver is
580 * loaded. All it does is register with the PCI subsystem.
582 static int __init igb_init_module(void)
585 printk(KERN_INFO "%s - version %s\n",
586 igb_driver_string, igb_driver_version);
588 printk(KERN_INFO "%s\n", igb_copyright);
590 #ifdef CONFIG_IGB_DCA
591 dca_register_notify(&dca_notifier);
593 ret = pci_register_driver(&igb_driver);
597 module_init(igb_init_module);
600 * igb_exit_module - Driver Exit Cleanup Routine
602 * igb_exit_module is called just before the driver is removed
605 static void __exit igb_exit_module(void)
607 #ifdef CONFIG_IGB_DCA
608 dca_unregister_notify(&dca_notifier);
610 pci_unregister_driver(&igb_driver);
613 module_exit(igb_exit_module);
615 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
617 * igb_cache_ring_register - Descriptor ring to register mapping
618 * @adapter: board private structure to initialize
620 * Once we know the feature-set enabled for the device, we'll cache
621 * the register offset the descriptor ring is assigned to.
623 static void igb_cache_ring_register(struct igb_adapter *adapter)
626 u32 rbase_offset = adapter->vfs_allocated_count;
628 switch (adapter->hw.mac.type) {
630 /* The queues are allocated for virtualization such that VF 0
631 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
632 * In order to avoid collision we start at the first free queue
633 * and continue consuming queues in the same sequence
635 if (adapter->vfs_allocated_count) {
636 for (; i < adapter->rss_queues; i++)
637 adapter->rx_ring[i]->reg_idx = rbase_offset +
644 for (; i < adapter->num_rx_queues; i++)
645 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
646 for (; j < adapter->num_tx_queues; j++)
647 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
652 static void igb_free_queues(struct igb_adapter *adapter)
656 for (i = 0; i < adapter->num_tx_queues; i++) {
657 kfree(adapter->tx_ring[i]);
658 adapter->tx_ring[i] = NULL;
660 for (i = 0; i < adapter->num_rx_queues; i++) {
661 kfree(adapter->rx_ring[i]);
662 adapter->rx_ring[i] = NULL;
664 adapter->num_rx_queues = 0;
665 adapter->num_tx_queues = 0;
669 * igb_alloc_queues - Allocate memory for all rings
670 * @adapter: board private structure to initialize
672 * We allocate one ring per queue at run-time since we don't know the
673 * number of queues at compile-time.
675 static int igb_alloc_queues(struct igb_adapter *adapter)
677 struct igb_ring *ring;
680 for (i = 0; i < adapter->num_tx_queues; i++) {
681 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
684 ring->count = adapter->tx_ring_count;
685 ring->queue_index = i;
686 ring->dev = &adapter->pdev->dev;
687 ring->netdev = adapter->netdev;
688 /* For 82575, context index must be unique per ring. */
689 if (adapter->hw.mac.type == e1000_82575)
690 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
691 adapter->tx_ring[i] = ring;
694 for (i = 0; i < adapter->num_rx_queues; i++) {
695 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
698 ring->count = adapter->rx_ring_count;
699 ring->queue_index = i;
700 ring->dev = &adapter->pdev->dev;
701 ring->netdev = adapter->netdev;
702 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
703 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
704 /* set flag indicating ring supports SCTP checksum offload */
705 if (adapter->hw.mac.type >= e1000_82576)
706 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
707 adapter->rx_ring[i] = ring;
710 igb_cache_ring_register(adapter);
715 igb_free_queues(adapter);
720 #define IGB_N0_QUEUE -1
721 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
724 struct igb_adapter *adapter = q_vector->adapter;
725 struct e1000_hw *hw = &adapter->hw;
727 int rx_queue = IGB_N0_QUEUE;
728 int tx_queue = IGB_N0_QUEUE;
730 if (q_vector->rx_ring)
731 rx_queue = q_vector->rx_ring->reg_idx;
732 if (q_vector->tx_ring)
733 tx_queue = q_vector->tx_ring->reg_idx;
735 switch (hw->mac.type) {
737 /* The 82575 assigns vectors using a bitmask, which matches the
738 bitmask for the EICR/EIMS/EIMC registers. To assign one
739 or more queues to a vector, we write the appropriate bits
740 into the MSIXBM register for that vector. */
741 if (rx_queue > IGB_N0_QUEUE)
742 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
743 if (tx_queue > IGB_N0_QUEUE)
744 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
745 if (!adapter->msix_entries && msix_vector == 0)
746 msixbm |= E1000_EIMS_OTHER;
747 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
748 q_vector->eims_value = msixbm;
751 /* 82576 uses a table-based method for assigning vectors.
752 Each queue has a single entry in the table to which we write
753 a vector number along with a "valid" bit. Sadly, the layout
754 of the table is somewhat counterintuitive. */
755 if (rx_queue > IGB_N0_QUEUE) {
756 index = (rx_queue & 0x7);
757 ivar = array_rd32(E1000_IVAR0, index);
759 /* vector goes into low byte of register */
760 ivar = ivar & 0xFFFFFF00;
761 ivar |= msix_vector | E1000_IVAR_VALID;
763 /* vector goes into third byte of register */
764 ivar = ivar & 0xFF00FFFF;
765 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
767 array_wr32(E1000_IVAR0, index, ivar);
769 if (tx_queue > IGB_N0_QUEUE) {
770 index = (tx_queue & 0x7);
771 ivar = array_rd32(E1000_IVAR0, index);
773 /* vector goes into second byte of register */
774 ivar = ivar & 0xFFFF00FF;
775 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
777 /* vector goes into high byte of register */
778 ivar = ivar & 0x00FFFFFF;
779 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
781 array_wr32(E1000_IVAR0, index, ivar);
783 q_vector->eims_value = 1 << msix_vector;
787 /* 82580 uses the same table-based approach as 82576 but has fewer
788 entries as a result we carry over for queues greater than 4. */
789 if (rx_queue > IGB_N0_QUEUE) {
790 index = (rx_queue >> 1);
791 ivar = array_rd32(E1000_IVAR0, index);
792 if (rx_queue & 0x1) {
793 /* vector goes into third byte of register */
794 ivar = ivar & 0xFF00FFFF;
795 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
797 /* vector goes into low byte of register */
798 ivar = ivar & 0xFFFFFF00;
799 ivar |= msix_vector | E1000_IVAR_VALID;
801 array_wr32(E1000_IVAR0, index, ivar);
803 if (tx_queue > IGB_N0_QUEUE) {
804 index = (tx_queue >> 1);
805 ivar = array_rd32(E1000_IVAR0, index);
806 if (tx_queue & 0x1) {
807 /* vector goes into high byte of register */
808 ivar = ivar & 0x00FFFFFF;
809 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
811 /* vector goes into second byte of register */
812 ivar = ivar & 0xFFFF00FF;
813 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
815 array_wr32(E1000_IVAR0, index, ivar);
817 q_vector->eims_value = 1 << msix_vector;
824 /* add q_vector eims value to global eims_enable_mask */
825 adapter->eims_enable_mask |= q_vector->eims_value;
827 /* configure q_vector to set itr on first interrupt */
828 q_vector->set_itr = 1;
832 * igb_configure_msix - Configure MSI-X hardware
834 * igb_configure_msix sets up the hardware to properly
835 * generate MSI-X interrupts.
837 static void igb_configure_msix(struct igb_adapter *adapter)
841 struct e1000_hw *hw = &adapter->hw;
843 adapter->eims_enable_mask = 0;
845 /* set vector for other causes, i.e. link changes */
846 switch (hw->mac.type) {
848 tmp = rd32(E1000_CTRL_EXT);
849 /* enable MSI-X PBA support*/
850 tmp |= E1000_CTRL_EXT_PBA_CLR;
852 /* Auto-Mask interrupts upon ICR read. */
853 tmp |= E1000_CTRL_EXT_EIAME;
854 tmp |= E1000_CTRL_EXT_IRCA;
856 wr32(E1000_CTRL_EXT, tmp);
858 /* enable msix_other interrupt */
859 array_wr32(E1000_MSIXBM(0), vector++,
861 adapter->eims_other = E1000_EIMS_OTHER;
868 /* Turn on MSI-X capability first, or our settings
869 * won't stick. And it will take days to debug. */
870 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
871 E1000_GPIE_PBA | E1000_GPIE_EIAME |
874 /* enable msix_other interrupt */
875 adapter->eims_other = 1 << vector;
876 tmp = (vector++ | E1000_IVAR_VALID) << 8;
878 wr32(E1000_IVAR_MISC, tmp);
881 /* do nothing, since nothing else supports MSI-X */
883 } /* switch (hw->mac.type) */
885 adapter->eims_enable_mask |= adapter->eims_other;
887 for (i = 0; i < adapter->num_q_vectors; i++)
888 igb_assign_vector(adapter->q_vector[i], vector++);
894 * igb_request_msix - Initialize MSI-X interrupts
896 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
899 static int igb_request_msix(struct igb_adapter *adapter)
901 struct net_device *netdev = adapter->netdev;
902 struct e1000_hw *hw = &adapter->hw;
903 int i, err = 0, vector = 0;
905 err = request_irq(adapter->msix_entries[vector].vector,
906 igb_msix_other, 0, netdev->name, adapter);
911 for (i = 0; i < adapter->num_q_vectors; i++) {
912 struct igb_q_vector *q_vector = adapter->q_vector[i];
914 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
916 if (q_vector->rx_ring && q_vector->tx_ring)
917 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
918 q_vector->rx_ring->queue_index);
919 else if (q_vector->tx_ring)
920 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
921 q_vector->tx_ring->queue_index);
922 else if (q_vector->rx_ring)
923 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
924 q_vector->rx_ring->queue_index);
926 sprintf(q_vector->name, "%s-unused", netdev->name);
928 err = request_irq(adapter->msix_entries[vector].vector,
929 igb_msix_ring, 0, q_vector->name,
936 igb_configure_msix(adapter);
942 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
944 if (adapter->msix_entries) {
945 pci_disable_msix(adapter->pdev);
946 kfree(adapter->msix_entries);
947 adapter->msix_entries = NULL;
948 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
949 pci_disable_msi(adapter->pdev);
954 * igb_free_q_vectors - Free memory allocated for interrupt vectors
955 * @adapter: board private structure to initialize
957 * This function frees the memory allocated to the q_vectors. In addition if
958 * NAPI is enabled it will delete any references to the NAPI struct prior
959 * to freeing the q_vector.
961 static void igb_free_q_vectors(struct igb_adapter *adapter)
965 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
966 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
967 adapter->q_vector[v_idx] = NULL;
970 netif_napi_del(&q_vector->napi);
973 adapter->num_q_vectors = 0;
977 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
979 * This function resets the device so that it has 0 rx queues, tx queues, and
980 * MSI-X interrupts allocated.
982 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
984 igb_free_queues(adapter);
985 igb_free_q_vectors(adapter);
986 igb_reset_interrupt_capability(adapter);
990 * igb_set_interrupt_capability - set MSI or MSI-X if supported
992 * Attempt to configure interrupts using the best available
993 * capabilities of the hardware and kernel.
995 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1000 /* Number of supported queues. */
1001 adapter->num_rx_queues = adapter->rss_queues;
1002 if (adapter->vfs_allocated_count)
1003 adapter->num_tx_queues = 1;
1005 adapter->num_tx_queues = adapter->rss_queues;
1007 /* start with one vector for every rx queue */
1008 numvecs = adapter->num_rx_queues;
1010 /* if tx handler is separate add 1 for every tx queue */
1011 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1012 numvecs += adapter->num_tx_queues;
1014 /* store the number of vectors reserved for queues */
1015 adapter->num_q_vectors = numvecs;
1017 /* add 1 vector for link status interrupts */
1019 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1021 if (!adapter->msix_entries)
1024 for (i = 0; i < numvecs; i++)
1025 adapter->msix_entries[i].entry = i;
1027 err = pci_enable_msix(adapter->pdev,
1028 adapter->msix_entries,
1033 igb_reset_interrupt_capability(adapter);
1035 /* If we can't do MSI-X, try MSI */
1037 #ifdef CONFIG_PCI_IOV
1038 /* disable SR-IOV for non MSI-X configurations */
1039 if (adapter->vf_data) {
1040 struct e1000_hw *hw = &adapter->hw;
1041 /* disable iov and allow time for transactions to clear */
1042 pci_disable_sriov(adapter->pdev);
1045 kfree(adapter->vf_data);
1046 adapter->vf_data = NULL;
1047 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1049 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1052 adapter->vfs_allocated_count = 0;
1053 adapter->rss_queues = 1;
1054 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1055 adapter->num_rx_queues = 1;
1056 adapter->num_tx_queues = 1;
1057 adapter->num_q_vectors = 1;
1058 if (!pci_enable_msi(adapter->pdev))
1059 adapter->flags |= IGB_FLAG_HAS_MSI;
1061 /* Notify the stack of the (possibly) reduced queue counts. */
1062 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1063 return netif_set_real_num_rx_queues(adapter->netdev,
1064 adapter->num_rx_queues);
1068 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1069 * @adapter: board private structure to initialize
1071 * We allocate one q_vector per queue interrupt. If allocation fails we
1074 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1076 struct igb_q_vector *q_vector;
1077 struct e1000_hw *hw = &adapter->hw;
1080 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1081 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1084 q_vector->adapter = adapter;
1085 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1086 q_vector->itr_val = IGB_START_ITR;
1087 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1088 adapter->q_vector[v_idx] = q_vector;
1093 igb_free_q_vectors(adapter);
1097 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1098 int ring_idx, int v_idx)
1100 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1102 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1103 q_vector->rx_ring->q_vector = q_vector;
1104 q_vector->itr_val = adapter->rx_itr_setting;
1105 if (q_vector->itr_val && q_vector->itr_val <= 3)
1106 q_vector->itr_val = IGB_START_ITR;
1109 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1110 int ring_idx, int v_idx)
1112 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1114 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1115 q_vector->tx_ring->q_vector = q_vector;
1116 q_vector->itr_val = adapter->tx_itr_setting;
1117 if (q_vector->itr_val && q_vector->itr_val <= 3)
1118 q_vector->itr_val = IGB_START_ITR;
1122 * igb_map_ring_to_vector - maps allocated queues to vectors
1124 * This function maps the recently allocated queues to vectors.
1126 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1131 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1132 (adapter->num_q_vectors < adapter->num_tx_queues))
1135 if (adapter->num_q_vectors >=
1136 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1137 for (i = 0; i < adapter->num_rx_queues; i++)
1138 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1139 for (i = 0; i < adapter->num_tx_queues; i++)
1140 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1142 for (i = 0; i < adapter->num_rx_queues; i++) {
1143 if (i < adapter->num_tx_queues)
1144 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1145 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1147 for (; i < adapter->num_tx_queues; i++)
1148 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1154 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1156 * This function initializes the interrupts and allocates all of the queues.
1158 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1160 struct pci_dev *pdev = adapter->pdev;
1163 err = igb_set_interrupt_capability(adapter);
1167 err = igb_alloc_q_vectors(adapter);
1169 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1170 goto err_alloc_q_vectors;
1173 err = igb_alloc_queues(adapter);
1175 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1176 goto err_alloc_queues;
1179 err = igb_map_ring_to_vector(adapter);
1181 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1182 goto err_map_queues;
1188 igb_free_queues(adapter);
1190 igb_free_q_vectors(adapter);
1191 err_alloc_q_vectors:
1192 igb_reset_interrupt_capability(adapter);
1197 * igb_request_irq - initialize interrupts
1199 * Attempts to configure interrupts using the best available
1200 * capabilities of the hardware and kernel.
1202 static int igb_request_irq(struct igb_adapter *adapter)
1204 struct net_device *netdev = adapter->netdev;
1205 struct pci_dev *pdev = adapter->pdev;
1208 if (adapter->msix_entries) {
1209 err = igb_request_msix(adapter);
1212 /* fall back to MSI */
1213 igb_clear_interrupt_scheme(adapter);
1214 if (!pci_enable_msi(adapter->pdev))
1215 adapter->flags |= IGB_FLAG_HAS_MSI;
1216 igb_free_all_tx_resources(adapter);
1217 igb_free_all_rx_resources(adapter);
1218 adapter->num_tx_queues = 1;
1219 adapter->num_rx_queues = 1;
1220 adapter->num_q_vectors = 1;
1221 err = igb_alloc_q_vectors(adapter);
1224 "Unable to allocate memory for vectors\n");
1227 err = igb_alloc_queues(adapter);
1230 "Unable to allocate memory for queues\n");
1231 igb_free_q_vectors(adapter);
1234 igb_setup_all_tx_resources(adapter);
1235 igb_setup_all_rx_resources(adapter);
1237 igb_assign_vector(adapter->q_vector[0], 0);
1240 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1241 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1242 netdev->name, adapter);
1246 /* fall back to legacy interrupts */
1247 igb_reset_interrupt_capability(adapter);
1248 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1251 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1252 netdev->name, adapter);
1255 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262 static void igb_free_irq(struct igb_adapter *adapter)
1264 if (adapter->msix_entries) {
1267 free_irq(adapter->msix_entries[vector++].vector, adapter);
1269 for (i = 0; i < adapter->num_q_vectors; i++) {
1270 struct igb_q_vector *q_vector = adapter->q_vector[i];
1271 free_irq(adapter->msix_entries[vector++].vector,
1275 free_irq(adapter->pdev->irq, adapter);
1280 * igb_irq_disable - Mask off interrupt generation on the NIC
1281 * @adapter: board private structure
1283 static void igb_irq_disable(struct igb_adapter *adapter)
1285 struct e1000_hw *hw = &adapter->hw;
1288 * we need to be careful when disabling interrupts. The VFs are also
1289 * mapped into these registers and so clearing the bits can cause
1290 * issues on the VF drivers so we only need to clear what we set
1292 if (adapter->msix_entries) {
1293 u32 regval = rd32(E1000_EIAM);
1294 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1295 wr32(E1000_EIMC, adapter->eims_enable_mask);
1296 regval = rd32(E1000_EIAC);
1297 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1301 wr32(E1000_IMC, ~0);
1303 if (adapter->msix_entries) {
1305 for (i = 0; i < adapter->num_q_vectors; i++)
1306 synchronize_irq(adapter->msix_entries[i].vector);
1308 synchronize_irq(adapter->pdev->irq);
1313 * igb_irq_enable - Enable default interrupt generation settings
1314 * @adapter: board private structure
1316 static void igb_irq_enable(struct igb_adapter *adapter)
1318 struct e1000_hw *hw = &adapter->hw;
1320 if (adapter->msix_entries) {
1321 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1322 u32 regval = rd32(E1000_EIAC);
1323 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1324 regval = rd32(E1000_EIAM);
1325 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1326 wr32(E1000_EIMS, adapter->eims_enable_mask);
1327 if (adapter->vfs_allocated_count) {
1328 wr32(E1000_MBVFIMR, 0xFF);
1329 ims |= E1000_IMS_VMMB;
1331 if (adapter->hw.mac.type == e1000_82580)
1332 ims |= E1000_IMS_DRSTA;
1334 wr32(E1000_IMS, ims);
1336 wr32(E1000_IMS, IMS_ENABLE_MASK |
1338 wr32(E1000_IAM, IMS_ENABLE_MASK |
1343 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1345 struct e1000_hw *hw = &adapter->hw;
1346 u16 vid = adapter->hw.mng_cookie.vlan_id;
1347 u16 old_vid = adapter->mng_vlan_id;
1349 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1350 /* add VID to filter table */
1351 igb_vfta_set(hw, vid, true);
1352 adapter->mng_vlan_id = vid;
1354 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1357 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1359 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1360 /* remove VID from filter table */
1361 igb_vfta_set(hw, old_vid, false);
1366 * igb_release_hw_control - release control of the h/w to f/w
1367 * @adapter: address of board private structure
1369 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1370 * For ASF and Pass Through versions of f/w this means that the
1371 * driver is no longer loaded.
1374 static void igb_release_hw_control(struct igb_adapter *adapter)
1376 struct e1000_hw *hw = &adapter->hw;
1379 /* Let firmware take over control of h/w */
1380 ctrl_ext = rd32(E1000_CTRL_EXT);
1381 wr32(E1000_CTRL_EXT,
1382 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1386 * igb_get_hw_control - get control of the h/w from f/w
1387 * @adapter: address of board private structure
1389 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1390 * For ASF and Pass Through versions of f/w this means that
1391 * the driver is loaded.
1394 static void igb_get_hw_control(struct igb_adapter *adapter)
1396 struct e1000_hw *hw = &adapter->hw;
1399 /* Let firmware know the driver has taken over */
1400 ctrl_ext = rd32(E1000_CTRL_EXT);
1401 wr32(E1000_CTRL_EXT,
1402 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1406 * igb_configure - configure the hardware for RX and TX
1407 * @adapter: private board structure
1409 static void igb_configure(struct igb_adapter *adapter)
1411 struct net_device *netdev = adapter->netdev;
1414 igb_get_hw_control(adapter);
1415 igb_set_rx_mode(netdev);
1417 igb_restore_vlan(adapter);
1419 igb_setup_tctl(adapter);
1420 igb_setup_mrqc(adapter);
1421 igb_setup_rctl(adapter);
1423 igb_configure_tx(adapter);
1424 igb_configure_rx(adapter);
1426 igb_rx_fifo_flush_82575(&adapter->hw);
1428 /* call igb_desc_unused which always leaves
1429 * at least 1 descriptor unused to make sure
1430 * next_to_use != next_to_clean */
1431 for (i = 0; i < adapter->num_rx_queues; i++) {
1432 struct igb_ring *ring = adapter->rx_ring[i];
1433 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1438 * igb_power_up_link - Power up the phy/serdes link
1439 * @adapter: address of board private structure
1441 void igb_power_up_link(struct igb_adapter *adapter)
1443 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1444 igb_power_up_phy_copper(&adapter->hw);
1446 igb_power_up_serdes_link_82575(&adapter->hw);
1450 * igb_power_down_link - Power down the phy/serdes link
1451 * @adapter: address of board private structure
1453 static void igb_power_down_link(struct igb_adapter *adapter)
1455 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1456 igb_power_down_phy_copper_82575(&adapter->hw);
1458 igb_shutdown_serdes_link_82575(&adapter->hw);
1462 * igb_up - Open the interface and prepare it to handle traffic
1463 * @adapter: board private structure
1465 int igb_up(struct igb_adapter *adapter)
1467 struct e1000_hw *hw = &adapter->hw;
1470 /* hardware has been reset, we need to reload some things */
1471 igb_configure(adapter);
1473 clear_bit(__IGB_DOWN, &adapter->state);
1475 for (i = 0; i < adapter->num_q_vectors; i++) {
1476 struct igb_q_vector *q_vector = adapter->q_vector[i];
1477 napi_enable(&q_vector->napi);
1479 if (adapter->msix_entries)
1480 igb_configure_msix(adapter);
1482 igb_assign_vector(adapter->q_vector[0], 0);
1484 /* Clear any pending interrupts. */
1486 igb_irq_enable(adapter);
1488 /* notify VFs that reset has been completed */
1489 if (adapter->vfs_allocated_count) {
1490 u32 reg_data = rd32(E1000_CTRL_EXT);
1491 reg_data |= E1000_CTRL_EXT_PFRSTD;
1492 wr32(E1000_CTRL_EXT, reg_data);
1495 netif_tx_start_all_queues(adapter->netdev);
1497 /* start the watchdog. */
1498 hw->mac.get_link_status = 1;
1499 schedule_work(&adapter->watchdog_task);
1504 void igb_down(struct igb_adapter *adapter)
1506 struct net_device *netdev = adapter->netdev;
1507 struct e1000_hw *hw = &adapter->hw;
1511 /* signal that we're down so the interrupt handler does not
1512 * reschedule our watchdog timer */
1513 set_bit(__IGB_DOWN, &adapter->state);
1515 /* disable receives in the hardware */
1516 rctl = rd32(E1000_RCTL);
1517 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1518 /* flush and sleep below */
1520 netif_tx_stop_all_queues(netdev);
1522 /* disable transmits in the hardware */
1523 tctl = rd32(E1000_TCTL);
1524 tctl &= ~E1000_TCTL_EN;
1525 wr32(E1000_TCTL, tctl);
1526 /* flush both disables and wait for them to finish */
1530 for (i = 0; i < adapter->num_q_vectors; i++) {
1531 struct igb_q_vector *q_vector = adapter->q_vector[i];
1532 napi_disable(&q_vector->napi);
1535 igb_irq_disable(adapter);
1537 del_timer_sync(&adapter->watchdog_timer);
1538 del_timer_sync(&adapter->phy_info_timer);
1540 netif_carrier_off(netdev);
1542 /* record the stats before reset*/
1543 spin_lock(&adapter->stats64_lock);
1544 igb_update_stats(adapter, &adapter->stats64);
1545 spin_unlock(&adapter->stats64_lock);
1547 adapter->link_speed = 0;
1548 adapter->link_duplex = 0;
1550 if (!pci_channel_offline(adapter->pdev))
1552 igb_clean_all_tx_rings(adapter);
1553 igb_clean_all_rx_rings(adapter);
1554 #ifdef CONFIG_IGB_DCA
1556 /* since we reset the hardware DCA settings were cleared */
1557 igb_setup_dca(adapter);
1561 void igb_reinit_locked(struct igb_adapter *adapter)
1563 WARN_ON(in_interrupt());
1564 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1568 clear_bit(__IGB_RESETTING, &adapter->state);
1571 void igb_reset(struct igb_adapter *adapter)
1573 struct pci_dev *pdev = adapter->pdev;
1574 struct e1000_hw *hw = &adapter->hw;
1575 struct e1000_mac_info *mac = &hw->mac;
1576 struct e1000_fc_info *fc = &hw->fc;
1577 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1580 /* Repartition Pba for greater than 9k mtu
1581 * To take effect CTRL.RST is required.
1583 switch (mac->type) {
1586 pba = rd32(E1000_RXPBS);
1587 pba = igb_rxpbs_adjust_82580(pba);
1590 pba = rd32(E1000_RXPBS);
1591 pba &= E1000_RXPBS_SIZE_MASK_82576;
1595 pba = E1000_PBA_34K;
1599 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1600 (mac->type < e1000_82576)) {
1601 /* adjust PBA for jumbo frames */
1602 wr32(E1000_PBA, pba);
1604 /* To maintain wire speed transmits, the Tx FIFO should be
1605 * large enough to accommodate two full transmit packets,
1606 * rounded up to the next 1KB and expressed in KB. Likewise,
1607 * the Rx FIFO should be large enough to accommodate at least
1608 * one full receive packet and is similarly rounded up and
1609 * expressed in KB. */
1610 pba = rd32(E1000_PBA);
1611 /* upper 16 bits has Tx packet buffer allocation size in KB */
1612 tx_space = pba >> 16;
1613 /* lower 16 bits has Rx packet buffer allocation size in KB */
1615 /* the tx fifo also stores 16 bytes of information about the tx
1616 * but don't include ethernet FCS because hardware appends it */
1617 min_tx_space = (adapter->max_frame_size +
1618 sizeof(union e1000_adv_tx_desc) -
1620 min_tx_space = ALIGN(min_tx_space, 1024);
1621 min_tx_space >>= 10;
1622 /* software strips receive CRC, so leave room for it */
1623 min_rx_space = adapter->max_frame_size;
1624 min_rx_space = ALIGN(min_rx_space, 1024);
1625 min_rx_space >>= 10;
1627 /* If current Tx allocation is less than the min Tx FIFO size,
1628 * and the min Tx FIFO size is less than the current Rx FIFO
1629 * allocation, take space away from current Rx allocation */
1630 if (tx_space < min_tx_space &&
1631 ((min_tx_space - tx_space) < pba)) {
1632 pba = pba - (min_tx_space - tx_space);
1634 /* if short on rx space, rx wins and must trump tx
1636 if (pba < min_rx_space)
1639 wr32(E1000_PBA, pba);
1642 /* flow control settings */
1643 /* The high water mark must be low enough to fit one full frame
1644 * (or the size used for early receive) above it in the Rx FIFO.
1645 * Set it to the lower of:
1646 * - 90% of the Rx FIFO size, or
1647 * - the full Rx FIFO size minus one full frame */
1648 hwm = min(((pba << 10) * 9 / 10),
1649 ((pba << 10) - 2 * adapter->max_frame_size));
1651 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1652 fc->low_water = fc->high_water - 16;
1653 fc->pause_time = 0xFFFF;
1655 fc->current_mode = fc->requested_mode;
1657 /* disable receive for all VFs and wait one second */
1658 if (adapter->vfs_allocated_count) {
1660 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1661 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1663 /* ping all the active vfs to let them know we are going down */
1664 igb_ping_all_vfs(adapter);
1666 /* disable transmits and receives */
1667 wr32(E1000_VFRE, 0);
1668 wr32(E1000_VFTE, 0);
1671 /* Allow time for pending master requests to run */
1672 hw->mac.ops.reset_hw(hw);
1675 if (hw->mac.ops.init_hw(hw))
1676 dev_err(&pdev->dev, "Hardware Error\n");
1678 if (hw->mac.type == e1000_82580) {
1679 u32 reg = rd32(E1000_PCIEMISC);
1680 wr32(E1000_PCIEMISC,
1681 reg & ~E1000_PCIEMISC_LX_DECISION);
1683 if (!netif_running(adapter->netdev))
1684 igb_power_down_link(adapter);
1686 igb_update_mng_vlan(adapter);
1688 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1689 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1691 igb_get_phy_info(hw);
1694 static const struct net_device_ops igb_netdev_ops = {
1695 .ndo_open = igb_open,
1696 .ndo_stop = igb_close,
1697 .ndo_start_xmit = igb_xmit_frame_adv,
1698 .ndo_get_stats64 = igb_get_stats64,
1699 .ndo_set_rx_mode = igb_set_rx_mode,
1700 .ndo_set_multicast_list = igb_set_rx_mode,
1701 .ndo_set_mac_address = igb_set_mac,
1702 .ndo_change_mtu = igb_change_mtu,
1703 .ndo_do_ioctl = igb_ioctl,
1704 .ndo_tx_timeout = igb_tx_timeout,
1705 .ndo_validate_addr = eth_validate_addr,
1706 .ndo_vlan_rx_register = igb_vlan_rx_register,
1707 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1708 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1709 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1710 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1711 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1712 .ndo_get_vf_config = igb_ndo_get_vf_config,
1713 #ifdef CONFIG_NET_POLL_CONTROLLER
1714 .ndo_poll_controller = igb_netpoll,
1719 * igb_probe - Device Initialization Routine
1720 * @pdev: PCI device information struct
1721 * @ent: entry in igb_pci_tbl
1723 * Returns 0 on success, negative on failure
1725 * igb_probe initializes an adapter identified by a pci_dev structure.
1726 * The OS initialization, configuring of the adapter private structure,
1727 * and a hardware reset occur.
1729 static int __devinit igb_probe(struct pci_dev *pdev,
1730 const struct pci_device_id *ent)
1732 struct net_device *netdev;
1733 struct igb_adapter *adapter;
1734 struct e1000_hw *hw;
1735 u16 eeprom_data = 0;
1737 static int global_quad_port_a; /* global quad port a indication */
1738 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1739 unsigned long mmio_start, mmio_len;
1740 int err, pci_using_dac;
1741 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1742 u8 part_str[E1000_PBANUM_LENGTH];
1744 /* Catch broken hardware that put the wrong VF device ID in
1745 * the PCIe SR-IOV capability.
1747 if (pdev->is_virtfn) {
1748 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1749 pci_name(pdev), pdev->vendor, pdev->device);
1753 err = pci_enable_device_mem(pdev);
1758 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1760 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1764 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1766 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1768 dev_err(&pdev->dev, "No usable DMA "
1769 "configuration, aborting\n");
1775 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1781 pci_enable_pcie_error_reporting(pdev);
1783 pci_set_master(pdev);
1784 pci_save_state(pdev);
1787 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1788 IGB_ABS_MAX_TX_QUEUES);
1790 goto err_alloc_etherdev;
1792 SET_NETDEV_DEV(netdev, &pdev->dev);
1794 pci_set_drvdata(pdev, netdev);
1795 adapter = netdev_priv(netdev);
1796 adapter->netdev = netdev;
1797 adapter->pdev = pdev;
1800 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1802 mmio_start = pci_resource_start(pdev, 0);
1803 mmio_len = pci_resource_len(pdev, 0);
1806 hw->hw_addr = ioremap(mmio_start, mmio_len);
1810 netdev->netdev_ops = &igb_netdev_ops;
1811 igb_set_ethtool_ops(netdev);
1812 netdev->watchdog_timeo = 5 * HZ;
1814 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1816 netdev->mem_start = mmio_start;
1817 netdev->mem_end = mmio_start + mmio_len;
1819 /* PCI config space info */
1820 hw->vendor_id = pdev->vendor;
1821 hw->device_id = pdev->device;
1822 hw->revision_id = pdev->revision;
1823 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1824 hw->subsystem_device_id = pdev->subsystem_device;
1826 /* Copy the default MAC, PHY and NVM function pointers */
1827 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1828 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1829 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1830 /* Initialize skew-specific constants */
1831 err = ei->get_invariants(hw);
1835 /* setup the private structure */
1836 err = igb_sw_init(adapter);
1840 igb_get_bus_info_pcie(hw);
1842 hw->phy.autoneg_wait_to_complete = false;
1844 /* Copper options */
1845 if (hw->phy.media_type == e1000_media_type_copper) {
1846 hw->phy.mdix = AUTO_ALL_MODES;
1847 hw->phy.disable_polarity_correction = false;
1848 hw->phy.ms_type = e1000_ms_hw_default;
1851 if (igb_check_reset_block(hw))
1852 dev_info(&pdev->dev,
1853 "PHY reset is blocked due to SOL/IDER session.\n");
1855 netdev->features = NETIF_F_SG |
1857 NETIF_F_HW_VLAN_TX |
1858 NETIF_F_HW_VLAN_RX |
1859 NETIF_F_HW_VLAN_FILTER;
1861 netdev->features |= NETIF_F_IPV6_CSUM;
1862 netdev->features |= NETIF_F_TSO;
1863 netdev->features |= NETIF_F_TSO6;
1864 netdev->features |= NETIF_F_GRO;
1866 netdev->vlan_features |= NETIF_F_TSO;
1867 netdev->vlan_features |= NETIF_F_TSO6;
1868 netdev->vlan_features |= NETIF_F_IP_CSUM;
1869 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1870 netdev->vlan_features |= NETIF_F_SG;
1872 if (pci_using_dac) {
1873 netdev->features |= NETIF_F_HIGHDMA;
1874 netdev->vlan_features |= NETIF_F_HIGHDMA;
1877 if (hw->mac.type >= e1000_82576)
1878 netdev->features |= NETIF_F_SCTP_CSUM;
1880 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1882 /* before reading the NVM, reset the controller to put the device in a
1883 * known good starting state */
1884 hw->mac.ops.reset_hw(hw);
1886 /* make sure the NVM is good */
1887 if (igb_validate_nvm_checksum(hw) < 0) {
1888 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1893 /* copy the MAC address out of the NVM */
1894 if (hw->mac.ops.read_mac_addr(hw))
1895 dev_err(&pdev->dev, "NVM Read Error\n");
1897 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1898 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1900 if (!is_valid_ether_addr(netdev->perm_addr)) {
1901 dev_err(&pdev->dev, "Invalid MAC Address\n");
1906 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1907 (unsigned long) adapter);
1908 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1909 (unsigned long) adapter);
1911 INIT_WORK(&adapter->reset_task, igb_reset_task);
1912 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1914 /* Initialize link properties that are user-changeable */
1915 adapter->fc_autoneg = true;
1916 hw->mac.autoneg = true;
1917 hw->phy.autoneg_advertised = 0x2f;
1919 hw->fc.requested_mode = e1000_fc_default;
1920 hw->fc.current_mode = e1000_fc_default;
1922 igb_validate_mdi_setting(hw);
1924 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1925 * enable the ACPI Magic Packet filter
1928 if (hw->bus.func == 0)
1929 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1930 else if (hw->mac.type == e1000_82580)
1931 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1932 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1934 else if (hw->bus.func == 1)
1935 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1937 if (eeprom_data & eeprom_apme_mask)
1938 adapter->eeprom_wol |= E1000_WUFC_MAG;
1940 /* now that we have the eeprom settings, apply the special cases where
1941 * the eeprom may be wrong or the board simply won't support wake on
1942 * lan on a particular port */
1943 switch (pdev->device) {
1944 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1945 adapter->eeprom_wol = 0;
1947 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1948 case E1000_DEV_ID_82576_FIBER:
1949 case E1000_DEV_ID_82576_SERDES:
1950 /* Wake events only supported on port A for dual fiber
1951 * regardless of eeprom setting */
1952 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1953 adapter->eeprom_wol = 0;
1955 case E1000_DEV_ID_82576_QUAD_COPPER:
1956 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1957 /* if quad port adapter, disable WoL on all but port A */
1958 if (global_quad_port_a != 0)
1959 adapter->eeprom_wol = 0;
1961 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1962 /* Reset for multiple quad port adapters */
1963 if (++global_quad_port_a == 4)
1964 global_quad_port_a = 0;
1968 /* initialize the wol settings based on the eeprom settings */
1969 adapter->wol = adapter->eeprom_wol;
1970 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1972 /* reset the hardware with the new settings */
1975 /* let the f/w know that the h/w is now under the control of the
1977 igb_get_hw_control(adapter);
1979 strcpy(netdev->name, "eth%d");
1980 err = register_netdev(netdev);
1984 /* carrier off reporting is important to ethtool even BEFORE open */
1985 netif_carrier_off(netdev);
1987 #ifdef CONFIG_IGB_DCA
1988 if (dca_add_requester(&pdev->dev) == 0) {
1989 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1990 dev_info(&pdev->dev, "DCA enabled\n");
1991 igb_setup_dca(adapter);
1995 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1996 /* print bus type/speed/width info */
1997 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1999 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2000 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2002 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2003 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2004 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2008 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2010 strcpy(part_str, "Unknown");
2011 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2012 dev_info(&pdev->dev,
2013 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2014 adapter->msix_entries ? "MSI-X" :
2015 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2016 adapter->num_rx_queues, adapter->num_tx_queues);
2021 igb_release_hw_control(adapter);
2023 if (!igb_check_reset_block(hw))
2026 if (hw->flash_address)
2027 iounmap(hw->flash_address);
2029 igb_clear_interrupt_scheme(adapter);
2030 iounmap(hw->hw_addr);
2032 free_netdev(netdev);
2034 pci_release_selected_regions(pdev,
2035 pci_select_bars(pdev, IORESOURCE_MEM));
2038 pci_disable_device(pdev);
2043 * igb_remove - Device Removal Routine
2044 * @pdev: PCI device information struct
2046 * igb_remove is called by the PCI subsystem to alert the driver
2047 * that it should release a PCI device. The could be caused by a
2048 * Hot-Plug event, or because the driver is going to be removed from
2051 static void __devexit igb_remove(struct pci_dev *pdev)
2053 struct net_device *netdev = pci_get_drvdata(pdev);
2054 struct igb_adapter *adapter = netdev_priv(netdev);
2055 struct e1000_hw *hw = &adapter->hw;
2058 * The watchdog timer may be rescheduled, so explicitly
2059 * disable watchdog from being rescheduled.
2061 set_bit(__IGB_DOWN, &adapter->state);
2062 del_timer_sync(&adapter->watchdog_timer);
2063 del_timer_sync(&adapter->phy_info_timer);
2065 cancel_work_sync(&adapter->reset_task);
2066 cancel_work_sync(&adapter->watchdog_task);
2068 #ifdef CONFIG_IGB_DCA
2069 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2070 dev_info(&pdev->dev, "DCA disabled\n");
2071 dca_remove_requester(&pdev->dev);
2072 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2073 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2077 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2078 * would have already happened in close and is redundant. */
2079 igb_release_hw_control(adapter);
2081 unregister_netdev(netdev);
2083 igb_clear_interrupt_scheme(adapter);
2085 #ifdef CONFIG_PCI_IOV
2086 /* reclaim resources allocated to VFs */
2087 if (adapter->vf_data) {
2088 /* disable iov and allow time for transactions to clear */
2089 pci_disable_sriov(pdev);
2092 kfree(adapter->vf_data);
2093 adapter->vf_data = NULL;
2094 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2096 dev_info(&pdev->dev, "IOV Disabled\n");
2100 iounmap(hw->hw_addr);
2101 if (hw->flash_address)
2102 iounmap(hw->flash_address);
2103 pci_release_selected_regions(pdev,
2104 pci_select_bars(pdev, IORESOURCE_MEM));
2106 free_netdev(netdev);
2108 pci_disable_pcie_error_reporting(pdev);
2110 pci_disable_device(pdev);
2114 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2115 * @adapter: board private structure to initialize
2117 * This function initializes the vf specific data storage and then attempts to
2118 * allocate the VFs. The reason for ordering it this way is because it is much
2119 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2120 * the memory for the VFs.
2122 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2124 #ifdef CONFIG_PCI_IOV
2125 struct pci_dev *pdev = adapter->pdev;
2127 if (adapter->vfs_allocated_count) {
2128 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2129 sizeof(struct vf_data_storage),
2131 /* if allocation failed then we do not support SR-IOV */
2132 if (!adapter->vf_data) {
2133 adapter->vfs_allocated_count = 0;
2134 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2139 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2140 kfree(adapter->vf_data);
2141 adapter->vf_data = NULL;
2142 #endif /* CONFIG_PCI_IOV */
2143 adapter->vfs_allocated_count = 0;
2144 #ifdef CONFIG_PCI_IOV
2146 unsigned char mac_addr[ETH_ALEN];
2148 dev_info(&pdev->dev, "%d vfs allocated\n",
2149 adapter->vfs_allocated_count);
2150 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2151 random_ether_addr(mac_addr);
2152 igb_set_vf_mac(adapter, i, mac_addr);
2155 #endif /* CONFIG_PCI_IOV */
2160 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2161 * @adapter: board private structure to initialize
2163 * igb_init_hw_timer initializes the function pointer and values for the hw
2164 * timer found in hardware.
2166 static void igb_init_hw_timer(struct igb_adapter *adapter)
2168 struct e1000_hw *hw = &adapter->hw;
2170 switch (hw->mac.type) {
2173 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2174 adapter->cycles.read = igb_read_clock;
2175 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2176 adapter->cycles.mult = 1;
2178 * The 82580 timesync updates the system timer every 8ns by 8ns
2179 * and the value cannot be shifted. Instead we need to shift
2180 * the registers to generate a 64bit timer value. As a result
2181 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2182 * 24 in order to generate a larger value for synchronization.
2184 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2185 /* disable system timer temporarily by setting bit 31 */
2186 wr32(E1000_TSAUXC, 0x80000000);
2189 /* Set registers so that rollover occurs soon to test this. */
2190 wr32(E1000_SYSTIMR, 0x00000000);
2191 wr32(E1000_SYSTIML, 0x80000000);
2192 wr32(E1000_SYSTIMH, 0x000000FF);
2195 /* enable system timer by clearing bit 31 */
2196 wr32(E1000_TSAUXC, 0x0);
2199 timecounter_init(&adapter->clock,
2201 ktime_to_ns(ktime_get_real()));
2203 * Synchronize our NIC clock against system wall clock. NIC
2204 * time stamp reading requires ~3us per sample, each sample
2205 * was pretty stable even under load => only require 10
2206 * samples for each offset comparison.
2208 memset(&adapter->compare, 0, sizeof(adapter->compare));
2209 adapter->compare.source = &adapter->clock;
2210 adapter->compare.target = ktime_get_real;
2211 adapter->compare.num_samples = 10;
2212 timecompare_update(&adapter->compare, 0);
2216 * Initialize hardware timer: we keep it running just in case
2217 * that some program needs it later on.
2219 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2220 adapter->cycles.read = igb_read_clock;
2221 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2222 adapter->cycles.mult = 1;
2224 * Scale the NIC clock cycle by a large factor so that
2225 * relatively small clock corrections can be added or
2226 * substracted at each clock tick. The drawbacks of a large
2227 * factor are a) that the clock register overflows more quickly
2228 * (not such a big deal) and b) that the increment per tick has
2229 * to fit into 24 bits. As a result we need to use a shift of
2230 * 19 so we can fit a value of 16 into the TIMINCA register.
2232 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2234 (1 << E1000_TIMINCA_16NS_SHIFT) |
2235 (16 << IGB_82576_TSYNC_SHIFT));
2237 /* Set registers so that rollover occurs soon to test this. */
2238 wr32(E1000_SYSTIML, 0x00000000);
2239 wr32(E1000_SYSTIMH, 0xFF800000);
2242 timecounter_init(&adapter->clock,
2244 ktime_to_ns(ktime_get_real()));
2246 * Synchronize our NIC clock against system wall clock. NIC
2247 * time stamp reading requires ~3us per sample, each sample
2248 * was pretty stable even under load => only require 10
2249 * samples for each offset comparison.
2251 memset(&adapter->compare, 0, sizeof(adapter->compare));
2252 adapter->compare.source = &adapter->clock;
2253 adapter->compare.target = ktime_get_real;
2254 adapter->compare.num_samples = 10;
2255 timecompare_update(&adapter->compare, 0);
2258 /* 82575 does not support timesync */
2266 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2267 * @adapter: board private structure to initialize
2269 * igb_sw_init initializes the Adapter private data structure.
2270 * Fields are initialized based on PCI device information and
2271 * OS network device settings (MTU size).
2273 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2275 struct e1000_hw *hw = &adapter->hw;
2276 struct net_device *netdev = adapter->netdev;
2277 struct pci_dev *pdev = adapter->pdev;
2279 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2281 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2282 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2283 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2284 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2286 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2287 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2289 spin_lock_init(&adapter->stats64_lock);
2290 #ifdef CONFIG_PCI_IOV
2291 switch (hw->mac.type) {
2295 dev_warn(&pdev->dev,
2296 "Maximum of 7 VFs per PF, using max\n");
2297 adapter->vfs_allocated_count = 7;
2299 adapter->vfs_allocated_count = max_vfs;
2304 #endif /* CONFIG_PCI_IOV */
2305 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2308 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2309 * then we should combine the queues into a queue pair in order to
2310 * conserve interrupts due to limited supply
2312 if ((adapter->rss_queues > 4) ||
2313 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2314 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2316 /* This call may decrease the number of queues */
2317 if (igb_init_interrupt_scheme(adapter)) {
2318 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2322 igb_init_hw_timer(adapter);
2323 igb_probe_vfs(adapter);
2325 /* Explicitly disable IRQ since the NIC can be in any state. */
2326 igb_irq_disable(adapter);
2328 set_bit(__IGB_DOWN, &adapter->state);
2333 * igb_open - Called when a network interface is made active
2334 * @netdev: network interface device structure
2336 * Returns 0 on success, negative value on failure
2338 * The open entry point is called when a network interface is made
2339 * active by the system (IFF_UP). At this point all resources needed
2340 * for transmit and receive operations are allocated, the interrupt
2341 * handler is registered with the OS, the watchdog timer is started,
2342 * and the stack is notified that the interface is ready.
2344 static int igb_open(struct net_device *netdev)
2346 struct igb_adapter *adapter = netdev_priv(netdev);
2347 struct e1000_hw *hw = &adapter->hw;
2351 /* disallow open during test */
2352 if (test_bit(__IGB_TESTING, &adapter->state))
2355 netif_carrier_off(netdev);
2357 /* allocate transmit descriptors */
2358 err = igb_setup_all_tx_resources(adapter);
2362 /* allocate receive descriptors */
2363 err = igb_setup_all_rx_resources(adapter);
2367 igb_power_up_link(adapter);
2369 /* before we allocate an interrupt, we must be ready to handle it.
2370 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2371 * as soon as we call pci_request_irq, so we have to setup our
2372 * clean_rx handler before we do so. */
2373 igb_configure(adapter);
2375 err = igb_request_irq(adapter);
2379 /* From here on the code is the same as igb_up() */
2380 clear_bit(__IGB_DOWN, &adapter->state);
2382 for (i = 0; i < adapter->num_q_vectors; i++) {
2383 struct igb_q_vector *q_vector = adapter->q_vector[i];
2384 napi_enable(&q_vector->napi);
2387 /* Clear any pending interrupts. */
2390 igb_irq_enable(adapter);
2392 /* notify VFs that reset has been completed */
2393 if (adapter->vfs_allocated_count) {
2394 u32 reg_data = rd32(E1000_CTRL_EXT);
2395 reg_data |= E1000_CTRL_EXT_PFRSTD;
2396 wr32(E1000_CTRL_EXT, reg_data);
2399 netif_tx_start_all_queues(netdev);
2401 /* start the watchdog. */
2402 hw->mac.get_link_status = 1;
2403 schedule_work(&adapter->watchdog_task);
2408 igb_release_hw_control(adapter);
2409 igb_power_down_link(adapter);
2410 igb_free_all_rx_resources(adapter);
2412 igb_free_all_tx_resources(adapter);
2420 * igb_close - Disables a network interface
2421 * @netdev: network interface device structure
2423 * Returns 0, this is not allowed to fail
2425 * The close entry point is called when an interface is de-activated
2426 * by the OS. The hardware is still under the driver's control, but
2427 * needs to be disabled. A global MAC reset is issued to stop the
2428 * hardware, and all transmit and receive resources are freed.
2430 static int igb_close(struct net_device *netdev)
2432 struct igb_adapter *adapter = netdev_priv(netdev);
2434 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2437 igb_free_irq(adapter);
2439 igb_free_all_tx_resources(adapter);
2440 igb_free_all_rx_resources(adapter);
2446 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2447 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2449 * Return 0 on success, negative on failure
2451 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2453 struct device *dev = tx_ring->dev;
2456 size = sizeof(struct igb_buffer) * tx_ring->count;
2457 tx_ring->buffer_info = vzalloc(size);
2458 if (!tx_ring->buffer_info)
2461 /* round up to nearest 4K */
2462 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2463 tx_ring->size = ALIGN(tx_ring->size, 4096);
2465 tx_ring->desc = dma_alloc_coherent(dev,
2473 tx_ring->next_to_use = 0;
2474 tx_ring->next_to_clean = 0;
2478 vfree(tx_ring->buffer_info);
2480 "Unable to allocate memory for the transmit descriptor ring\n");
2485 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2486 * (Descriptors) for all queues
2487 * @adapter: board private structure
2489 * Return 0 on success, negative on failure
2491 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2493 struct pci_dev *pdev = adapter->pdev;
2496 for (i = 0; i < adapter->num_tx_queues; i++) {
2497 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2500 "Allocation for Tx Queue %u failed\n", i);
2501 for (i--; i >= 0; i--)
2502 igb_free_tx_resources(adapter->tx_ring[i]);
2507 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2508 int r_idx = i % adapter->num_tx_queues;
2509 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2515 * igb_setup_tctl - configure the transmit control registers
2516 * @adapter: Board private structure
2518 void igb_setup_tctl(struct igb_adapter *adapter)
2520 struct e1000_hw *hw = &adapter->hw;
2523 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2524 wr32(E1000_TXDCTL(0), 0);
2526 /* Program the Transmit Control Register */
2527 tctl = rd32(E1000_TCTL);
2528 tctl &= ~E1000_TCTL_CT;
2529 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2530 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2532 igb_config_collision_dist(hw);
2534 /* Enable transmits */
2535 tctl |= E1000_TCTL_EN;
2537 wr32(E1000_TCTL, tctl);
2541 * igb_configure_tx_ring - Configure transmit ring after Reset
2542 * @adapter: board private structure
2543 * @ring: tx ring to configure
2545 * Configure a transmit ring after a reset.
2547 void igb_configure_tx_ring(struct igb_adapter *adapter,
2548 struct igb_ring *ring)
2550 struct e1000_hw *hw = &adapter->hw;
2552 u64 tdba = ring->dma;
2553 int reg_idx = ring->reg_idx;
2555 /* disable the queue */
2556 txdctl = rd32(E1000_TXDCTL(reg_idx));
2557 wr32(E1000_TXDCTL(reg_idx),
2558 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2562 wr32(E1000_TDLEN(reg_idx),
2563 ring->count * sizeof(union e1000_adv_tx_desc));
2564 wr32(E1000_TDBAL(reg_idx),
2565 tdba & 0x00000000ffffffffULL);
2566 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2568 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2569 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2570 writel(0, ring->head);
2571 writel(0, ring->tail);
2573 txdctl |= IGB_TX_PTHRESH;
2574 txdctl |= IGB_TX_HTHRESH << 8;
2575 txdctl |= IGB_TX_WTHRESH << 16;
2577 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2578 wr32(E1000_TXDCTL(reg_idx), txdctl);
2582 * igb_configure_tx - Configure transmit Unit after Reset
2583 * @adapter: board private structure
2585 * Configure the Tx unit of the MAC after a reset.
2587 static void igb_configure_tx(struct igb_adapter *adapter)
2591 for (i = 0; i < adapter->num_tx_queues; i++)
2592 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2596 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2597 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2599 * Returns 0 on success, negative on failure
2601 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2603 struct device *dev = rx_ring->dev;
2606 size = sizeof(struct igb_buffer) * rx_ring->count;
2607 rx_ring->buffer_info = vzalloc(size);
2608 if (!rx_ring->buffer_info)
2611 desc_len = sizeof(union e1000_adv_rx_desc);
2613 /* Round up to nearest 4K */
2614 rx_ring->size = rx_ring->count * desc_len;
2615 rx_ring->size = ALIGN(rx_ring->size, 4096);
2617 rx_ring->desc = dma_alloc_coherent(dev,
2625 rx_ring->next_to_clean = 0;
2626 rx_ring->next_to_use = 0;
2631 vfree(rx_ring->buffer_info);
2632 rx_ring->buffer_info = NULL;
2633 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2639 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2640 * (Descriptors) for all queues
2641 * @adapter: board private structure
2643 * Return 0 on success, negative on failure
2645 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2647 struct pci_dev *pdev = adapter->pdev;
2650 for (i = 0; i < adapter->num_rx_queues; i++) {
2651 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2654 "Allocation for Rx Queue %u failed\n", i);
2655 for (i--; i >= 0; i--)
2656 igb_free_rx_resources(adapter->rx_ring[i]);
2665 * igb_setup_mrqc - configure the multiple receive queue control registers
2666 * @adapter: Board private structure
2668 static void igb_setup_mrqc(struct igb_adapter *adapter)
2670 struct e1000_hw *hw = &adapter->hw;
2672 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2677 static const u8 rsshash[40] = {
2678 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2679 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2680 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2681 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2683 /* Fill out hash function seeds */
2684 for (j = 0; j < 10; j++) {
2685 u32 rsskey = rsshash[(j * 4)];
2686 rsskey |= rsshash[(j * 4) + 1] << 8;
2687 rsskey |= rsshash[(j * 4) + 2] << 16;
2688 rsskey |= rsshash[(j * 4) + 3] << 24;
2689 array_wr32(E1000_RSSRK(0), j, rsskey);
2692 num_rx_queues = adapter->rss_queues;
2694 if (adapter->vfs_allocated_count) {
2695 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2696 switch (hw->mac.type) {
2713 if (hw->mac.type == e1000_82575)
2717 for (j = 0; j < (32 * 4); j++) {
2718 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2720 reta.bytes[j & 3] |= num_rx_queues << shift2;
2722 wr32(E1000_RETA(j >> 2), reta.dword);
2726 * Disable raw packet checksumming so that RSS hash is placed in
2727 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2728 * offloads as they are enabled by default
2730 rxcsum = rd32(E1000_RXCSUM);
2731 rxcsum |= E1000_RXCSUM_PCSD;
2733 if (adapter->hw.mac.type >= e1000_82576)
2734 /* Enable Receive Checksum Offload for SCTP */
2735 rxcsum |= E1000_RXCSUM_CRCOFL;
2737 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2738 wr32(E1000_RXCSUM, rxcsum);
2740 /* If VMDq is enabled then we set the appropriate mode for that, else
2741 * we default to RSS so that an RSS hash is calculated per packet even
2742 * if we are only using one queue */
2743 if (adapter->vfs_allocated_count) {
2744 if (hw->mac.type > e1000_82575) {
2745 /* Set the default pool for the PF's first queue */
2746 u32 vtctl = rd32(E1000_VT_CTL);
2747 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2748 E1000_VT_CTL_DISABLE_DEF_POOL);
2749 vtctl |= adapter->vfs_allocated_count <<
2750 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2751 wr32(E1000_VT_CTL, vtctl);
2753 if (adapter->rss_queues > 1)
2754 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2756 mrqc = E1000_MRQC_ENABLE_VMDQ;
2758 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2760 igb_vmm_control(adapter);
2763 * Generate RSS hash based on TCP port numbers and/or
2764 * IPv4/v6 src and dst addresses since UDP cannot be
2765 * hashed reliably due to IP fragmentation
2767 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2768 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2769 E1000_MRQC_RSS_FIELD_IPV6 |
2770 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2771 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2773 wr32(E1000_MRQC, mrqc);
2777 * igb_setup_rctl - configure the receive control registers
2778 * @adapter: Board private structure
2780 void igb_setup_rctl(struct igb_adapter *adapter)
2782 struct e1000_hw *hw = &adapter->hw;
2785 rctl = rd32(E1000_RCTL);
2787 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2788 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2790 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2791 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2794 * enable stripping of CRC. It's unlikely this will break BMC
2795 * redirection as it did with e1000. Newer features require
2796 * that the HW strips the CRC.
2798 rctl |= E1000_RCTL_SECRC;
2800 /* disable store bad packets and clear size bits. */
2801 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2803 /* enable LPE to prevent packets larger than max_frame_size */
2804 rctl |= E1000_RCTL_LPE;
2806 /* disable queue 0 to prevent tail write w/o re-config */
2807 wr32(E1000_RXDCTL(0), 0);
2809 /* Attention!!! For SR-IOV PF driver operations you must enable
2810 * queue drop for all VF and PF queues to prevent head of line blocking
2811 * if an un-trusted VF does not provide descriptors to hardware.
2813 if (adapter->vfs_allocated_count) {
2814 /* set all queue drop enable bits */
2815 wr32(E1000_QDE, ALL_QUEUES);
2818 wr32(E1000_RCTL, rctl);
2821 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2824 struct e1000_hw *hw = &adapter->hw;
2827 /* if it isn't the PF check to see if VFs are enabled and
2828 * increase the size to support vlan tags */
2829 if (vfn < adapter->vfs_allocated_count &&
2830 adapter->vf_data[vfn].vlans_enabled)
2831 size += VLAN_TAG_SIZE;
2833 vmolr = rd32(E1000_VMOLR(vfn));
2834 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2835 vmolr |= size | E1000_VMOLR_LPE;
2836 wr32(E1000_VMOLR(vfn), vmolr);
2842 * igb_rlpml_set - set maximum receive packet size
2843 * @adapter: board private structure
2845 * Configure maximum receivable packet size.
2847 static void igb_rlpml_set(struct igb_adapter *adapter)
2849 u32 max_frame_size = adapter->max_frame_size;
2850 struct e1000_hw *hw = &adapter->hw;
2851 u16 pf_id = adapter->vfs_allocated_count;
2854 max_frame_size += VLAN_TAG_SIZE;
2856 /* if vfs are enabled we set RLPML to the largest possible request
2857 * size and set the VMOLR RLPML to the size we need */
2859 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2860 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2863 wr32(E1000_RLPML, max_frame_size);
2866 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2869 struct e1000_hw *hw = &adapter->hw;
2873 * This register exists only on 82576 and newer so if we are older then
2874 * we should exit and do nothing
2876 if (hw->mac.type < e1000_82576)
2879 vmolr = rd32(E1000_VMOLR(vfn));
2880 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2882 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2884 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2886 /* clear all bits that might not be set */
2887 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2889 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2890 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2892 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2895 if (vfn <= adapter->vfs_allocated_count)
2896 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2898 wr32(E1000_VMOLR(vfn), vmolr);
2902 * igb_configure_rx_ring - Configure a receive ring after Reset
2903 * @adapter: board private structure
2904 * @ring: receive ring to be configured
2906 * Configure the Rx unit of the MAC after a reset.
2908 void igb_configure_rx_ring(struct igb_adapter *adapter,
2909 struct igb_ring *ring)
2911 struct e1000_hw *hw = &adapter->hw;
2912 u64 rdba = ring->dma;
2913 int reg_idx = ring->reg_idx;
2916 /* disable the queue */
2917 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2918 wr32(E1000_RXDCTL(reg_idx),
2919 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2921 /* Set DMA base address registers */
2922 wr32(E1000_RDBAL(reg_idx),
2923 rdba & 0x00000000ffffffffULL);
2924 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2925 wr32(E1000_RDLEN(reg_idx),
2926 ring->count * sizeof(union e1000_adv_rx_desc));
2928 /* initialize head and tail */
2929 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2930 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2931 writel(0, ring->head);
2932 writel(0, ring->tail);
2934 /* set descriptor configuration */
2935 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2936 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2937 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2938 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2939 srrctl |= IGB_RXBUFFER_16384 >>
2940 E1000_SRRCTL_BSIZEPKT_SHIFT;
2942 srrctl |= (PAGE_SIZE / 2) >>
2943 E1000_SRRCTL_BSIZEPKT_SHIFT;
2945 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2947 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2948 E1000_SRRCTL_BSIZEPKT_SHIFT;
2949 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2951 if (hw->mac.type == e1000_82580)
2952 srrctl |= E1000_SRRCTL_TIMESTAMP;
2953 /* Only set Drop Enable if we are supporting multiple queues */
2954 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2955 srrctl |= E1000_SRRCTL_DROP_EN;
2957 wr32(E1000_SRRCTL(reg_idx), srrctl);
2959 /* set filtering for VMDQ pools */
2960 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2962 /* enable receive descriptor fetching */
2963 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2964 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2965 rxdctl &= 0xFFF00000;
2966 rxdctl |= IGB_RX_PTHRESH;
2967 rxdctl |= IGB_RX_HTHRESH << 8;
2968 rxdctl |= IGB_RX_WTHRESH << 16;
2969 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2973 * igb_configure_rx - Configure receive Unit after Reset
2974 * @adapter: board private structure
2976 * Configure the Rx unit of the MAC after a reset.
2978 static void igb_configure_rx(struct igb_adapter *adapter)
2982 /* set UTA to appropriate mode */
2983 igb_set_uta(adapter);
2985 /* set the correct pool for the PF default MAC address in entry 0 */
2986 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2987 adapter->vfs_allocated_count);
2989 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2990 * the Base and Length of the Rx Descriptor Ring */
2991 for (i = 0; i < adapter->num_rx_queues; i++)
2992 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2996 * igb_free_tx_resources - Free Tx Resources per Queue
2997 * @tx_ring: Tx descriptor ring for a specific queue
2999 * Free all transmit software resources
3001 void igb_free_tx_resources(struct igb_ring *tx_ring)
3003 igb_clean_tx_ring(tx_ring);
3005 vfree(tx_ring->buffer_info);
3006 tx_ring->buffer_info = NULL;
3008 /* if not set, then don't free */
3012 dma_free_coherent(tx_ring->dev, tx_ring->size,
3013 tx_ring->desc, tx_ring->dma);
3015 tx_ring->desc = NULL;
3019 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3020 * @adapter: board private structure
3022 * Free all transmit software resources
3024 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3028 for (i = 0; i < adapter->num_tx_queues; i++)
3029 igb_free_tx_resources(adapter->tx_ring[i]);
3032 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3033 struct igb_buffer *buffer_info)
3035 if (buffer_info->dma) {
3036 if (buffer_info->mapped_as_page)
3037 dma_unmap_page(tx_ring->dev,
3039 buffer_info->length,
3042 dma_unmap_single(tx_ring->dev,
3044 buffer_info->length,
3046 buffer_info->dma = 0;
3048 if (buffer_info->skb) {
3049 dev_kfree_skb_any(buffer_info->skb);
3050 buffer_info->skb = NULL;
3052 buffer_info->time_stamp = 0;
3053 buffer_info->length = 0;
3054 buffer_info->next_to_watch = 0;
3055 buffer_info->mapped_as_page = false;
3059 * igb_clean_tx_ring - Free Tx Buffers
3060 * @tx_ring: ring to be cleaned
3062 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3064 struct igb_buffer *buffer_info;
3068 if (!tx_ring->buffer_info)
3070 /* Free all the Tx ring sk_buffs */
3072 for (i = 0; i < tx_ring->count; i++) {
3073 buffer_info = &tx_ring->buffer_info[i];
3074 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3077 size = sizeof(struct igb_buffer) * tx_ring->count;
3078 memset(tx_ring->buffer_info, 0, size);
3080 /* Zero out the descriptor ring */
3081 memset(tx_ring->desc, 0, tx_ring->size);
3083 tx_ring->next_to_use = 0;
3084 tx_ring->next_to_clean = 0;
3088 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3089 * @adapter: board private structure
3091 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3095 for (i = 0; i < adapter->num_tx_queues; i++)
3096 igb_clean_tx_ring(adapter->tx_ring[i]);
3100 * igb_free_rx_resources - Free Rx Resources
3101 * @rx_ring: ring to clean the resources from
3103 * Free all receive software resources
3105 void igb_free_rx_resources(struct igb_ring *rx_ring)
3107 igb_clean_rx_ring(rx_ring);
3109 vfree(rx_ring->buffer_info);
3110 rx_ring->buffer_info = NULL;
3112 /* if not set, then don't free */
3116 dma_free_coherent(rx_ring->dev, rx_ring->size,
3117 rx_ring->desc, rx_ring->dma);
3119 rx_ring->desc = NULL;
3123 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3124 * @adapter: board private structure
3126 * Free all receive software resources
3128 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3132 for (i = 0; i < adapter->num_rx_queues; i++)
3133 igb_free_rx_resources(adapter->rx_ring[i]);
3137 * igb_clean_rx_ring - Free Rx Buffers per Queue
3138 * @rx_ring: ring to free buffers from
3140 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3142 struct igb_buffer *buffer_info;
3146 if (!rx_ring->buffer_info)
3149 /* Free all the Rx ring sk_buffs */
3150 for (i = 0; i < rx_ring->count; i++) {
3151 buffer_info = &rx_ring->buffer_info[i];
3152 if (buffer_info->dma) {
3153 dma_unmap_single(rx_ring->dev,
3155 rx_ring->rx_buffer_len,
3157 buffer_info->dma = 0;
3160 if (buffer_info->skb) {
3161 dev_kfree_skb(buffer_info->skb);
3162 buffer_info->skb = NULL;
3164 if (buffer_info->page_dma) {
3165 dma_unmap_page(rx_ring->dev,
3166 buffer_info->page_dma,
3169 buffer_info->page_dma = 0;
3171 if (buffer_info->page) {
3172 put_page(buffer_info->page);
3173 buffer_info->page = NULL;
3174 buffer_info->page_offset = 0;
3178 size = sizeof(struct igb_buffer) * rx_ring->count;
3179 memset(rx_ring->buffer_info, 0, size);
3181 /* Zero out the descriptor ring */
3182 memset(rx_ring->desc, 0, rx_ring->size);
3184 rx_ring->next_to_clean = 0;
3185 rx_ring->next_to_use = 0;
3189 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3190 * @adapter: board private structure
3192 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3196 for (i = 0; i < adapter->num_rx_queues; i++)
3197 igb_clean_rx_ring(adapter->rx_ring[i]);
3201 * igb_set_mac - Change the Ethernet Address of the NIC
3202 * @netdev: network interface device structure
3203 * @p: pointer to an address structure
3205 * Returns 0 on success, negative on failure
3207 static int igb_set_mac(struct net_device *netdev, void *p)
3209 struct igb_adapter *adapter = netdev_priv(netdev);
3210 struct e1000_hw *hw = &adapter->hw;
3211 struct sockaddr *addr = p;
3213 if (!is_valid_ether_addr(addr->sa_data))
3214 return -EADDRNOTAVAIL;
3216 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3217 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3219 /* set the correct pool for the new PF MAC address in entry 0 */
3220 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3221 adapter->vfs_allocated_count);
3227 * igb_write_mc_addr_list - write multicast addresses to MTA
3228 * @netdev: network interface device structure
3230 * Writes multicast address list to the MTA hash table.
3231 * Returns: -ENOMEM on failure
3232 * 0 on no addresses written
3233 * X on writing X addresses to MTA
3235 static int igb_write_mc_addr_list(struct net_device *netdev)
3237 struct igb_adapter *adapter = netdev_priv(netdev);
3238 struct e1000_hw *hw = &adapter->hw;
3239 struct netdev_hw_addr *ha;
3243 if (netdev_mc_empty(netdev)) {
3244 /* nothing to program, so clear mc list */
3245 igb_update_mc_addr_list(hw, NULL, 0);
3246 igb_restore_vf_multicasts(adapter);
3250 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3254 /* The shared function expects a packed array of only addresses. */
3256 netdev_for_each_mc_addr(ha, netdev)
3257 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3259 igb_update_mc_addr_list(hw, mta_list, i);
3262 return netdev_mc_count(netdev);
3266 * igb_write_uc_addr_list - write unicast addresses to RAR table
3267 * @netdev: network interface device structure
3269 * Writes unicast address list to the RAR table.
3270 * Returns: -ENOMEM on failure/insufficient address space
3271 * 0 on no addresses written
3272 * X on writing X addresses to the RAR table
3274 static int igb_write_uc_addr_list(struct net_device *netdev)
3276 struct igb_adapter *adapter = netdev_priv(netdev);
3277 struct e1000_hw *hw = &adapter->hw;
3278 unsigned int vfn = adapter->vfs_allocated_count;
3279 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3282 /* return ENOMEM indicating insufficient memory for addresses */
3283 if (netdev_uc_count(netdev) > rar_entries)
3286 if (!netdev_uc_empty(netdev) && rar_entries) {
3287 struct netdev_hw_addr *ha;
3289 netdev_for_each_uc_addr(ha, netdev) {
3292 igb_rar_set_qsel(adapter, ha->addr,
3298 /* write the addresses in reverse order to avoid write combining */
3299 for (; rar_entries > 0 ; rar_entries--) {
3300 wr32(E1000_RAH(rar_entries), 0);
3301 wr32(E1000_RAL(rar_entries), 0);
3309 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3310 * @netdev: network interface device structure
3312 * The set_rx_mode entry point is called whenever the unicast or multicast
3313 * address lists or the network interface flags are updated. This routine is
3314 * responsible for configuring the hardware for proper unicast, multicast,
3315 * promiscuous mode, and all-multi behavior.
3317 static void igb_set_rx_mode(struct net_device *netdev)
3319 struct igb_adapter *adapter = netdev_priv(netdev);
3320 struct e1000_hw *hw = &adapter->hw;
3321 unsigned int vfn = adapter->vfs_allocated_count;
3322 u32 rctl, vmolr = 0;
3325 /* Check for Promiscuous and All Multicast modes */
3326 rctl = rd32(E1000_RCTL);
3328 /* clear the effected bits */
3329 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3331 if (netdev->flags & IFF_PROMISC) {
3332 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3333 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3335 if (netdev->flags & IFF_ALLMULTI) {
3336 rctl |= E1000_RCTL_MPE;
3337 vmolr |= E1000_VMOLR_MPME;
3340 * Write addresses to the MTA, if the attempt fails
3341 * then we should just turn on promiscous mode so
3342 * that we can at least receive multicast traffic
3344 count = igb_write_mc_addr_list(netdev);
3346 rctl |= E1000_RCTL_MPE;
3347 vmolr |= E1000_VMOLR_MPME;
3349 vmolr |= E1000_VMOLR_ROMPE;
3353 * Write addresses to available RAR registers, if there is not
3354 * sufficient space to store all the addresses then enable
3355 * unicast promiscous mode
3357 count = igb_write_uc_addr_list(netdev);
3359 rctl |= E1000_RCTL_UPE;
3360 vmolr |= E1000_VMOLR_ROPE;
3362 rctl |= E1000_RCTL_VFE;
3364 wr32(E1000_RCTL, rctl);
3367 * In order to support SR-IOV and eventually VMDq it is necessary to set
3368 * the VMOLR to enable the appropriate modes. Without this workaround
3369 * we will have issues with VLAN tag stripping not being done for frames
3370 * that are only arriving because we are the default pool
3372 if (hw->mac.type < e1000_82576)
3375 vmolr |= rd32(E1000_VMOLR(vfn)) &
3376 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3377 wr32(E1000_VMOLR(vfn), vmolr);
3378 igb_restore_vf_multicasts(adapter);
3381 static void igb_check_wvbr(struct igb_adapter *adapter)
3383 struct e1000_hw *hw = &adapter->hw;
3386 switch (hw->mac.type) {
3389 if (!(wvbr = rd32(E1000_WVBR)))
3396 adapter->wvbr |= wvbr;
3399 #define IGB_STAGGERED_QUEUE_OFFSET 8
3401 static void igb_spoof_check(struct igb_adapter *adapter)
3408 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3409 if (adapter->wvbr & (1 << j) ||
3410 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3411 dev_warn(&adapter->pdev->dev,
3412 "Spoof event(s) detected on VF %d\n", j);
3415 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3420 /* Need to wait a few seconds after link up to get diagnostic information from
3422 static void igb_update_phy_info(unsigned long data)
3424 struct igb_adapter *adapter = (struct igb_adapter *) data;
3425 igb_get_phy_info(&adapter->hw);
3429 * igb_has_link - check shared code for link and determine up/down
3430 * @adapter: pointer to driver private info
3432 bool igb_has_link(struct igb_adapter *adapter)
3434 struct e1000_hw *hw = &adapter->hw;
3435 bool link_active = false;
3438 /* get_link_status is set on LSC (link status) interrupt or
3439 * rx sequence error interrupt. get_link_status will stay
3440 * false until the e1000_check_for_link establishes link
3441 * for copper adapters ONLY
3443 switch (hw->phy.media_type) {
3444 case e1000_media_type_copper:
3445 if (hw->mac.get_link_status) {
3446 ret_val = hw->mac.ops.check_for_link(hw);
3447 link_active = !hw->mac.get_link_status;
3452 case e1000_media_type_internal_serdes:
3453 ret_val = hw->mac.ops.check_for_link(hw);
3454 link_active = hw->mac.serdes_has_link;
3457 case e1000_media_type_unknown:
3465 * igb_watchdog - Timer Call-back
3466 * @data: pointer to adapter cast into an unsigned long
3468 static void igb_watchdog(unsigned long data)
3470 struct igb_adapter *adapter = (struct igb_adapter *)data;
3471 /* Do the rest outside of interrupt context */
3472 schedule_work(&adapter->watchdog_task);
3475 static void igb_watchdog_task(struct work_struct *work)
3477 struct igb_adapter *adapter = container_of(work,
3480 struct e1000_hw *hw = &adapter->hw;
3481 struct net_device *netdev = adapter->netdev;
3485 link = igb_has_link(adapter);
3487 if (!netif_carrier_ok(netdev)) {
3489 hw->mac.ops.get_speed_and_duplex(hw,
3490 &adapter->link_speed,
3491 &adapter->link_duplex);
3493 ctrl = rd32(E1000_CTRL);
3494 /* Links status message must follow this format */
3495 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3496 "Flow Control: %s\n",
3498 adapter->link_speed,
3499 adapter->link_duplex == FULL_DUPLEX ?
3500 "Full Duplex" : "Half Duplex",
3501 ((ctrl & E1000_CTRL_TFCE) &&
3502 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3503 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3504 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3506 /* adjust timeout factor according to speed/duplex */
3507 adapter->tx_timeout_factor = 1;
3508 switch (adapter->link_speed) {
3510 adapter->tx_timeout_factor = 14;
3513 /* maybe add some timeout factor ? */
3517 netif_carrier_on(netdev);
3519 igb_ping_all_vfs(adapter);
3520 igb_check_vf_rate_limit(adapter);
3522 /* link state has changed, schedule phy info update */
3523 if (!test_bit(__IGB_DOWN, &adapter->state))
3524 mod_timer(&adapter->phy_info_timer,
3525 round_jiffies(jiffies + 2 * HZ));
3528 if (netif_carrier_ok(netdev)) {
3529 adapter->link_speed = 0;
3530 adapter->link_duplex = 0;
3531 /* Links status message must follow this format */
3532 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3534 netif_carrier_off(netdev);
3536 igb_ping_all_vfs(adapter);
3538 /* link state has changed, schedule phy info update */
3539 if (!test_bit(__IGB_DOWN, &adapter->state))
3540 mod_timer(&adapter->phy_info_timer,
3541 round_jiffies(jiffies + 2 * HZ));
3545 spin_lock(&adapter->stats64_lock);
3546 igb_update_stats(adapter, &adapter->stats64);
3547 spin_unlock(&adapter->stats64_lock);
3549 for (i = 0; i < adapter->num_tx_queues; i++) {
3550 struct igb_ring *tx_ring = adapter->tx_ring[i];
3551 if (!netif_carrier_ok(netdev)) {
3552 /* We've lost link, so the controller stops DMA,
3553 * but we've got queued Tx work that's never going
3554 * to get done, so reset controller to flush Tx.
3555 * (Do the reset outside of interrupt context). */
3556 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3557 adapter->tx_timeout_count++;
3558 schedule_work(&adapter->reset_task);
3559 /* return immediately since reset is imminent */
3564 /* Force detection of hung controller every watchdog period */
3565 tx_ring->detect_tx_hung = true;
3568 /* Cause software interrupt to ensure rx ring is cleaned */
3569 if (adapter->msix_entries) {
3571 for (i = 0; i < adapter->num_q_vectors; i++) {
3572 struct igb_q_vector *q_vector = adapter->q_vector[i];
3573 eics |= q_vector->eims_value;
3575 wr32(E1000_EICS, eics);
3577 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3580 igb_spoof_check(adapter);
3582 /* Reset the timer */
3583 if (!test_bit(__IGB_DOWN, &adapter->state))
3584 mod_timer(&adapter->watchdog_timer,
3585 round_jiffies(jiffies + 2 * HZ));
3588 enum latency_range {
3592 latency_invalid = 255
3596 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3598 * Stores a new ITR value based on strictly on packet size. This
3599 * algorithm is less sophisticated than that used in igb_update_itr,
3600 * due to the difficulty of synchronizing statistics across multiple
3601 * receive rings. The divisors and thresholds used by this function
3602 * were determined based on theoretical maximum wire speed and testing
3603 * data, in order to minimize response time while increasing bulk
3605 * This functionality is controlled by the InterruptThrottleRate module
3606 * parameter (see igb_param.c)
3607 * NOTE: This function is called only when operating in a multiqueue
3608 * receive environment.
3609 * @q_vector: pointer to q_vector
3611 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3613 int new_val = q_vector->itr_val;
3614 int avg_wire_size = 0;
3615 struct igb_adapter *adapter = q_vector->adapter;
3616 struct igb_ring *ring;
3617 unsigned int packets;
3619 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3620 * ints/sec - ITR timer value of 120 ticks.
3622 if (adapter->link_speed != SPEED_1000) {
3627 ring = q_vector->rx_ring;
3629 packets = ACCESS_ONCE(ring->total_packets);
3632 avg_wire_size = ring->total_bytes / packets;
3635 ring = q_vector->tx_ring;
3637 packets = ACCESS_ONCE(ring->total_packets);
3640 avg_wire_size = max_t(u32, avg_wire_size,
3641 ring->total_bytes / packets);
3644 /* if avg_wire_size isn't set no work was done */
3648 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3649 avg_wire_size += 24;
3651 /* Don't starve jumbo frames */
3652 avg_wire_size = min(avg_wire_size, 3000);
3654 /* Give a little boost to mid-size frames */
3655 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3656 new_val = avg_wire_size / 3;
3658 new_val = avg_wire_size / 2;
3660 /* when in itr mode 3 do not exceed 20K ints/sec */
3661 if (adapter->rx_itr_setting == 3 && new_val < 196)
3665 if (new_val != q_vector->itr_val) {
3666 q_vector->itr_val = new_val;
3667 q_vector->set_itr = 1;
3670 if (q_vector->rx_ring) {
3671 q_vector->rx_ring->total_bytes = 0;
3672 q_vector->rx_ring->total_packets = 0;
3674 if (q_vector->tx_ring) {
3675 q_vector->tx_ring->total_bytes = 0;
3676 q_vector->tx_ring->total_packets = 0;
3681 * igb_update_itr - update the dynamic ITR value based on statistics
3682 * Stores a new ITR value based on packets and byte
3683 * counts during the last interrupt. The advantage of per interrupt
3684 * computation is faster updates and more accurate ITR for the current
3685 * traffic pattern. Constants in this function were computed
3686 * based on theoretical maximum wire speed and thresholds were set based
3687 * on testing data as well as attempting to minimize response time
3688 * while increasing bulk throughput.
3689 * this functionality is controlled by the InterruptThrottleRate module
3690 * parameter (see igb_param.c)
3691 * NOTE: These calculations are only valid when operating in a single-
3692 * queue environment.
3693 * @adapter: pointer to adapter
3694 * @itr_setting: current q_vector->itr_val
3695 * @packets: the number of packets during this measurement interval
3696 * @bytes: the number of bytes during this measurement interval
3698 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3699 int packets, int bytes)
3701 unsigned int retval = itr_setting;
3704 goto update_itr_done;
3706 switch (itr_setting) {
3707 case lowest_latency:
3708 /* handle TSO and jumbo frames */
3709 if (bytes/packets > 8000)
3710 retval = bulk_latency;
3711 else if ((packets < 5) && (bytes > 512))
3712 retval = low_latency;
3714 case low_latency: /* 50 usec aka 20000 ints/s */
3715 if (bytes > 10000) {
3716 /* this if handles the TSO accounting */
3717 if (bytes/packets > 8000) {
3718 retval = bulk_latency;
3719 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3720 retval = bulk_latency;
3721 } else if ((packets > 35)) {
3722 retval = lowest_latency;
3724 } else if (bytes/packets > 2000) {
3725 retval = bulk_latency;
3726 } else if (packets <= 2 && bytes < 512) {
3727 retval = lowest_latency;
3730 case bulk_latency: /* 250 usec aka 4000 ints/s */
3731 if (bytes > 25000) {
3733 retval = low_latency;
3734 } else if (bytes < 1500) {
3735 retval = low_latency;
3744 static void igb_set_itr(struct igb_adapter *adapter)
3746 struct igb_q_vector *q_vector = adapter->q_vector[0];
3748 u32 new_itr = q_vector->itr_val;
3750 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3751 if (adapter->link_speed != SPEED_1000) {
3757 adapter->rx_itr = igb_update_itr(adapter,
3759 q_vector->rx_ring->total_packets,
3760 q_vector->rx_ring->total_bytes);
3762 adapter->tx_itr = igb_update_itr(adapter,
3764 q_vector->tx_ring->total_packets,
3765 q_vector->tx_ring->total_bytes);
3766 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3768 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3769 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3770 current_itr = low_latency;
3772 switch (current_itr) {
3773 /* counts and packets in update_itr are dependent on these numbers */
3774 case lowest_latency:
3775 new_itr = 56; /* aka 70,000 ints/sec */
3778 new_itr = 196; /* aka 20,000 ints/sec */
3781 new_itr = 980; /* aka 4,000 ints/sec */
3788 q_vector->rx_ring->total_bytes = 0;
3789 q_vector->rx_ring->total_packets = 0;
3790 q_vector->tx_ring->total_bytes = 0;
3791 q_vector->tx_ring->total_packets = 0;
3793 if (new_itr != q_vector->itr_val) {
3794 /* this attempts to bias the interrupt rate towards Bulk
3795 * by adding intermediate steps when interrupt rate is
3797 new_itr = new_itr > q_vector->itr_val ?
3798 max((new_itr * q_vector->itr_val) /
3799 (new_itr + (q_vector->itr_val >> 2)),
3802 /* Don't write the value here; it resets the adapter's
3803 * internal timer, and causes us to delay far longer than
3804 * we should between interrupts. Instead, we write the ITR
3805 * value at the beginning of the next interrupt so the timing
3806 * ends up being correct.
3808 q_vector->itr_val = new_itr;
3809 q_vector->set_itr = 1;
3813 #define IGB_TX_FLAGS_CSUM 0x00000001
3814 #define IGB_TX_FLAGS_VLAN 0x00000002
3815 #define IGB_TX_FLAGS_TSO 0x00000004
3816 #define IGB_TX_FLAGS_IPV4 0x00000008
3817 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3818 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3819 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3821 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3822 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3824 struct e1000_adv_tx_context_desc *context_desc;
3827 struct igb_buffer *buffer_info;
3828 u32 info = 0, tu_cmd = 0;
3832 if (skb_header_cloned(skb)) {
3833 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3838 l4len = tcp_hdrlen(skb);
3841 if (skb->protocol == htons(ETH_P_IP)) {
3842 struct iphdr *iph = ip_hdr(skb);
3845 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3849 } else if (skb_is_gso_v6(skb)) {
3850 ipv6_hdr(skb)->payload_len = 0;
3851 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3852 &ipv6_hdr(skb)->daddr,
3856 i = tx_ring->next_to_use;
3858 buffer_info = &tx_ring->buffer_info[i];
3859 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3860 /* VLAN MACLEN IPLEN */
3861 if (tx_flags & IGB_TX_FLAGS_VLAN)
3862 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3863 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3864 *hdr_len += skb_network_offset(skb);
3865 info |= skb_network_header_len(skb);
3866 *hdr_len += skb_network_header_len(skb);
3867 context_desc->vlan_macip_lens = cpu_to_le32(info);
3869 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3870 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3872 if (skb->protocol == htons(ETH_P_IP))
3873 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3874 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3876 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3879 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3880 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3882 /* For 82575, context index must be unique per ring. */
3883 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3884 mss_l4len_idx |= tx_ring->reg_idx << 4;
3886 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3887 context_desc->seqnum_seed = 0;
3889 buffer_info->time_stamp = jiffies;
3890 buffer_info->next_to_watch = i;
3891 buffer_info->dma = 0;
3893 if (i == tx_ring->count)
3896 tx_ring->next_to_use = i;
3901 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3902 struct sk_buff *skb, u32 tx_flags)
3904 struct e1000_adv_tx_context_desc *context_desc;
3905 struct device *dev = tx_ring->dev;
3906 struct igb_buffer *buffer_info;
3907 u32 info = 0, tu_cmd = 0;
3910 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3911 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3912 i = tx_ring->next_to_use;
3913 buffer_info = &tx_ring->buffer_info[i];
3914 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3916 if (tx_flags & IGB_TX_FLAGS_VLAN)
3917 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3919 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3920 if (skb->ip_summed == CHECKSUM_PARTIAL)
3921 info |= skb_network_header_len(skb);
3923 context_desc->vlan_macip_lens = cpu_to_le32(info);
3925 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3927 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3930 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3931 const struct vlan_ethhdr *vhdr =
3932 (const struct vlan_ethhdr*)skb->data;
3934 protocol = vhdr->h_vlan_encapsulated_proto;
3936 protocol = skb->protocol;
3940 case cpu_to_be16(ETH_P_IP):
3941 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3942 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3943 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3944 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3945 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3947 case cpu_to_be16(ETH_P_IPV6):
3948 /* XXX what about other V6 headers?? */
3949 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3950 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3951 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3952 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3955 if (unlikely(net_ratelimit()))
3957 "partial checksum but proto=%x!\n",
3963 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3964 context_desc->seqnum_seed = 0;
3965 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3966 context_desc->mss_l4len_idx =
3967 cpu_to_le32(tx_ring->reg_idx << 4);
3969 buffer_info->time_stamp = jiffies;
3970 buffer_info->next_to_watch = i;
3971 buffer_info->dma = 0;
3974 if (i == tx_ring->count)
3976 tx_ring->next_to_use = i;
3983 #define IGB_MAX_TXD_PWR 16
3984 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3986 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3989 struct igb_buffer *buffer_info;
3990 struct device *dev = tx_ring->dev;
3991 unsigned int hlen = skb_headlen(skb);
3992 unsigned int count = 0, i;
3994 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3996 i = tx_ring->next_to_use;
3998 buffer_info = &tx_ring->buffer_info[i];
3999 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4000 buffer_info->length = hlen;
4001 /* set time_stamp *before* dma to help avoid a possible race */
4002 buffer_info->time_stamp = jiffies;
4003 buffer_info->next_to_watch = i;
4004 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4006 if (dma_mapping_error(dev, buffer_info->dma))
4009 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4010 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4011 unsigned int len = frag->size;
4015 if (i == tx_ring->count)
4018 buffer_info = &tx_ring->buffer_info[i];
4019 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4020 buffer_info->length = len;
4021 buffer_info->time_stamp = jiffies;
4022 buffer_info->next_to_watch = i;
4023 buffer_info->mapped_as_page = true;
4024 buffer_info->dma = dma_map_page(dev,
4029 if (dma_mapping_error(dev, buffer_info->dma))
4034 tx_ring->buffer_info[i].skb = skb;
4035 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4036 /* multiply data chunks by size of headers */
4037 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4038 tx_ring->buffer_info[i].gso_segs = gso_segs;
4039 tx_ring->buffer_info[first].next_to_watch = i;
4044 dev_err(dev, "TX DMA map failed\n");
4046 /* clear timestamp and dma mappings for failed buffer_info mapping */
4047 buffer_info->dma = 0;
4048 buffer_info->time_stamp = 0;
4049 buffer_info->length = 0;
4050 buffer_info->next_to_watch = 0;
4051 buffer_info->mapped_as_page = false;
4053 /* clear timestamp and dma mappings for remaining portion of packet */
4058 buffer_info = &tx_ring->buffer_info[i];
4059 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4065 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4066 u32 tx_flags, int count, u32 paylen,
4069 union e1000_adv_tx_desc *tx_desc;
4070 struct igb_buffer *buffer_info;
4071 u32 olinfo_status = 0, cmd_type_len;
4072 unsigned int i = tx_ring->next_to_use;
4074 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4075 E1000_ADVTXD_DCMD_DEXT);
4077 if (tx_flags & IGB_TX_FLAGS_VLAN)
4078 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4080 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4081 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4083 if (tx_flags & IGB_TX_FLAGS_TSO) {
4084 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4086 /* insert tcp checksum */
4087 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4089 /* insert ip checksum */
4090 if (tx_flags & IGB_TX_FLAGS_IPV4)
4091 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4093 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4094 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4097 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4098 (tx_flags & (IGB_TX_FLAGS_CSUM |
4100 IGB_TX_FLAGS_VLAN)))
4101 olinfo_status |= tx_ring->reg_idx << 4;
4103 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4106 buffer_info = &tx_ring->buffer_info[i];
4107 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4108 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4109 tx_desc->read.cmd_type_len =
4110 cpu_to_le32(cmd_type_len | buffer_info->length);
4111 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4114 if (i == tx_ring->count)
4116 } while (count > 0);
4118 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4119 /* Force memory writes to complete before letting h/w
4120 * know there are new descriptors to fetch. (Only
4121 * applicable for weak-ordered memory model archs,
4122 * such as IA-64). */
4125 tx_ring->next_to_use = i;
4126 writel(i, tx_ring->tail);
4127 /* we need this if more than one processor can write to our tail
4128 * at a time, it syncronizes IO on IA64/Altix systems */
4132 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4134 struct net_device *netdev = tx_ring->netdev;
4136 netif_stop_subqueue(netdev, tx_ring->queue_index);
4138 /* Herbert's original patch had:
4139 * smp_mb__after_netif_stop_queue();
4140 * but since that doesn't exist yet, just open code it. */
4143 /* We need to check again in a case another CPU has just
4144 * made room available. */
4145 if (igb_desc_unused(tx_ring) < size)
4149 netif_wake_subqueue(netdev, tx_ring->queue_index);
4151 u64_stats_update_begin(&tx_ring->tx_syncp2);
4152 tx_ring->tx_stats.restart_queue2++;
4153 u64_stats_update_end(&tx_ring->tx_syncp2);
4158 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4160 if (igb_desc_unused(tx_ring) >= size)
4162 return __igb_maybe_stop_tx(tx_ring, size);
4165 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4166 struct igb_ring *tx_ring)
4173 /* need: 1 descriptor per page,
4174 * + 2 desc gap to keep tail from touching head,
4175 * + 1 desc for skb->data,
4176 * + 1 desc for context descriptor,
4177 * otherwise try next time */
4178 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4179 /* this is a hard error */
4180 return NETDEV_TX_BUSY;
4183 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4184 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4185 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4188 if (vlan_tx_tag_present(skb)) {
4189 tx_flags |= IGB_TX_FLAGS_VLAN;
4190 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4193 if (skb->protocol == htons(ETH_P_IP))
4194 tx_flags |= IGB_TX_FLAGS_IPV4;
4196 first = tx_ring->next_to_use;
4197 if (skb_is_gso(skb)) {
4198 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4201 dev_kfree_skb_any(skb);
4202 return NETDEV_TX_OK;
4207 tx_flags |= IGB_TX_FLAGS_TSO;
4208 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4209 (skb->ip_summed == CHECKSUM_PARTIAL))
4210 tx_flags |= IGB_TX_FLAGS_CSUM;
4213 * count reflects descriptors mapped, if 0 or less then mapping error
4214 * has occured and we need to rewind the descriptor queue
4216 count = igb_tx_map_adv(tx_ring, skb, first);
4218 dev_kfree_skb_any(skb);
4219 tx_ring->buffer_info[first].time_stamp = 0;
4220 tx_ring->next_to_use = first;
4221 return NETDEV_TX_OK;
4224 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4226 /* Make sure there is space in the ring for the next send. */
4227 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4229 return NETDEV_TX_OK;
4232 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4233 struct net_device *netdev)
4235 struct igb_adapter *adapter = netdev_priv(netdev);
4236 struct igb_ring *tx_ring;
4239 if (test_bit(__IGB_DOWN, &adapter->state)) {
4240 dev_kfree_skb_any(skb);
4241 return NETDEV_TX_OK;
4244 if (skb->len <= 0) {
4245 dev_kfree_skb_any(skb);
4246 return NETDEV_TX_OK;
4249 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4250 tx_ring = adapter->multi_tx_table[r_idx];
4252 /* This goes back to the question of how to logically map a tx queue
4253 * to a flow. Right now, performance is impacted slightly negatively
4254 * if using multiple tx queues. If the stack breaks away from a
4255 * single qdisc implementation, we can look at this again. */
4256 return igb_xmit_frame_ring_adv(skb, tx_ring);
4260 * igb_tx_timeout - Respond to a Tx Hang
4261 * @netdev: network interface device structure
4263 static void igb_tx_timeout(struct net_device *netdev)
4265 struct igb_adapter *adapter = netdev_priv(netdev);
4266 struct e1000_hw *hw = &adapter->hw;
4268 /* Do the reset outside of interrupt context */
4269 adapter->tx_timeout_count++;
4271 if (hw->mac.type == e1000_82580)
4272 hw->dev_spec._82575.global_device_reset = true;
4274 schedule_work(&adapter->reset_task);
4276 (adapter->eims_enable_mask & ~adapter->eims_other));
4279 static void igb_reset_task(struct work_struct *work)
4281 struct igb_adapter *adapter;
4282 adapter = container_of(work, struct igb_adapter, reset_task);
4285 netdev_err(adapter->netdev, "Reset adapter\n");
4286 igb_reinit_locked(adapter);
4290 * igb_get_stats64 - Get System Network Statistics
4291 * @netdev: network interface device structure
4292 * @stats: rtnl_link_stats64 pointer
4295 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4296 struct rtnl_link_stats64 *stats)
4298 struct igb_adapter *adapter = netdev_priv(netdev);
4300 spin_lock(&adapter->stats64_lock);
4301 igb_update_stats(adapter, &adapter->stats64);
4302 memcpy(stats, &adapter->stats64, sizeof(*stats));
4303 spin_unlock(&adapter->stats64_lock);
4309 * igb_change_mtu - Change the Maximum Transfer Unit
4310 * @netdev: network interface device structure
4311 * @new_mtu: new value for maximum frame size
4313 * Returns 0 on success, negative on failure
4315 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4317 struct igb_adapter *adapter = netdev_priv(netdev);
4318 struct pci_dev *pdev = adapter->pdev;
4319 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4320 u32 rx_buffer_len, i;
4322 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4323 dev_err(&pdev->dev, "Invalid MTU setting\n");
4327 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4328 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4332 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4335 /* igb_down has a dependency on max_frame_size */
4336 adapter->max_frame_size = max_frame;
4338 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4339 * means we reserve 2 more, this pushes us to allocate from the next
4341 * i.e. RXBUFFER_2048 --> size-4096 slab
4344 if (adapter->hw.mac.type == e1000_82580)
4345 max_frame += IGB_TS_HDR_LEN;
4347 if (max_frame <= IGB_RXBUFFER_1024)
4348 rx_buffer_len = IGB_RXBUFFER_1024;
4349 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4350 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4352 rx_buffer_len = IGB_RXBUFFER_128;
4354 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4355 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4356 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4358 if ((adapter->hw.mac.type == e1000_82580) &&
4359 (rx_buffer_len == IGB_RXBUFFER_128))
4360 rx_buffer_len += IGB_RXBUFFER_64;
4362 if (netif_running(netdev))
4365 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4366 netdev->mtu, new_mtu);
4367 netdev->mtu = new_mtu;
4369 for (i = 0; i < adapter->num_rx_queues; i++)
4370 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4372 if (netif_running(netdev))
4377 clear_bit(__IGB_RESETTING, &adapter->state);
4383 * igb_update_stats - Update the board statistics counters
4384 * @adapter: board private structure
4387 void igb_update_stats(struct igb_adapter *adapter,
4388 struct rtnl_link_stats64 *net_stats)
4390 struct e1000_hw *hw = &adapter->hw;
4391 struct pci_dev *pdev = adapter->pdev;
4397 u64 _bytes, _packets;
4399 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4402 * Prevent stats update while adapter is being reset, or if the pci
4403 * connection is down.
4405 if (adapter->link_speed == 0)
4407 if (pci_channel_offline(pdev))
4412 for (i = 0; i < adapter->num_rx_queues; i++) {
4413 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4414 struct igb_ring *ring = adapter->rx_ring[i];
4416 ring->rx_stats.drops += rqdpc_tmp;
4417 net_stats->rx_fifo_errors += rqdpc_tmp;
4420 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4421 _bytes = ring->rx_stats.bytes;
4422 _packets = ring->rx_stats.packets;
4423 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4425 packets += _packets;
4428 net_stats->rx_bytes = bytes;
4429 net_stats->rx_packets = packets;
4433 for (i = 0; i < adapter->num_tx_queues; i++) {
4434 struct igb_ring *ring = adapter->tx_ring[i];
4436 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4437 _bytes = ring->tx_stats.bytes;
4438 _packets = ring->tx_stats.packets;
4439 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4441 packets += _packets;
4443 net_stats->tx_bytes = bytes;
4444 net_stats->tx_packets = packets;
4446 /* read stats registers */
4447 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4448 adapter->stats.gprc += rd32(E1000_GPRC);
4449 adapter->stats.gorc += rd32(E1000_GORCL);
4450 rd32(E1000_GORCH); /* clear GORCL */
4451 adapter->stats.bprc += rd32(E1000_BPRC);
4452 adapter->stats.mprc += rd32(E1000_MPRC);
4453 adapter->stats.roc += rd32(E1000_ROC);
4455 adapter->stats.prc64 += rd32(E1000_PRC64);
4456 adapter->stats.prc127 += rd32(E1000_PRC127);
4457 adapter->stats.prc255 += rd32(E1000_PRC255);
4458 adapter->stats.prc511 += rd32(E1000_PRC511);
4459 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4460 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4461 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4462 adapter->stats.sec += rd32(E1000_SEC);
4464 mpc = rd32(E1000_MPC);
4465 adapter->stats.mpc += mpc;
4466 net_stats->rx_fifo_errors += mpc;
4467 adapter->stats.scc += rd32(E1000_SCC);
4468 adapter->stats.ecol += rd32(E1000_ECOL);
4469 adapter->stats.mcc += rd32(E1000_MCC);
4470 adapter->stats.latecol += rd32(E1000_LATECOL);
4471 adapter->stats.dc += rd32(E1000_DC);
4472 adapter->stats.rlec += rd32(E1000_RLEC);
4473 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4474 adapter->stats.xontxc += rd32(E1000_XONTXC);
4475 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4476 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4477 adapter->stats.fcruc += rd32(E1000_FCRUC);
4478 adapter->stats.gptc += rd32(E1000_GPTC);
4479 adapter->stats.gotc += rd32(E1000_GOTCL);
4480 rd32(E1000_GOTCH); /* clear GOTCL */
4481 adapter->stats.rnbc += rd32(E1000_RNBC);
4482 adapter->stats.ruc += rd32(E1000_RUC);
4483 adapter->stats.rfc += rd32(E1000_RFC);
4484 adapter->stats.rjc += rd32(E1000_RJC);
4485 adapter->stats.tor += rd32(E1000_TORH);
4486 adapter->stats.tot += rd32(E1000_TOTH);
4487 adapter->stats.tpr += rd32(E1000_TPR);
4489 adapter->stats.ptc64 += rd32(E1000_PTC64);
4490 adapter->stats.ptc127 += rd32(E1000_PTC127);
4491 adapter->stats.ptc255 += rd32(E1000_PTC255);
4492 adapter->stats.ptc511 += rd32(E1000_PTC511);
4493 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4494 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4496 adapter->stats.mptc += rd32(E1000_MPTC);
4497 adapter->stats.bptc += rd32(E1000_BPTC);
4499 adapter->stats.tpt += rd32(E1000_TPT);
4500 adapter->stats.colc += rd32(E1000_COLC);
4502 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4503 /* read internal phy specific stats */
4504 reg = rd32(E1000_CTRL_EXT);
4505 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4506 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4507 adapter->stats.tncrs += rd32(E1000_TNCRS);
4510 adapter->stats.tsctc += rd32(E1000_TSCTC);
4511 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4513 adapter->stats.iac += rd32(E1000_IAC);
4514 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4515 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4516 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4517 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4518 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4519 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4520 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4521 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4523 /* Fill out the OS statistics structure */
4524 net_stats->multicast = adapter->stats.mprc;
4525 net_stats->collisions = adapter->stats.colc;
4529 /* RLEC on some newer hardware can be incorrect so build
4530 * our own version based on RUC and ROC */
4531 net_stats->rx_errors = adapter->stats.rxerrc +
4532 adapter->stats.crcerrs + adapter->stats.algnerrc +
4533 adapter->stats.ruc + adapter->stats.roc +
4534 adapter->stats.cexterr;
4535 net_stats->rx_length_errors = adapter->stats.ruc +
4537 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4538 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4539 net_stats->rx_missed_errors = adapter->stats.mpc;
4542 net_stats->tx_errors = adapter->stats.ecol +
4543 adapter->stats.latecol;
4544 net_stats->tx_aborted_errors = adapter->stats.ecol;
4545 net_stats->tx_window_errors = adapter->stats.latecol;
4546 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4548 /* Tx Dropped needs to be maintained elsewhere */
4551 if (hw->phy.media_type == e1000_media_type_copper) {
4552 if ((adapter->link_speed == SPEED_1000) &&
4553 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4554 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4555 adapter->phy_stats.idle_errors += phy_tmp;
4559 /* Management Stats */
4560 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4561 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4562 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4565 static irqreturn_t igb_msix_other(int irq, void *data)
4567 struct igb_adapter *adapter = data;
4568 struct e1000_hw *hw = &adapter->hw;
4569 u32 icr = rd32(E1000_ICR);
4570 /* reading ICR causes bit 31 of EICR to be cleared */
4572 if (icr & E1000_ICR_DRSTA)
4573 schedule_work(&adapter->reset_task);
4575 if (icr & E1000_ICR_DOUTSYNC) {
4576 /* HW is reporting DMA is out of sync */
4577 adapter->stats.doosync++;
4578 /* The DMA Out of Sync is also indication of a spoof event
4579 * in IOV mode. Check the Wrong VM Behavior register to
4580 * see if it is really a spoof event. */
4581 igb_check_wvbr(adapter);
4584 /* Check for a mailbox event */
4585 if (icr & E1000_ICR_VMMB)
4586 igb_msg_task(adapter);
4588 if (icr & E1000_ICR_LSC) {
4589 hw->mac.get_link_status = 1;
4590 /* guard against interrupt when we're going down */
4591 if (!test_bit(__IGB_DOWN, &adapter->state))
4592 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4595 if (adapter->vfs_allocated_count)
4596 wr32(E1000_IMS, E1000_IMS_LSC |
4598 E1000_IMS_DOUTSYNC);
4600 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4601 wr32(E1000_EIMS, adapter->eims_other);
4606 static void igb_write_itr(struct igb_q_vector *q_vector)
4608 struct igb_adapter *adapter = q_vector->adapter;
4609 u32 itr_val = q_vector->itr_val & 0x7FFC;
4611 if (!q_vector->set_itr)
4617 if (adapter->hw.mac.type == e1000_82575)
4618 itr_val |= itr_val << 16;
4620 itr_val |= 0x8000000;
4622 writel(itr_val, q_vector->itr_register);
4623 q_vector->set_itr = 0;
4626 static irqreturn_t igb_msix_ring(int irq, void *data)
4628 struct igb_q_vector *q_vector = data;
4630 /* Write the ITR value calculated from the previous interrupt. */
4631 igb_write_itr(q_vector);
4633 napi_schedule(&q_vector->napi);
4638 #ifdef CONFIG_IGB_DCA
4639 static void igb_update_dca(struct igb_q_vector *q_vector)
4641 struct igb_adapter *adapter = q_vector->adapter;
4642 struct e1000_hw *hw = &adapter->hw;
4643 int cpu = get_cpu();
4645 if (q_vector->cpu == cpu)
4648 if (q_vector->tx_ring) {
4649 int q = q_vector->tx_ring->reg_idx;
4650 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4651 if (hw->mac.type == e1000_82575) {
4652 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4653 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4655 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4656 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4657 E1000_DCA_TXCTRL_CPUID_SHIFT;
4659 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4660 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4662 if (q_vector->rx_ring) {
4663 int q = q_vector->rx_ring->reg_idx;
4664 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4665 if (hw->mac.type == e1000_82575) {
4666 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4667 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4669 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4670 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4671 E1000_DCA_RXCTRL_CPUID_SHIFT;
4673 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4674 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4675 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4676 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4678 q_vector->cpu = cpu;
4683 static void igb_setup_dca(struct igb_adapter *adapter)
4685 struct e1000_hw *hw = &adapter->hw;
4688 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4691 /* Always use CB2 mode, difference is masked in the CB driver. */
4692 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4694 for (i = 0; i < adapter->num_q_vectors; i++) {
4695 adapter->q_vector[i]->cpu = -1;
4696 igb_update_dca(adapter->q_vector[i]);
4700 static int __igb_notify_dca(struct device *dev, void *data)
4702 struct net_device *netdev = dev_get_drvdata(dev);
4703 struct igb_adapter *adapter = netdev_priv(netdev);
4704 struct pci_dev *pdev = adapter->pdev;
4705 struct e1000_hw *hw = &adapter->hw;
4706 unsigned long event = *(unsigned long *)data;
4709 case DCA_PROVIDER_ADD:
4710 /* if already enabled, don't do it again */
4711 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4713 if (dca_add_requester(dev) == 0) {
4714 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4715 dev_info(&pdev->dev, "DCA enabled\n");
4716 igb_setup_dca(adapter);
4719 /* Fall Through since DCA is disabled. */
4720 case DCA_PROVIDER_REMOVE:
4721 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4722 /* without this a class_device is left
4723 * hanging around in the sysfs model */
4724 dca_remove_requester(dev);
4725 dev_info(&pdev->dev, "DCA disabled\n");
4726 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4727 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4735 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4740 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4743 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4745 #endif /* CONFIG_IGB_DCA */
4747 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4749 struct e1000_hw *hw = &adapter->hw;
4753 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4754 ping = E1000_PF_CONTROL_MSG;
4755 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4756 ping |= E1000_VT_MSGTYPE_CTS;
4757 igb_write_mbx(hw, &ping, 1, i);
4761 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4763 struct e1000_hw *hw = &adapter->hw;
4764 u32 vmolr = rd32(E1000_VMOLR(vf));
4765 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4767 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4768 IGB_VF_FLAG_MULTI_PROMISC);
4769 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4771 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4772 vmolr |= E1000_VMOLR_MPME;
4773 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4774 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4777 * if we have hashes and we are clearing a multicast promisc
4778 * flag we need to write the hashes to the MTA as this step
4779 * was previously skipped
4781 if (vf_data->num_vf_mc_hashes > 30) {
4782 vmolr |= E1000_VMOLR_MPME;
4783 } else if (vf_data->num_vf_mc_hashes) {
4785 vmolr |= E1000_VMOLR_ROMPE;
4786 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4787 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4791 wr32(E1000_VMOLR(vf), vmolr);
4793 /* there are flags left unprocessed, likely not supported */
4794 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4801 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4802 u32 *msgbuf, u32 vf)
4804 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4805 u16 *hash_list = (u16 *)&msgbuf[1];
4806 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4809 /* salt away the number of multicast addresses assigned
4810 * to this VF for later use to restore when the PF multi cast
4813 vf_data->num_vf_mc_hashes = n;
4815 /* only up to 30 hash values supported */
4819 /* store the hashes for later use */
4820 for (i = 0; i < n; i++)
4821 vf_data->vf_mc_hashes[i] = hash_list[i];
4823 /* Flush and reset the mta with the new values */
4824 igb_set_rx_mode(adapter->netdev);
4829 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4831 struct e1000_hw *hw = &adapter->hw;
4832 struct vf_data_storage *vf_data;
4835 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4836 u32 vmolr = rd32(E1000_VMOLR(i));
4837 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4839 vf_data = &adapter->vf_data[i];
4841 if ((vf_data->num_vf_mc_hashes > 30) ||
4842 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4843 vmolr |= E1000_VMOLR_MPME;
4844 } else if (vf_data->num_vf_mc_hashes) {
4845 vmolr |= E1000_VMOLR_ROMPE;
4846 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4847 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4849 wr32(E1000_VMOLR(i), vmolr);
4853 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4855 struct e1000_hw *hw = &adapter->hw;
4856 u32 pool_mask, reg, vid;
4859 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4861 /* Find the vlan filter for this id */
4862 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4863 reg = rd32(E1000_VLVF(i));
4865 /* remove the vf from the pool */
4868 /* if pool is empty then remove entry from vfta */
4869 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4870 (reg & E1000_VLVF_VLANID_ENABLE)) {
4872 vid = reg & E1000_VLVF_VLANID_MASK;
4873 igb_vfta_set(hw, vid, false);
4876 wr32(E1000_VLVF(i), reg);
4879 adapter->vf_data[vf].vlans_enabled = 0;
4882 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4884 struct e1000_hw *hw = &adapter->hw;
4887 /* The vlvf table only exists on 82576 hardware and newer */
4888 if (hw->mac.type < e1000_82576)
4891 /* we only need to do this if VMDq is enabled */
4892 if (!adapter->vfs_allocated_count)
4895 /* Find the vlan filter for this id */
4896 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4897 reg = rd32(E1000_VLVF(i));
4898 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4899 vid == (reg & E1000_VLVF_VLANID_MASK))
4904 if (i == E1000_VLVF_ARRAY_SIZE) {
4905 /* Did not find a matching VLAN ID entry that was
4906 * enabled. Search for a free filter entry, i.e.
4907 * one without the enable bit set
4909 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4910 reg = rd32(E1000_VLVF(i));
4911 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4915 if (i < E1000_VLVF_ARRAY_SIZE) {
4916 /* Found an enabled/available entry */
4917 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4919 /* if !enabled we need to set this up in vfta */
4920 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4921 /* add VID to filter table */
4922 igb_vfta_set(hw, vid, true);
4923 reg |= E1000_VLVF_VLANID_ENABLE;
4925 reg &= ~E1000_VLVF_VLANID_MASK;
4927 wr32(E1000_VLVF(i), reg);
4929 /* do not modify RLPML for PF devices */
4930 if (vf >= adapter->vfs_allocated_count)
4933 if (!adapter->vf_data[vf].vlans_enabled) {
4935 reg = rd32(E1000_VMOLR(vf));
4936 size = reg & E1000_VMOLR_RLPML_MASK;
4938 reg &= ~E1000_VMOLR_RLPML_MASK;
4940 wr32(E1000_VMOLR(vf), reg);
4943 adapter->vf_data[vf].vlans_enabled++;
4947 if (i < E1000_VLVF_ARRAY_SIZE) {
4948 /* remove vf from the pool */
4949 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4950 /* if pool is empty then remove entry from vfta */
4951 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4953 igb_vfta_set(hw, vid, false);
4955 wr32(E1000_VLVF(i), reg);
4957 /* do not modify RLPML for PF devices */
4958 if (vf >= adapter->vfs_allocated_count)
4961 adapter->vf_data[vf].vlans_enabled--;
4962 if (!adapter->vf_data[vf].vlans_enabled) {
4964 reg = rd32(E1000_VMOLR(vf));
4965 size = reg & E1000_VMOLR_RLPML_MASK;
4967 reg &= ~E1000_VMOLR_RLPML_MASK;
4969 wr32(E1000_VMOLR(vf), reg);
4976 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4978 struct e1000_hw *hw = &adapter->hw;
4981 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4983 wr32(E1000_VMVIR(vf), 0);
4986 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4987 int vf, u16 vlan, u8 qos)
4990 struct igb_adapter *adapter = netdev_priv(netdev);
4992 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4995 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4998 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4999 igb_set_vmolr(adapter, vf, !vlan);
5000 adapter->vf_data[vf].pf_vlan = vlan;
5001 adapter->vf_data[vf].pf_qos = qos;
5002 dev_info(&adapter->pdev->dev,
5003 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5004 if (test_bit(__IGB_DOWN, &adapter->state)) {
5005 dev_warn(&adapter->pdev->dev,
5006 "The VF VLAN has been set,"
5007 " but the PF device is not up.\n");
5008 dev_warn(&adapter->pdev->dev,
5009 "Bring the PF device up before"
5010 " attempting to use the VF device.\n");
5013 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5015 igb_set_vmvir(adapter, vlan, vf);
5016 igb_set_vmolr(adapter, vf, true);
5017 adapter->vf_data[vf].pf_vlan = 0;
5018 adapter->vf_data[vf].pf_qos = 0;
5024 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5026 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5027 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5029 return igb_vlvf_set(adapter, vid, add, vf);
5032 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5034 /* clear flags - except flag that indicates PF has set the MAC */
5035 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5036 adapter->vf_data[vf].last_nack = jiffies;
5038 /* reset offloads to defaults */
5039 igb_set_vmolr(adapter, vf, true);
5041 /* reset vlans for device */
5042 igb_clear_vf_vfta(adapter, vf);
5043 if (adapter->vf_data[vf].pf_vlan)
5044 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5045 adapter->vf_data[vf].pf_vlan,
5046 adapter->vf_data[vf].pf_qos);
5048 igb_clear_vf_vfta(adapter, vf);
5050 /* reset multicast table array for vf */
5051 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5053 /* Flush and reset the mta with the new values */
5054 igb_set_rx_mode(adapter->netdev);
5057 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5059 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5061 /* generate a new mac address as we were hotplug removed/added */
5062 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5063 random_ether_addr(vf_mac);
5065 /* process remaining reset events */
5066 igb_vf_reset(adapter, vf);
5069 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5071 struct e1000_hw *hw = &adapter->hw;
5072 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5073 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5075 u8 *addr = (u8 *)(&msgbuf[1]);
5077 /* process all the same items cleared in a function level reset */
5078 igb_vf_reset(adapter, vf);
5080 /* set vf mac address */
5081 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5083 /* enable transmit and receive for vf */
5084 reg = rd32(E1000_VFTE);
5085 wr32(E1000_VFTE, reg | (1 << vf));
5086 reg = rd32(E1000_VFRE);
5087 wr32(E1000_VFRE, reg | (1 << vf));
5089 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5091 /* reply to reset with ack and vf mac address */
5092 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5093 memcpy(addr, vf_mac, 6);
5094 igb_write_mbx(hw, msgbuf, 3, vf);
5097 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5100 * The VF MAC Address is stored in a packed array of bytes
5101 * starting at the second 32 bit word of the msg array
5103 unsigned char *addr = (char *)&msg[1];
5106 if (is_valid_ether_addr(addr))
5107 err = igb_set_vf_mac(adapter, vf, addr);
5112 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5114 struct e1000_hw *hw = &adapter->hw;
5115 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5116 u32 msg = E1000_VT_MSGTYPE_NACK;
5118 /* if device isn't clear to send it shouldn't be reading either */
5119 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5120 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5121 igb_write_mbx(hw, &msg, 1, vf);
5122 vf_data->last_nack = jiffies;
5126 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5128 struct pci_dev *pdev = adapter->pdev;
5129 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5130 struct e1000_hw *hw = &adapter->hw;
5131 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5134 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5137 /* if receive failed revoke VF CTS stats and restart init */
5138 dev_err(&pdev->dev, "Error receiving message from VF\n");
5139 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5140 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5145 /* this is a message we already processed, do nothing */
5146 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5150 * until the vf completes a reset it should not be
5151 * allowed to start any configuration.
5154 if (msgbuf[0] == E1000_VF_RESET) {
5155 igb_vf_reset_msg(adapter, vf);
5159 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5160 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5166 switch ((msgbuf[0] & 0xFFFF)) {
5167 case E1000_VF_SET_MAC_ADDR:
5169 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5170 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5172 dev_warn(&pdev->dev,
5173 "VF %d attempted to override administratively "
5174 "set MAC address\nReload the VF driver to "
5175 "resume operations\n", vf);
5177 case E1000_VF_SET_PROMISC:
5178 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5180 case E1000_VF_SET_MULTICAST:
5181 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5183 case E1000_VF_SET_LPE:
5184 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5186 case E1000_VF_SET_VLAN:
5188 if (vf_data->pf_vlan)
5189 dev_warn(&pdev->dev,
5190 "VF %d attempted to override administratively "
5191 "set VLAN tag\nReload the VF driver to "
5192 "resume operations\n", vf);
5194 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5197 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5202 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5204 /* notify the VF of the results of what it sent us */
5206 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5208 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5210 igb_write_mbx(hw, msgbuf, 1, vf);
5213 static void igb_msg_task(struct igb_adapter *adapter)
5215 struct e1000_hw *hw = &adapter->hw;
5218 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5219 /* process any reset requests */
5220 if (!igb_check_for_rst(hw, vf))
5221 igb_vf_reset_event(adapter, vf);
5223 /* process any messages pending */
5224 if (!igb_check_for_msg(hw, vf))
5225 igb_rcv_msg_from_vf(adapter, vf);
5227 /* process any acks */
5228 if (!igb_check_for_ack(hw, vf))
5229 igb_rcv_ack_from_vf(adapter, vf);
5234 * igb_set_uta - Set unicast filter table address
5235 * @adapter: board private structure
5237 * The unicast table address is a register array of 32-bit registers.
5238 * The table is meant to be used in a way similar to how the MTA is used
5239 * however due to certain limitations in the hardware it is necessary to
5240 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5241 * enable bit to allow vlan tag stripping when promiscous mode is enabled
5243 static void igb_set_uta(struct igb_adapter *adapter)
5245 struct e1000_hw *hw = &adapter->hw;
5248 /* The UTA table only exists on 82576 hardware and newer */
5249 if (hw->mac.type < e1000_82576)
5252 /* we only need to do this if VMDq is enabled */
5253 if (!adapter->vfs_allocated_count)
5256 for (i = 0; i < hw->mac.uta_reg_count; i++)
5257 array_wr32(E1000_UTA, i, ~0);
5261 * igb_intr_msi - Interrupt Handler
5262 * @irq: interrupt number
5263 * @data: pointer to a network interface device structure
5265 static irqreturn_t igb_intr_msi(int irq, void *data)
5267 struct igb_adapter *adapter = data;
5268 struct igb_q_vector *q_vector = adapter->q_vector[0];
5269 struct e1000_hw *hw = &adapter->hw;
5270 /* read ICR disables interrupts using IAM */
5271 u32 icr = rd32(E1000_ICR);
5273 igb_write_itr(q_vector);
5275 if (icr & E1000_ICR_DRSTA)
5276 schedule_work(&adapter->reset_task);
5278 if (icr & E1000_ICR_DOUTSYNC) {
5279 /* HW is reporting DMA is out of sync */
5280 adapter->stats.doosync++;
5283 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5284 hw->mac.get_link_status = 1;
5285 if (!test_bit(__IGB_DOWN, &adapter->state))
5286 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5289 napi_schedule(&q_vector->napi);
5295 * igb_intr - Legacy Interrupt Handler
5296 * @irq: interrupt number
5297 * @data: pointer to a network interface device structure
5299 static irqreturn_t igb_intr(int irq, void *data)
5301 struct igb_adapter *adapter = data;
5302 struct igb_q_vector *q_vector = adapter->q_vector[0];
5303 struct e1000_hw *hw = &adapter->hw;
5304 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5305 * need for the IMC write */
5306 u32 icr = rd32(E1000_ICR);
5308 return IRQ_NONE; /* Not our interrupt */
5310 igb_write_itr(q_vector);
5312 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5313 * not set, then the adapter didn't send an interrupt */
5314 if (!(icr & E1000_ICR_INT_ASSERTED))
5317 if (icr & E1000_ICR_DRSTA)
5318 schedule_work(&adapter->reset_task);
5320 if (icr & E1000_ICR_DOUTSYNC) {
5321 /* HW is reporting DMA is out of sync */
5322 adapter->stats.doosync++;
5325 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5326 hw->mac.get_link_status = 1;
5327 /* guard against interrupt when we're going down */
5328 if (!test_bit(__IGB_DOWN, &adapter->state))
5329 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5332 napi_schedule(&q_vector->napi);
5337 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5339 struct igb_adapter *adapter = q_vector->adapter;
5340 struct e1000_hw *hw = &adapter->hw;
5342 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5343 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5344 if (!adapter->msix_entries)
5345 igb_set_itr(adapter);
5347 igb_update_ring_itr(q_vector);
5350 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5351 if (adapter->msix_entries)
5352 wr32(E1000_EIMS, q_vector->eims_value);
5354 igb_irq_enable(adapter);
5359 * igb_poll - NAPI Rx polling callback
5360 * @napi: napi polling structure
5361 * @budget: count of how many packets we should handle
5363 static int igb_poll(struct napi_struct *napi, int budget)
5365 struct igb_q_vector *q_vector = container_of(napi,
5366 struct igb_q_vector,
5368 int tx_clean_complete = 1, work_done = 0;
5370 #ifdef CONFIG_IGB_DCA
5371 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5372 igb_update_dca(q_vector);
5374 if (q_vector->tx_ring)
5375 tx_clean_complete = igb_clean_tx_irq(q_vector);
5377 if (q_vector->rx_ring)
5378 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5380 if (!tx_clean_complete)
5383 /* If not enough Rx work done, exit the polling mode */
5384 if (work_done < budget) {
5385 napi_complete(napi);
5386 igb_ring_irq_enable(q_vector);
5393 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5394 * @adapter: board private structure
5395 * @shhwtstamps: timestamp structure to update
5396 * @regval: unsigned 64bit system time value.
5398 * We need to convert the system time value stored in the RX/TXSTMP registers
5399 * into a hwtstamp which can be used by the upper level timestamping functions
5401 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5402 struct skb_shared_hwtstamps *shhwtstamps,
5408 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5409 * 24 to match clock shift we setup earlier.
5411 if (adapter->hw.mac.type == e1000_82580)
5412 regval <<= IGB_82580_TSYNC_SHIFT;
5414 ns = timecounter_cyc2time(&adapter->clock, regval);
5415 timecompare_update(&adapter->compare, ns);
5416 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5417 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5418 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5422 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5423 * @q_vector: pointer to q_vector containing needed info
5424 * @buffer: pointer to igb_buffer structure
5426 * If we were asked to do hardware stamping and such a time stamp is
5427 * available, then it must have been for this skb here because we only
5428 * allow only one such packet into the queue.
5430 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5432 struct igb_adapter *adapter = q_vector->adapter;
5433 struct e1000_hw *hw = &adapter->hw;
5434 struct skb_shared_hwtstamps shhwtstamps;
5437 /* if skb does not support hw timestamp or TX stamp not valid exit */
5438 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5439 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5442 regval = rd32(E1000_TXSTMPL);
5443 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5445 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5446 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5450 * igb_clean_tx_irq - Reclaim resources after transmit completes
5451 * @q_vector: pointer to q_vector containing needed info
5452 * returns true if ring is completely cleaned
5454 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5456 struct igb_adapter *adapter = q_vector->adapter;
5457 struct igb_ring *tx_ring = q_vector->tx_ring;
5458 struct net_device *netdev = tx_ring->netdev;
5459 struct e1000_hw *hw = &adapter->hw;
5460 struct igb_buffer *buffer_info;
5461 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5462 unsigned int total_bytes = 0, total_packets = 0;
5463 unsigned int i, eop, count = 0;
5464 bool cleaned = false;
5466 i = tx_ring->next_to_clean;
5467 eop = tx_ring->buffer_info[i].next_to_watch;
5468 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5470 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5471 (count < tx_ring->count)) {
5472 rmb(); /* read buffer_info after eop_desc status */
5473 for (cleaned = false; !cleaned; count++) {
5474 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5475 buffer_info = &tx_ring->buffer_info[i];
5476 cleaned = (i == eop);
5478 if (buffer_info->skb) {
5479 total_bytes += buffer_info->bytecount;
5480 /* gso_segs is currently only valid for tcp */
5481 total_packets += buffer_info->gso_segs;
5482 igb_tx_hwtstamp(q_vector, buffer_info);
5485 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5486 tx_desc->wb.status = 0;
5489 if (i == tx_ring->count)
5492 eop = tx_ring->buffer_info[i].next_to_watch;
5493 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5496 tx_ring->next_to_clean = i;
5498 if (unlikely(count &&
5499 netif_carrier_ok(netdev) &&
5500 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5501 /* Make sure that anybody stopping the queue after this
5502 * sees the new next_to_clean.
5505 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5506 !(test_bit(__IGB_DOWN, &adapter->state))) {
5507 netif_wake_subqueue(netdev, tx_ring->queue_index);
5509 u64_stats_update_begin(&tx_ring->tx_syncp);
5510 tx_ring->tx_stats.restart_queue++;
5511 u64_stats_update_end(&tx_ring->tx_syncp);
5515 if (tx_ring->detect_tx_hung) {
5516 /* Detect a transmit hang in hardware, this serializes the
5517 * check with the clearing of time_stamp and movement of i */
5518 tx_ring->detect_tx_hung = false;
5519 if (tx_ring->buffer_info[i].time_stamp &&
5520 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5521 (adapter->tx_timeout_factor * HZ)) &&
5522 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5524 /* detected Tx unit hang */
5525 dev_err(tx_ring->dev,
5526 "Detected Tx Unit Hang\n"
5530 " next_to_use <%x>\n"
5531 " next_to_clean <%x>\n"
5532 "buffer_info[next_to_clean]\n"
5533 " time_stamp <%lx>\n"
5534 " next_to_watch <%x>\n"
5536 " desc.status <%x>\n",
5537 tx_ring->queue_index,
5538 readl(tx_ring->head),
5539 readl(tx_ring->tail),
5540 tx_ring->next_to_use,
5541 tx_ring->next_to_clean,
5542 tx_ring->buffer_info[eop].time_stamp,
5545 eop_desc->wb.status);
5546 netif_stop_subqueue(netdev, tx_ring->queue_index);
5549 tx_ring->total_bytes += total_bytes;
5550 tx_ring->total_packets += total_packets;
5551 u64_stats_update_begin(&tx_ring->tx_syncp);
5552 tx_ring->tx_stats.bytes += total_bytes;
5553 tx_ring->tx_stats.packets += total_packets;
5554 u64_stats_update_end(&tx_ring->tx_syncp);
5555 return count < tx_ring->count;
5559 * igb_receive_skb - helper function to handle rx indications
5560 * @q_vector: structure containing interrupt and ring information
5561 * @skb: packet to send up
5562 * @vlan_tag: vlan tag for packet
5564 static void igb_receive_skb(struct igb_q_vector *q_vector,
5565 struct sk_buff *skb,
5568 struct igb_adapter *adapter = q_vector->adapter;
5570 if (vlan_tag && adapter->vlgrp)
5571 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5574 napi_gro_receive(&q_vector->napi, skb);
5577 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5578 u32 status_err, struct sk_buff *skb)
5580 skb_checksum_none_assert(skb);
5582 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5583 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5584 (status_err & E1000_RXD_STAT_IXSM))
5587 /* TCP/UDP checksum error bit is set */
5589 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5591 * work around errata with sctp packets where the TCPE aka
5592 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5593 * packets, (aka let the stack check the crc32c)
5595 if ((skb->len == 60) &&
5596 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5597 u64_stats_update_begin(&ring->rx_syncp);
5598 ring->rx_stats.csum_err++;
5599 u64_stats_update_end(&ring->rx_syncp);
5601 /* let the stack verify checksum errors */
5604 /* It must be a TCP or UDP packet with a valid checksum */
5605 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5606 skb->ip_summed = CHECKSUM_UNNECESSARY;
5608 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5611 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5612 struct sk_buff *skb)
5614 struct igb_adapter *adapter = q_vector->adapter;
5615 struct e1000_hw *hw = &adapter->hw;
5619 * If this bit is set, then the RX registers contain the time stamp. No
5620 * other packet will be time stamped until we read these registers, so
5621 * read the registers to make them available again. Because only one
5622 * packet can be time stamped at a time, we know that the register
5623 * values must belong to this one here and therefore we don't need to
5624 * compare any of the additional attributes stored for it.
5626 * If nothing went wrong, then it should have a shared tx_flags that we
5627 * can turn into a skb_shared_hwtstamps.
5629 if (staterr & E1000_RXDADV_STAT_TSIP) {
5630 u32 *stamp = (u32 *)skb->data;
5631 regval = le32_to_cpu(*(stamp + 2));
5632 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5633 skb_pull(skb, IGB_TS_HDR_LEN);
5635 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5638 regval = rd32(E1000_RXSTMPL);
5639 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5642 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5644 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5645 union e1000_adv_rx_desc *rx_desc)
5647 /* HW will not DMA in data larger than the given buffer, even if it
5648 * parses the (NFS, of course) header to be larger. In that case, it
5649 * fills the header buffer and spills the rest into the page.
5651 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5652 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5653 if (hlen > rx_ring->rx_buffer_len)
5654 hlen = rx_ring->rx_buffer_len;
5658 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5659 int *work_done, int budget)
5661 struct igb_ring *rx_ring = q_vector->rx_ring;
5662 struct net_device *netdev = rx_ring->netdev;
5663 struct device *dev = rx_ring->dev;
5664 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5665 struct igb_buffer *buffer_info , *next_buffer;
5666 struct sk_buff *skb;
5667 bool cleaned = false;
5668 int cleaned_count = 0;
5669 int current_node = numa_node_id();
5670 unsigned int total_bytes = 0, total_packets = 0;
5676 i = rx_ring->next_to_clean;
5677 buffer_info = &rx_ring->buffer_info[i];
5678 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5679 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5681 while (staterr & E1000_RXD_STAT_DD) {
5682 if (*work_done >= budget)
5685 rmb(); /* read descriptor and rx_buffer_info after status DD */
5687 skb = buffer_info->skb;
5688 prefetch(skb->data - NET_IP_ALIGN);
5689 buffer_info->skb = NULL;
5692 if (i == rx_ring->count)
5695 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5697 next_buffer = &rx_ring->buffer_info[i];
5699 length = le16_to_cpu(rx_desc->wb.upper.length);
5703 if (buffer_info->dma) {
5704 dma_unmap_single(dev, buffer_info->dma,
5705 rx_ring->rx_buffer_len,
5707 buffer_info->dma = 0;
5708 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5709 skb_put(skb, length);
5712 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5716 dma_unmap_page(dev, buffer_info->page_dma,
5717 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5718 buffer_info->page_dma = 0;
5720 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5722 buffer_info->page_offset,
5725 if ((page_count(buffer_info->page) != 1) ||
5726 (page_to_nid(buffer_info->page) != current_node))
5727 buffer_info->page = NULL;
5729 get_page(buffer_info->page);
5732 skb->data_len += length;
5733 skb->truesize += length;
5736 if (!(staterr & E1000_RXD_STAT_EOP)) {
5737 buffer_info->skb = next_buffer->skb;
5738 buffer_info->dma = next_buffer->dma;
5739 next_buffer->skb = skb;
5740 next_buffer->dma = 0;
5744 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5745 dev_kfree_skb_irq(skb);
5749 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5750 igb_rx_hwtstamp(q_vector, staterr, skb);
5751 total_bytes += skb->len;
5754 igb_rx_checksum_adv(rx_ring, staterr, skb);
5756 skb->protocol = eth_type_trans(skb, netdev);
5757 skb_record_rx_queue(skb, rx_ring->queue_index);
5759 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5760 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5762 igb_receive_skb(q_vector, skb, vlan_tag);
5765 rx_desc->wb.upper.status_error = 0;
5767 /* return some buffers to hardware, one at a time is too slow */
5768 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5769 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5773 /* use prefetched values */
5775 buffer_info = next_buffer;
5776 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5779 rx_ring->next_to_clean = i;
5780 cleaned_count = igb_desc_unused(rx_ring);
5783 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5785 rx_ring->total_packets += total_packets;
5786 rx_ring->total_bytes += total_bytes;
5787 u64_stats_update_begin(&rx_ring->rx_syncp);
5788 rx_ring->rx_stats.packets += total_packets;
5789 rx_ring->rx_stats.bytes += total_bytes;
5790 u64_stats_update_end(&rx_ring->rx_syncp);
5795 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5796 * @adapter: address of board private structure
5798 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5800 struct net_device *netdev = rx_ring->netdev;
5801 union e1000_adv_rx_desc *rx_desc;
5802 struct igb_buffer *buffer_info;
5803 struct sk_buff *skb;
5807 i = rx_ring->next_to_use;
5808 buffer_info = &rx_ring->buffer_info[i];
5810 bufsz = rx_ring->rx_buffer_len;
5812 while (cleaned_count--) {
5813 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5815 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5816 if (!buffer_info->page) {
5817 buffer_info->page = netdev_alloc_page(netdev);
5818 if (unlikely(!buffer_info->page)) {
5819 u64_stats_update_begin(&rx_ring->rx_syncp);
5820 rx_ring->rx_stats.alloc_failed++;
5821 u64_stats_update_end(&rx_ring->rx_syncp);
5824 buffer_info->page_offset = 0;
5826 buffer_info->page_offset ^= PAGE_SIZE / 2;
5828 buffer_info->page_dma =
5829 dma_map_page(rx_ring->dev, buffer_info->page,
5830 buffer_info->page_offset,
5833 if (dma_mapping_error(rx_ring->dev,
5834 buffer_info->page_dma)) {
5835 buffer_info->page_dma = 0;
5836 u64_stats_update_begin(&rx_ring->rx_syncp);
5837 rx_ring->rx_stats.alloc_failed++;
5838 u64_stats_update_end(&rx_ring->rx_syncp);
5843 skb = buffer_info->skb;
5845 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5846 if (unlikely(!skb)) {
5847 u64_stats_update_begin(&rx_ring->rx_syncp);
5848 rx_ring->rx_stats.alloc_failed++;
5849 u64_stats_update_end(&rx_ring->rx_syncp);
5853 buffer_info->skb = skb;
5855 if (!buffer_info->dma) {
5856 buffer_info->dma = dma_map_single(rx_ring->dev,
5860 if (dma_mapping_error(rx_ring->dev,
5861 buffer_info->dma)) {
5862 buffer_info->dma = 0;
5863 u64_stats_update_begin(&rx_ring->rx_syncp);
5864 rx_ring->rx_stats.alloc_failed++;
5865 u64_stats_update_end(&rx_ring->rx_syncp);
5869 /* Refresh the desc even if buffer_addrs didn't change because
5870 * each write-back erases this info. */
5871 if (bufsz < IGB_RXBUFFER_1024) {
5872 rx_desc->read.pkt_addr =
5873 cpu_to_le64(buffer_info->page_dma);
5874 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5876 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5877 rx_desc->read.hdr_addr = 0;
5881 if (i == rx_ring->count)
5883 buffer_info = &rx_ring->buffer_info[i];
5887 if (rx_ring->next_to_use != i) {
5888 rx_ring->next_to_use = i;
5890 i = (rx_ring->count - 1);
5894 /* Force memory writes to complete before letting h/w
5895 * know there are new descriptors to fetch. (Only
5896 * applicable for weak-ordered memory model archs,
5897 * such as IA-64). */
5899 writel(i, rx_ring->tail);
5909 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5911 struct igb_adapter *adapter = netdev_priv(netdev);
5912 struct mii_ioctl_data *data = if_mii(ifr);
5914 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5919 data->phy_id = adapter->hw.phy.addr;
5922 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5934 * igb_hwtstamp_ioctl - control hardware time stamping
5939 * Outgoing time stamping can be enabled and disabled. Play nice and
5940 * disable it when requested, although it shouldn't case any overhead
5941 * when no packet needs it. At most one packet in the queue may be
5942 * marked for time stamping, otherwise it would be impossible to tell
5943 * for sure to which packet the hardware time stamp belongs.
5945 * Incoming time stamping has to be configured via the hardware
5946 * filters. Not all combinations are supported, in particular event
5947 * type has to be specified. Matching the kind of event packet is
5948 * not supported, with the exception of "all V2 events regardless of
5952 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5953 struct ifreq *ifr, int cmd)
5955 struct igb_adapter *adapter = netdev_priv(netdev);
5956 struct e1000_hw *hw = &adapter->hw;
5957 struct hwtstamp_config config;
5958 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5959 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5960 u32 tsync_rx_cfg = 0;
5965 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5968 /* reserved for future extensions */
5972 switch (config.tx_type) {
5973 case HWTSTAMP_TX_OFF:
5975 case HWTSTAMP_TX_ON:
5981 switch (config.rx_filter) {
5982 case HWTSTAMP_FILTER_NONE:
5985 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5986 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5987 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5988 case HWTSTAMP_FILTER_ALL:
5990 * register TSYNCRXCFG must be set, therefore it is not
5991 * possible to time stamp both Sync and Delay_Req messages
5992 * => fall back to time stamping all packets
5994 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5995 config.rx_filter = HWTSTAMP_FILTER_ALL;
5997 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5998 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5999 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6002 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6003 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6004 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6007 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6008 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6009 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6010 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6013 config.rx_filter = HWTSTAMP_FILTER_SOME;
6015 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6016 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6017 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6018 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6021 config.rx_filter = HWTSTAMP_FILTER_SOME;
6023 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6024 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6025 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6026 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6027 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6034 if (hw->mac.type == e1000_82575) {
6035 if (tsync_rx_ctl | tsync_tx_ctl)
6041 * Per-packet timestamping only works if all packets are
6042 * timestamped, so enable timestamping in all packets as
6043 * long as one rx filter was configured.
6045 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6046 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6047 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6050 /* enable/disable TX */
6051 regval = rd32(E1000_TSYNCTXCTL);
6052 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6053 regval |= tsync_tx_ctl;
6054 wr32(E1000_TSYNCTXCTL, regval);
6056 /* enable/disable RX */
6057 regval = rd32(E1000_TSYNCRXCTL);
6058 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6059 regval |= tsync_rx_ctl;
6060 wr32(E1000_TSYNCRXCTL, regval);
6062 /* define which PTP packets are time stamped */
6063 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6065 /* define ethertype filter for timestamped packets */
6068 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6069 E1000_ETQF_1588 | /* enable timestamping */
6070 ETH_P_1588)); /* 1588 eth protocol type */
6072 wr32(E1000_ETQF(3), 0);
6074 #define PTP_PORT 319
6075 /* L4 Queue Filter[3]: filter by destination port and protocol */
6077 u32 ftqf = (IPPROTO_UDP /* UDP */
6078 | E1000_FTQF_VF_BP /* VF not compared */
6079 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6080 | E1000_FTQF_MASK); /* mask all inputs */
6081 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6083 wr32(E1000_IMIR(3), htons(PTP_PORT));
6084 wr32(E1000_IMIREXT(3),
6085 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6086 if (hw->mac.type == e1000_82576) {
6087 /* enable source port check */
6088 wr32(E1000_SPQF(3), htons(PTP_PORT));
6089 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6091 wr32(E1000_FTQF(3), ftqf);
6093 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6097 adapter->hwtstamp_config = config;
6099 /* clear TX/RX time stamp registers, just to be sure */
6100 regval = rd32(E1000_TXSTMPH);
6101 regval = rd32(E1000_RXSTMPH);
6103 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6113 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6119 return igb_mii_ioctl(netdev, ifr, cmd);
6121 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6127 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6129 struct igb_adapter *adapter = hw->back;
6132 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6134 return -E1000_ERR_CONFIG;
6136 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6141 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6143 struct igb_adapter *adapter = hw->back;
6146 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6148 return -E1000_ERR_CONFIG;
6150 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6155 static void igb_vlan_rx_register(struct net_device *netdev,
6156 struct vlan_group *grp)
6158 struct igb_adapter *adapter = netdev_priv(netdev);
6159 struct e1000_hw *hw = &adapter->hw;
6162 igb_irq_disable(adapter);
6163 adapter->vlgrp = grp;
6166 /* enable VLAN tag insert/strip */
6167 ctrl = rd32(E1000_CTRL);
6168 ctrl |= E1000_CTRL_VME;
6169 wr32(E1000_CTRL, ctrl);
6171 /* Disable CFI check */
6172 rctl = rd32(E1000_RCTL);
6173 rctl &= ~E1000_RCTL_CFIEN;
6174 wr32(E1000_RCTL, rctl);
6176 /* disable VLAN tag insert/strip */
6177 ctrl = rd32(E1000_CTRL);
6178 ctrl &= ~E1000_CTRL_VME;
6179 wr32(E1000_CTRL, ctrl);
6182 igb_rlpml_set(adapter);
6184 if (!test_bit(__IGB_DOWN, &adapter->state))
6185 igb_irq_enable(adapter);
6188 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6190 struct igb_adapter *adapter = netdev_priv(netdev);
6191 struct e1000_hw *hw = &adapter->hw;
6192 int pf_id = adapter->vfs_allocated_count;
6194 /* attempt to add filter to vlvf array */
6195 igb_vlvf_set(adapter, vid, true, pf_id);
6197 /* add the filter since PF can receive vlans w/o entry in vlvf */
6198 igb_vfta_set(hw, vid, true);
6201 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6203 struct igb_adapter *adapter = netdev_priv(netdev);
6204 struct e1000_hw *hw = &adapter->hw;
6205 int pf_id = adapter->vfs_allocated_count;
6208 igb_irq_disable(adapter);
6209 vlan_group_set_device(adapter->vlgrp, vid, NULL);
6211 if (!test_bit(__IGB_DOWN, &adapter->state))
6212 igb_irq_enable(adapter);
6214 /* remove vlan from VLVF table array */
6215 err = igb_vlvf_set(adapter, vid, false, pf_id);
6217 /* if vid was not present in VLVF just remove it from table */
6219 igb_vfta_set(hw, vid, false);
6222 static void igb_restore_vlan(struct igb_adapter *adapter)
6224 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6226 if (adapter->vlgrp) {
6228 for (vid = 0; vid < VLAN_N_VID; vid++) {
6229 if (!vlan_group_get_device(adapter->vlgrp, vid))
6231 igb_vlan_rx_add_vid(adapter->netdev, vid);
6236 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6238 struct pci_dev *pdev = adapter->pdev;
6239 struct e1000_mac_info *mac = &adapter->hw.mac;
6243 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6244 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6245 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6246 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6251 case SPEED_10 + DUPLEX_HALF:
6252 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6254 case SPEED_10 + DUPLEX_FULL:
6255 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6257 case SPEED_100 + DUPLEX_HALF:
6258 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6260 case SPEED_100 + DUPLEX_FULL:
6261 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6263 case SPEED_1000 + DUPLEX_FULL:
6265 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6267 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6269 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6275 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6277 struct net_device *netdev = pci_get_drvdata(pdev);
6278 struct igb_adapter *adapter = netdev_priv(netdev);
6279 struct e1000_hw *hw = &adapter->hw;
6280 u32 ctrl, rctl, status;
6281 u32 wufc = adapter->wol;
6286 netif_device_detach(netdev);
6288 if (netif_running(netdev))
6291 igb_clear_interrupt_scheme(adapter);
6294 retval = pci_save_state(pdev);
6299 status = rd32(E1000_STATUS);
6300 if (status & E1000_STATUS_LU)
6301 wufc &= ~E1000_WUFC_LNKC;
6304 igb_setup_rctl(adapter);
6305 igb_set_rx_mode(netdev);
6307 /* turn on all-multi mode if wake on multicast is enabled */
6308 if (wufc & E1000_WUFC_MC) {
6309 rctl = rd32(E1000_RCTL);
6310 rctl |= E1000_RCTL_MPE;
6311 wr32(E1000_RCTL, rctl);
6314 ctrl = rd32(E1000_CTRL);
6315 /* advertise wake from D3Cold */
6316 #define E1000_CTRL_ADVD3WUC 0x00100000
6317 /* phy power management enable */
6318 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6319 ctrl |= E1000_CTRL_ADVD3WUC;
6320 wr32(E1000_CTRL, ctrl);
6322 /* Allow time for pending master requests to run */
6323 igb_disable_pcie_master(hw);
6325 wr32(E1000_WUC, E1000_WUC_PME_EN);
6326 wr32(E1000_WUFC, wufc);
6329 wr32(E1000_WUFC, 0);
6332 *enable_wake = wufc || adapter->en_mng_pt;
6334 igb_power_down_link(adapter);
6336 igb_power_up_link(adapter);
6338 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6339 * would have already happened in close and is redundant. */
6340 igb_release_hw_control(adapter);
6342 pci_disable_device(pdev);
6348 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6353 retval = __igb_shutdown(pdev, &wake);
6358 pci_prepare_to_sleep(pdev);
6360 pci_wake_from_d3(pdev, false);
6361 pci_set_power_state(pdev, PCI_D3hot);
6367 static int igb_resume(struct pci_dev *pdev)
6369 struct net_device *netdev = pci_get_drvdata(pdev);
6370 struct igb_adapter *adapter = netdev_priv(netdev);
6371 struct e1000_hw *hw = &adapter->hw;
6374 pci_set_power_state(pdev, PCI_D0);
6375 pci_restore_state(pdev);
6376 pci_save_state(pdev);
6378 err = pci_enable_device_mem(pdev);
6381 "igb: Cannot enable PCI device from suspend\n");
6384 pci_set_master(pdev);
6386 pci_enable_wake(pdev, PCI_D3hot, 0);
6387 pci_enable_wake(pdev, PCI_D3cold, 0);
6389 if (igb_init_interrupt_scheme(adapter)) {
6390 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6396 /* let the f/w know that the h/w is now under the control of the
6398 igb_get_hw_control(adapter);
6400 wr32(E1000_WUS, ~0);
6402 if (netif_running(netdev)) {
6403 err = igb_open(netdev);
6408 netif_device_attach(netdev);
6414 static void igb_shutdown(struct pci_dev *pdev)
6418 __igb_shutdown(pdev, &wake);
6420 if (system_state == SYSTEM_POWER_OFF) {
6421 pci_wake_from_d3(pdev, wake);
6422 pci_set_power_state(pdev, PCI_D3hot);
6426 #ifdef CONFIG_NET_POLL_CONTROLLER
6428 * Polling 'interrupt' - used by things like netconsole to send skbs
6429 * without having to re-enable interrupts. It's not called while
6430 * the interrupt routine is executing.
6432 static void igb_netpoll(struct net_device *netdev)
6434 struct igb_adapter *adapter = netdev_priv(netdev);
6435 struct e1000_hw *hw = &adapter->hw;
6438 if (!adapter->msix_entries) {
6439 struct igb_q_vector *q_vector = adapter->q_vector[0];
6440 igb_irq_disable(adapter);
6441 napi_schedule(&q_vector->napi);
6445 for (i = 0; i < adapter->num_q_vectors; i++) {
6446 struct igb_q_vector *q_vector = adapter->q_vector[i];
6447 wr32(E1000_EIMC, q_vector->eims_value);
6448 napi_schedule(&q_vector->napi);
6451 #endif /* CONFIG_NET_POLL_CONTROLLER */
6454 * igb_io_error_detected - called when PCI error is detected
6455 * @pdev: Pointer to PCI device
6456 * @state: The current pci connection state
6458 * This function is called after a PCI bus error affecting
6459 * this device has been detected.
6461 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6462 pci_channel_state_t state)
6464 struct net_device *netdev = pci_get_drvdata(pdev);
6465 struct igb_adapter *adapter = netdev_priv(netdev);
6467 netif_device_detach(netdev);
6469 if (state == pci_channel_io_perm_failure)
6470 return PCI_ERS_RESULT_DISCONNECT;
6472 if (netif_running(netdev))
6474 pci_disable_device(pdev);
6476 /* Request a slot slot reset. */
6477 return PCI_ERS_RESULT_NEED_RESET;
6481 * igb_io_slot_reset - called after the pci bus has been reset.
6482 * @pdev: Pointer to PCI device
6484 * Restart the card from scratch, as if from a cold-boot. Implementation
6485 * resembles the first-half of the igb_resume routine.
6487 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6489 struct net_device *netdev = pci_get_drvdata(pdev);
6490 struct igb_adapter *adapter = netdev_priv(netdev);
6491 struct e1000_hw *hw = &adapter->hw;
6492 pci_ers_result_t result;
6495 if (pci_enable_device_mem(pdev)) {
6497 "Cannot re-enable PCI device after reset.\n");
6498 result = PCI_ERS_RESULT_DISCONNECT;
6500 pci_set_master(pdev);
6501 pci_restore_state(pdev);
6502 pci_save_state(pdev);
6504 pci_enable_wake(pdev, PCI_D3hot, 0);
6505 pci_enable_wake(pdev, PCI_D3cold, 0);
6508 wr32(E1000_WUS, ~0);
6509 result = PCI_ERS_RESULT_RECOVERED;
6512 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6514 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6515 "failed 0x%0x\n", err);
6516 /* non-fatal, continue */
6523 * igb_io_resume - called when traffic can start flowing again.
6524 * @pdev: Pointer to PCI device
6526 * This callback is called when the error recovery driver tells us that
6527 * its OK to resume normal operation. Implementation resembles the
6528 * second-half of the igb_resume routine.
6530 static void igb_io_resume(struct pci_dev *pdev)
6532 struct net_device *netdev = pci_get_drvdata(pdev);
6533 struct igb_adapter *adapter = netdev_priv(netdev);
6535 if (netif_running(netdev)) {
6536 if (igb_up(adapter)) {
6537 dev_err(&pdev->dev, "igb_up failed after reset\n");
6542 netif_device_attach(netdev);
6544 /* let the f/w know that the h/w is now under the control of the
6546 igb_get_hw_control(adapter);
6549 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6552 u32 rar_low, rar_high;
6553 struct e1000_hw *hw = &adapter->hw;
6555 /* HW expects these in little endian so we reverse the byte order
6556 * from network order (big endian) to little endian
6558 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6559 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6560 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6562 /* Indicate to hardware the Address is Valid. */
6563 rar_high |= E1000_RAH_AV;
6565 if (hw->mac.type == e1000_82575)
6566 rar_high |= E1000_RAH_POOL_1 * qsel;
6568 rar_high |= E1000_RAH_POOL_1 << qsel;
6570 wr32(E1000_RAL(index), rar_low);
6572 wr32(E1000_RAH(index), rar_high);
6576 static int igb_set_vf_mac(struct igb_adapter *adapter,
6577 int vf, unsigned char *mac_addr)
6579 struct e1000_hw *hw = &adapter->hw;
6580 /* VF MAC addresses start at end of receive addresses and moves
6581 * torwards the first, as a result a collision should not be possible */
6582 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6584 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6586 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6591 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6593 struct igb_adapter *adapter = netdev_priv(netdev);
6594 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6596 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6597 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6598 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6599 " change effective.");
6600 if (test_bit(__IGB_DOWN, &adapter->state)) {
6601 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6602 " but the PF device is not up.\n");
6603 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6604 " attempting to use the VF device.\n");
6606 return igb_set_vf_mac(adapter, vf, mac);
6609 static int igb_link_mbps(int internal_link_speed)
6611 switch (internal_link_speed) {
6621 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6628 /* Calculate the rate factor values to set */
6629 rf_int = link_speed / tx_rate;
6630 rf_dec = (link_speed - (rf_int * tx_rate));
6631 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6633 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6634 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6635 E1000_RTTBCNRC_RF_INT_MASK);
6636 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6641 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6642 wr32(E1000_RTTBCNRC, bcnrc_val);
6645 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6647 int actual_link_speed, i;
6648 bool reset_rate = false;
6650 /* VF TX rate limit was not set or not supported */
6651 if ((adapter->vf_rate_link_speed == 0) ||
6652 (adapter->hw.mac.type != e1000_82576))
6655 actual_link_speed = igb_link_mbps(adapter->link_speed);
6656 if (actual_link_speed != adapter->vf_rate_link_speed) {
6658 adapter->vf_rate_link_speed = 0;
6659 dev_info(&adapter->pdev->dev,
6660 "Link speed has been changed. VF Transmit "
6661 "rate is disabled\n");
6664 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6666 adapter->vf_data[i].tx_rate = 0;
6668 igb_set_vf_rate_limit(&adapter->hw, i,
6669 adapter->vf_data[i].tx_rate,
6674 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6676 struct igb_adapter *adapter = netdev_priv(netdev);
6677 struct e1000_hw *hw = &adapter->hw;
6678 int actual_link_speed;
6680 if (hw->mac.type != e1000_82576)
6683 actual_link_speed = igb_link_mbps(adapter->link_speed);
6684 if ((vf >= adapter->vfs_allocated_count) ||
6685 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6686 (tx_rate < 0) || (tx_rate > actual_link_speed))
6689 adapter->vf_rate_link_speed = actual_link_speed;
6690 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6691 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6696 static int igb_ndo_get_vf_config(struct net_device *netdev,
6697 int vf, struct ifla_vf_info *ivi)
6699 struct igb_adapter *adapter = netdev_priv(netdev);
6700 if (vf >= adapter->vfs_allocated_count)
6703 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6704 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6705 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6706 ivi->qos = adapter->vf_data[vf].pf_qos;
6710 static void igb_vmm_control(struct igb_adapter *adapter)
6712 struct e1000_hw *hw = &adapter->hw;
6715 switch (hw->mac.type) {
6718 /* replication is not supported for 82575 */
6721 /* notify HW that the MAC is adding vlan tags */
6722 reg = rd32(E1000_DTXCTL);
6723 reg |= E1000_DTXCTL_VLAN_ADDED;
6724 wr32(E1000_DTXCTL, reg);
6726 /* enable replication vlan tag stripping */
6727 reg = rd32(E1000_RPLOLR);
6728 reg |= E1000_RPLOLR_STRVLAN;
6729 wr32(E1000_RPLOLR, reg);
6731 /* none of the above registers are supported by i350 */
6735 if (adapter->vfs_allocated_count) {
6736 igb_vmdq_set_loopback_pf(hw, true);
6737 igb_vmdq_set_replication_pf(hw, true);
6738 igb_vmdq_set_anti_spoofing_pf(hw, true,
6739 adapter->vfs_allocated_count);
6741 igb_vmdq_set_loopback_pf(hw, false);
6742 igb_vmdq_set_replication_pf(hw, false);