1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if_vlan.h>
43 #include <linux/pci.h>
44 #include <linux/pci-aspm.h>
45 #include <linux/delay.h>
46 #include <linux/interrupt.h>
47 #include <linux/if_ether.h>
48 #include <linux/aer.h>
49 #include <linux/prefetch.h>
51 #include <linux/dca.h>
58 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
59 __stringify(BUILD) "-k"
60 char igb_driver_name[] = "igb";
61 char igb_driver_version[] = DRV_VERSION;
62 static const char igb_driver_string[] =
63 "Intel(R) Gigabit Ethernet Network Driver";
64 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66 static const struct e1000_info *igb_info_tbl[] = {
67 [board_82575] = &e1000_82575_info,
70 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
96 /* required last entry */
100 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102 void igb_reset(struct igb_adapter *);
103 static int igb_setup_all_tx_resources(struct igb_adapter *);
104 static int igb_setup_all_rx_resources(struct igb_adapter *);
105 static void igb_free_all_tx_resources(struct igb_adapter *);
106 static void igb_free_all_rx_resources(struct igb_adapter *);
107 static void igb_setup_mrqc(struct igb_adapter *);
108 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
109 static void __devexit igb_remove(struct pci_dev *pdev);
110 static void igb_init_hw_timer(struct igb_adapter *adapter);
111 static int igb_sw_init(struct igb_adapter *);
112 static int igb_open(struct net_device *);
113 static int igb_close(struct net_device *);
114 static void igb_configure_tx(struct igb_adapter *);
115 static void igb_configure_rx(struct igb_adapter *);
116 static void igb_clean_all_tx_rings(struct igb_adapter *);
117 static void igb_clean_all_rx_rings(struct igb_adapter *);
118 static void igb_clean_tx_ring(struct igb_ring *);
119 static void igb_clean_rx_ring(struct igb_ring *);
120 static void igb_set_rx_mode(struct net_device *);
121 static void igb_update_phy_info(unsigned long);
122 static void igb_watchdog(unsigned long);
123 static void igb_watchdog_task(struct work_struct *);
124 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
125 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
126 struct rtnl_link_stats64 *stats);
127 static int igb_change_mtu(struct net_device *, int);
128 static int igb_set_mac(struct net_device *, void *);
129 static void igb_set_uta(struct igb_adapter *adapter);
130 static irqreturn_t igb_intr(int irq, void *);
131 static irqreturn_t igb_intr_msi(int irq, void *);
132 static irqreturn_t igb_msix_other(int irq, void *);
133 static irqreturn_t igb_msix_ring(int irq, void *);
134 #ifdef CONFIG_IGB_DCA
135 static void igb_update_dca(struct igb_q_vector *);
136 static void igb_setup_dca(struct igb_adapter *);
137 #endif /* CONFIG_IGB_DCA */
138 static bool igb_clean_tx_irq(struct igb_q_vector *);
139 static int igb_poll(struct napi_struct *, int);
140 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
141 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
142 static void igb_tx_timeout(struct net_device *);
143 static void igb_reset_task(struct work_struct *);
144 static void igb_vlan_mode(struct net_device *netdev, u32 features);
145 static void igb_vlan_rx_add_vid(struct net_device *, u16);
146 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
147 static void igb_restore_vlan(struct igb_adapter *);
148 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
149 static void igb_ping_all_vfs(struct igb_adapter *);
150 static void igb_msg_task(struct igb_adapter *);
151 static void igb_vmm_control(struct igb_adapter *);
152 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
153 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
154 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
155 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
156 int vf, u16 vlan, u8 qos);
157 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
158 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
159 struct ifla_vf_info *ivi);
160 static void igb_check_vf_rate_limit(struct igb_adapter *);
163 static int igb_suspend(struct pci_dev *, pm_message_t);
164 static int igb_resume(struct pci_dev *);
166 static void igb_shutdown(struct pci_dev *);
167 #ifdef CONFIG_IGB_DCA
168 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
169 static struct notifier_block dca_notifier = {
170 .notifier_call = igb_notify_dca,
175 #ifdef CONFIG_NET_POLL_CONTROLLER
176 /* for netdump / net console */
177 static void igb_netpoll(struct net_device *);
179 #ifdef CONFIG_PCI_IOV
180 static unsigned int max_vfs = 0;
181 module_param(max_vfs, uint, 0);
182 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
183 "per physical function");
184 #endif /* CONFIG_PCI_IOV */
186 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
187 pci_channel_state_t);
188 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
189 static void igb_io_resume(struct pci_dev *);
191 static struct pci_error_handlers igb_err_handler = {
192 .error_detected = igb_io_error_detected,
193 .slot_reset = igb_io_slot_reset,
194 .resume = igb_io_resume,
198 static struct pci_driver igb_driver = {
199 .name = igb_driver_name,
200 .id_table = igb_pci_tbl,
202 .remove = __devexit_p(igb_remove),
204 /* Power Management Hooks */
205 .suspend = igb_suspend,
206 .resume = igb_resume,
208 .shutdown = igb_shutdown,
209 .err_handler = &igb_err_handler
212 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
213 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
214 MODULE_LICENSE("GPL");
215 MODULE_VERSION(DRV_VERSION);
217 struct igb_reg_info {
222 static const struct igb_reg_info igb_reg_info_tbl[] = {
224 /* General Registers */
225 {E1000_CTRL, "CTRL"},
226 {E1000_STATUS, "STATUS"},
227 {E1000_CTRL_EXT, "CTRL_EXT"},
229 /* Interrupt Registers */
233 {E1000_RCTL, "RCTL"},
234 {E1000_RDLEN(0), "RDLEN"},
235 {E1000_RDH(0), "RDH"},
236 {E1000_RDT(0), "RDT"},
237 {E1000_RXDCTL(0), "RXDCTL"},
238 {E1000_RDBAL(0), "RDBAL"},
239 {E1000_RDBAH(0), "RDBAH"},
242 {E1000_TCTL, "TCTL"},
243 {E1000_TDBAL(0), "TDBAL"},
244 {E1000_TDBAH(0), "TDBAH"},
245 {E1000_TDLEN(0), "TDLEN"},
246 {E1000_TDH(0), "TDH"},
247 {E1000_TDT(0), "TDT"},
248 {E1000_TXDCTL(0), "TXDCTL"},
249 {E1000_TDFH, "TDFH"},
250 {E1000_TDFT, "TDFT"},
251 {E1000_TDFHS, "TDFHS"},
252 {E1000_TDFPC, "TDFPC"},
254 /* List Terminator */
259 * igb_regdump - register printout routine
261 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
267 switch (reginfo->ofs) {
269 for (n = 0; n < 4; n++)
270 regs[n] = rd32(E1000_RDLEN(n));
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDH(n));
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDT(n));
280 case E1000_RXDCTL(0):
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RXDCTL(n));
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RDBAL(n));
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAH(n));
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAL(n));
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_TDBAH(n));
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDLEN(n));
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDH(n));
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDT(n));
312 case E1000_TXDCTL(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TXDCTL(n));
317 printk(KERN_INFO "%-15s %08x\n",
318 reginfo->name, rd32(reginfo->ofs));
322 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
323 printk(KERN_INFO "%-15s ", rname);
324 for (n = 0; n < 4; n++)
325 printk(KERN_CONT "%08x ", regs[n]);
326 printk(KERN_CONT "\n");
330 * igb_dump - Print registers, tx-rings and rx-rings
332 static void igb_dump(struct igb_adapter *adapter)
334 struct net_device *netdev = adapter->netdev;
335 struct e1000_hw *hw = &adapter->hw;
336 struct igb_reg_info *reginfo;
338 struct igb_ring *tx_ring;
339 union e1000_adv_tx_desc *tx_desc;
340 struct my_u0 { u64 a; u64 b; } *u0;
341 struct igb_buffer *buffer_info;
342 struct igb_ring *rx_ring;
343 union e1000_adv_rx_desc *rx_desc;
347 if (!netif_msg_hw(adapter))
350 /* Print netdevice Info */
352 dev_info(&adapter->pdev->dev, "Net device Info\n");
353 printk(KERN_INFO "Device Name state "
354 "trans_start last_rx\n");
355 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
362 /* Print Registers */
363 dev_info(&adapter->pdev->dev, "Register Dump\n");
364 printk(KERN_INFO " Register Name Value\n");
365 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
366 reginfo->name; reginfo++) {
367 igb_regdump(hw, reginfo);
370 /* Print TX Ring Summary */
371 if (!netdev || !netif_running(netdev))
374 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
375 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
376 " leng ntw timestamp\n");
377 for (n = 0; n < adapter->num_tx_queues; n++) {
378 tx_ring = adapter->tx_ring[n];
379 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
380 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
381 n, tx_ring->next_to_use, tx_ring->next_to_clean,
382 (u64)buffer_info->dma,
384 buffer_info->next_to_watch,
385 (u64)buffer_info->time_stamp);
389 if (!netif_msg_tx_done(adapter))
390 goto rx_ring_summary;
392 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394 /* Transmit Descriptor Formats
396 * Advanced Transmit Descriptor
397 * +--------------------------------------------------------------+
398 * 0 | Buffer Address [63:0] |
399 * +--------------------------------------------------------------+
400 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
401 * +--------------------------------------------------------------+
402 * 63 46 45 40 39 38 36 35 32 31 24 15 0
405 for (n = 0; n < adapter->num_tx_queues; n++) {
406 tx_ring = adapter->tx_ring[n];
407 printk(KERN_INFO "------------------------------------\n");
408 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
409 printk(KERN_INFO "------------------------------------\n");
410 printk(KERN_INFO "T [desc] [address 63:0 ] "
411 "[PlPOCIStDDM Ln] [bi->dma ] "
412 "leng ntw timestamp bi->skb\n");
414 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
415 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
416 buffer_info = &tx_ring->buffer_info[i];
417 u0 = (struct my_u0 *)tx_desc;
418 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
419 " %04X %3X %016llX %p", i,
422 (u64)buffer_info->dma,
424 buffer_info->next_to_watch,
425 (u64)buffer_info->time_stamp,
427 if (i == tx_ring->next_to_use &&
428 i == tx_ring->next_to_clean)
429 printk(KERN_CONT " NTC/U\n");
430 else if (i == tx_ring->next_to_use)
431 printk(KERN_CONT " NTU\n");
432 else if (i == tx_ring->next_to_clean)
433 printk(KERN_CONT " NTC\n");
435 printk(KERN_CONT "\n");
437 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
438 print_hex_dump(KERN_INFO, "",
440 16, 1, phys_to_virt(buffer_info->dma),
441 buffer_info->length, true);
445 /* Print RX Rings Summary */
447 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
448 printk(KERN_INFO "Queue [NTU] [NTC]\n");
449 for (n = 0; n < adapter->num_rx_queues; n++) {
450 rx_ring = adapter->rx_ring[n];
451 printk(KERN_INFO " %5d %5X %5X\n", n,
452 rx_ring->next_to_use, rx_ring->next_to_clean);
456 if (!netif_msg_rx_status(adapter))
459 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461 /* Advanced Receive Descriptor (Read) Format
463 * +-----------------------------------------------------+
464 * 0 | Packet Buffer Address [63:1] |A0/NSE|
465 * +----------------------------------------------+------+
466 * 8 | Header Buffer Address [63:1] | DD |
467 * +-----------------------------------------------------+
470 * Advanced Receive Descriptor (Write-Back) Format
472 * 63 48 47 32 31 30 21 20 17 16 4 3 0
473 * +------------------------------------------------------+
474 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
475 * | Checksum Ident | | | | Type | Type |
476 * +------------------------------------------------------+
477 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
478 * +------------------------------------------------------+
479 * 63 48 47 32 31 20 19 0
482 for (n = 0; n < adapter->num_rx_queues; n++) {
483 rx_ring = adapter->rx_ring[n];
484 printk(KERN_INFO "------------------------------------\n");
485 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
486 printk(KERN_INFO "------------------------------------\n");
487 printk(KERN_INFO "R [desc] [ PktBuf A0] "
488 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
489 "<-- Adv Rx Read format\n");
490 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
491 "[vl er S cks ln] ---------------- [bi->skb] "
492 "<-- Adv Rx Write-Back format\n");
494 for (i = 0; i < rx_ring->count; i++) {
495 buffer_info = &rx_ring->buffer_info[i];
496 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
497 u0 = (struct my_u0 *)rx_desc;
498 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
499 if (staterr & E1000_RXD_STAT_DD) {
500 /* Descriptor Done */
501 printk(KERN_INFO "RWB[0x%03X] %016llX "
502 "%016llX ---------------- %p", i,
507 printk(KERN_INFO "R [0x%03X] %016llX "
508 "%016llX %016llX %p", i,
511 (u64)buffer_info->dma,
514 if (netif_msg_pktdata(adapter)) {
515 print_hex_dump(KERN_INFO, "",
518 phys_to_virt(buffer_info->dma),
519 rx_ring->rx_buffer_len, true);
520 if (rx_ring->rx_buffer_len
522 print_hex_dump(KERN_INFO, "",
526 buffer_info->page_dma +
527 buffer_info->page_offset),
532 if (i == rx_ring->next_to_use)
533 printk(KERN_CONT " NTU\n");
534 else if (i == rx_ring->next_to_clean)
535 printk(KERN_CONT " NTC\n");
537 printk(KERN_CONT "\n");
548 * igb_read_clock - read raw cycle counter (to be used by time counter)
550 static cycle_t igb_read_clock(const struct cyclecounter *tc)
552 struct igb_adapter *adapter =
553 container_of(tc, struct igb_adapter, cycles);
554 struct e1000_hw *hw = &adapter->hw;
559 * The timestamp latches on lowest register read. For the 82580
560 * the lowest register is SYSTIMR instead of SYSTIML. However we never
561 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
563 if (hw->mac.type == e1000_82580) {
564 stamp = rd32(E1000_SYSTIMR) >> 8;
565 shift = IGB_82580_TSYNC_SHIFT;
568 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
569 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
574 * igb_get_hw_dev - return device
575 * used by hardware layer to print debugging information
577 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
579 struct igb_adapter *adapter = hw->back;
580 return adapter->netdev;
584 * igb_init_module - Driver Registration Routine
586 * igb_init_module is the first routine called when the driver is
587 * loaded. All it does is register with the PCI subsystem.
589 static int __init igb_init_module(void)
592 printk(KERN_INFO "%s - version %s\n",
593 igb_driver_string, igb_driver_version);
595 printk(KERN_INFO "%s\n", igb_copyright);
597 #ifdef CONFIG_IGB_DCA
598 dca_register_notify(&dca_notifier);
600 ret = pci_register_driver(&igb_driver);
604 module_init(igb_init_module);
607 * igb_exit_module - Driver Exit Cleanup Routine
609 * igb_exit_module is called just before the driver is removed
612 static void __exit igb_exit_module(void)
614 #ifdef CONFIG_IGB_DCA
615 dca_unregister_notify(&dca_notifier);
617 pci_unregister_driver(&igb_driver);
620 module_exit(igb_exit_module);
622 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
624 * igb_cache_ring_register - Descriptor ring to register mapping
625 * @adapter: board private structure to initialize
627 * Once we know the feature-set enabled for the device, we'll cache
628 * the register offset the descriptor ring is assigned to.
630 static void igb_cache_ring_register(struct igb_adapter *adapter)
633 u32 rbase_offset = adapter->vfs_allocated_count;
635 switch (adapter->hw.mac.type) {
637 /* The queues are allocated for virtualization such that VF 0
638 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
639 * In order to avoid collision we start at the first free queue
640 * and continue consuming queues in the same sequence
642 if (adapter->vfs_allocated_count) {
643 for (; i < adapter->rss_queues; i++)
644 adapter->rx_ring[i]->reg_idx = rbase_offset +
651 for (; i < adapter->num_rx_queues; i++)
652 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653 for (; j < adapter->num_tx_queues; j++)
654 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
659 static void igb_free_queues(struct igb_adapter *adapter)
663 for (i = 0; i < adapter->num_tx_queues; i++) {
664 kfree(adapter->tx_ring[i]);
665 adapter->tx_ring[i] = NULL;
667 for (i = 0; i < adapter->num_rx_queues; i++) {
668 kfree(adapter->rx_ring[i]);
669 adapter->rx_ring[i] = NULL;
671 adapter->num_rx_queues = 0;
672 adapter->num_tx_queues = 0;
676 * igb_alloc_queues - Allocate memory for all rings
677 * @adapter: board private structure to initialize
679 * We allocate one ring per queue at run-time since we don't know the
680 * number of queues at compile-time.
682 static int igb_alloc_queues(struct igb_adapter *adapter)
684 struct igb_ring *ring;
687 for (i = 0; i < adapter->num_tx_queues; i++) {
688 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
691 ring->count = adapter->tx_ring_count;
692 ring->queue_index = i;
693 ring->dev = &adapter->pdev->dev;
694 ring->netdev = adapter->netdev;
695 /* For 82575, context index must be unique per ring. */
696 if (adapter->hw.mac.type == e1000_82575)
697 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
698 adapter->tx_ring[i] = ring;
701 for (i = 0; i < adapter->num_rx_queues; i++) {
702 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
705 ring->count = adapter->rx_ring_count;
706 ring->queue_index = i;
707 ring->dev = &adapter->pdev->dev;
708 ring->netdev = adapter->netdev;
709 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
710 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
711 /* set flag indicating ring supports SCTP checksum offload */
712 if (adapter->hw.mac.type >= e1000_82576)
713 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
714 adapter->rx_ring[i] = ring;
717 igb_cache_ring_register(adapter);
722 igb_free_queues(adapter);
727 #define IGB_N0_QUEUE -1
728 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
731 struct igb_adapter *adapter = q_vector->adapter;
732 struct e1000_hw *hw = &adapter->hw;
734 int rx_queue = IGB_N0_QUEUE;
735 int tx_queue = IGB_N0_QUEUE;
737 if (q_vector->rx_ring)
738 rx_queue = q_vector->rx_ring->reg_idx;
739 if (q_vector->tx_ring)
740 tx_queue = q_vector->tx_ring->reg_idx;
742 switch (hw->mac.type) {
744 /* The 82575 assigns vectors using a bitmask, which matches the
745 bitmask for the EICR/EIMS/EIMC registers. To assign one
746 or more queues to a vector, we write the appropriate bits
747 into the MSIXBM register for that vector. */
748 if (rx_queue > IGB_N0_QUEUE)
749 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
750 if (tx_queue > IGB_N0_QUEUE)
751 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
752 if (!adapter->msix_entries && msix_vector == 0)
753 msixbm |= E1000_EIMS_OTHER;
754 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
755 q_vector->eims_value = msixbm;
758 /* 82576 uses a table-based method for assigning vectors.
759 Each queue has a single entry in the table to which we write
760 a vector number along with a "valid" bit. Sadly, the layout
761 of the table is somewhat counterintuitive. */
762 if (rx_queue > IGB_N0_QUEUE) {
763 index = (rx_queue & 0x7);
764 ivar = array_rd32(E1000_IVAR0, index);
766 /* vector goes into low byte of register */
767 ivar = ivar & 0xFFFFFF00;
768 ivar |= msix_vector | E1000_IVAR_VALID;
770 /* vector goes into third byte of register */
771 ivar = ivar & 0xFF00FFFF;
772 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
774 array_wr32(E1000_IVAR0, index, ivar);
776 if (tx_queue > IGB_N0_QUEUE) {
777 index = (tx_queue & 0x7);
778 ivar = array_rd32(E1000_IVAR0, index);
780 /* vector goes into second byte of register */
781 ivar = ivar & 0xFFFF00FF;
782 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
784 /* vector goes into high byte of register */
785 ivar = ivar & 0x00FFFFFF;
786 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
788 array_wr32(E1000_IVAR0, index, ivar);
790 q_vector->eims_value = 1 << msix_vector;
794 /* 82580 uses the same table-based approach as 82576 but has fewer
795 entries as a result we carry over for queues greater than 4. */
796 if (rx_queue > IGB_N0_QUEUE) {
797 index = (rx_queue >> 1);
798 ivar = array_rd32(E1000_IVAR0, index);
799 if (rx_queue & 0x1) {
800 /* vector goes into third byte of register */
801 ivar = ivar & 0xFF00FFFF;
802 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
804 /* vector goes into low byte of register */
805 ivar = ivar & 0xFFFFFF00;
806 ivar |= msix_vector | E1000_IVAR_VALID;
808 array_wr32(E1000_IVAR0, index, ivar);
810 if (tx_queue > IGB_N0_QUEUE) {
811 index = (tx_queue >> 1);
812 ivar = array_rd32(E1000_IVAR0, index);
813 if (tx_queue & 0x1) {
814 /* vector goes into high byte of register */
815 ivar = ivar & 0x00FFFFFF;
816 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
818 /* vector goes into second byte of register */
819 ivar = ivar & 0xFFFF00FF;
820 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
822 array_wr32(E1000_IVAR0, index, ivar);
824 q_vector->eims_value = 1 << msix_vector;
831 /* add q_vector eims value to global eims_enable_mask */
832 adapter->eims_enable_mask |= q_vector->eims_value;
834 /* configure q_vector to set itr on first interrupt */
835 q_vector->set_itr = 1;
839 * igb_configure_msix - Configure MSI-X hardware
841 * igb_configure_msix sets up the hardware to properly
842 * generate MSI-X interrupts.
844 static void igb_configure_msix(struct igb_adapter *adapter)
848 struct e1000_hw *hw = &adapter->hw;
850 adapter->eims_enable_mask = 0;
852 /* set vector for other causes, i.e. link changes */
853 switch (hw->mac.type) {
855 tmp = rd32(E1000_CTRL_EXT);
856 /* enable MSI-X PBA support*/
857 tmp |= E1000_CTRL_EXT_PBA_CLR;
859 /* Auto-Mask interrupts upon ICR read. */
860 tmp |= E1000_CTRL_EXT_EIAME;
861 tmp |= E1000_CTRL_EXT_IRCA;
863 wr32(E1000_CTRL_EXT, tmp);
865 /* enable msix_other interrupt */
866 array_wr32(E1000_MSIXBM(0), vector++,
868 adapter->eims_other = E1000_EIMS_OTHER;
875 /* Turn on MSI-X capability first, or our settings
876 * won't stick. And it will take days to debug. */
877 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
878 E1000_GPIE_PBA | E1000_GPIE_EIAME |
881 /* enable msix_other interrupt */
882 adapter->eims_other = 1 << vector;
883 tmp = (vector++ | E1000_IVAR_VALID) << 8;
885 wr32(E1000_IVAR_MISC, tmp);
888 /* do nothing, since nothing else supports MSI-X */
890 } /* switch (hw->mac.type) */
892 adapter->eims_enable_mask |= adapter->eims_other;
894 for (i = 0; i < adapter->num_q_vectors; i++)
895 igb_assign_vector(adapter->q_vector[i], vector++);
901 * igb_request_msix - Initialize MSI-X interrupts
903 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
906 static int igb_request_msix(struct igb_adapter *adapter)
908 struct net_device *netdev = adapter->netdev;
909 struct e1000_hw *hw = &adapter->hw;
910 int i, err = 0, vector = 0;
912 err = request_irq(adapter->msix_entries[vector].vector,
913 igb_msix_other, 0, netdev->name, adapter);
918 for (i = 0; i < adapter->num_q_vectors; i++) {
919 struct igb_q_vector *q_vector = adapter->q_vector[i];
921 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
923 if (q_vector->rx_ring && q_vector->tx_ring)
924 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
925 q_vector->rx_ring->queue_index);
926 else if (q_vector->tx_ring)
927 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
928 q_vector->tx_ring->queue_index);
929 else if (q_vector->rx_ring)
930 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
931 q_vector->rx_ring->queue_index);
933 sprintf(q_vector->name, "%s-unused", netdev->name);
935 err = request_irq(adapter->msix_entries[vector].vector,
936 igb_msix_ring, 0, q_vector->name,
943 igb_configure_msix(adapter);
949 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
951 if (adapter->msix_entries) {
952 pci_disable_msix(adapter->pdev);
953 kfree(adapter->msix_entries);
954 adapter->msix_entries = NULL;
955 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
956 pci_disable_msi(adapter->pdev);
961 * igb_free_q_vectors - Free memory allocated for interrupt vectors
962 * @adapter: board private structure to initialize
964 * This function frees the memory allocated to the q_vectors. In addition if
965 * NAPI is enabled it will delete any references to the NAPI struct prior
966 * to freeing the q_vector.
968 static void igb_free_q_vectors(struct igb_adapter *adapter)
972 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
973 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
974 adapter->q_vector[v_idx] = NULL;
977 netif_napi_del(&q_vector->napi);
980 adapter->num_q_vectors = 0;
984 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
986 * This function resets the device so that it has 0 rx queues, tx queues, and
987 * MSI-X interrupts allocated.
989 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
991 igb_free_queues(adapter);
992 igb_free_q_vectors(adapter);
993 igb_reset_interrupt_capability(adapter);
997 * igb_set_interrupt_capability - set MSI or MSI-X if supported
999 * Attempt to configure interrupts using the best available
1000 * capabilities of the hardware and kernel.
1002 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1007 /* Number of supported queues. */
1008 adapter->num_rx_queues = adapter->rss_queues;
1009 if (adapter->vfs_allocated_count)
1010 adapter->num_tx_queues = 1;
1012 adapter->num_tx_queues = adapter->rss_queues;
1014 /* start with one vector for every rx queue */
1015 numvecs = adapter->num_rx_queues;
1017 /* if tx handler is separate add 1 for every tx queue */
1018 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1019 numvecs += adapter->num_tx_queues;
1021 /* store the number of vectors reserved for queues */
1022 adapter->num_q_vectors = numvecs;
1024 /* add 1 vector for link status interrupts */
1026 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1028 if (!adapter->msix_entries)
1031 for (i = 0; i < numvecs; i++)
1032 adapter->msix_entries[i].entry = i;
1034 err = pci_enable_msix(adapter->pdev,
1035 adapter->msix_entries,
1040 igb_reset_interrupt_capability(adapter);
1042 /* If we can't do MSI-X, try MSI */
1044 #ifdef CONFIG_PCI_IOV
1045 /* disable SR-IOV for non MSI-X configurations */
1046 if (adapter->vf_data) {
1047 struct e1000_hw *hw = &adapter->hw;
1048 /* disable iov and allow time for transactions to clear */
1049 pci_disable_sriov(adapter->pdev);
1052 kfree(adapter->vf_data);
1053 adapter->vf_data = NULL;
1054 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1056 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1059 adapter->vfs_allocated_count = 0;
1060 adapter->rss_queues = 1;
1061 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1062 adapter->num_rx_queues = 1;
1063 adapter->num_tx_queues = 1;
1064 adapter->num_q_vectors = 1;
1065 if (!pci_enable_msi(adapter->pdev))
1066 adapter->flags |= IGB_FLAG_HAS_MSI;
1068 /* Notify the stack of the (possibly) reduced queue counts. */
1069 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1070 return netif_set_real_num_rx_queues(adapter->netdev,
1071 adapter->num_rx_queues);
1075 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1076 * @adapter: board private structure to initialize
1078 * We allocate one q_vector per queue interrupt. If allocation fails we
1081 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1083 struct igb_q_vector *q_vector;
1084 struct e1000_hw *hw = &adapter->hw;
1087 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1088 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1091 q_vector->adapter = adapter;
1092 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1093 q_vector->itr_val = IGB_START_ITR;
1094 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1095 adapter->q_vector[v_idx] = q_vector;
1100 igb_free_q_vectors(adapter);
1104 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1105 int ring_idx, int v_idx)
1107 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1110 q_vector->rx_ring->q_vector = q_vector;
1111 q_vector->itr_val = adapter->rx_itr_setting;
1112 if (q_vector->itr_val && q_vector->itr_val <= 3)
1113 q_vector->itr_val = IGB_START_ITR;
1116 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1117 int ring_idx, int v_idx)
1119 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1121 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1122 q_vector->tx_ring->q_vector = q_vector;
1123 q_vector->itr_val = adapter->tx_itr_setting;
1124 if (q_vector->itr_val && q_vector->itr_val <= 3)
1125 q_vector->itr_val = IGB_START_ITR;
1129 * igb_map_ring_to_vector - maps allocated queues to vectors
1131 * This function maps the recently allocated queues to vectors.
1133 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1138 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1139 (adapter->num_q_vectors < adapter->num_tx_queues))
1142 if (adapter->num_q_vectors >=
1143 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1144 for (i = 0; i < adapter->num_rx_queues; i++)
1145 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146 for (i = 0; i < adapter->num_tx_queues; i++)
1147 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149 for (i = 0; i < adapter->num_rx_queues; i++) {
1150 if (i < adapter->num_tx_queues)
1151 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1152 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1154 for (; i < adapter->num_tx_queues; i++)
1155 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1161 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1163 * This function initializes the interrupts and allocates all of the queues.
1165 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1167 struct pci_dev *pdev = adapter->pdev;
1170 err = igb_set_interrupt_capability(adapter);
1174 err = igb_alloc_q_vectors(adapter);
1176 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1177 goto err_alloc_q_vectors;
1180 err = igb_alloc_queues(adapter);
1182 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1183 goto err_alloc_queues;
1186 err = igb_map_ring_to_vector(adapter);
1188 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1189 goto err_map_queues;
1195 igb_free_queues(adapter);
1197 igb_free_q_vectors(adapter);
1198 err_alloc_q_vectors:
1199 igb_reset_interrupt_capability(adapter);
1204 * igb_request_irq - initialize interrupts
1206 * Attempts to configure interrupts using the best available
1207 * capabilities of the hardware and kernel.
1209 static int igb_request_irq(struct igb_adapter *adapter)
1211 struct net_device *netdev = adapter->netdev;
1212 struct pci_dev *pdev = adapter->pdev;
1215 if (adapter->msix_entries) {
1216 err = igb_request_msix(adapter);
1219 /* fall back to MSI */
1220 igb_clear_interrupt_scheme(adapter);
1221 if (!pci_enable_msi(adapter->pdev))
1222 adapter->flags |= IGB_FLAG_HAS_MSI;
1223 igb_free_all_tx_resources(adapter);
1224 igb_free_all_rx_resources(adapter);
1225 adapter->num_tx_queues = 1;
1226 adapter->num_rx_queues = 1;
1227 adapter->num_q_vectors = 1;
1228 err = igb_alloc_q_vectors(adapter);
1231 "Unable to allocate memory for vectors\n");
1234 err = igb_alloc_queues(adapter);
1237 "Unable to allocate memory for queues\n");
1238 igb_free_q_vectors(adapter);
1241 igb_setup_all_tx_resources(adapter);
1242 igb_setup_all_rx_resources(adapter);
1244 igb_assign_vector(adapter->q_vector[0], 0);
1247 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1248 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1249 netdev->name, adapter);
1253 /* fall back to legacy interrupts */
1254 igb_reset_interrupt_capability(adapter);
1255 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1258 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1259 netdev->name, adapter);
1262 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1269 static void igb_free_irq(struct igb_adapter *adapter)
1271 if (adapter->msix_entries) {
1274 free_irq(adapter->msix_entries[vector++].vector, adapter);
1276 for (i = 0; i < adapter->num_q_vectors; i++) {
1277 struct igb_q_vector *q_vector = adapter->q_vector[i];
1278 free_irq(adapter->msix_entries[vector++].vector,
1282 free_irq(adapter->pdev->irq, adapter);
1287 * igb_irq_disable - Mask off interrupt generation on the NIC
1288 * @adapter: board private structure
1290 static void igb_irq_disable(struct igb_adapter *adapter)
1292 struct e1000_hw *hw = &adapter->hw;
1295 * we need to be careful when disabling interrupts. The VFs are also
1296 * mapped into these registers and so clearing the bits can cause
1297 * issues on the VF drivers so we only need to clear what we set
1299 if (adapter->msix_entries) {
1300 u32 regval = rd32(E1000_EIAM);
1301 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1302 wr32(E1000_EIMC, adapter->eims_enable_mask);
1303 regval = rd32(E1000_EIAC);
1304 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1308 wr32(E1000_IMC, ~0);
1310 if (adapter->msix_entries) {
1312 for (i = 0; i < adapter->num_q_vectors; i++)
1313 synchronize_irq(adapter->msix_entries[i].vector);
1315 synchronize_irq(adapter->pdev->irq);
1320 * igb_irq_enable - Enable default interrupt generation settings
1321 * @adapter: board private structure
1323 static void igb_irq_enable(struct igb_adapter *adapter)
1325 struct e1000_hw *hw = &adapter->hw;
1327 if (adapter->msix_entries) {
1328 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1329 u32 regval = rd32(E1000_EIAC);
1330 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1331 regval = rd32(E1000_EIAM);
1332 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1333 wr32(E1000_EIMS, adapter->eims_enable_mask);
1334 if (adapter->vfs_allocated_count) {
1335 wr32(E1000_MBVFIMR, 0xFF);
1336 ims |= E1000_IMS_VMMB;
1338 if (adapter->hw.mac.type == e1000_82580)
1339 ims |= E1000_IMS_DRSTA;
1341 wr32(E1000_IMS, ims);
1343 wr32(E1000_IMS, IMS_ENABLE_MASK |
1345 wr32(E1000_IAM, IMS_ENABLE_MASK |
1350 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1352 struct e1000_hw *hw = &adapter->hw;
1353 u16 vid = adapter->hw.mng_cookie.vlan_id;
1354 u16 old_vid = adapter->mng_vlan_id;
1356 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1357 /* add VID to filter table */
1358 igb_vfta_set(hw, vid, true);
1359 adapter->mng_vlan_id = vid;
1361 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1364 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1366 !test_bit(old_vid, adapter->active_vlans)) {
1367 /* remove VID from filter table */
1368 igb_vfta_set(hw, old_vid, false);
1373 * igb_release_hw_control - release control of the h/w to f/w
1374 * @adapter: address of board private structure
1376 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1377 * For ASF and Pass Through versions of f/w this means that the
1378 * driver is no longer loaded.
1381 static void igb_release_hw_control(struct igb_adapter *adapter)
1383 struct e1000_hw *hw = &adapter->hw;
1386 /* Let firmware take over control of h/w */
1387 ctrl_ext = rd32(E1000_CTRL_EXT);
1388 wr32(E1000_CTRL_EXT,
1389 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1393 * igb_get_hw_control - get control of the h/w from f/w
1394 * @adapter: address of board private structure
1396 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1397 * For ASF and Pass Through versions of f/w this means that
1398 * the driver is loaded.
1401 static void igb_get_hw_control(struct igb_adapter *adapter)
1403 struct e1000_hw *hw = &adapter->hw;
1406 /* Let firmware know the driver has taken over */
1407 ctrl_ext = rd32(E1000_CTRL_EXT);
1408 wr32(E1000_CTRL_EXT,
1409 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1413 * igb_configure - configure the hardware for RX and TX
1414 * @adapter: private board structure
1416 static void igb_configure(struct igb_adapter *adapter)
1418 struct net_device *netdev = adapter->netdev;
1421 igb_get_hw_control(adapter);
1422 igb_set_rx_mode(netdev);
1424 igb_restore_vlan(adapter);
1426 igb_setup_tctl(adapter);
1427 igb_setup_mrqc(adapter);
1428 igb_setup_rctl(adapter);
1430 igb_configure_tx(adapter);
1431 igb_configure_rx(adapter);
1433 igb_rx_fifo_flush_82575(&adapter->hw);
1435 /* call igb_desc_unused which always leaves
1436 * at least 1 descriptor unused to make sure
1437 * next_to_use != next_to_clean */
1438 for (i = 0; i < adapter->num_rx_queues; i++) {
1439 struct igb_ring *ring = adapter->rx_ring[i];
1440 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1445 * igb_power_up_link - Power up the phy/serdes link
1446 * @adapter: address of board private structure
1448 void igb_power_up_link(struct igb_adapter *adapter)
1450 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1451 igb_power_up_phy_copper(&adapter->hw);
1453 igb_power_up_serdes_link_82575(&adapter->hw);
1457 * igb_power_down_link - Power down the phy/serdes link
1458 * @adapter: address of board private structure
1460 static void igb_power_down_link(struct igb_adapter *adapter)
1462 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1463 igb_power_down_phy_copper_82575(&adapter->hw);
1465 igb_shutdown_serdes_link_82575(&adapter->hw);
1469 * igb_up - Open the interface and prepare it to handle traffic
1470 * @adapter: board private structure
1472 int igb_up(struct igb_adapter *adapter)
1474 struct e1000_hw *hw = &adapter->hw;
1477 /* hardware has been reset, we need to reload some things */
1478 igb_configure(adapter);
1480 clear_bit(__IGB_DOWN, &adapter->state);
1482 for (i = 0; i < adapter->num_q_vectors; i++) {
1483 struct igb_q_vector *q_vector = adapter->q_vector[i];
1484 napi_enable(&q_vector->napi);
1486 if (adapter->msix_entries)
1487 igb_configure_msix(adapter);
1489 igb_assign_vector(adapter->q_vector[0], 0);
1491 /* Clear any pending interrupts. */
1493 igb_irq_enable(adapter);
1495 /* notify VFs that reset has been completed */
1496 if (adapter->vfs_allocated_count) {
1497 u32 reg_data = rd32(E1000_CTRL_EXT);
1498 reg_data |= E1000_CTRL_EXT_PFRSTD;
1499 wr32(E1000_CTRL_EXT, reg_data);
1502 netif_tx_start_all_queues(adapter->netdev);
1504 /* start the watchdog. */
1505 hw->mac.get_link_status = 1;
1506 schedule_work(&adapter->watchdog_task);
1511 void igb_down(struct igb_adapter *adapter)
1513 struct net_device *netdev = adapter->netdev;
1514 struct e1000_hw *hw = &adapter->hw;
1518 /* signal that we're down so the interrupt handler does not
1519 * reschedule our watchdog timer */
1520 set_bit(__IGB_DOWN, &adapter->state);
1522 /* disable receives in the hardware */
1523 rctl = rd32(E1000_RCTL);
1524 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1525 /* flush and sleep below */
1527 netif_tx_stop_all_queues(netdev);
1529 /* disable transmits in the hardware */
1530 tctl = rd32(E1000_TCTL);
1531 tctl &= ~E1000_TCTL_EN;
1532 wr32(E1000_TCTL, tctl);
1533 /* flush both disables and wait for them to finish */
1537 for (i = 0; i < adapter->num_q_vectors; i++) {
1538 struct igb_q_vector *q_vector = adapter->q_vector[i];
1539 napi_disable(&q_vector->napi);
1542 igb_irq_disable(adapter);
1544 del_timer_sync(&adapter->watchdog_timer);
1545 del_timer_sync(&adapter->phy_info_timer);
1547 netif_carrier_off(netdev);
1549 /* record the stats before reset*/
1550 spin_lock(&adapter->stats64_lock);
1551 igb_update_stats(adapter, &adapter->stats64);
1552 spin_unlock(&adapter->stats64_lock);
1554 adapter->link_speed = 0;
1555 adapter->link_duplex = 0;
1557 if (!pci_channel_offline(adapter->pdev))
1559 igb_clean_all_tx_rings(adapter);
1560 igb_clean_all_rx_rings(adapter);
1561 #ifdef CONFIG_IGB_DCA
1563 /* since we reset the hardware DCA settings were cleared */
1564 igb_setup_dca(adapter);
1568 void igb_reinit_locked(struct igb_adapter *adapter)
1570 WARN_ON(in_interrupt());
1571 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1575 clear_bit(__IGB_RESETTING, &adapter->state);
1578 void igb_reset(struct igb_adapter *adapter)
1580 struct pci_dev *pdev = adapter->pdev;
1581 struct e1000_hw *hw = &adapter->hw;
1582 struct e1000_mac_info *mac = &hw->mac;
1583 struct e1000_fc_info *fc = &hw->fc;
1584 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1587 /* Repartition Pba for greater than 9k mtu
1588 * To take effect CTRL.RST is required.
1590 switch (mac->type) {
1593 pba = rd32(E1000_RXPBS);
1594 pba = igb_rxpbs_adjust_82580(pba);
1597 pba = rd32(E1000_RXPBS);
1598 pba &= E1000_RXPBS_SIZE_MASK_82576;
1602 pba = E1000_PBA_34K;
1606 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1607 (mac->type < e1000_82576)) {
1608 /* adjust PBA for jumbo frames */
1609 wr32(E1000_PBA, pba);
1611 /* To maintain wire speed transmits, the Tx FIFO should be
1612 * large enough to accommodate two full transmit packets,
1613 * rounded up to the next 1KB and expressed in KB. Likewise,
1614 * the Rx FIFO should be large enough to accommodate at least
1615 * one full receive packet and is similarly rounded up and
1616 * expressed in KB. */
1617 pba = rd32(E1000_PBA);
1618 /* upper 16 bits has Tx packet buffer allocation size in KB */
1619 tx_space = pba >> 16;
1620 /* lower 16 bits has Rx packet buffer allocation size in KB */
1622 /* the tx fifo also stores 16 bytes of information about the tx
1623 * but don't include ethernet FCS because hardware appends it */
1624 min_tx_space = (adapter->max_frame_size +
1625 sizeof(union e1000_adv_tx_desc) -
1627 min_tx_space = ALIGN(min_tx_space, 1024);
1628 min_tx_space >>= 10;
1629 /* software strips receive CRC, so leave room for it */
1630 min_rx_space = adapter->max_frame_size;
1631 min_rx_space = ALIGN(min_rx_space, 1024);
1632 min_rx_space >>= 10;
1634 /* If current Tx allocation is less than the min Tx FIFO size,
1635 * and the min Tx FIFO size is less than the current Rx FIFO
1636 * allocation, take space away from current Rx allocation */
1637 if (tx_space < min_tx_space &&
1638 ((min_tx_space - tx_space) < pba)) {
1639 pba = pba - (min_tx_space - tx_space);
1641 /* if short on rx space, rx wins and must trump tx
1643 if (pba < min_rx_space)
1646 wr32(E1000_PBA, pba);
1649 /* flow control settings */
1650 /* The high water mark must be low enough to fit one full frame
1651 * (or the size used for early receive) above it in the Rx FIFO.
1652 * Set it to the lower of:
1653 * - 90% of the Rx FIFO size, or
1654 * - the full Rx FIFO size minus one full frame */
1655 hwm = min(((pba << 10) * 9 / 10),
1656 ((pba << 10) - 2 * adapter->max_frame_size));
1658 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1659 fc->low_water = fc->high_water - 16;
1660 fc->pause_time = 0xFFFF;
1662 fc->current_mode = fc->requested_mode;
1664 /* disable receive for all VFs and wait one second */
1665 if (adapter->vfs_allocated_count) {
1667 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1668 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1670 /* ping all the active vfs to let them know we are going down */
1671 igb_ping_all_vfs(adapter);
1673 /* disable transmits and receives */
1674 wr32(E1000_VFRE, 0);
1675 wr32(E1000_VFTE, 0);
1678 /* Allow time for pending master requests to run */
1679 hw->mac.ops.reset_hw(hw);
1682 if (hw->mac.ops.init_hw(hw))
1683 dev_err(&pdev->dev, "Hardware Error\n");
1684 if (hw->mac.type > e1000_82580) {
1685 if (adapter->flags & IGB_FLAG_DMAC) {
1689 * DMA Coalescing high water mark needs to be higher
1690 * than * the * Rx threshold. The Rx threshold is
1691 * currently * pba - 6, so we * should use a high water
1692 * mark of pba * - 4. */
1693 hwm = (pba - 4) << 10;
1695 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1696 & E1000_DMACR_DMACTHR_MASK);
1698 /* transition to L0x or L1 if available..*/
1699 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1701 /* watchdog timer= +-1000 usec in 32usec intervals */
1703 wr32(E1000_DMACR, reg);
1705 /* no lower threshold to disable coalescing(smart fifb)
1707 wr32(E1000_DMCRTRH, 0);
1709 /* set hwm to PBA - 2 * max frame size */
1710 wr32(E1000_FCRTC, hwm);
1713 * This sets the time to wait before requesting tran-
1714 * sition to * low power state to number of usecs needed
1715 * to receive 1 512 * byte frame at gigabit line rate
1717 reg = rd32(E1000_DMCTLX);
1718 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1720 /* Delay 255 usec before entering Lx state. */
1722 wr32(E1000_DMCTLX, reg);
1724 /* free space in Tx packet buffer to wake from DMAC */
1727 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1730 /* make low power state decision controlled by DMAC */
1731 reg = rd32(E1000_PCIEMISC);
1732 reg |= E1000_PCIEMISC_LX_DECISION;
1733 wr32(E1000_PCIEMISC, reg);
1734 } /* end if IGB_FLAG_DMAC set */
1736 if (hw->mac.type == e1000_82580) {
1737 u32 reg = rd32(E1000_PCIEMISC);
1738 wr32(E1000_PCIEMISC,
1739 reg & ~E1000_PCIEMISC_LX_DECISION);
1741 if (!netif_running(adapter->netdev))
1742 igb_power_down_link(adapter);
1744 igb_update_mng_vlan(adapter);
1746 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1747 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749 igb_get_phy_info(hw);
1752 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1755 * Since there is no support for separate rx/tx vlan accel
1756 * enable/disable make sure tx flag is always in same state as rx.
1758 if (features & NETIF_F_HW_VLAN_RX)
1759 features |= NETIF_F_HW_VLAN_TX;
1761 features &= ~NETIF_F_HW_VLAN_TX;
1766 static int igb_set_features(struct net_device *netdev, u32 features)
1768 struct igb_adapter *adapter = netdev_priv(netdev);
1770 u32 changed = netdev->features ^ features;
1772 for (i = 0; i < adapter->num_rx_queues; i++) {
1773 if (features & NETIF_F_RXCSUM)
1774 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1776 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1779 if (changed & NETIF_F_HW_VLAN_RX)
1780 igb_vlan_mode(netdev, features);
1785 static const struct net_device_ops igb_netdev_ops = {
1786 .ndo_open = igb_open,
1787 .ndo_stop = igb_close,
1788 .ndo_start_xmit = igb_xmit_frame_adv,
1789 .ndo_get_stats64 = igb_get_stats64,
1790 .ndo_set_rx_mode = igb_set_rx_mode,
1791 .ndo_set_multicast_list = igb_set_rx_mode,
1792 .ndo_set_mac_address = igb_set_mac,
1793 .ndo_change_mtu = igb_change_mtu,
1794 .ndo_do_ioctl = igb_ioctl,
1795 .ndo_tx_timeout = igb_tx_timeout,
1796 .ndo_validate_addr = eth_validate_addr,
1797 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1798 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1799 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1800 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1801 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1802 .ndo_get_vf_config = igb_ndo_get_vf_config,
1803 #ifdef CONFIG_NET_POLL_CONTROLLER
1804 .ndo_poll_controller = igb_netpoll,
1806 .ndo_fix_features = igb_fix_features,
1807 .ndo_set_features = igb_set_features,
1811 * igb_probe - Device Initialization Routine
1812 * @pdev: PCI device information struct
1813 * @ent: entry in igb_pci_tbl
1815 * Returns 0 on success, negative on failure
1817 * igb_probe initializes an adapter identified by a pci_dev structure.
1818 * The OS initialization, configuring of the adapter private structure,
1819 * and a hardware reset occur.
1821 static int __devinit igb_probe(struct pci_dev *pdev,
1822 const struct pci_device_id *ent)
1824 struct net_device *netdev;
1825 struct igb_adapter *adapter;
1826 struct e1000_hw *hw;
1827 u16 eeprom_data = 0;
1829 static int global_quad_port_a; /* global quad port a indication */
1830 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1831 unsigned long mmio_start, mmio_len;
1832 int err, pci_using_dac;
1833 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1834 u8 part_str[E1000_PBANUM_LENGTH];
1836 /* Catch broken hardware that put the wrong VF device ID in
1837 * the PCIe SR-IOV capability.
1839 if (pdev->is_virtfn) {
1840 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1841 pci_name(pdev), pdev->vendor, pdev->device);
1845 err = pci_enable_device_mem(pdev);
1850 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1852 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1856 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1858 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1860 dev_err(&pdev->dev, "No usable DMA "
1861 "configuration, aborting\n");
1867 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1873 pci_enable_pcie_error_reporting(pdev);
1875 pci_set_master(pdev);
1876 pci_save_state(pdev);
1879 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1880 IGB_ABS_MAX_TX_QUEUES);
1882 goto err_alloc_etherdev;
1884 SET_NETDEV_DEV(netdev, &pdev->dev);
1886 pci_set_drvdata(pdev, netdev);
1887 adapter = netdev_priv(netdev);
1888 adapter->netdev = netdev;
1889 adapter->pdev = pdev;
1892 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1894 mmio_start = pci_resource_start(pdev, 0);
1895 mmio_len = pci_resource_len(pdev, 0);
1898 hw->hw_addr = ioremap(mmio_start, mmio_len);
1902 netdev->netdev_ops = &igb_netdev_ops;
1903 igb_set_ethtool_ops(netdev);
1904 netdev->watchdog_timeo = 5 * HZ;
1906 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1908 netdev->mem_start = mmio_start;
1909 netdev->mem_end = mmio_start + mmio_len;
1911 /* PCI config space info */
1912 hw->vendor_id = pdev->vendor;
1913 hw->device_id = pdev->device;
1914 hw->revision_id = pdev->revision;
1915 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1916 hw->subsystem_device_id = pdev->subsystem_device;
1918 /* Copy the default MAC, PHY and NVM function pointers */
1919 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1920 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1921 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1922 /* Initialize skew-specific constants */
1923 err = ei->get_invariants(hw);
1927 /* setup the private structure */
1928 err = igb_sw_init(adapter);
1932 igb_get_bus_info_pcie(hw);
1934 hw->phy.autoneg_wait_to_complete = false;
1936 /* Copper options */
1937 if (hw->phy.media_type == e1000_media_type_copper) {
1938 hw->phy.mdix = AUTO_ALL_MODES;
1939 hw->phy.disable_polarity_correction = false;
1940 hw->phy.ms_type = e1000_ms_hw_default;
1943 if (igb_check_reset_block(hw))
1944 dev_info(&pdev->dev,
1945 "PHY reset is blocked due to SOL/IDER session.\n");
1947 netdev->hw_features = NETIF_F_SG |
1955 netdev->features = netdev->hw_features |
1956 NETIF_F_HW_VLAN_TX |
1957 NETIF_F_HW_VLAN_FILTER;
1959 netdev->vlan_features |= NETIF_F_TSO;
1960 netdev->vlan_features |= NETIF_F_TSO6;
1961 netdev->vlan_features |= NETIF_F_IP_CSUM;
1962 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1963 netdev->vlan_features |= NETIF_F_SG;
1965 if (pci_using_dac) {
1966 netdev->features |= NETIF_F_HIGHDMA;
1967 netdev->vlan_features |= NETIF_F_HIGHDMA;
1970 if (hw->mac.type >= e1000_82576) {
1971 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1972 netdev->features |= NETIF_F_SCTP_CSUM;
1975 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1977 /* before reading the NVM, reset the controller to put the device in a
1978 * known good starting state */
1979 hw->mac.ops.reset_hw(hw);
1981 /* make sure the NVM is good */
1982 if (hw->nvm.ops.validate(hw) < 0) {
1983 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988 /* copy the MAC address out of the NVM */
1989 if (hw->mac.ops.read_mac_addr(hw))
1990 dev_err(&pdev->dev, "NVM Read Error\n");
1992 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1993 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1995 if (!is_valid_ether_addr(netdev->perm_addr)) {
1996 dev_err(&pdev->dev, "Invalid MAC Address\n");
2001 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2002 (unsigned long) adapter);
2003 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2004 (unsigned long) adapter);
2006 INIT_WORK(&adapter->reset_task, igb_reset_task);
2007 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2009 /* Initialize link properties that are user-changeable */
2010 adapter->fc_autoneg = true;
2011 hw->mac.autoneg = true;
2012 hw->phy.autoneg_advertised = 0x2f;
2014 hw->fc.requested_mode = e1000_fc_default;
2015 hw->fc.current_mode = e1000_fc_default;
2017 igb_validate_mdi_setting(hw);
2019 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2020 * enable the ACPI Magic Packet filter
2023 if (hw->bus.func == 0)
2024 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2025 else if (hw->mac.type == e1000_82580)
2026 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2027 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2029 else if (hw->bus.func == 1)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2032 if (eeprom_data & eeprom_apme_mask)
2033 adapter->eeprom_wol |= E1000_WUFC_MAG;
2035 /* now that we have the eeprom settings, apply the special cases where
2036 * the eeprom may be wrong or the board simply won't support wake on
2037 * lan on a particular port */
2038 switch (pdev->device) {
2039 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2040 adapter->eeprom_wol = 0;
2042 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2043 case E1000_DEV_ID_82576_FIBER:
2044 case E1000_DEV_ID_82576_SERDES:
2045 /* Wake events only supported on port A for dual fiber
2046 * regardless of eeprom setting */
2047 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2048 adapter->eeprom_wol = 0;
2050 case E1000_DEV_ID_82576_QUAD_COPPER:
2051 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2052 /* if quad port adapter, disable WoL on all but port A */
2053 if (global_quad_port_a != 0)
2054 adapter->eeprom_wol = 0;
2056 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2057 /* Reset for multiple quad port adapters */
2058 if (++global_quad_port_a == 4)
2059 global_quad_port_a = 0;
2063 /* initialize the wol settings based on the eeprom settings */
2064 adapter->wol = adapter->eeprom_wol;
2065 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2067 /* reset the hardware with the new settings */
2070 /* let the f/w know that the h/w is now under the control of the
2072 igb_get_hw_control(adapter);
2074 strcpy(netdev->name, "eth%d");
2075 err = register_netdev(netdev);
2079 igb_vlan_mode(netdev, netdev->features);
2081 /* carrier off reporting is important to ethtool even BEFORE open */
2082 netif_carrier_off(netdev);
2084 #ifdef CONFIG_IGB_DCA
2085 if (dca_add_requester(&pdev->dev) == 0) {
2086 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087 dev_info(&pdev->dev, "DCA enabled\n");
2088 igb_setup_dca(adapter);
2092 /* do hw tstamp init after resetting */
2093 igb_init_hw_timer(adapter);
2095 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096 /* print bus type/speed/width info */
2097 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2099 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2102 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2108 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2110 strcpy(part_str, "Unknown");
2111 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112 dev_info(&pdev->dev,
2113 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114 adapter->msix_entries ? "MSI-X" :
2115 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116 adapter->num_rx_queues, adapter->num_tx_queues);
2117 switch (hw->mac.type) {
2119 igb_set_eee_i350(hw);
2127 igb_release_hw_control(adapter);
2129 if (!igb_check_reset_block(hw))
2132 if (hw->flash_address)
2133 iounmap(hw->flash_address);
2135 igb_clear_interrupt_scheme(adapter);
2136 iounmap(hw->hw_addr);
2138 free_netdev(netdev);
2140 pci_release_selected_regions(pdev,
2141 pci_select_bars(pdev, IORESOURCE_MEM));
2144 pci_disable_device(pdev);
2149 * igb_remove - Device Removal Routine
2150 * @pdev: PCI device information struct
2152 * igb_remove is called by the PCI subsystem to alert the driver
2153 * that it should release a PCI device. The could be caused by a
2154 * Hot-Plug event, or because the driver is going to be removed from
2157 static void __devexit igb_remove(struct pci_dev *pdev)
2159 struct net_device *netdev = pci_get_drvdata(pdev);
2160 struct igb_adapter *adapter = netdev_priv(netdev);
2161 struct e1000_hw *hw = &adapter->hw;
2164 * The watchdog timer may be rescheduled, so explicitly
2165 * disable watchdog from being rescheduled.
2167 set_bit(__IGB_DOWN, &adapter->state);
2168 del_timer_sync(&adapter->watchdog_timer);
2169 del_timer_sync(&adapter->phy_info_timer);
2171 cancel_work_sync(&adapter->reset_task);
2172 cancel_work_sync(&adapter->watchdog_task);
2174 #ifdef CONFIG_IGB_DCA
2175 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2176 dev_info(&pdev->dev, "DCA disabled\n");
2177 dca_remove_requester(&pdev->dev);
2178 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2179 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2183 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2184 * would have already happened in close and is redundant. */
2185 igb_release_hw_control(adapter);
2187 unregister_netdev(netdev);
2189 igb_clear_interrupt_scheme(adapter);
2191 #ifdef CONFIG_PCI_IOV
2192 /* reclaim resources allocated to VFs */
2193 if (adapter->vf_data) {
2194 /* disable iov and allow time for transactions to clear */
2195 pci_disable_sriov(pdev);
2198 kfree(adapter->vf_data);
2199 adapter->vf_data = NULL;
2200 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2202 dev_info(&pdev->dev, "IOV Disabled\n");
2206 iounmap(hw->hw_addr);
2207 if (hw->flash_address)
2208 iounmap(hw->flash_address);
2209 pci_release_selected_regions(pdev,
2210 pci_select_bars(pdev, IORESOURCE_MEM));
2212 free_netdev(netdev);
2214 pci_disable_pcie_error_reporting(pdev);
2216 pci_disable_device(pdev);
2220 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2221 * @adapter: board private structure to initialize
2223 * This function initializes the vf specific data storage and then attempts to
2224 * allocate the VFs. The reason for ordering it this way is because it is much
2225 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2226 * the memory for the VFs.
2228 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2230 #ifdef CONFIG_PCI_IOV
2231 struct pci_dev *pdev = adapter->pdev;
2233 if (adapter->vfs_allocated_count) {
2234 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2235 sizeof(struct vf_data_storage),
2237 /* if allocation failed then we do not support SR-IOV */
2238 if (!adapter->vf_data) {
2239 adapter->vfs_allocated_count = 0;
2240 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2245 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2246 kfree(adapter->vf_data);
2247 adapter->vf_data = NULL;
2248 #endif /* CONFIG_PCI_IOV */
2249 adapter->vfs_allocated_count = 0;
2250 #ifdef CONFIG_PCI_IOV
2252 unsigned char mac_addr[ETH_ALEN];
2254 dev_info(&pdev->dev, "%d vfs allocated\n",
2255 adapter->vfs_allocated_count);
2256 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2257 random_ether_addr(mac_addr);
2258 igb_set_vf_mac(adapter, i, mac_addr);
2260 /* DMA Coalescing is not supported in IOV mode. */
2261 if (adapter->flags & IGB_FLAG_DMAC)
2262 adapter->flags &= ~IGB_FLAG_DMAC;
2264 #endif /* CONFIG_PCI_IOV */
2269 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2270 * @adapter: board private structure to initialize
2272 * igb_init_hw_timer initializes the function pointer and values for the hw
2273 * timer found in hardware.
2275 static void igb_init_hw_timer(struct igb_adapter *adapter)
2277 struct e1000_hw *hw = &adapter->hw;
2279 switch (hw->mac.type) {
2282 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2283 adapter->cycles.read = igb_read_clock;
2284 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2285 adapter->cycles.mult = 1;
2287 * The 82580 timesync updates the system timer every 8ns by 8ns
2288 * and the value cannot be shifted. Instead we need to shift
2289 * the registers to generate a 64bit timer value. As a result
2290 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2291 * 24 in order to generate a larger value for synchronization.
2293 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2294 /* disable system timer temporarily by setting bit 31 */
2295 wr32(E1000_TSAUXC, 0x80000000);
2298 /* Set registers so that rollover occurs soon to test this. */
2299 wr32(E1000_SYSTIMR, 0x00000000);
2300 wr32(E1000_SYSTIML, 0x80000000);
2301 wr32(E1000_SYSTIMH, 0x000000FF);
2304 /* enable system timer by clearing bit 31 */
2305 wr32(E1000_TSAUXC, 0x0);
2308 timecounter_init(&adapter->clock,
2310 ktime_to_ns(ktime_get_real()));
2312 * Synchronize our NIC clock against system wall clock. NIC
2313 * time stamp reading requires ~3us per sample, each sample
2314 * was pretty stable even under load => only require 10
2315 * samples for each offset comparison.
2317 memset(&adapter->compare, 0, sizeof(adapter->compare));
2318 adapter->compare.source = &adapter->clock;
2319 adapter->compare.target = ktime_get_real;
2320 adapter->compare.num_samples = 10;
2321 timecompare_update(&adapter->compare, 0);
2325 * Initialize hardware timer: we keep it running just in case
2326 * that some program needs it later on.
2328 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2329 adapter->cycles.read = igb_read_clock;
2330 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2331 adapter->cycles.mult = 1;
2333 * Scale the NIC clock cycle by a large factor so that
2334 * relatively small clock corrections can be added or
2335 * subtracted at each clock tick. The drawbacks of a large
2336 * factor are a) that the clock register overflows more quickly
2337 * (not such a big deal) and b) that the increment per tick has
2338 * to fit into 24 bits. As a result we need to use a shift of
2339 * 19 so we can fit a value of 16 into the TIMINCA register.
2341 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2343 (1 << E1000_TIMINCA_16NS_SHIFT) |
2344 (16 << IGB_82576_TSYNC_SHIFT));
2346 /* Set registers so that rollover occurs soon to test this. */
2347 wr32(E1000_SYSTIML, 0x00000000);
2348 wr32(E1000_SYSTIMH, 0xFF800000);
2351 timecounter_init(&adapter->clock,
2353 ktime_to_ns(ktime_get_real()));
2355 * Synchronize our NIC clock against system wall clock. NIC
2356 * time stamp reading requires ~3us per sample, each sample
2357 * was pretty stable even under load => only require 10
2358 * samples for each offset comparison.
2360 memset(&adapter->compare, 0, sizeof(adapter->compare));
2361 adapter->compare.source = &adapter->clock;
2362 adapter->compare.target = ktime_get_real;
2363 adapter->compare.num_samples = 10;
2364 timecompare_update(&adapter->compare, 0);
2367 /* 82575 does not support timesync */
2375 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2376 * @adapter: board private structure to initialize
2378 * igb_sw_init initializes the Adapter private data structure.
2379 * Fields are initialized based on PCI device information and
2380 * OS network device settings (MTU size).
2382 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2384 struct e1000_hw *hw = &adapter->hw;
2385 struct net_device *netdev = adapter->netdev;
2386 struct pci_dev *pdev = adapter->pdev;
2388 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2390 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2391 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2392 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2393 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2395 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2396 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2398 spin_lock_init(&adapter->stats64_lock);
2399 #ifdef CONFIG_PCI_IOV
2400 switch (hw->mac.type) {
2404 dev_warn(&pdev->dev,
2405 "Maximum of 7 VFs per PF, using max\n");
2406 adapter->vfs_allocated_count = 7;
2408 adapter->vfs_allocated_count = max_vfs;
2413 #endif /* CONFIG_PCI_IOV */
2414 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2415 /* i350 cannot do RSS and SR-IOV at the same time */
2416 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2417 adapter->rss_queues = 1;
2420 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2421 * then we should combine the queues into a queue pair in order to
2422 * conserve interrupts due to limited supply
2424 if ((adapter->rss_queues > 4) ||
2425 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2426 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2428 /* This call may decrease the number of queues */
2429 if (igb_init_interrupt_scheme(adapter)) {
2430 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2434 igb_probe_vfs(adapter);
2436 /* Explicitly disable IRQ since the NIC can be in any state. */
2437 igb_irq_disable(adapter);
2439 if (hw->mac.type == e1000_i350)
2440 adapter->flags &= ~IGB_FLAG_DMAC;
2442 set_bit(__IGB_DOWN, &adapter->state);
2447 * igb_open - Called when a network interface is made active
2448 * @netdev: network interface device structure
2450 * Returns 0 on success, negative value on failure
2452 * The open entry point is called when a network interface is made
2453 * active by the system (IFF_UP). At this point all resources needed
2454 * for transmit and receive operations are allocated, the interrupt
2455 * handler is registered with the OS, the watchdog timer is started,
2456 * and the stack is notified that the interface is ready.
2458 static int igb_open(struct net_device *netdev)
2460 struct igb_adapter *adapter = netdev_priv(netdev);
2461 struct e1000_hw *hw = &adapter->hw;
2465 /* disallow open during test */
2466 if (test_bit(__IGB_TESTING, &adapter->state))
2469 netif_carrier_off(netdev);
2471 /* allocate transmit descriptors */
2472 err = igb_setup_all_tx_resources(adapter);
2476 /* allocate receive descriptors */
2477 err = igb_setup_all_rx_resources(adapter);
2481 igb_power_up_link(adapter);
2483 /* before we allocate an interrupt, we must be ready to handle it.
2484 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2485 * as soon as we call pci_request_irq, so we have to setup our
2486 * clean_rx handler before we do so. */
2487 igb_configure(adapter);
2489 err = igb_request_irq(adapter);
2493 /* From here on the code is the same as igb_up() */
2494 clear_bit(__IGB_DOWN, &adapter->state);
2496 for (i = 0; i < adapter->num_q_vectors; i++) {
2497 struct igb_q_vector *q_vector = adapter->q_vector[i];
2498 napi_enable(&q_vector->napi);
2501 /* Clear any pending interrupts. */
2504 igb_irq_enable(adapter);
2506 /* notify VFs that reset has been completed */
2507 if (adapter->vfs_allocated_count) {
2508 u32 reg_data = rd32(E1000_CTRL_EXT);
2509 reg_data |= E1000_CTRL_EXT_PFRSTD;
2510 wr32(E1000_CTRL_EXT, reg_data);
2513 netif_tx_start_all_queues(netdev);
2515 /* start the watchdog. */
2516 hw->mac.get_link_status = 1;
2517 schedule_work(&adapter->watchdog_task);
2522 igb_release_hw_control(adapter);
2523 igb_power_down_link(adapter);
2524 igb_free_all_rx_resources(adapter);
2526 igb_free_all_tx_resources(adapter);
2534 * igb_close - Disables a network interface
2535 * @netdev: network interface device structure
2537 * Returns 0, this is not allowed to fail
2539 * The close entry point is called when an interface is de-activated
2540 * by the OS. The hardware is still under the driver's control, but
2541 * needs to be disabled. A global MAC reset is issued to stop the
2542 * hardware, and all transmit and receive resources are freed.
2544 static int igb_close(struct net_device *netdev)
2546 struct igb_adapter *adapter = netdev_priv(netdev);
2548 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551 igb_free_irq(adapter);
2553 igb_free_all_tx_resources(adapter);
2554 igb_free_all_rx_resources(adapter);
2560 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2561 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2563 * Return 0 on success, negative on failure
2565 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2567 struct device *dev = tx_ring->dev;
2570 size = sizeof(struct igb_buffer) * tx_ring->count;
2571 tx_ring->buffer_info = vzalloc(size);
2572 if (!tx_ring->buffer_info)
2575 /* round up to nearest 4K */
2576 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2577 tx_ring->size = ALIGN(tx_ring->size, 4096);
2579 tx_ring->desc = dma_alloc_coherent(dev,
2587 tx_ring->next_to_use = 0;
2588 tx_ring->next_to_clean = 0;
2592 vfree(tx_ring->buffer_info);
2594 "Unable to allocate memory for the transmit descriptor ring\n");
2599 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2600 * (Descriptors) for all queues
2601 * @adapter: board private structure
2603 * Return 0 on success, negative on failure
2605 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2607 struct pci_dev *pdev = adapter->pdev;
2610 for (i = 0; i < adapter->num_tx_queues; i++) {
2611 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2614 "Allocation for Tx Queue %u failed\n", i);
2615 for (i--; i >= 0; i--)
2616 igb_free_tx_resources(adapter->tx_ring[i]);
2621 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2622 int r_idx = i % adapter->num_tx_queues;
2623 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2629 * igb_setup_tctl - configure the transmit control registers
2630 * @adapter: Board private structure
2632 void igb_setup_tctl(struct igb_adapter *adapter)
2634 struct e1000_hw *hw = &adapter->hw;
2637 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2638 wr32(E1000_TXDCTL(0), 0);
2640 /* Program the Transmit Control Register */
2641 tctl = rd32(E1000_TCTL);
2642 tctl &= ~E1000_TCTL_CT;
2643 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2644 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2646 igb_config_collision_dist(hw);
2648 /* Enable transmits */
2649 tctl |= E1000_TCTL_EN;
2651 wr32(E1000_TCTL, tctl);
2655 * igb_configure_tx_ring - Configure transmit ring after Reset
2656 * @adapter: board private structure
2657 * @ring: tx ring to configure
2659 * Configure a transmit ring after a reset.
2661 void igb_configure_tx_ring(struct igb_adapter *adapter,
2662 struct igb_ring *ring)
2664 struct e1000_hw *hw = &adapter->hw;
2666 u64 tdba = ring->dma;
2667 int reg_idx = ring->reg_idx;
2669 /* disable the queue */
2670 txdctl = rd32(E1000_TXDCTL(reg_idx));
2671 wr32(E1000_TXDCTL(reg_idx),
2672 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2676 wr32(E1000_TDLEN(reg_idx),
2677 ring->count * sizeof(union e1000_adv_tx_desc));
2678 wr32(E1000_TDBAL(reg_idx),
2679 tdba & 0x00000000ffffffffULL);
2680 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2682 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2683 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2684 writel(0, ring->head);
2685 writel(0, ring->tail);
2687 txdctl |= IGB_TX_PTHRESH;
2688 txdctl |= IGB_TX_HTHRESH << 8;
2689 txdctl |= IGB_TX_WTHRESH << 16;
2691 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2692 wr32(E1000_TXDCTL(reg_idx), txdctl);
2696 * igb_configure_tx - Configure transmit Unit after Reset
2697 * @adapter: board private structure
2699 * Configure the Tx unit of the MAC after a reset.
2701 static void igb_configure_tx(struct igb_adapter *adapter)
2705 for (i = 0; i < adapter->num_tx_queues; i++)
2706 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2710 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2711 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2713 * Returns 0 on success, negative on failure
2715 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2717 struct device *dev = rx_ring->dev;
2720 size = sizeof(struct igb_buffer) * rx_ring->count;
2721 rx_ring->buffer_info = vzalloc(size);
2722 if (!rx_ring->buffer_info)
2725 desc_len = sizeof(union e1000_adv_rx_desc);
2727 /* Round up to nearest 4K */
2728 rx_ring->size = rx_ring->count * desc_len;
2729 rx_ring->size = ALIGN(rx_ring->size, 4096);
2731 rx_ring->desc = dma_alloc_coherent(dev,
2739 rx_ring->next_to_clean = 0;
2740 rx_ring->next_to_use = 0;
2745 vfree(rx_ring->buffer_info);
2746 rx_ring->buffer_info = NULL;
2747 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2753 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2754 * (Descriptors) for all queues
2755 * @adapter: board private structure
2757 * Return 0 on success, negative on failure
2759 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2761 struct pci_dev *pdev = adapter->pdev;
2764 for (i = 0; i < adapter->num_rx_queues; i++) {
2765 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2768 "Allocation for Rx Queue %u failed\n", i);
2769 for (i--; i >= 0; i--)
2770 igb_free_rx_resources(adapter->rx_ring[i]);
2779 * igb_setup_mrqc - configure the multiple receive queue control registers
2780 * @adapter: Board private structure
2782 static void igb_setup_mrqc(struct igb_adapter *adapter)
2784 struct e1000_hw *hw = &adapter->hw;
2786 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2791 static const u8 rsshash[40] = {
2792 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2793 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2794 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2795 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2797 /* Fill out hash function seeds */
2798 for (j = 0; j < 10; j++) {
2799 u32 rsskey = rsshash[(j * 4)];
2800 rsskey |= rsshash[(j * 4) + 1] << 8;
2801 rsskey |= rsshash[(j * 4) + 2] << 16;
2802 rsskey |= rsshash[(j * 4) + 3] << 24;
2803 array_wr32(E1000_RSSRK(0), j, rsskey);
2806 num_rx_queues = adapter->rss_queues;
2808 if (adapter->vfs_allocated_count) {
2809 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2810 switch (hw->mac.type) {
2827 if (hw->mac.type == e1000_82575)
2831 for (j = 0; j < (32 * 4); j++) {
2832 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2834 reta.bytes[j & 3] |= num_rx_queues << shift2;
2836 wr32(E1000_RETA(j >> 2), reta.dword);
2840 * Disable raw packet checksumming so that RSS hash is placed in
2841 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2842 * offloads as they are enabled by default
2844 rxcsum = rd32(E1000_RXCSUM);
2845 rxcsum |= E1000_RXCSUM_PCSD;
2847 if (adapter->hw.mac.type >= e1000_82576)
2848 /* Enable Receive Checksum Offload for SCTP */
2849 rxcsum |= E1000_RXCSUM_CRCOFL;
2851 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2852 wr32(E1000_RXCSUM, rxcsum);
2854 /* If VMDq is enabled then we set the appropriate mode for that, else
2855 * we default to RSS so that an RSS hash is calculated per packet even
2856 * if we are only using one queue */
2857 if (adapter->vfs_allocated_count) {
2858 if (hw->mac.type > e1000_82575) {
2859 /* Set the default pool for the PF's first queue */
2860 u32 vtctl = rd32(E1000_VT_CTL);
2861 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2862 E1000_VT_CTL_DISABLE_DEF_POOL);
2863 vtctl |= adapter->vfs_allocated_count <<
2864 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2865 wr32(E1000_VT_CTL, vtctl);
2867 if (adapter->rss_queues > 1)
2868 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2870 mrqc = E1000_MRQC_ENABLE_VMDQ;
2872 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2874 igb_vmm_control(adapter);
2877 * Generate RSS hash based on TCP port numbers and/or
2878 * IPv4/v6 src and dst addresses since UDP cannot be
2879 * hashed reliably due to IP fragmentation
2881 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2882 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2883 E1000_MRQC_RSS_FIELD_IPV6 |
2884 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2885 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2887 wr32(E1000_MRQC, mrqc);
2891 * igb_setup_rctl - configure the receive control registers
2892 * @adapter: Board private structure
2894 void igb_setup_rctl(struct igb_adapter *adapter)
2896 struct e1000_hw *hw = &adapter->hw;
2899 rctl = rd32(E1000_RCTL);
2901 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2902 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2904 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2905 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2908 * enable stripping of CRC. It's unlikely this will break BMC
2909 * redirection as it did with e1000. Newer features require
2910 * that the HW strips the CRC.
2912 rctl |= E1000_RCTL_SECRC;
2914 /* disable store bad packets and clear size bits. */
2915 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2917 /* enable LPE to prevent packets larger than max_frame_size */
2918 rctl |= E1000_RCTL_LPE;
2920 /* disable queue 0 to prevent tail write w/o re-config */
2921 wr32(E1000_RXDCTL(0), 0);
2923 /* Attention!!! For SR-IOV PF driver operations you must enable
2924 * queue drop for all VF and PF queues to prevent head of line blocking
2925 * if an un-trusted VF does not provide descriptors to hardware.
2927 if (adapter->vfs_allocated_count) {
2928 /* set all queue drop enable bits */
2929 wr32(E1000_QDE, ALL_QUEUES);
2932 wr32(E1000_RCTL, rctl);
2935 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2938 struct e1000_hw *hw = &adapter->hw;
2941 /* if it isn't the PF check to see if VFs are enabled and
2942 * increase the size to support vlan tags */
2943 if (vfn < adapter->vfs_allocated_count &&
2944 adapter->vf_data[vfn].vlans_enabled)
2945 size += VLAN_TAG_SIZE;
2947 vmolr = rd32(E1000_VMOLR(vfn));
2948 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2949 vmolr |= size | E1000_VMOLR_LPE;
2950 wr32(E1000_VMOLR(vfn), vmolr);
2956 * igb_rlpml_set - set maximum receive packet size
2957 * @adapter: board private structure
2959 * Configure maximum receivable packet size.
2961 static void igb_rlpml_set(struct igb_adapter *adapter)
2964 struct e1000_hw *hw = &adapter->hw;
2965 u16 pf_id = adapter->vfs_allocated_count;
2967 max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2969 /* if vfs are enabled we set RLPML to the largest possible request
2970 * size and set the VMOLR RLPML to the size we need */
2972 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2973 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2976 wr32(E1000_RLPML, max_frame_size);
2979 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2982 struct e1000_hw *hw = &adapter->hw;
2986 * This register exists only on 82576 and newer so if we are older then
2987 * we should exit and do nothing
2989 if (hw->mac.type < e1000_82576)
2992 vmolr = rd32(E1000_VMOLR(vfn));
2993 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2995 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2997 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2999 /* clear all bits that might not be set */
3000 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3002 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3003 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3005 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3008 if (vfn <= adapter->vfs_allocated_count)
3009 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3011 wr32(E1000_VMOLR(vfn), vmolr);
3015 * igb_configure_rx_ring - Configure a receive ring after Reset
3016 * @adapter: board private structure
3017 * @ring: receive ring to be configured
3019 * Configure the Rx unit of the MAC after a reset.
3021 void igb_configure_rx_ring(struct igb_adapter *adapter,
3022 struct igb_ring *ring)
3024 struct e1000_hw *hw = &adapter->hw;
3025 u64 rdba = ring->dma;
3026 int reg_idx = ring->reg_idx;
3029 /* disable the queue */
3030 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3031 wr32(E1000_RXDCTL(reg_idx),
3032 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3034 /* Set DMA base address registers */
3035 wr32(E1000_RDBAL(reg_idx),
3036 rdba & 0x00000000ffffffffULL);
3037 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3038 wr32(E1000_RDLEN(reg_idx),
3039 ring->count * sizeof(union e1000_adv_rx_desc));
3041 /* initialize head and tail */
3042 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3043 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3044 writel(0, ring->head);
3045 writel(0, ring->tail);
3047 /* set descriptor configuration */
3048 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3049 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3050 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3051 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3052 srrctl |= IGB_RXBUFFER_16384 >>
3053 E1000_SRRCTL_BSIZEPKT_SHIFT;
3055 srrctl |= (PAGE_SIZE / 2) >>
3056 E1000_SRRCTL_BSIZEPKT_SHIFT;
3058 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3060 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3061 E1000_SRRCTL_BSIZEPKT_SHIFT;
3062 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3064 if (hw->mac.type == e1000_82580)
3065 srrctl |= E1000_SRRCTL_TIMESTAMP;
3066 /* Only set Drop Enable if we are supporting multiple queues */
3067 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3068 srrctl |= E1000_SRRCTL_DROP_EN;
3070 wr32(E1000_SRRCTL(reg_idx), srrctl);
3072 /* set filtering for VMDQ pools */
3073 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3075 /* enable receive descriptor fetching */
3076 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3077 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3078 rxdctl &= 0xFFF00000;
3079 rxdctl |= IGB_RX_PTHRESH;
3080 rxdctl |= IGB_RX_HTHRESH << 8;
3081 rxdctl |= IGB_RX_WTHRESH << 16;
3082 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3086 * igb_configure_rx - Configure receive Unit after Reset
3087 * @adapter: board private structure
3089 * Configure the Rx unit of the MAC after a reset.
3091 static void igb_configure_rx(struct igb_adapter *adapter)
3095 /* set UTA to appropriate mode */
3096 igb_set_uta(adapter);
3098 /* set the correct pool for the PF default MAC address in entry 0 */
3099 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3100 adapter->vfs_allocated_count);
3102 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3103 * the Base and Length of the Rx Descriptor Ring */
3104 for (i = 0; i < adapter->num_rx_queues; i++)
3105 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3109 * igb_free_tx_resources - Free Tx Resources per Queue
3110 * @tx_ring: Tx descriptor ring for a specific queue
3112 * Free all transmit software resources
3114 void igb_free_tx_resources(struct igb_ring *tx_ring)
3116 igb_clean_tx_ring(tx_ring);
3118 vfree(tx_ring->buffer_info);
3119 tx_ring->buffer_info = NULL;
3121 /* if not set, then don't free */
3125 dma_free_coherent(tx_ring->dev, tx_ring->size,
3126 tx_ring->desc, tx_ring->dma);
3128 tx_ring->desc = NULL;
3132 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3133 * @adapter: board private structure
3135 * Free all transmit software resources
3137 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3141 for (i = 0; i < adapter->num_tx_queues; i++)
3142 igb_free_tx_resources(adapter->tx_ring[i]);
3145 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3146 struct igb_buffer *buffer_info)
3148 if (buffer_info->dma) {
3149 if (buffer_info->mapped_as_page)
3150 dma_unmap_page(tx_ring->dev,
3152 buffer_info->length,
3155 dma_unmap_single(tx_ring->dev,
3157 buffer_info->length,
3159 buffer_info->dma = 0;
3161 if (buffer_info->skb) {
3162 dev_kfree_skb_any(buffer_info->skb);
3163 buffer_info->skb = NULL;
3165 buffer_info->time_stamp = 0;
3166 buffer_info->length = 0;
3167 buffer_info->next_to_watch = 0;
3168 buffer_info->mapped_as_page = false;
3172 * igb_clean_tx_ring - Free Tx Buffers
3173 * @tx_ring: ring to be cleaned
3175 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3177 struct igb_buffer *buffer_info;
3181 if (!tx_ring->buffer_info)
3183 /* Free all the Tx ring sk_buffs */
3185 for (i = 0; i < tx_ring->count; i++) {
3186 buffer_info = &tx_ring->buffer_info[i];
3187 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3190 size = sizeof(struct igb_buffer) * tx_ring->count;
3191 memset(tx_ring->buffer_info, 0, size);
3193 /* Zero out the descriptor ring */
3194 memset(tx_ring->desc, 0, tx_ring->size);
3196 tx_ring->next_to_use = 0;
3197 tx_ring->next_to_clean = 0;
3201 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3202 * @adapter: board private structure
3204 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3208 for (i = 0; i < adapter->num_tx_queues; i++)
3209 igb_clean_tx_ring(adapter->tx_ring[i]);
3213 * igb_free_rx_resources - Free Rx Resources
3214 * @rx_ring: ring to clean the resources from
3216 * Free all receive software resources
3218 void igb_free_rx_resources(struct igb_ring *rx_ring)
3220 igb_clean_rx_ring(rx_ring);
3222 vfree(rx_ring->buffer_info);
3223 rx_ring->buffer_info = NULL;
3225 /* if not set, then don't free */
3229 dma_free_coherent(rx_ring->dev, rx_ring->size,
3230 rx_ring->desc, rx_ring->dma);
3232 rx_ring->desc = NULL;
3236 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3237 * @adapter: board private structure
3239 * Free all receive software resources
3241 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3245 for (i = 0; i < adapter->num_rx_queues; i++)
3246 igb_free_rx_resources(adapter->rx_ring[i]);
3250 * igb_clean_rx_ring - Free Rx Buffers per Queue
3251 * @rx_ring: ring to free buffers from
3253 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3255 struct igb_buffer *buffer_info;
3259 if (!rx_ring->buffer_info)
3262 /* Free all the Rx ring sk_buffs */
3263 for (i = 0; i < rx_ring->count; i++) {
3264 buffer_info = &rx_ring->buffer_info[i];
3265 if (buffer_info->dma) {
3266 dma_unmap_single(rx_ring->dev,
3268 rx_ring->rx_buffer_len,
3270 buffer_info->dma = 0;
3273 if (buffer_info->skb) {
3274 dev_kfree_skb(buffer_info->skb);
3275 buffer_info->skb = NULL;
3277 if (buffer_info->page_dma) {
3278 dma_unmap_page(rx_ring->dev,
3279 buffer_info->page_dma,
3282 buffer_info->page_dma = 0;
3284 if (buffer_info->page) {
3285 put_page(buffer_info->page);
3286 buffer_info->page = NULL;
3287 buffer_info->page_offset = 0;
3291 size = sizeof(struct igb_buffer) * rx_ring->count;
3292 memset(rx_ring->buffer_info, 0, size);
3294 /* Zero out the descriptor ring */
3295 memset(rx_ring->desc, 0, rx_ring->size);
3297 rx_ring->next_to_clean = 0;
3298 rx_ring->next_to_use = 0;
3302 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3303 * @adapter: board private structure
3305 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3309 for (i = 0; i < adapter->num_rx_queues; i++)
3310 igb_clean_rx_ring(adapter->rx_ring[i]);
3314 * igb_set_mac - Change the Ethernet Address of the NIC
3315 * @netdev: network interface device structure
3316 * @p: pointer to an address structure
3318 * Returns 0 on success, negative on failure
3320 static int igb_set_mac(struct net_device *netdev, void *p)
3322 struct igb_adapter *adapter = netdev_priv(netdev);
3323 struct e1000_hw *hw = &adapter->hw;
3324 struct sockaddr *addr = p;
3326 if (!is_valid_ether_addr(addr->sa_data))
3327 return -EADDRNOTAVAIL;
3329 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3330 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3332 /* set the correct pool for the new PF MAC address in entry 0 */
3333 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3334 adapter->vfs_allocated_count);
3340 * igb_write_mc_addr_list - write multicast addresses to MTA
3341 * @netdev: network interface device structure
3343 * Writes multicast address list to the MTA hash table.
3344 * Returns: -ENOMEM on failure
3345 * 0 on no addresses written
3346 * X on writing X addresses to MTA
3348 static int igb_write_mc_addr_list(struct net_device *netdev)
3350 struct igb_adapter *adapter = netdev_priv(netdev);
3351 struct e1000_hw *hw = &adapter->hw;
3352 struct netdev_hw_addr *ha;
3356 if (netdev_mc_empty(netdev)) {
3357 /* nothing to program, so clear mc list */
3358 igb_update_mc_addr_list(hw, NULL, 0);
3359 igb_restore_vf_multicasts(adapter);
3363 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3367 /* The shared function expects a packed array of only addresses. */
3369 netdev_for_each_mc_addr(ha, netdev)
3370 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3372 igb_update_mc_addr_list(hw, mta_list, i);
3375 return netdev_mc_count(netdev);
3379 * igb_write_uc_addr_list - write unicast addresses to RAR table
3380 * @netdev: network interface device structure
3382 * Writes unicast address list to the RAR table.
3383 * Returns: -ENOMEM on failure/insufficient address space
3384 * 0 on no addresses written
3385 * X on writing X addresses to the RAR table
3387 static int igb_write_uc_addr_list(struct net_device *netdev)
3389 struct igb_adapter *adapter = netdev_priv(netdev);
3390 struct e1000_hw *hw = &adapter->hw;
3391 unsigned int vfn = adapter->vfs_allocated_count;
3392 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3395 /* return ENOMEM indicating insufficient memory for addresses */
3396 if (netdev_uc_count(netdev) > rar_entries)
3399 if (!netdev_uc_empty(netdev) && rar_entries) {
3400 struct netdev_hw_addr *ha;
3402 netdev_for_each_uc_addr(ha, netdev) {
3405 igb_rar_set_qsel(adapter, ha->addr,
3411 /* write the addresses in reverse order to avoid write combining */
3412 for (; rar_entries > 0 ; rar_entries--) {
3413 wr32(E1000_RAH(rar_entries), 0);
3414 wr32(E1000_RAL(rar_entries), 0);
3422 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3423 * @netdev: network interface device structure
3425 * The set_rx_mode entry point is called whenever the unicast or multicast
3426 * address lists or the network interface flags are updated. This routine is
3427 * responsible for configuring the hardware for proper unicast, multicast,
3428 * promiscuous mode, and all-multi behavior.
3430 static void igb_set_rx_mode(struct net_device *netdev)
3432 struct igb_adapter *adapter = netdev_priv(netdev);
3433 struct e1000_hw *hw = &adapter->hw;
3434 unsigned int vfn = adapter->vfs_allocated_count;
3435 u32 rctl, vmolr = 0;
3438 /* Check for Promiscuous and All Multicast modes */
3439 rctl = rd32(E1000_RCTL);
3441 /* clear the effected bits */
3442 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3444 if (netdev->flags & IFF_PROMISC) {
3445 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3446 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3448 if (netdev->flags & IFF_ALLMULTI) {
3449 rctl |= E1000_RCTL_MPE;
3450 vmolr |= E1000_VMOLR_MPME;
3453 * Write addresses to the MTA, if the attempt fails
3454 * then we should just turn on promiscuous mode so
3455 * that we can at least receive multicast traffic
3457 count = igb_write_mc_addr_list(netdev);
3459 rctl |= E1000_RCTL_MPE;
3460 vmolr |= E1000_VMOLR_MPME;
3462 vmolr |= E1000_VMOLR_ROMPE;
3466 * Write addresses to available RAR registers, if there is not
3467 * sufficient space to store all the addresses then enable
3468 * unicast promiscuous mode
3470 count = igb_write_uc_addr_list(netdev);
3472 rctl |= E1000_RCTL_UPE;
3473 vmolr |= E1000_VMOLR_ROPE;
3475 rctl |= E1000_RCTL_VFE;
3477 wr32(E1000_RCTL, rctl);
3480 * In order to support SR-IOV and eventually VMDq it is necessary to set
3481 * the VMOLR to enable the appropriate modes. Without this workaround
3482 * we will have issues with VLAN tag stripping not being done for frames
3483 * that are only arriving because we are the default pool
3485 if (hw->mac.type < e1000_82576)
3488 vmolr |= rd32(E1000_VMOLR(vfn)) &
3489 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3490 wr32(E1000_VMOLR(vfn), vmolr);
3491 igb_restore_vf_multicasts(adapter);
3494 static void igb_check_wvbr(struct igb_adapter *adapter)
3496 struct e1000_hw *hw = &adapter->hw;
3499 switch (hw->mac.type) {
3502 if (!(wvbr = rd32(E1000_WVBR)))
3509 adapter->wvbr |= wvbr;
3512 #define IGB_STAGGERED_QUEUE_OFFSET 8
3514 static void igb_spoof_check(struct igb_adapter *adapter)
3521 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3522 if (adapter->wvbr & (1 << j) ||
3523 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3524 dev_warn(&adapter->pdev->dev,
3525 "Spoof event(s) detected on VF %d\n", j);
3528 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3533 /* Need to wait a few seconds after link up to get diagnostic information from
3535 static void igb_update_phy_info(unsigned long data)
3537 struct igb_adapter *adapter = (struct igb_adapter *) data;
3538 igb_get_phy_info(&adapter->hw);
3542 * igb_has_link - check shared code for link and determine up/down
3543 * @adapter: pointer to driver private info
3545 bool igb_has_link(struct igb_adapter *adapter)
3547 struct e1000_hw *hw = &adapter->hw;
3548 bool link_active = false;
3551 /* get_link_status is set on LSC (link status) interrupt or
3552 * rx sequence error interrupt. get_link_status will stay
3553 * false until the e1000_check_for_link establishes link
3554 * for copper adapters ONLY
3556 switch (hw->phy.media_type) {
3557 case e1000_media_type_copper:
3558 if (hw->mac.get_link_status) {
3559 ret_val = hw->mac.ops.check_for_link(hw);
3560 link_active = !hw->mac.get_link_status;
3565 case e1000_media_type_internal_serdes:
3566 ret_val = hw->mac.ops.check_for_link(hw);
3567 link_active = hw->mac.serdes_has_link;
3570 case e1000_media_type_unknown:
3577 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3580 u32 ctrl_ext, thstat;
3582 /* check for thermal sensor event on i350, copper only */
3583 if (hw->mac.type == e1000_i350) {
3584 thstat = rd32(E1000_THSTAT);
3585 ctrl_ext = rd32(E1000_CTRL_EXT);
3587 if ((hw->phy.media_type == e1000_media_type_copper) &&
3588 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3589 ret = !!(thstat & event);
3597 * igb_watchdog - Timer Call-back
3598 * @data: pointer to adapter cast into an unsigned long
3600 static void igb_watchdog(unsigned long data)
3602 struct igb_adapter *adapter = (struct igb_adapter *)data;
3603 /* Do the rest outside of interrupt context */
3604 schedule_work(&adapter->watchdog_task);
3607 static void igb_watchdog_task(struct work_struct *work)
3609 struct igb_adapter *adapter = container_of(work,
3612 struct e1000_hw *hw = &adapter->hw;
3613 struct net_device *netdev = adapter->netdev;
3617 link = igb_has_link(adapter);
3619 if (!netif_carrier_ok(netdev)) {
3621 hw->mac.ops.get_speed_and_duplex(hw,
3622 &adapter->link_speed,
3623 &adapter->link_duplex);
3625 ctrl = rd32(E1000_CTRL);
3626 /* Links status message must follow this format */
3627 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3628 "Flow Control: %s\n",
3630 adapter->link_speed,
3631 adapter->link_duplex == FULL_DUPLEX ?
3632 "Full Duplex" : "Half Duplex",
3633 ((ctrl & E1000_CTRL_TFCE) &&
3634 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3635 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3636 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3638 /* check for thermal sensor event */
3639 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3640 printk(KERN_INFO "igb: %s The network adapter "
3641 "link speed was downshifted "
3642 "because it overheated.\n",
3646 /* adjust timeout factor according to speed/duplex */
3647 adapter->tx_timeout_factor = 1;
3648 switch (adapter->link_speed) {
3650 adapter->tx_timeout_factor = 14;
3653 /* maybe add some timeout factor ? */
3657 netif_carrier_on(netdev);
3659 igb_ping_all_vfs(adapter);
3660 igb_check_vf_rate_limit(adapter);
3662 /* link state has changed, schedule phy info update */
3663 if (!test_bit(__IGB_DOWN, &adapter->state))
3664 mod_timer(&adapter->phy_info_timer,
3665 round_jiffies(jiffies + 2 * HZ));
3668 if (netif_carrier_ok(netdev)) {
3669 adapter->link_speed = 0;
3670 adapter->link_duplex = 0;
3672 /* check for thermal sensor event */
3673 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3674 printk(KERN_ERR "igb: %s The network adapter "
3675 "was stopped because it "
3680 /* Links status message must follow this format */
3681 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3683 netif_carrier_off(netdev);
3685 igb_ping_all_vfs(adapter);
3687 /* link state has changed, schedule phy info update */
3688 if (!test_bit(__IGB_DOWN, &adapter->state))
3689 mod_timer(&adapter->phy_info_timer,
3690 round_jiffies(jiffies + 2 * HZ));
3694 spin_lock(&adapter->stats64_lock);
3695 igb_update_stats(adapter, &adapter->stats64);
3696 spin_unlock(&adapter->stats64_lock);
3698 for (i = 0; i < adapter->num_tx_queues; i++) {
3699 struct igb_ring *tx_ring = adapter->tx_ring[i];
3700 if (!netif_carrier_ok(netdev)) {
3701 /* We've lost link, so the controller stops DMA,
3702 * but we've got queued Tx work that's never going
3703 * to get done, so reset controller to flush Tx.
3704 * (Do the reset outside of interrupt context). */
3705 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3706 adapter->tx_timeout_count++;
3707 schedule_work(&adapter->reset_task);
3708 /* return immediately since reset is imminent */
3713 /* Force detection of hung controller every watchdog period */
3714 tx_ring->detect_tx_hung = true;
3717 /* Cause software interrupt to ensure rx ring is cleaned */
3718 if (adapter->msix_entries) {
3720 for (i = 0; i < adapter->num_q_vectors; i++) {
3721 struct igb_q_vector *q_vector = adapter->q_vector[i];
3722 eics |= q_vector->eims_value;
3724 wr32(E1000_EICS, eics);
3726 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3729 igb_spoof_check(adapter);
3731 /* Reset the timer */
3732 if (!test_bit(__IGB_DOWN, &adapter->state))
3733 mod_timer(&adapter->watchdog_timer,
3734 round_jiffies(jiffies + 2 * HZ));
3737 enum latency_range {
3741 latency_invalid = 255
3745 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3747 * Stores a new ITR value based on strictly on packet size. This
3748 * algorithm is less sophisticated than that used in igb_update_itr,
3749 * due to the difficulty of synchronizing statistics across multiple
3750 * receive rings. The divisors and thresholds used by this function
3751 * were determined based on theoretical maximum wire speed and testing
3752 * data, in order to minimize response time while increasing bulk
3754 * This functionality is controlled by the InterruptThrottleRate module
3755 * parameter (see igb_param.c)
3756 * NOTE: This function is called only when operating in a multiqueue
3757 * receive environment.
3758 * @q_vector: pointer to q_vector
3760 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3762 int new_val = q_vector->itr_val;
3763 int avg_wire_size = 0;
3764 struct igb_adapter *adapter = q_vector->adapter;
3765 struct igb_ring *ring;
3766 unsigned int packets;
3768 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3769 * ints/sec - ITR timer value of 120 ticks.
3771 if (adapter->link_speed != SPEED_1000) {
3776 ring = q_vector->rx_ring;
3778 packets = ACCESS_ONCE(ring->total_packets);
3781 avg_wire_size = ring->total_bytes / packets;
3784 ring = q_vector->tx_ring;
3786 packets = ACCESS_ONCE(ring->total_packets);
3789 avg_wire_size = max_t(u32, avg_wire_size,
3790 ring->total_bytes / packets);
3793 /* if avg_wire_size isn't set no work was done */
3797 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3798 avg_wire_size += 24;
3800 /* Don't starve jumbo frames */
3801 avg_wire_size = min(avg_wire_size, 3000);
3803 /* Give a little boost to mid-size frames */
3804 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3805 new_val = avg_wire_size / 3;
3807 new_val = avg_wire_size / 2;
3809 /* when in itr mode 3 do not exceed 20K ints/sec */
3810 if (adapter->rx_itr_setting == 3 && new_val < 196)
3814 if (new_val != q_vector->itr_val) {
3815 q_vector->itr_val = new_val;
3816 q_vector->set_itr = 1;
3819 if (q_vector->rx_ring) {
3820 q_vector->rx_ring->total_bytes = 0;
3821 q_vector->rx_ring->total_packets = 0;
3823 if (q_vector->tx_ring) {
3824 q_vector->tx_ring->total_bytes = 0;
3825 q_vector->tx_ring->total_packets = 0;
3830 * igb_update_itr - update the dynamic ITR value based on statistics
3831 * Stores a new ITR value based on packets and byte
3832 * counts during the last interrupt. The advantage of per interrupt
3833 * computation is faster updates and more accurate ITR for the current
3834 * traffic pattern. Constants in this function were computed
3835 * based on theoretical maximum wire speed and thresholds were set based
3836 * on testing data as well as attempting to minimize response time
3837 * while increasing bulk throughput.
3838 * this functionality is controlled by the InterruptThrottleRate module
3839 * parameter (see igb_param.c)
3840 * NOTE: These calculations are only valid when operating in a single-
3841 * queue environment.
3842 * @adapter: pointer to adapter
3843 * @itr_setting: current q_vector->itr_val
3844 * @packets: the number of packets during this measurement interval
3845 * @bytes: the number of bytes during this measurement interval
3847 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3848 int packets, int bytes)
3850 unsigned int retval = itr_setting;
3853 goto update_itr_done;
3855 switch (itr_setting) {
3856 case lowest_latency:
3857 /* handle TSO and jumbo frames */
3858 if (bytes/packets > 8000)
3859 retval = bulk_latency;
3860 else if ((packets < 5) && (bytes > 512))
3861 retval = low_latency;
3863 case low_latency: /* 50 usec aka 20000 ints/s */
3864 if (bytes > 10000) {
3865 /* this if handles the TSO accounting */
3866 if (bytes/packets > 8000) {
3867 retval = bulk_latency;
3868 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3869 retval = bulk_latency;
3870 } else if ((packets > 35)) {
3871 retval = lowest_latency;
3873 } else if (bytes/packets > 2000) {
3874 retval = bulk_latency;
3875 } else if (packets <= 2 && bytes < 512) {
3876 retval = lowest_latency;
3879 case bulk_latency: /* 250 usec aka 4000 ints/s */
3880 if (bytes > 25000) {
3882 retval = low_latency;
3883 } else if (bytes < 1500) {
3884 retval = low_latency;
3893 static void igb_set_itr(struct igb_adapter *adapter)
3895 struct igb_q_vector *q_vector = adapter->q_vector[0];
3897 u32 new_itr = q_vector->itr_val;
3899 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3900 if (adapter->link_speed != SPEED_1000) {
3906 adapter->rx_itr = igb_update_itr(adapter,
3908 q_vector->rx_ring->total_packets,
3909 q_vector->rx_ring->total_bytes);
3911 adapter->tx_itr = igb_update_itr(adapter,
3913 q_vector->tx_ring->total_packets,
3914 q_vector->tx_ring->total_bytes);
3915 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3917 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3918 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3919 current_itr = low_latency;
3921 switch (current_itr) {
3922 /* counts and packets in update_itr are dependent on these numbers */
3923 case lowest_latency:
3924 new_itr = 56; /* aka 70,000 ints/sec */
3927 new_itr = 196; /* aka 20,000 ints/sec */
3930 new_itr = 980; /* aka 4,000 ints/sec */
3937 q_vector->rx_ring->total_bytes = 0;
3938 q_vector->rx_ring->total_packets = 0;
3939 q_vector->tx_ring->total_bytes = 0;
3940 q_vector->tx_ring->total_packets = 0;
3942 if (new_itr != q_vector->itr_val) {
3943 /* this attempts to bias the interrupt rate towards Bulk
3944 * by adding intermediate steps when interrupt rate is
3946 new_itr = new_itr > q_vector->itr_val ?
3947 max((new_itr * q_vector->itr_val) /
3948 (new_itr + (q_vector->itr_val >> 2)),
3951 /* Don't write the value here; it resets the adapter's
3952 * internal timer, and causes us to delay far longer than
3953 * we should between interrupts. Instead, we write the ITR
3954 * value at the beginning of the next interrupt so the timing
3955 * ends up being correct.
3957 q_vector->itr_val = new_itr;
3958 q_vector->set_itr = 1;
3962 #define IGB_TX_FLAGS_CSUM 0x00000001
3963 #define IGB_TX_FLAGS_VLAN 0x00000002
3964 #define IGB_TX_FLAGS_TSO 0x00000004
3965 #define IGB_TX_FLAGS_IPV4 0x00000008
3966 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3967 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3968 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3970 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3971 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3973 struct e1000_adv_tx_context_desc *context_desc;
3976 struct igb_buffer *buffer_info;
3977 u32 info = 0, tu_cmd = 0;
3981 if (skb_header_cloned(skb)) {
3982 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3987 l4len = tcp_hdrlen(skb);
3990 if (skb->protocol == htons(ETH_P_IP)) {
3991 struct iphdr *iph = ip_hdr(skb);
3994 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3998 } else if (skb_is_gso_v6(skb)) {
3999 ipv6_hdr(skb)->payload_len = 0;
4000 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4001 &ipv6_hdr(skb)->daddr,
4005 i = tx_ring->next_to_use;
4007 buffer_info = &tx_ring->buffer_info[i];
4008 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4009 /* VLAN MACLEN IPLEN */
4010 if (tx_flags & IGB_TX_FLAGS_VLAN)
4011 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4012 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4013 *hdr_len += skb_network_offset(skb);
4014 info |= skb_network_header_len(skb);
4015 *hdr_len += skb_network_header_len(skb);
4016 context_desc->vlan_macip_lens = cpu_to_le32(info);
4018 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4019 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4021 if (skb->protocol == htons(ETH_P_IP))
4022 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4023 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4025 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4028 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4029 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4031 /* For 82575, context index must be unique per ring. */
4032 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4033 mss_l4len_idx |= tx_ring->reg_idx << 4;
4035 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4036 context_desc->seqnum_seed = 0;
4038 buffer_info->time_stamp = jiffies;
4039 buffer_info->next_to_watch = i;
4040 buffer_info->dma = 0;
4042 if (i == tx_ring->count)
4045 tx_ring->next_to_use = i;
4050 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4051 struct sk_buff *skb, u32 tx_flags)
4053 struct e1000_adv_tx_context_desc *context_desc;
4054 struct device *dev = tx_ring->dev;
4055 struct igb_buffer *buffer_info;
4056 u32 info = 0, tu_cmd = 0;
4059 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4060 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4061 i = tx_ring->next_to_use;
4062 buffer_info = &tx_ring->buffer_info[i];
4063 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4065 if (tx_flags & IGB_TX_FLAGS_VLAN)
4066 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4068 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4069 if (skb->ip_summed == CHECKSUM_PARTIAL)
4070 info |= skb_network_header_len(skb);
4072 context_desc->vlan_macip_lens = cpu_to_le32(info);
4074 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4076 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4079 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4080 const struct vlan_ethhdr *vhdr =
4081 (const struct vlan_ethhdr*)skb->data;
4083 protocol = vhdr->h_vlan_encapsulated_proto;
4085 protocol = skb->protocol;
4089 case cpu_to_be16(ETH_P_IP):
4090 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4091 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4092 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4093 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4094 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4096 case cpu_to_be16(ETH_P_IPV6):
4097 /* XXX what about other V6 headers?? */
4098 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4099 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4100 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4101 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4104 if (unlikely(net_ratelimit()))
4106 "partial checksum but proto=%x!\n",
4112 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4113 context_desc->seqnum_seed = 0;
4114 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4115 context_desc->mss_l4len_idx =
4116 cpu_to_le32(tx_ring->reg_idx << 4);
4118 buffer_info->time_stamp = jiffies;
4119 buffer_info->next_to_watch = i;
4120 buffer_info->dma = 0;
4123 if (i == tx_ring->count)
4125 tx_ring->next_to_use = i;
4132 #define IGB_MAX_TXD_PWR 16
4133 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4135 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4138 struct igb_buffer *buffer_info;
4139 struct device *dev = tx_ring->dev;
4140 unsigned int hlen = skb_headlen(skb);
4141 unsigned int count = 0, i;
4143 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4145 i = tx_ring->next_to_use;
4147 buffer_info = &tx_ring->buffer_info[i];
4148 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4149 buffer_info->length = hlen;
4150 /* set time_stamp *before* dma to help avoid a possible race */
4151 buffer_info->time_stamp = jiffies;
4152 buffer_info->next_to_watch = i;
4153 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4155 if (dma_mapping_error(dev, buffer_info->dma))
4158 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4159 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4160 unsigned int len = frag->size;
4164 if (i == tx_ring->count)
4167 buffer_info = &tx_ring->buffer_info[i];
4168 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4169 buffer_info->length = len;
4170 buffer_info->time_stamp = jiffies;
4171 buffer_info->next_to_watch = i;
4172 buffer_info->mapped_as_page = true;
4173 buffer_info->dma = dma_map_page(dev,
4178 if (dma_mapping_error(dev, buffer_info->dma))
4183 tx_ring->buffer_info[i].skb = skb;
4184 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4185 /* multiply data chunks by size of headers */
4186 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4187 tx_ring->buffer_info[i].gso_segs = gso_segs;
4188 tx_ring->buffer_info[first].next_to_watch = i;
4193 dev_err(dev, "TX DMA map failed\n");
4195 /* clear timestamp and dma mappings for failed buffer_info mapping */
4196 buffer_info->dma = 0;
4197 buffer_info->time_stamp = 0;
4198 buffer_info->length = 0;
4199 buffer_info->next_to_watch = 0;
4200 buffer_info->mapped_as_page = false;
4202 /* clear timestamp and dma mappings for remaining portion of packet */
4207 buffer_info = &tx_ring->buffer_info[i];
4208 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4214 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4215 u32 tx_flags, int count, u32 paylen,
4218 union e1000_adv_tx_desc *tx_desc;
4219 struct igb_buffer *buffer_info;
4220 u32 olinfo_status = 0, cmd_type_len;
4221 unsigned int i = tx_ring->next_to_use;
4223 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4224 E1000_ADVTXD_DCMD_DEXT);
4226 if (tx_flags & IGB_TX_FLAGS_VLAN)
4227 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4229 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4230 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4232 if (tx_flags & IGB_TX_FLAGS_TSO) {
4233 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4235 /* insert tcp checksum */
4236 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4238 /* insert ip checksum */
4239 if (tx_flags & IGB_TX_FLAGS_IPV4)
4240 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4242 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4243 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4246 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4247 (tx_flags & (IGB_TX_FLAGS_CSUM |
4249 IGB_TX_FLAGS_VLAN)))
4250 olinfo_status |= tx_ring->reg_idx << 4;
4252 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4255 buffer_info = &tx_ring->buffer_info[i];
4256 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4257 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4258 tx_desc->read.cmd_type_len =
4259 cpu_to_le32(cmd_type_len | buffer_info->length);
4260 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4263 if (i == tx_ring->count)
4265 } while (count > 0);
4267 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4268 /* Force memory writes to complete before letting h/w
4269 * know there are new descriptors to fetch. (Only
4270 * applicable for weak-ordered memory model archs,
4271 * such as IA-64). */
4274 tx_ring->next_to_use = i;
4275 writel(i, tx_ring->tail);
4276 /* we need this if more than one processor can write to our tail
4277 * at a time, it syncronizes IO on IA64/Altix systems */
4281 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4283 struct net_device *netdev = tx_ring->netdev;
4285 netif_stop_subqueue(netdev, tx_ring->queue_index);
4287 /* Herbert's original patch had:
4288 * smp_mb__after_netif_stop_queue();
4289 * but since that doesn't exist yet, just open code it. */
4292 /* We need to check again in a case another CPU has just
4293 * made room available. */
4294 if (igb_desc_unused(tx_ring) < size)
4298 netif_wake_subqueue(netdev, tx_ring->queue_index);
4300 u64_stats_update_begin(&tx_ring->tx_syncp2);
4301 tx_ring->tx_stats.restart_queue2++;
4302 u64_stats_update_end(&tx_ring->tx_syncp2);
4307 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4309 if (igb_desc_unused(tx_ring) >= size)
4311 return __igb_maybe_stop_tx(tx_ring, size);
4314 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4315 struct igb_ring *tx_ring)
4322 /* need: 1 descriptor per page,
4323 * + 2 desc gap to keep tail from touching head,
4324 * + 1 desc for skb->data,
4325 * + 1 desc for context descriptor,
4326 * otherwise try next time */
4327 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4328 /* this is a hard error */
4329 return NETDEV_TX_BUSY;
4332 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4333 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4334 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4337 if (vlan_tx_tag_present(skb)) {
4338 tx_flags |= IGB_TX_FLAGS_VLAN;
4339 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4342 if (skb->protocol == htons(ETH_P_IP))
4343 tx_flags |= IGB_TX_FLAGS_IPV4;
4345 first = tx_ring->next_to_use;
4346 if (skb_is_gso(skb)) {
4347 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4350 dev_kfree_skb_any(skb);
4351 return NETDEV_TX_OK;
4356 tx_flags |= IGB_TX_FLAGS_TSO;
4357 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4358 (skb->ip_summed == CHECKSUM_PARTIAL))
4359 tx_flags |= IGB_TX_FLAGS_CSUM;
4362 * count reflects descriptors mapped, if 0 or less then mapping error
4363 * has occurred and we need to rewind the descriptor queue
4365 count = igb_tx_map_adv(tx_ring, skb, first);
4367 dev_kfree_skb_any(skb);
4368 tx_ring->buffer_info[first].time_stamp = 0;
4369 tx_ring->next_to_use = first;
4370 return NETDEV_TX_OK;
4373 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4375 /* Make sure there is space in the ring for the next send. */
4376 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4378 return NETDEV_TX_OK;
4381 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4382 struct net_device *netdev)
4384 struct igb_adapter *adapter = netdev_priv(netdev);
4385 struct igb_ring *tx_ring;
4388 if (test_bit(__IGB_DOWN, &adapter->state)) {
4389 dev_kfree_skb_any(skb);
4390 return NETDEV_TX_OK;
4393 if (skb->len <= 0) {
4394 dev_kfree_skb_any(skb);
4395 return NETDEV_TX_OK;
4398 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4399 tx_ring = adapter->multi_tx_table[r_idx];
4401 /* This goes back to the question of how to logically map a tx queue
4402 * to a flow. Right now, performance is impacted slightly negatively
4403 * if using multiple tx queues. If the stack breaks away from a
4404 * single qdisc implementation, we can look at this again. */
4405 return igb_xmit_frame_ring_adv(skb, tx_ring);
4409 * igb_tx_timeout - Respond to a Tx Hang
4410 * @netdev: network interface device structure
4412 static void igb_tx_timeout(struct net_device *netdev)
4414 struct igb_adapter *adapter = netdev_priv(netdev);
4415 struct e1000_hw *hw = &adapter->hw;
4417 /* Do the reset outside of interrupt context */
4418 adapter->tx_timeout_count++;
4420 if (hw->mac.type == e1000_82580)
4421 hw->dev_spec._82575.global_device_reset = true;
4423 schedule_work(&adapter->reset_task);
4425 (adapter->eims_enable_mask & ~adapter->eims_other));
4428 static void igb_reset_task(struct work_struct *work)
4430 struct igb_adapter *adapter;
4431 adapter = container_of(work, struct igb_adapter, reset_task);
4434 netdev_err(adapter->netdev, "Reset adapter\n");
4435 igb_reinit_locked(adapter);
4439 * igb_get_stats64 - Get System Network Statistics
4440 * @netdev: network interface device structure
4441 * @stats: rtnl_link_stats64 pointer
4444 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4445 struct rtnl_link_stats64 *stats)
4447 struct igb_adapter *adapter = netdev_priv(netdev);
4449 spin_lock(&adapter->stats64_lock);
4450 igb_update_stats(adapter, &adapter->stats64);
4451 memcpy(stats, &adapter->stats64, sizeof(*stats));
4452 spin_unlock(&adapter->stats64_lock);
4458 * igb_change_mtu - Change the Maximum Transfer Unit
4459 * @netdev: network interface device structure
4460 * @new_mtu: new value for maximum frame size
4462 * Returns 0 on success, negative on failure
4464 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4466 struct igb_adapter *adapter = netdev_priv(netdev);
4467 struct pci_dev *pdev = adapter->pdev;
4468 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4469 u32 rx_buffer_len, i;
4471 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4472 dev_err(&pdev->dev, "Invalid MTU setting\n");
4476 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4477 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4481 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4484 /* igb_down has a dependency on max_frame_size */
4485 adapter->max_frame_size = max_frame;
4487 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4488 * means we reserve 2 more, this pushes us to allocate from the next
4490 * i.e. RXBUFFER_2048 --> size-4096 slab
4493 if (adapter->hw.mac.type == e1000_82580)
4494 max_frame += IGB_TS_HDR_LEN;
4496 if (max_frame <= IGB_RXBUFFER_1024)
4497 rx_buffer_len = IGB_RXBUFFER_1024;
4498 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4499 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4501 rx_buffer_len = IGB_RXBUFFER_128;
4503 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4504 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4505 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4507 if ((adapter->hw.mac.type == e1000_82580) &&
4508 (rx_buffer_len == IGB_RXBUFFER_128))
4509 rx_buffer_len += IGB_RXBUFFER_64;
4511 if (netif_running(netdev))
4514 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4515 netdev->mtu, new_mtu);
4516 netdev->mtu = new_mtu;
4518 for (i = 0; i < adapter->num_rx_queues; i++)
4519 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4521 if (netif_running(netdev))
4526 clear_bit(__IGB_RESETTING, &adapter->state);
4532 * igb_update_stats - Update the board statistics counters
4533 * @adapter: board private structure
4536 void igb_update_stats(struct igb_adapter *adapter,
4537 struct rtnl_link_stats64 *net_stats)
4539 struct e1000_hw *hw = &adapter->hw;
4540 struct pci_dev *pdev = adapter->pdev;
4546 u64 _bytes, _packets;
4548 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4551 * Prevent stats update while adapter is being reset, or if the pci
4552 * connection is down.
4554 if (adapter->link_speed == 0)
4556 if (pci_channel_offline(pdev))
4561 for (i = 0; i < adapter->num_rx_queues; i++) {
4562 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4563 struct igb_ring *ring = adapter->rx_ring[i];
4565 ring->rx_stats.drops += rqdpc_tmp;
4566 net_stats->rx_fifo_errors += rqdpc_tmp;
4569 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4570 _bytes = ring->rx_stats.bytes;
4571 _packets = ring->rx_stats.packets;
4572 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4574 packets += _packets;
4577 net_stats->rx_bytes = bytes;
4578 net_stats->rx_packets = packets;
4582 for (i = 0; i < adapter->num_tx_queues; i++) {
4583 struct igb_ring *ring = adapter->tx_ring[i];
4585 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4586 _bytes = ring->tx_stats.bytes;
4587 _packets = ring->tx_stats.packets;
4588 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4590 packets += _packets;
4592 net_stats->tx_bytes = bytes;
4593 net_stats->tx_packets = packets;
4595 /* read stats registers */
4596 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4597 adapter->stats.gprc += rd32(E1000_GPRC);
4598 adapter->stats.gorc += rd32(E1000_GORCL);
4599 rd32(E1000_GORCH); /* clear GORCL */
4600 adapter->stats.bprc += rd32(E1000_BPRC);
4601 adapter->stats.mprc += rd32(E1000_MPRC);
4602 adapter->stats.roc += rd32(E1000_ROC);
4604 adapter->stats.prc64 += rd32(E1000_PRC64);
4605 adapter->stats.prc127 += rd32(E1000_PRC127);
4606 adapter->stats.prc255 += rd32(E1000_PRC255);
4607 adapter->stats.prc511 += rd32(E1000_PRC511);
4608 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4609 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4610 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4611 adapter->stats.sec += rd32(E1000_SEC);
4613 mpc = rd32(E1000_MPC);
4614 adapter->stats.mpc += mpc;
4615 net_stats->rx_fifo_errors += mpc;
4616 adapter->stats.scc += rd32(E1000_SCC);
4617 adapter->stats.ecol += rd32(E1000_ECOL);
4618 adapter->stats.mcc += rd32(E1000_MCC);
4619 adapter->stats.latecol += rd32(E1000_LATECOL);
4620 adapter->stats.dc += rd32(E1000_DC);
4621 adapter->stats.rlec += rd32(E1000_RLEC);
4622 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4623 adapter->stats.xontxc += rd32(E1000_XONTXC);
4624 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4625 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4626 adapter->stats.fcruc += rd32(E1000_FCRUC);
4627 adapter->stats.gptc += rd32(E1000_GPTC);
4628 adapter->stats.gotc += rd32(E1000_GOTCL);
4629 rd32(E1000_GOTCH); /* clear GOTCL */
4630 adapter->stats.rnbc += rd32(E1000_RNBC);
4631 adapter->stats.ruc += rd32(E1000_RUC);
4632 adapter->stats.rfc += rd32(E1000_RFC);
4633 adapter->stats.rjc += rd32(E1000_RJC);
4634 adapter->stats.tor += rd32(E1000_TORH);
4635 adapter->stats.tot += rd32(E1000_TOTH);
4636 adapter->stats.tpr += rd32(E1000_TPR);
4638 adapter->stats.ptc64 += rd32(E1000_PTC64);
4639 adapter->stats.ptc127 += rd32(E1000_PTC127);
4640 adapter->stats.ptc255 += rd32(E1000_PTC255);
4641 adapter->stats.ptc511 += rd32(E1000_PTC511);
4642 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4643 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4645 adapter->stats.mptc += rd32(E1000_MPTC);
4646 adapter->stats.bptc += rd32(E1000_BPTC);
4648 adapter->stats.tpt += rd32(E1000_TPT);
4649 adapter->stats.colc += rd32(E1000_COLC);
4651 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4652 /* read internal phy specific stats */
4653 reg = rd32(E1000_CTRL_EXT);
4654 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4655 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4656 adapter->stats.tncrs += rd32(E1000_TNCRS);
4659 adapter->stats.tsctc += rd32(E1000_TSCTC);
4660 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4662 adapter->stats.iac += rd32(E1000_IAC);
4663 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4664 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4665 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4666 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4667 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4668 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4669 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4670 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4672 /* Fill out the OS statistics structure */
4673 net_stats->multicast = adapter->stats.mprc;
4674 net_stats->collisions = adapter->stats.colc;
4678 /* RLEC on some newer hardware can be incorrect so build
4679 * our own version based on RUC and ROC */
4680 net_stats->rx_errors = adapter->stats.rxerrc +
4681 adapter->stats.crcerrs + adapter->stats.algnerrc +
4682 adapter->stats.ruc + adapter->stats.roc +
4683 adapter->stats.cexterr;
4684 net_stats->rx_length_errors = adapter->stats.ruc +
4686 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4687 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4688 net_stats->rx_missed_errors = adapter->stats.mpc;
4691 net_stats->tx_errors = adapter->stats.ecol +
4692 adapter->stats.latecol;
4693 net_stats->tx_aborted_errors = adapter->stats.ecol;
4694 net_stats->tx_window_errors = adapter->stats.latecol;
4695 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4697 /* Tx Dropped needs to be maintained elsewhere */
4700 if (hw->phy.media_type == e1000_media_type_copper) {
4701 if ((adapter->link_speed == SPEED_1000) &&
4702 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4703 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4704 adapter->phy_stats.idle_errors += phy_tmp;
4708 /* Management Stats */
4709 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4710 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4711 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4714 reg = rd32(E1000_MANC);
4715 if (reg & E1000_MANC_EN_BMC2OS) {
4716 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4717 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4718 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4719 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4723 static irqreturn_t igb_msix_other(int irq, void *data)
4725 struct igb_adapter *adapter = data;
4726 struct e1000_hw *hw = &adapter->hw;
4727 u32 icr = rd32(E1000_ICR);
4728 /* reading ICR causes bit 31 of EICR to be cleared */
4730 if (icr & E1000_ICR_DRSTA)
4731 schedule_work(&adapter->reset_task);
4733 if (icr & E1000_ICR_DOUTSYNC) {
4734 /* HW is reporting DMA is out of sync */
4735 adapter->stats.doosync++;
4736 /* The DMA Out of Sync is also indication of a spoof event
4737 * in IOV mode. Check the Wrong VM Behavior register to
4738 * see if it is really a spoof event. */
4739 igb_check_wvbr(adapter);
4742 /* Check for a mailbox event */
4743 if (icr & E1000_ICR_VMMB)
4744 igb_msg_task(adapter);
4746 if (icr & E1000_ICR_LSC) {
4747 hw->mac.get_link_status = 1;
4748 /* guard against interrupt when we're going down */
4749 if (!test_bit(__IGB_DOWN, &adapter->state))
4750 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4753 if (adapter->vfs_allocated_count)
4754 wr32(E1000_IMS, E1000_IMS_LSC |
4756 E1000_IMS_DOUTSYNC);
4758 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4759 wr32(E1000_EIMS, adapter->eims_other);
4764 static void igb_write_itr(struct igb_q_vector *q_vector)
4766 struct igb_adapter *adapter = q_vector->adapter;
4767 u32 itr_val = q_vector->itr_val & 0x7FFC;
4769 if (!q_vector->set_itr)
4775 if (adapter->hw.mac.type == e1000_82575)
4776 itr_val |= itr_val << 16;
4778 itr_val |= 0x8000000;
4780 writel(itr_val, q_vector->itr_register);
4781 q_vector->set_itr = 0;
4784 static irqreturn_t igb_msix_ring(int irq, void *data)
4786 struct igb_q_vector *q_vector = data;
4788 /* Write the ITR value calculated from the previous interrupt. */
4789 igb_write_itr(q_vector);
4791 napi_schedule(&q_vector->napi);
4796 #ifdef CONFIG_IGB_DCA
4797 static void igb_update_dca(struct igb_q_vector *q_vector)
4799 struct igb_adapter *adapter = q_vector->adapter;
4800 struct e1000_hw *hw = &adapter->hw;
4801 int cpu = get_cpu();
4803 if (q_vector->cpu == cpu)
4806 if (q_vector->tx_ring) {
4807 int q = q_vector->tx_ring->reg_idx;
4808 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4809 if (hw->mac.type == e1000_82575) {
4810 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4811 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4813 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4814 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4815 E1000_DCA_TXCTRL_CPUID_SHIFT;
4817 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4818 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4820 if (q_vector->rx_ring) {
4821 int q = q_vector->rx_ring->reg_idx;
4822 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4823 if (hw->mac.type == e1000_82575) {
4824 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4825 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4827 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4828 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4829 E1000_DCA_RXCTRL_CPUID_SHIFT;
4831 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4832 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4833 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4834 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4836 q_vector->cpu = cpu;
4841 static void igb_setup_dca(struct igb_adapter *adapter)
4843 struct e1000_hw *hw = &adapter->hw;
4846 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4849 /* Always use CB2 mode, difference is masked in the CB driver. */
4850 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4852 for (i = 0; i < adapter->num_q_vectors; i++) {
4853 adapter->q_vector[i]->cpu = -1;
4854 igb_update_dca(adapter->q_vector[i]);
4858 static int __igb_notify_dca(struct device *dev, void *data)
4860 struct net_device *netdev = dev_get_drvdata(dev);
4861 struct igb_adapter *adapter = netdev_priv(netdev);
4862 struct pci_dev *pdev = adapter->pdev;
4863 struct e1000_hw *hw = &adapter->hw;
4864 unsigned long event = *(unsigned long *)data;
4867 case DCA_PROVIDER_ADD:
4868 /* if already enabled, don't do it again */
4869 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4871 if (dca_add_requester(dev) == 0) {
4872 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4873 dev_info(&pdev->dev, "DCA enabled\n");
4874 igb_setup_dca(adapter);
4877 /* Fall Through since DCA is disabled. */
4878 case DCA_PROVIDER_REMOVE:
4879 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4880 /* without this a class_device is left
4881 * hanging around in the sysfs model */
4882 dca_remove_requester(dev);
4883 dev_info(&pdev->dev, "DCA disabled\n");
4884 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4885 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4893 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4898 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4901 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4903 #endif /* CONFIG_IGB_DCA */
4905 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4907 struct e1000_hw *hw = &adapter->hw;
4911 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4912 ping = E1000_PF_CONTROL_MSG;
4913 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4914 ping |= E1000_VT_MSGTYPE_CTS;
4915 igb_write_mbx(hw, &ping, 1, i);
4919 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4921 struct e1000_hw *hw = &adapter->hw;
4922 u32 vmolr = rd32(E1000_VMOLR(vf));
4923 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4925 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4926 IGB_VF_FLAG_MULTI_PROMISC);
4927 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4929 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4930 vmolr |= E1000_VMOLR_MPME;
4931 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4932 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4935 * if we have hashes and we are clearing a multicast promisc
4936 * flag we need to write the hashes to the MTA as this step
4937 * was previously skipped
4939 if (vf_data->num_vf_mc_hashes > 30) {
4940 vmolr |= E1000_VMOLR_MPME;
4941 } else if (vf_data->num_vf_mc_hashes) {
4943 vmolr |= E1000_VMOLR_ROMPE;
4944 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4945 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4949 wr32(E1000_VMOLR(vf), vmolr);
4951 /* there are flags left unprocessed, likely not supported */
4952 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4959 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4960 u32 *msgbuf, u32 vf)
4962 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4963 u16 *hash_list = (u16 *)&msgbuf[1];
4964 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4967 /* salt away the number of multicast addresses assigned
4968 * to this VF for later use to restore when the PF multi cast
4971 vf_data->num_vf_mc_hashes = n;
4973 /* only up to 30 hash values supported */
4977 /* store the hashes for later use */
4978 for (i = 0; i < n; i++)
4979 vf_data->vf_mc_hashes[i] = hash_list[i];
4981 /* Flush and reset the mta with the new values */
4982 igb_set_rx_mode(adapter->netdev);
4987 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4989 struct e1000_hw *hw = &adapter->hw;
4990 struct vf_data_storage *vf_data;
4993 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4994 u32 vmolr = rd32(E1000_VMOLR(i));
4995 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4997 vf_data = &adapter->vf_data[i];
4999 if ((vf_data->num_vf_mc_hashes > 30) ||
5000 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5001 vmolr |= E1000_VMOLR_MPME;
5002 } else if (vf_data->num_vf_mc_hashes) {
5003 vmolr |= E1000_VMOLR_ROMPE;
5004 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5005 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5007 wr32(E1000_VMOLR(i), vmolr);
5011 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5013 struct e1000_hw *hw = &adapter->hw;
5014 u32 pool_mask, reg, vid;
5017 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5019 /* Find the vlan filter for this id */
5020 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5021 reg = rd32(E1000_VLVF(i));
5023 /* remove the vf from the pool */
5026 /* if pool is empty then remove entry from vfta */
5027 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5028 (reg & E1000_VLVF_VLANID_ENABLE)) {
5030 vid = reg & E1000_VLVF_VLANID_MASK;
5031 igb_vfta_set(hw, vid, false);
5034 wr32(E1000_VLVF(i), reg);
5037 adapter->vf_data[vf].vlans_enabled = 0;
5040 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5042 struct e1000_hw *hw = &adapter->hw;
5045 /* The vlvf table only exists on 82576 hardware and newer */
5046 if (hw->mac.type < e1000_82576)
5049 /* we only need to do this if VMDq is enabled */
5050 if (!adapter->vfs_allocated_count)
5053 /* Find the vlan filter for this id */
5054 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5055 reg = rd32(E1000_VLVF(i));
5056 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5057 vid == (reg & E1000_VLVF_VLANID_MASK))
5062 if (i == E1000_VLVF_ARRAY_SIZE) {
5063 /* Did not find a matching VLAN ID entry that was
5064 * enabled. Search for a free filter entry, i.e.
5065 * one without the enable bit set
5067 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5068 reg = rd32(E1000_VLVF(i));
5069 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5073 if (i < E1000_VLVF_ARRAY_SIZE) {
5074 /* Found an enabled/available entry */
5075 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5077 /* if !enabled we need to set this up in vfta */
5078 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5079 /* add VID to filter table */
5080 igb_vfta_set(hw, vid, true);
5081 reg |= E1000_VLVF_VLANID_ENABLE;
5083 reg &= ~E1000_VLVF_VLANID_MASK;
5085 wr32(E1000_VLVF(i), reg);
5087 /* do not modify RLPML for PF devices */
5088 if (vf >= adapter->vfs_allocated_count)
5091 if (!adapter->vf_data[vf].vlans_enabled) {
5093 reg = rd32(E1000_VMOLR(vf));
5094 size = reg & E1000_VMOLR_RLPML_MASK;
5096 reg &= ~E1000_VMOLR_RLPML_MASK;
5098 wr32(E1000_VMOLR(vf), reg);
5101 adapter->vf_data[vf].vlans_enabled++;
5105 if (i < E1000_VLVF_ARRAY_SIZE) {
5106 /* remove vf from the pool */
5107 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5108 /* if pool is empty then remove entry from vfta */
5109 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5111 igb_vfta_set(hw, vid, false);
5113 wr32(E1000_VLVF(i), reg);
5115 /* do not modify RLPML for PF devices */
5116 if (vf >= adapter->vfs_allocated_count)
5119 adapter->vf_data[vf].vlans_enabled--;
5120 if (!adapter->vf_data[vf].vlans_enabled) {
5122 reg = rd32(E1000_VMOLR(vf));
5123 size = reg & E1000_VMOLR_RLPML_MASK;
5125 reg &= ~E1000_VMOLR_RLPML_MASK;
5127 wr32(E1000_VMOLR(vf), reg);
5134 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5136 struct e1000_hw *hw = &adapter->hw;
5139 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5141 wr32(E1000_VMVIR(vf), 0);
5144 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5145 int vf, u16 vlan, u8 qos)
5148 struct igb_adapter *adapter = netdev_priv(netdev);
5150 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5153 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5156 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5157 igb_set_vmolr(adapter, vf, !vlan);
5158 adapter->vf_data[vf].pf_vlan = vlan;
5159 adapter->vf_data[vf].pf_qos = qos;
5160 dev_info(&adapter->pdev->dev,
5161 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5162 if (test_bit(__IGB_DOWN, &adapter->state)) {
5163 dev_warn(&adapter->pdev->dev,
5164 "The VF VLAN has been set,"
5165 " but the PF device is not up.\n");
5166 dev_warn(&adapter->pdev->dev,
5167 "Bring the PF device up before"
5168 " attempting to use the VF device.\n");
5171 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5173 igb_set_vmvir(adapter, vlan, vf);
5174 igb_set_vmolr(adapter, vf, true);
5175 adapter->vf_data[vf].pf_vlan = 0;
5176 adapter->vf_data[vf].pf_qos = 0;
5182 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5184 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5185 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5187 return igb_vlvf_set(adapter, vid, add, vf);
5190 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5192 /* clear flags - except flag that indicates PF has set the MAC */
5193 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5194 adapter->vf_data[vf].last_nack = jiffies;
5196 /* reset offloads to defaults */
5197 igb_set_vmolr(adapter, vf, true);
5199 /* reset vlans for device */
5200 igb_clear_vf_vfta(adapter, vf);
5201 if (adapter->vf_data[vf].pf_vlan)
5202 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5203 adapter->vf_data[vf].pf_vlan,
5204 adapter->vf_data[vf].pf_qos);
5206 igb_clear_vf_vfta(adapter, vf);
5208 /* reset multicast table array for vf */
5209 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5211 /* Flush and reset the mta with the new values */
5212 igb_set_rx_mode(adapter->netdev);
5215 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5217 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5219 /* generate a new mac address as we were hotplug removed/added */
5220 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5221 random_ether_addr(vf_mac);
5223 /* process remaining reset events */
5224 igb_vf_reset(adapter, vf);
5227 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5229 struct e1000_hw *hw = &adapter->hw;
5230 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5231 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5233 u8 *addr = (u8 *)(&msgbuf[1]);
5235 /* process all the same items cleared in a function level reset */
5236 igb_vf_reset(adapter, vf);
5238 /* set vf mac address */
5239 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5241 /* enable transmit and receive for vf */
5242 reg = rd32(E1000_VFTE);
5243 wr32(E1000_VFTE, reg | (1 << vf));
5244 reg = rd32(E1000_VFRE);
5245 wr32(E1000_VFRE, reg | (1 << vf));
5247 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5249 /* reply to reset with ack and vf mac address */
5250 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5251 memcpy(addr, vf_mac, 6);
5252 igb_write_mbx(hw, msgbuf, 3, vf);
5255 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5258 * The VF MAC Address is stored in a packed array of bytes
5259 * starting at the second 32 bit word of the msg array
5261 unsigned char *addr = (char *)&msg[1];
5264 if (is_valid_ether_addr(addr))
5265 err = igb_set_vf_mac(adapter, vf, addr);
5270 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5272 struct e1000_hw *hw = &adapter->hw;
5273 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5274 u32 msg = E1000_VT_MSGTYPE_NACK;
5276 /* if device isn't clear to send it shouldn't be reading either */
5277 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5278 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5279 igb_write_mbx(hw, &msg, 1, vf);
5280 vf_data->last_nack = jiffies;
5284 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5286 struct pci_dev *pdev = adapter->pdev;
5287 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5288 struct e1000_hw *hw = &adapter->hw;
5289 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5292 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5295 /* if receive failed revoke VF CTS stats and restart init */
5296 dev_err(&pdev->dev, "Error receiving message from VF\n");
5297 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5298 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5303 /* this is a message we already processed, do nothing */
5304 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5308 * until the vf completes a reset it should not be
5309 * allowed to start any configuration.
5312 if (msgbuf[0] == E1000_VF_RESET) {
5313 igb_vf_reset_msg(adapter, vf);
5317 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5318 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5324 switch ((msgbuf[0] & 0xFFFF)) {
5325 case E1000_VF_SET_MAC_ADDR:
5327 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5328 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5330 dev_warn(&pdev->dev,
5331 "VF %d attempted to override administratively "
5332 "set MAC address\nReload the VF driver to "
5333 "resume operations\n", vf);
5335 case E1000_VF_SET_PROMISC:
5336 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5338 case E1000_VF_SET_MULTICAST:
5339 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5341 case E1000_VF_SET_LPE:
5342 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5344 case E1000_VF_SET_VLAN:
5346 if (vf_data->pf_vlan)
5347 dev_warn(&pdev->dev,
5348 "VF %d attempted to override administratively "
5349 "set VLAN tag\nReload the VF driver to "
5350 "resume operations\n", vf);
5352 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5355 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5360 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5362 /* notify the VF of the results of what it sent us */
5364 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5366 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5368 igb_write_mbx(hw, msgbuf, 1, vf);
5371 static void igb_msg_task(struct igb_adapter *adapter)
5373 struct e1000_hw *hw = &adapter->hw;
5376 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5377 /* process any reset requests */
5378 if (!igb_check_for_rst(hw, vf))
5379 igb_vf_reset_event(adapter, vf);
5381 /* process any messages pending */
5382 if (!igb_check_for_msg(hw, vf))
5383 igb_rcv_msg_from_vf(adapter, vf);
5385 /* process any acks */
5386 if (!igb_check_for_ack(hw, vf))
5387 igb_rcv_ack_from_vf(adapter, vf);
5392 * igb_set_uta - Set unicast filter table address
5393 * @adapter: board private structure
5395 * The unicast table address is a register array of 32-bit registers.
5396 * The table is meant to be used in a way similar to how the MTA is used
5397 * however due to certain limitations in the hardware it is necessary to
5398 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5399 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5401 static void igb_set_uta(struct igb_adapter *adapter)
5403 struct e1000_hw *hw = &adapter->hw;
5406 /* The UTA table only exists on 82576 hardware and newer */
5407 if (hw->mac.type < e1000_82576)
5410 /* we only need to do this if VMDq is enabled */
5411 if (!adapter->vfs_allocated_count)
5414 for (i = 0; i < hw->mac.uta_reg_count; i++)
5415 array_wr32(E1000_UTA, i, ~0);
5419 * igb_intr_msi - Interrupt Handler
5420 * @irq: interrupt number
5421 * @data: pointer to a network interface device structure
5423 static irqreturn_t igb_intr_msi(int irq, void *data)
5425 struct igb_adapter *adapter = data;
5426 struct igb_q_vector *q_vector = adapter->q_vector[0];
5427 struct e1000_hw *hw = &adapter->hw;
5428 /* read ICR disables interrupts using IAM */
5429 u32 icr = rd32(E1000_ICR);
5431 igb_write_itr(q_vector);
5433 if (icr & E1000_ICR_DRSTA)
5434 schedule_work(&adapter->reset_task);
5436 if (icr & E1000_ICR_DOUTSYNC) {
5437 /* HW is reporting DMA is out of sync */
5438 adapter->stats.doosync++;
5441 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5442 hw->mac.get_link_status = 1;
5443 if (!test_bit(__IGB_DOWN, &adapter->state))
5444 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5447 napi_schedule(&q_vector->napi);
5453 * igb_intr - Legacy Interrupt Handler
5454 * @irq: interrupt number
5455 * @data: pointer to a network interface device structure
5457 static irqreturn_t igb_intr(int irq, void *data)
5459 struct igb_adapter *adapter = data;
5460 struct igb_q_vector *q_vector = adapter->q_vector[0];
5461 struct e1000_hw *hw = &adapter->hw;
5462 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5463 * need for the IMC write */
5464 u32 icr = rd32(E1000_ICR);
5466 return IRQ_NONE; /* Not our interrupt */
5468 igb_write_itr(q_vector);
5470 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5471 * not set, then the adapter didn't send an interrupt */
5472 if (!(icr & E1000_ICR_INT_ASSERTED))
5475 if (icr & E1000_ICR_DRSTA)
5476 schedule_work(&adapter->reset_task);
5478 if (icr & E1000_ICR_DOUTSYNC) {
5479 /* HW is reporting DMA is out of sync */
5480 adapter->stats.doosync++;
5483 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5484 hw->mac.get_link_status = 1;
5485 /* guard against interrupt when we're going down */
5486 if (!test_bit(__IGB_DOWN, &adapter->state))
5487 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5490 napi_schedule(&q_vector->napi);
5495 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5497 struct igb_adapter *adapter = q_vector->adapter;
5498 struct e1000_hw *hw = &adapter->hw;
5500 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5501 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5502 if (!adapter->msix_entries)
5503 igb_set_itr(adapter);
5505 igb_update_ring_itr(q_vector);
5508 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5509 if (adapter->msix_entries)
5510 wr32(E1000_EIMS, q_vector->eims_value);
5512 igb_irq_enable(adapter);
5517 * igb_poll - NAPI Rx polling callback
5518 * @napi: napi polling structure
5519 * @budget: count of how many packets we should handle
5521 static int igb_poll(struct napi_struct *napi, int budget)
5523 struct igb_q_vector *q_vector = container_of(napi,
5524 struct igb_q_vector,
5526 int tx_clean_complete = 1, work_done = 0;
5528 #ifdef CONFIG_IGB_DCA
5529 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5530 igb_update_dca(q_vector);
5532 if (q_vector->tx_ring)
5533 tx_clean_complete = igb_clean_tx_irq(q_vector);
5535 if (q_vector->rx_ring)
5536 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5538 if (!tx_clean_complete)
5541 /* If not enough Rx work done, exit the polling mode */
5542 if (work_done < budget) {
5543 napi_complete(napi);
5544 igb_ring_irq_enable(q_vector);
5551 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5552 * @adapter: board private structure
5553 * @shhwtstamps: timestamp structure to update
5554 * @regval: unsigned 64bit system time value.
5556 * We need to convert the system time value stored in the RX/TXSTMP registers
5557 * into a hwtstamp which can be used by the upper level timestamping functions
5559 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5560 struct skb_shared_hwtstamps *shhwtstamps,
5566 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5567 * 24 to match clock shift we setup earlier.
5569 if (adapter->hw.mac.type == e1000_82580)
5570 regval <<= IGB_82580_TSYNC_SHIFT;
5572 ns = timecounter_cyc2time(&adapter->clock, regval);
5573 timecompare_update(&adapter->compare, ns);
5574 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5575 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5576 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5580 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5581 * @q_vector: pointer to q_vector containing needed info
5582 * @buffer: pointer to igb_buffer structure
5584 * If we were asked to do hardware stamping and such a time stamp is
5585 * available, then it must have been for this skb here because we only
5586 * allow only one such packet into the queue.
5588 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5590 struct igb_adapter *adapter = q_vector->adapter;
5591 struct e1000_hw *hw = &adapter->hw;
5592 struct skb_shared_hwtstamps shhwtstamps;
5595 /* if skb does not support hw timestamp or TX stamp not valid exit */
5596 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5597 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5600 regval = rd32(E1000_TXSTMPL);
5601 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5603 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5604 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5608 * igb_clean_tx_irq - Reclaim resources after transmit completes
5609 * @q_vector: pointer to q_vector containing needed info
5610 * returns true if ring is completely cleaned
5612 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5614 struct igb_adapter *adapter = q_vector->adapter;
5615 struct igb_ring *tx_ring = q_vector->tx_ring;
5616 struct net_device *netdev = tx_ring->netdev;
5617 struct e1000_hw *hw = &adapter->hw;
5618 struct igb_buffer *buffer_info;
5619 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5620 unsigned int total_bytes = 0, total_packets = 0;
5621 unsigned int i, eop, count = 0;
5622 bool cleaned = false;
5624 i = tx_ring->next_to_clean;
5625 eop = tx_ring->buffer_info[i].next_to_watch;
5626 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5628 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5629 (count < tx_ring->count)) {
5630 rmb(); /* read buffer_info after eop_desc status */
5631 for (cleaned = false; !cleaned; count++) {
5632 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5633 buffer_info = &tx_ring->buffer_info[i];
5634 cleaned = (i == eop);
5636 if (buffer_info->skb) {
5637 total_bytes += buffer_info->bytecount;
5638 /* gso_segs is currently only valid for tcp */
5639 total_packets += buffer_info->gso_segs;
5640 igb_tx_hwtstamp(q_vector, buffer_info);
5643 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5644 tx_desc->wb.status = 0;
5647 if (i == tx_ring->count)
5650 eop = tx_ring->buffer_info[i].next_to_watch;
5651 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5654 tx_ring->next_to_clean = i;
5656 if (unlikely(count &&
5657 netif_carrier_ok(netdev) &&
5658 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5659 /* Make sure that anybody stopping the queue after this
5660 * sees the new next_to_clean.
5663 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5664 !(test_bit(__IGB_DOWN, &adapter->state))) {
5665 netif_wake_subqueue(netdev, tx_ring->queue_index);
5667 u64_stats_update_begin(&tx_ring->tx_syncp);
5668 tx_ring->tx_stats.restart_queue++;
5669 u64_stats_update_end(&tx_ring->tx_syncp);
5673 if (tx_ring->detect_tx_hung) {
5674 /* Detect a transmit hang in hardware, this serializes the
5675 * check with the clearing of time_stamp and movement of i */
5676 tx_ring->detect_tx_hung = false;
5677 if (tx_ring->buffer_info[i].time_stamp &&
5678 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5679 (adapter->tx_timeout_factor * HZ)) &&
5680 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5682 /* detected Tx unit hang */
5683 dev_err(tx_ring->dev,
5684 "Detected Tx Unit Hang\n"
5688 " next_to_use <%x>\n"
5689 " next_to_clean <%x>\n"
5690 "buffer_info[next_to_clean]\n"
5691 " time_stamp <%lx>\n"
5692 " next_to_watch <%x>\n"
5694 " desc.status <%x>\n",
5695 tx_ring->queue_index,
5696 readl(tx_ring->head),
5697 readl(tx_ring->tail),
5698 tx_ring->next_to_use,
5699 tx_ring->next_to_clean,
5700 tx_ring->buffer_info[eop].time_stamp,
5703 eop_desc->wb.status);
5704 netif_stop_subqueue(netdev, tx_ring->queue_index);
5707 tx_ring->total_bytes += total_bytes;
5708 tx_ring->total_packets += total_packets;
5709 u64_stats_update_begin(&tx_ring->tx_syncp);
5710 tx_ring->tx_stats.bytes += total_bytes;
5711 tx_ring->tx_stats.packets += total_packets;
5712 u64_stats_update_end(&tx_ring->tx_syncp);
5713 return count < tx_ring->count;
5716 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5717 u32 status_err, struct sk_buff *skb)
5719 skb_checksum_none_assert(skb);
5721 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5722 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5723 (status_err & E1000_RXD_STAT_IXSM))
5726 /* TCP/UDP checksum error bit is set */
5728 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5730 * work around errata with sctp packets where the TCPE aka
5731 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5732 * packets, (aka let the stack check the crc32c)
5734 if ((skb->len == 60) &&
5735 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5736 u64_stats_update_begin(&ring->rx_syncp);
5737 ring->rx_stats.csum_err++;
5738 u64_stats_update_end(&ring->rx_syncp);
5740 /* let the stack verify checksum errors */
5743 /* It must be a TCP or UDP packet with a valid checksum */
5744 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5745 skb->ip_summed = CHECKSUM_UNNECESSARY;
5747 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5750 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5751 struct sk_buff *skb)
5753 struct igb_adapter *adapter = q_vector->adapter;
5754 struct e1000_hw *hw = &adapter->hw;
5758 * If this bit is set, then the RX registers contain the time stamp. No
5759 * other packet will be time stamped until we read these registers, so
5760 * read the registers to make them available again. Because only one
5761 * packet can be time stamped at a time, we know that the register
5762 * values must belong to this one here and therefore we don't need to
5763 * compare any of the additional attributes stored for it.
5765 * If nothing went wrong, then it should have a shared tx_flags that we
5766 * can turn into a skb_shared_hwtstamps.
5768 if (staterr & E1000_RXDADV_STAT_TSIP) {
5769 u32 *stamp = (u32 *)skb->data;
5770 regval = le32_to_cpu(*(stamp + 2));
5771 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5772 skb_pull(skb, IGB_TS_HDR_LEN);
5774 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5777 regval = rd32(E1000_RXSTMPL);
5778 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5781 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5783 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5784 union e1000_adv_rx_desc *rx_desc)
5786 /* HW will not DMA in data larger than the given buffer, even if it
5787 * parses the (NFS, of course) header to be larger. In that case, it
5788 * fills the header buffer and spills the rest into the page.
5790 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5791 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5792 if (hlen > rx_ring->rx_buffer_len)
5793 hlen = rx_ring->rx_buffer_len;
5797 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5798 int *work_done, int budget)
5800 struct igb_ring *rx_ring = q_vector->rx_ring;
5801 struct net_device *netdev = rx_ring->netdev;
5802 struct device *dev = rx_ring->dev;
5803 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5804 struct igb_buffer *buffer_info , *next_buffer;
5805 struct sk_buff *skb;
5806 bool cleaned = false;
5807 int cleaned_count = 0;
5808 int current_node = numa_node_id();
5809 unsigned int total_bytes = 0, total_packets = 0;
5814 i = rx_ring->next_to_clean;
5815 buffer_info = &rx_ring->buffer_info[i];
5816 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5817 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5819 while (staterr & E1000_RXD_STAT_DD) {
5820 if (*work_done >= budget)
5823 rmb(); /* read descriptor and rx_buffer_info after status DD */
5825 skb = buffer_info->skb;
5826 prefetch(skb->data - NET_IP_ALIGN);
5827 buffer_info->skb = NULL;
5830 if (i == rx_ring->count)
5833 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5835 next_buffer = &rx_ring->buffer_info[i];
5837 length = le16_to_cpu(rx_desc->wb.upper.length);
5841 if (buffer_info->dma) {
5842 dma_unmap_single(dev, buffer_info->dma,
5843 rx_ring->rx_buffer_len,
5845 buffer_info->dma = 0;
5846 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5847 skb_put(skb, length);
5850 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5854 dma_unmap_page(dev, buffer_info->page_dma,
5855 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5856 buffer_info->page_dma = 0;
5858 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5860 buffer_info->page_offset,
5863 if ((page_count(buffer_info->page) != 1) ||
5864 (page_to_nid(buffer_info->page) != current_node))
5865 buffer_info->page = NULL;
5867 get_page(buffer_info->page);
5870 skb->data_len += length;
5871 skb->truesize += length;
5874 if (!(staterr & E1000_RXD_STAT_EOP)) {
5875 buffer_info->skb = next_buffer->skb;
5876 buffer_info->dma = next_buffer->dma;
5877 next_buffer->skb = skb;
5878 next_buffer->dma = 0;
5882 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5883 dev_kfree_skb_irq(skb);
5887 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5888 igb_rx_hwtstamp(q_vector, staterr, skb);
5889 total_bytes += skb->len;
5892 igb_rx_checksum_adv(rx_ring, staterr, skb);
5894 skb->protocol = eth_type_trans(skb, netdev);
5895 skb_record_rx_queue(skb, rx_ring->queue_index);
5897 if (staterr & E1000_RXD_STAT_VP) {
5898 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5900 __vlan_hwaccel_put_tag(skb, vid);
5902 napi_gro_receive(&q_vector->napi, skb);
5905 rx_desc->wb.upper.status_error = 0;
5907 /* return some buffers to hardware, one at a time is too slow */
5908 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5909 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5913 /* use prefetched values */
5915 buffer_info = next_buffer;
5916 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5919 rx_ring->next_to_clean = i;
5920 cleaned_count = igb_desc_unused(rx_ring);
5923 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5925 rx_ring->total_packets += total_packets;
5926 rx_ring->total_bytes += total_bytes;
5927 u64_stats_update_begin(&rx_ring->rx_syncp);
5928 rx_ring->rx_stats.packets += total_packets;
5929 rx_ring->rx_stats.bytes += total_bytes;
5930 u64_stats_update_end(&rx_ring->rx_syncp);
5935 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5936 * @adapter: address of board private structure
5938 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5940 struct net_device *netdev = rx_ring->netdev;
5941 union e1000_adv_rx_desc *rx_desc;
5942 struct igb_buffer *buffer_info;
5943 struct sk_buff *skb;
5947 i = rx_ring->next_to_use;
5948 buffer_info = &rx_ring->buffer_info[i];
5950 bufsz = rx_ring->rx_buffer_len;
5952 while (cleaned_count--) {
5953 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5955 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5956 if (!buffer_info->page) {
5957 buffer_info->page = netdev_alloc_page(netdev);
5958 if (unlikely(!buffer_info->page)) {
5959 u64_stats_update_begin(&rx_ring->rx_syncp);
5960 rx_ring->rx_stats.alloc_failed++;
5961 u64_stats_update_end(&rx_ring->rx_syncp);
5964 buffer_info->page_offset = 0;
5966 buffer_info->page_offset ^= PAGE_SIZE / 2;
5968 buffer_info->page_dma =
5969 dma_map_page(rx_ring->dev, buffer_info->page,
5970 buffer_info->page_offset,
5973 if (dma_mapping_error(rx_ring->dev,
5974 buffer_info->page_dma)) {
5975 buffer_info->page_dma = 0;
5976 u64_stats_update_begin(&rx_ring->rx_syncp);
5977 rx_ring->rx_stats.alloc_failed++;
5978 u64_stats_update_end(&rx_ring->rx_syncp);
5983 skb = buffer_info->skb;
5985 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5986 if (unlikely(!skb)) {
5987 u64_stats_update_begin(&rx_ring->rx_syncp);
5988 rx_ring->rx_stats.alloc_failed++;
5989 u64_stats_update_end(&rx_ring->rx_syncp);
5993 buffer_info->skb = skb;
5995 if (!buffer_info->dma) {
5996 buffer_info->dma = dma_map_single(rx_ring->dev,
6000 if (dma_mapping_error(rx_ring->dev,
6001 buffer_info->dma)) {
6002 buffer_info->dma = 0;
6003 u64_stats_update_begin(&rx_ring->rx_syncp);
6004 rx_ring->rx_stats.alloc_failed++;
6005 u64_stats_update_end(&rx_ring->rx_syncp);
6009 /* Refresh the desc even if buffer_addrs didn't change because
6010 * each write-back erases this info. */
6011 if (bufsz < IGB_RXBUFFER_1024) {
6012 rx_desc->read.pkt_addr =
6013 cpu_to_le64(buffer_info->page_dma);
6014 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6016 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6017 rx_desc->read.hdr_addr = 0;
6021 if (i == rx_ring->count)
6023 buffer_info = &rx_ring->buffer_info[i];
6027 if (rx_ring->next_to_use != i) {
6028 rx_ring->next_to_use = i;
6030 i = (rx_ring->count - 1);
6034 /* Force memory writes to complete before letting h/w
6035 * know there are new descriptors to fetch. (Only
6036 * applicable for weak-ordered memory model archs,
6037 * such as IA-64). */
6039 writel(i, rx_ring->tail);
6049 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6051 struct igb_adapter *adapter = netdev_priv(netdev);
6052 struct mii_ioctl_data *data = if_mii(ifr);
6054 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6059 data->phy_id = adapter->hw.phy.addr;
6062 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6074 * igb_hwtstamp_ioctl - control hardware time stamping
6079 * Outgoing time stamping can be enabled and disabled. Play nice and
6080 * disable it when requested, although it shouldn't case any overhead
6081 * when no packet needs it. At most one packet in the queue may be
6082 * marked for time stamping, otherwise it would be impossible to tell
6083 * for sure to which packet the hardware time stamp belongs.
6085 * Incoming time stamping has to be configured via the hardware
6086 * filters. Not all combinations are supported, in particular event
6087 * type has to be specified. Matching the kind of event packet is
6088 * not supported, with the exception of "all V2 events regardless of
6092 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6093 struct ifreq *ifr, int cmd)
6095 struct igb_adapter *adapter = netdev_priv(netdev);
6096 struct e1000_hw *hw = &adapter->hw;
6097 struct hwtstamp_config config;
6098 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6099 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6100 u32 tsync_rx_cfg = 0;
6105 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6108 /* reserved for future extensions */
6112 switch (config.tx_type) {
6113 case HWTSTAMP_TX_OFF:
6115 case HWTSTAMP_TX_ON:
6121 switch (config.rx_filter) {
6122 case HWTSTAMP_FILTER_NONE:
6125 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6126 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6127 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6128 case HWTSTAMP_FILTER_ALL:
6130 * register TSYNCRXCFG must be set, therefore it is not
6131 * possible to time stamp both Sync and Delay_Req messages
6132 * => fall back to time stamping all packets
6134 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6135 config.rx_filter = HWTSTAMP_FILTER_ALL;
6137 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6138 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6139 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6142 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6143 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6144 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6147 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6148 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6149 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6150 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6153 config.rx_filter = HWTSTAMP_FILTER_SOME;
6155 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6156 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6157 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6158 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6161 config.rx_filter = HWTSTAMP_FILTER_SOME;
6163 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6164 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6165 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6166 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6167 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6174 if (hw->mac.type == e1000_82575) {
6175 if (tsync_rx_ctl | tsync_tx_ctl)
6181 * Per-packet timestamping only works if all packets are
6182 * timestamped, so enable timestamping in all packets as
6183 * long as one rx filter was configured.
6185 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6186 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6187 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6190 /* enable/disable TX */
6191 regval = rd32(E1000_TSYNCTXCTL);
6192 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6193 regval |= tsync_tx_ctl;
6194 wr32(E1000_TSYNCTXCTL, regval);
6196 /* enable/disable RX */
6197 regval = rd32(E1000_TSYNCRXCTL);
6198 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6199 regval |= tsync_rx_ctl;
6200 wr32(E1000_TSYNCRXCTL, regval);
6202 /* define which PTP packets are time stamped */
6203 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6205 /* define ethertype filter for timestamped packets */
6208 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6209 E1000_ETQF_1588 | /* enable timestamping */
6210 ETH_P_1588)); /* 1588 eth protocol type */
6212 wr32(E1000_ETQF(3), 0);
6214 #define PTP_PORT 319
6215 /* L4 Queue Filter[3]: filter by destination port and protocol */
6217 u32 ftqf = (IPPROTO_UDP /* UDP */
6218 | E1000_FTQF_VF_BP /* VF not compared */
6219 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6220 | E1000_FTQF_MASK); /* mask all inputs */
6221 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6223 wr32(E1000_IMIR(3), htons(PTP_PORT));
6224 wr32(E1000_IMIREXT(3),
6225 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6226 if (hw->mac.type == e1000_82576) {
6227 /* enable source port check */
6228 wr32(E1000_SPQF(3), htons(PTP_PORT));
6229 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6231 wr32(E1000_FTQF(3), ftqf);
6233 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6237 adapter->hwtstamp_config = config;
6239 /* clear TX/RX time stamp registers, just to be sure */
6240 regval = rd32(E1000_TXSTMPH);
6241 regval = rd32(E1000_RXSTMPH);
6243 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6253 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6259 return igb_mii_ioctl(netdev, ifr, cmd);
6261 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6267 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6269 struct igb_adapter *adapter = hw->back;
6272 cap_offset = adapter->pdev->pcie_cap;
6274 return -E1000_ERR_CONFIG;
6276 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6281 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6283 struct igb_adapter *adapter = hw->back;
6286 cap_offset = adapter->pdev->pcie_cap;
6288 return -E1000_ERR_CONFIG;
6290 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6295 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6297 struct igb_adapter *adapter = netdev_priv(netdev);
6298 struct e1000_hw *hw = &adapter->hw;
6301 igb_irq_disable(adapter);
6303 if (features & NETIF_F_HW_VLAN_RX) {
6304 /* enable VLAN tag insert/strip */
6305 ctrl = rd32(E1000_CTRL);
6306 ctrl |= E1000_CTRL_VME;
6307 wr32(E1000_CTRL, ctrl);
6309 /* Disable CFI check */
6310 rctl = rd32(E1000_RCTL);
6311 rctl &= ~E1000_RCTL_CFIEN;
6312 wr32(E1000_RCTL, rctl);
6314 /* disable VLAN tag insert/strip */
6315 ctrl = rd32(E1000_CTRL);
6316 ctrl &= ~E1000_CTRL_VME;
6317 wr32(E1000_CTRL, ctrl);
6320 igb_rlpml_set(adapter);
6322 if (!test_bit(__IGB_DOWN, &adapter->state))
6323 igb_irq_enable(adapter);
6326 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6328 struct igb_adapter *adapter = netdev_priv(netdev);
6329 struct e1000_hw *hw = &adapter->hw;
6330 int pf_id = adapter->vfs_allocated_count;
6332 /* attempt to add filter to vlvf array */
6333 igb_vlvf_set(adapter, vid, true, pf_id);
6335 /* add the filter since PF can receive vlans w/o entry in vlvf */
6336 igb_vfta_set(hw, vid, true);
6338 set_bit(vid, adapter->active_vlans);
6341 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6343 struct igb_adapter *adapter = netdev_priv(netdev);
6344 struct e1000_hw *hw = &adapter->hw;
6345 int pf_id = adapter->vfs_allocated_count;
6348 igb_irq_disable(adapter);
6350 if (!test_bit(__IGB_DOWN, &adapter->state))
6351 igb_irq_enable(adapter);
6353 /* remove vlan from VLVF table array */
6354 err = igb_vlvf_set(adapter, vid, false, pf_id);
6356 /* if vid was not present in VLVF just remove it from table */
6358 igb_vfta_set(hw, vid, false);
6360 clear_bit(vid, adapter->active_vlans);
6363 static void igb_restore_vlan(struct igb_adapter *adapter)
6367 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6368 igb_vlan_rx_add_vid(adapter->netdev, vid);
6371 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6373 struct pci_dev *pdev = adapter->pdev;
6374 struct e1000_mac_info *mac = &adapter->hw.mac;
6378 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6379 * for the switch() below to work */
6380 if ((spd & 1) || (dplx & ~1))
6383 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6384 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6385 spd != SPEED_1000 &&
6386 dplx != DUPLEX_FULL)
6389 switch (spd + dplx) {
6390 case SPEED_10 + DUPLEX_HALF:
6391 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6393 case SPEED_10 + DUPLEX_FULL:
6394 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6396 case SPEED_100 + DUPLEX_HALF:
6397 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6399 case SPEED_100 + DUPLEX_FULL:
6400 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6402 case SPEED_1000 + DUPLEX_FULL:
6404 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6406 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6413 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6417 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6419 struct net_device *netdev = pci_get_drvdata(pdev);
6420 struct igb_adapter *adapter = netdev_priv(netdev);
6421 struct e1000_hw *hw = &adapter->hw;
6422 u32 ctrl, rctl, status;
6423 u32 wufc = adapter->wol;
6428 netif_device_detach(netdev);
6430 if (netif_running(netdev))
6433 igb_clear_interrupt_scheme(adapter);
6436 retval = pci_save_state(pdev);
6441 status = rd32(E1000_STATUS);
6442 if (status & E1000_STATUS_LU)
6443 wufc &= ~E1000_WUFC_LNKC;
6446 igb_setup_rctl(adapter);
6447 igb_set_rx_mode(netdev);
6449 /* turn on all-multi mode if wake on multicast is enabled */
6450 if (wufc & E1000_WUFC_MC) {
6451 rctl = rd32(E1000_RCTL);
6452 rctl |= E1000_RCTL_MPE;
6453 wr32(E1000_RCTL, rctl);
6456 ctrl = rd32(E1000_CTRL);
6457 /* advertise wake from D3Cold */
6458 #define E1000_CTRL_ADVD3WUC 0x00100000
6459 /* phy power management enable */
6460 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6461 ctrl |= E1000_CTRL_ADVD3WUC;
6462 wr32(E1000_CTRL, ctrl);
6464 /* Allow time for pending master requests to run */
6465 igb_disable_pcie_master(hw);
6467 wr32(E1000_WUC, E1000_WUC_PME_EN);
6468 wr32(E1000_WUFC, wufc);
6471 wr32(E1000_WUFC, 0);
6474 *enable_wake = wufc || adapter->en_mng_pt;
6476 igb_power_down_link(adapter);
6478 igb_power_up_link(adapter);
6480 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6481 * would have already happened in close and is redundant. */
6482 igb_release_hw_control(adapter);
6484 pci_disable_device(pdev);
6490 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6495 retval = __igb_shutdown(pdev, &wake);
6500 pci_prepare_to_sleep(pdev);
6502 pci_wake_from_d3(pdev, false);
6503 pci_set_power_state(pdev, PCI_D3hot);
6509 static int igb_resume(struct pci_dev *pdev)
6511 struct net_device *netdev = pci_get_drvdata(pdev);
6512 struct igb_adapter *adapter = netdev_priv(netdev);
6513 struct e1000_hw *hw = &adapter->hw;
6516 pci_set_power_state(pdev, PCI_D0);
6517 pci_restore_state(pdev);
6518 pci_save_state(pdev);
6520 err = pci_enable_device_mem(pdev);
6523 "igb: Cannot enable PCI device from suspend\n");
6526 pci_set_master(pdev);
6528 pci_enable_wake(pdev, PCI_D3hot, 0);
6529 pci_enable_wake(pdev, PCI_D3cold, 0);
6531 if (igb_init_interrupt_scheme(adapter)) {
6532 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6538 /* let the f/w know that the h/w is now under the control of the
6540 igb_get_hw_control(adapter);
6542 wr32(E1000_WUS, ~0);
6544 if (netif_running(netdev)) {
6545 err = igb_open(netdev);
6550 netif_device_attach(netdev);
6556 static void igb_shutdown(struct pci_dev *pdev)
6560 __igb_shutdown(pdev, &wake);
6562 if (system_state == SYSTEM_POWER_OFF) {
6563 pci_wake_from_d3(pdev, wake);
6564 pci_set_power_state(pdev, PCI_D3hot);
6568 #ifdef CONFIG_NET_POLL_CONTROLLER
6570 * Polling 'interrupt' - used by things like netconsole to send skbs
6571 * without having to re-enable interrupts. It's not called while
6572 * the interrupt routine is executing.
6574 static void igb_netpoll(struct net_device *netdev)
6576 struct igb_adapter *adapter = netdev_priv(netdev);
6577 struct e1000_hw *hw = &adapter->hw;
6580 if (!adapter->msix_entries) {
6581 struct igb_q_vector *q_vector = adapter->q_vector[0];
6582 igb_irq_disable(adapter);
6583 napi_schedule(&q_vector->napi);
6587 for (i = 0; i < adapter->num_q_vectors; i++) {
6588 struct igb_q_vector *q_vector = adapter->q_vector[i];
6589 wr32(E1000_EIMC, q_vector->eims_value);
6590 napi_schedule(&q_vector->napi);
6593 #endif /* CONFIG_NET_POLL_CONTROLLER */
6596 * igb_io_error_detected - called when PCI error is detected
6597 * @pdev: Pointer to PCI device
6598 * @state: The current pci connection state
6600 * This function is called after a PCI bus error affecting
6601 * this device has been detected.
6603 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6604 pci_channel_state_t state)
6606 struct net_device *netdev = pci_get_drvdata(pdev);
6607 struct igb_adapter *adapter = netdev_priv(netdev);
6609 netif_device_detach(netdev);
6611 if (state == pci_channel_io_perm_failure)
6612 return PCI_ERS_RESULT_DISCONNECT;
6614 if (netif_running(netdev))
6616 pci_disable_device(pdev);
6618 /* Request a slot slot reset. */
6619 return PCI_ERS_RESULT_NEED_RESET;
6623 * igb_io_slot_reset - called after the pci bus has been reset.
6624 * @pdev: Pointer to PCI device
6626 * Restart the card from scratch, as if from a cold-boot. Implementation
6627 * resembles the first-half of the igb_resume routine.
6629 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6631 struct net_device *netdev = pci_get_drvdata(pdev);
6632 struct igb_adapter *adapter = netdev_priv(netdev);
6633 struct e1000_hw *hw = &adapter->hw;
6634 pci_ers_result_t result;
6637 if (pci_enable_device_mem(pdev)) {
6639 "Cannot re-enable PCI device after reset.\n");
6640 result = PCI_ERS_RESULT_DISCONNECT;
6642 pci_set_master(pdev);
6643 pci_restore_state(pdev);
6644 pci_save_state(pdev);
6646 pci_enable_wake(pdev, PCI_D3hot, 0);
6647 pci_enable_wake(pdev, PCI_D3cold, 0);
6650 wr32(E1000_WUS, ~0);
6651 result = PCI_ERS_RESULT_RECOVERED;
6654 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6656 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6657 "failed 0x%0x\n", err);
6658 /* non-fatal, continue */
6665 * igb_io_resume - called when traffic can start flowing again.
6666 * @pdev: Pointer to PCI device
6668 * This callback is called when the error recovery driver tells us that
6669 * its OK to resume normal operation. Implementation resembles the
6670 * second-half of the igb_resume routine.
6672 static void igb_io_resume(struct pci_dev *pdev)
6674 struct net_device *netdev = pci_get_drvdata(pdev);
6675 struct igb_adapter *adapter = netdev_priv(netdev);
6677 if (netif_running(netdev)) {
6678 if (igb_up(adapter)) {
6679 dev_err(&pdev->dev, "igb_up failed after reset\n");
6684 netif_device_attach(netdev);
6686 /* let the f/w know that the h/w is now under the control of the
6688 igb_get_hw_control(adapter);
6691 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6694 u32 rar_low, rar_high;
6695 struct e1000_hw *hw = &adapter->hw;
6697 /* HW expects these in little endian so we reverse the byte order
6698 * from network order (big endian) to little endian
6700 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6701 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6702 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6704 /* Indicate to hardware the Address is Valid. */
6705 rar_high |= E1000_RAH_AV;
6707 if (hw->mac.type == e1000_82575)
6708 rar_high |= E1000_RAH_POOL_1 * qsel;
6710 rar_high |= E1000_RAH_POOL_1 << qsel;
6712 wr32(E1000_RAL(index), rar_low);
6714 wr32(E1000_RAH(index), rar_high);
6718 static int igb_set_vf_mac(struct igb_adapter *adapter,
6719 int vf, unsigned char *mac_addr)
6721 struct e1000_hw *hw = &adapter->hw;
6722 /* VF MAC addresses start at end of receive addresses and moves
6723 * torwards the first, as a result a collision should not be possible */
6724 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6726 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6728 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6733 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6735 struct igb_adapter *adapter = netdev_priv(netdev);
6736 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6738 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6739 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6740 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6741 " change effective.");
6742 if (test_bit(__IGB_DOWN, &adapter->state)) {
6743 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6744 " but the PF device is not up.\n");
6745 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6746 " attempting to use the VF device.\n");
6748 return igb_set_vf_mac(adapter, vf, mac);
6751 static int igb_link_mbps(int internal_link_speed)
6753 switch (internal_link_speed) {
6763 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6770 /* Calculate the rate factor values to set */
6771 rf_int = link_speed / tx_rate;
6772 rf_dec = (link_speed - (rf_int * tx_rate));
6773 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6775 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6776 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6777 E1000_RTTBCNRC_RF_INT_MASK);
6778 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6783 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6784 wr32(E1000_RTTBCNRC, bcnrc_val);
6787 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6789 int actual_link_speed, i;
6790 bool reset_rate = false;
6792 /* VF TX rate limit was not set or not supported */
6793 if ((adapter->vf_rate_link_speed == 0) ||
6794 (adapter->hw.mac.type != e1000_82576))
6797 actual_link_speed = igb_link_mbps(adapter->link_speed);
6798 if (actual_link_speed != adapter->vf_rate_link_speed) {
6800 adapter->vf_rate_link_speed = 0;
6801 dev_info(&adapter->pdev->dev,
6802 "Link speed has been changed. VF Transmit "
6803 "rate is disabled\n");
6806 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6808 adapter->vf_data[i].tx_rate = 0;
6810 igb_set_vf_rate_limit(&adapter->hw, i,
6811 adapter->vf_data[i].tx_rate,
6816 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6818 struct igb_adapter *adapter = netdev_priv(netdev);
6819 struct e1000_hw *hw = &adapter->hw;
6820 int actual_link_speed;
6822 if (hw->mac.type != e1000_82576)
6825 actual_link_speed = igb_link_mbps(adapter->link_speed);
6826 if ((vf >= adapter->vfs_allocated_count) ||
6827 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6828 (tx_rate < 0) || (tx_rate > actual_link_speed))
6831 adapter->vf_rate_link_speed = actual_link_speed;
6832 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6833 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6838 static int igb_ndo_get_vf_config(struct net_device *netdev,
6839 int vf, struct ifla_vf_info *ivi)
6841 struct igb_adapter *adapter = netdev_priv(netdev);
6842 if (vf >= adapter->vfs_allocated_count)
6845 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6846 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6847 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6848 ivi->qos = adapter->vf_data[vf].pf_qos;
6852 static void igb_vmm_control(struct igb_adapter *adapter)
6854 struct e1000_hw *hw = &adapter->hw;
6857 switch (hw->mac.type) {
6860 /* replication is not supported for 82575 */
6863 /* notify HW that the MAC is adding vlan tags */
6864 reg = rd32(E1000_DTXCTL);
6865 reg |= E1000_DTXCTL_VLAN_ADDED;
6866 wr32(E1000_DTXCTL, reg);
6868 /* enable replication vlan tag stripping */
6869 reg = rd32(E1000_RPLOLR);
6870 reg |= E1000_RPLOLR_STRVLAN;
6871 wr32(E1000_RPLOLR, reg);
6873 /* none of the above registers are supported by i350 */
6877 if (adapter->vfs_allocated_count) {
6878 igb_vmdq_set_loopback_pf(hw, true);
6879 igb_vmdq_set_replication_pf(hw, true);
6880 igb_vmdq_set_anti_spoofing_pf(hw, true,
6881 adapter->vfs_allocated_count);
6883 igb_vmdq_set_loopback_pf(hw, false);
6884 igb_vmdq_set_replication_pf(hw, false);