1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/if_ether.h>
49 #include <linux/aer.h>
50 #include <linux/prefetch.h>
52 #include <linux/dca.h>
59 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60 __stringify(BUILD) "-k"
61 char igb_driver_name[] = "igb";
62 char igb_driver_version[] = DRV_VERSION;
63 static const char igb_driver_string[] =
64 "Intel(R) Gigabit Ethernet Network Driver";
65 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
67 static const struct e1000_info *igb_info_tbl[] = {
68 [board_82575] = &e1000_82575_info,
71 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97 /* required last entry */
101 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
103 void igb_reset(struct igb_adapter *);
104 static int igb_setup_all_tx_resources(struct igb_adapter *);
105 static int igb_setup_all_rx_resources(struct igb_adapter *);
106 static void igb_free_all_tx_resources(struct igb_adapter *);
107 static void igb_free_all_rx_resources(struct igb_adapter *);
108 static void igb_setup_mrqc(struct igb_adapter *);
109 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110 static void __devexit igb_remove(struct pci_dev *pdev);
111 static void igb_init_hw_timer(struct igb_adapter *adapter);
112 static int igb_sw_init(struct igb_adapter *);
113 static int igb_open(struct net_device *);
114 static int igb_close(struct net_device *);
115 static void igb_configure_tx(struct igb_adapter *);
116 static void igb_configure_rx(struct igb_adapter *);
117 static void igb_clean_all_tx_rings(struct igb_adapter *);
118 static void igb_clean_all_rx_rings(struct igb_adapter *);
119 static void igb_clean_tx_ring(struct igb_ring *);
120 static void igb_clean_rx_ring(struct igb_ring *);
121 static void igb_set_rx_mode(struct net_device *);
122 static void igb_update_phy_info(unsigned long);
123 static void igb_watchdog(unsigned long);
124 static void igb_watchdog_task(struct work_struct *);
125 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
126 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127 struct rtnl_link_stats64 *stats);
128 static int igb_change_mtu(struct net_device *, int);
129 static int igb_set_mac(struct net_device *, void *);
130 static void igb_set_uta(struct igb_adapter *adapter);
131 static irqreturn_t igb_intr(int irq, void *);
132 static irqreturn_t igb_intr_msi(int irq, void *);
133 static irqreturn_t igb_msix_other(int irq, void *);
134 static irqreturn_t igb_msix_ring(int irq, void *);
135 #ifdef CONFIG_IGB_DCA
136 static void igb_update_dca(struct igb_q_vector *);
137 static void igb_setup_dca(struct igb_adapter *);
138 #endif /* CONFIG_IGB_DCA */
139 static bool igb_clean_tx_irq(struct igb_q_vector *);
140 static int igb_poll(struct napi_struct *, int);
141 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
142 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143 static void igb_tx_timeout(struct net_device *);
144 static void igb_reset_task(struct work_struct *);
145 static void igb_vlan_mode(struct net_device *netdev, u32 features);
146 static void igb_vlan_rx_add_vid(struct net_device *, u16);
147 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148 static void igb_restore_vlan(struct igb_adapter *);
149 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150 static void igb_ping_all_vfs(struct igb_adapter *);
151 static void igb_msg_task(struct igb_adapter *);
152 static void igb_vmm_control(struct igb_adapter *);
153 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157 int vf, u16 vlan, u8 qos);
158 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160 struct ifla_vf_info *ivi);
161 static void igb_check_vf_rate_limit(struct igb_adapter *);
164 static int igb_suspend(struct pci_dev *, pm_message_t);
165 static int igb_resume(struct pci_dev *);
167 static void igb_shutdown(struct pci_dev *);
168 #ifdef CONFIG_IGB_DCA
169 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170 static struct notifier_block dca_notifier = {
171 .notifier_call = igb_notify_dca,
176 #ifdef CONFIG_NET_POLL_CONTROLLER
177 /* for netdump / net console */
178 static void igb_netpoll(struct net_device *);
180 #ifdef CONFIG_PCI_IOV
181 static unsigned int max_vfs = 0;
182 module_param(max_vfs, uint, 0);
183 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184 "per physical function");
185 #endif /* CONFIG_PCI_IOV */
187 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188 pci_channel_state_t);
189 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190 static void igb_io_resume(struct pci_dev *);
192 static struct pci_error_handlers igb_err_handler = {
193 .error_detected = igb_io_error_detected,
194 .slot_reset = igb_io_slot_reset,
195 .resume = igb_io_resume,
199 static struct pci_driver igb_driver = {
200 .name = igb_driver_name,
201 .id_table = igb_pci_tbl,
203 .remove = __devexit_p(igb_remove),
205 /* Power Management Hooks */
206 .suspend = igb_suspend,
207 .resume = igb_resume,
209 .shutdown = igb_shutdown,
210 .err_handler = &igb_err_handler
213 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215 MODULE_LICENSE("GPL");
216 MODULE_VERSION(DRV_VERSION);
218 struct igb_reg_info {
223 static const struct igb_reg_info igb_reg_info_tbl[] = {
225 /* General Registers */
226 {E1000_CTRL, "CTRL"},
227 {E1000_STATUS, "STATUS"},
228 {E1000_CTRL_EXT, "CTRL_EXT"},
230 /* Interrupt Registers */
234 {E1000_RCTL, "RCTL"},
235 {E1000_RDLEN(0), "RDLEN"},
236 {E1000_RDH(0), "RDH"},
237 {E1000_RDT(0), "RDT"},
238 {E1000_RXDCTL(0), "RXDCTL"},
239 {E1000_RDBAL(0), "RDBAL"},
240 {E1000_RDBAH(0), "RDBAH"},
243 {E1000_TCTL, "TCTL"},
244 {E1000_TDBAL(0), "TDBAL"},
245 {E1000_TDBAH(0), "TDBAH"},
246 {E1000_TDLEN(0), "TDLEN"},
247 {E1000_TDH(0), "TDH"},
248 {E1000_TDT(0), "TDT"},
249 {E1000_TXDCTL(0), "TXDCTL"},
250 {E1000_TDFH, "TDFH"},
251 {E1000_TDFT, "TDFT"},
252 {E1000_TDFHS, "TDFHS"},
253 {E1000_TDFPC, "TDFPC"},
255 /* List Terminator */
260 * igb_regdump - register printout routine
262 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
268 switch (reginfo->ofs) {
270 for (n = 0; n < 4; n++)
271 regs[n] = rd32(E1000_RDLEN(n));
274 for (n = 0; n < 4; n++)
275 regs[n] = rd32(E1000_RDH(n));
278 for (n = 0; n < 4; n++)
279 regs[n] = rd32(E1000_RDT(n));
281 case E1000_RXDCTL(0):
282 for (n = 0; n < 4; n++)
283 regs[n] = rd32(E1000_RXDCTL(n));
286 for (n = 0; n < 4; n++)
287 regs[n] = rd32(E1000_RDBAL(n));
290 for (n = 0; n < 4; n++)
291 regs[n] = rd32(E1000_RDBAH(n));
294 for (n = 0; n < 4; n++)
295 regs[n] = rd32(E1000_RDBAL(n));
298 for (n = 0; n < 4; n++)
299 regs[n] = rd32(E1000_TDBAH(n));
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_TDLEN(n));
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_TDH(n));
310 for (n = 0; n < 4; n++)
311 regs[n] = rd32(E1000_TDT(n));
313 case E1000_TXDCTL(0):
314 for (n = 0; n < 4; n++)
315 regs[n] = rd32(E1000_TXDCTL(n));
318 printk(KERN_INFO "%-15s %08x\n",
319 reginfo->name, rd32(reginfo->ofs));
323 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324 printk(KERN_INFO "%-15s ", rname);
325 for (n = 0; n < 4; n++)
326 printk(KERN_CONT "%08x ", regs[n]);
327 printk(KERN_CONT "\n");
331 * igb_dump - Print registers, tx-rings and rx-rings
333 static void igb_dump(struct igb_adapter *adapter)
335 struct net_device *netdev = adapter->netdev;
336 struct e1000_hw *hw = &adapter->hw;
337 struct igb_reg_info *reginfo;
339 struct igb_ring *tx_ring;
340 union e1000_adv_tx_desc *tx_desc;
341 struct my_u0 { u64 a; u64 b; } *u0;
342 struct igb_buffer *buffer_info;
343 struct igb_ring *rx_ring;
344 union e1000_adv_rx_desc *rx_desc;
348 if (!netif_msg_hw(adapter))
351 /* Print netdevice Info */
353 dev_info(&adapter->pdev->dev, "Net device Info\n");
354 printk(KERN_INFO "Device Name state "
355 "trans_start last_rx\n");
356 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
363 /* Print Registers */
364 dev_info(&adapter->pdev->dev, "Register Dump\n");
365 printk(KERN_INFO " Register Name Value\n");
366 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367 reginfo->name; reginfo++) {
368 igb_regdump(hw, reginfo);
371 /* Print TX Ring Summary */
372 if (!netdev || !netif_running(netdev))
375 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
377 " leng ntw timestamp\n");
378 for (n = 0; n < adapter->num_tx_queues; n++) {
379 tx_ring = adapter->tx_ring[n];
380 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382 n, tx_ring->next_to_use, tx_ring->next_to_clean,
383 (u64)buffer_info->dma,
385 buffer_info->next_to_watch,
386 (u64)buffer_info->time_stamp);
390 if (!netif_msg_tx_done(adapter))
391 goto rx_ring_summary;
393 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
395 /* Transmit Descriptor Formats
397 * Advanced Transmit Descriptor
398 * +--------------------------------------------------------------+
399 * 0 | Buffer Address [63:0] |
400 * +--------------------------------------------------------------+
401 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
402 * +--------------------------------------------------------------+
403 * 63 46 45 40 39 38 36 35 32 31 24 15 0
406 for (n = 0; n < adapter->num_tx_queues; n++) {
407 tx_ring = adapter->tx_ring[n];
408 printk(KERN_INFO "------------------------------------\n");
409 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410 printk(KERN_INFO "------------------------------------\n");
411 printk(KERN_INFO "T [desc] [address 63:0 ] "
412 "[PlPOCIStDDM Ln] [bi->dma ] "
413 "leng ntw timestamp bi->skb\n");
415 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416 tx_desc = IGB_TX_DESC(tx_ring, i);
417 buffer_info = &tx_ring->buffer_info[i];
418 u0 = (struct my_u0 *)tx_desc;
419 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
420 " %04X %3X %016llX %p", i,
423 (u64)buffer_info->dma,
425 buffer_info->next_to_watch,
426 (u64)buffer_info->time_stamp,
428 if (i == tx_ring->next_to_use &&
429 i == tx_ring->next_to_clean)
430 printk(KERN_CONT " NTC/U\n");
431 else if (i == tx_ring->next_to_use)
432 printk(KERN_CONT " NTU\n");
433 else if (i == tx_ring->next_to_clean)
434 printk(KERN_CONT " NTC\n");
436 printk(KERN_CONT "\n");
438 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439 print_hex_dump(KERN_INFO, "",
441 16, 1, phys_to_virt(buffer_info->dma),
442 buffer_info->length, true);
446 /* Print RX Rings Summary */
448 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449 printk(KERN_INFO "Queue [NTU] [NTC]\n");
450 for (n = 0; n < adapter->num_rx_queues; n++) {
451 rx_ring = adapter->rx_ring[n];
452 printk(KERN_INFO " %5d %5X %5X\n", n,
453 rx_ring->next_to_use, rx_ring->next_to_clean);
457 if (!netif_msg_rx_status(adapter))
460 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
462 /* Advanced Receive Descriptor (Read) Format
464 * +-----------------------------------------------------+
465 * 0 | Packet Buffer Address [63:1] |A0/NSE|
466 * +----------------------------------------------+------+
467 * 8 | Header Buffer Address [63:1] | DD |
468 * +-----------------------------------------------------+
471 * Advanced Receive Descriptor (Write-Back) Format
473 * 63 48 47 32 31 30 21 20 17 16 4 3 0
474 * +------------------------------------------------------+
475 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
476 * | Checksum Ident | | | | Type | Type |
477 * +------------------------------------------------------+
478 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479 * +------------------------------------------------------+
480 * 63 48 47 32 31 20 19 0
483 for (n = 0; n < adapter->num_rx_queues; n++) {
484 rx_ring = adapter->rx_ring[n];
485 printk(KERN_INFO "------------------------------------\n");
486 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487 printk(KERN_INFO "------------------------------------\n");
488 printk(KERN_INFO "R [desc] [ PktBuf A0] "
489 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
490 "<-- Adv Rx Read format\n");
491 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
492 "[vl er S cks ln] ---------------- [bi->skb] "
493 "<-- Adv Rx Write-Back format\n");
495 for (i = 0; i < rx_ring->count; i++) {
496 buffer_info = &rx_ring->buffer_info[i];
497 rx_desc = IGB_RX_DESC(rx_ring, i);
498 u0 = (struct my_u0 *)rx_desc;
499 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500 if (staterr & E1000_RXD_STAT_DD) {
501 /* Descriptor Done */
502 printk(KERN_INFO "RWB[0x%03X] %016llX "
503 "%016llX ---------------- %p", i,
508 printk(KERN_INFO "R [0x%03X] %016llX "
509 "%016llX %016llX %p", i,
512 (u64)buffer_info->dma,
515 if (netif_msg_pktdata(adapter)) {
516 print_hex_dump(KERN_INFO, "",
519 phys_to_virt(buffer_info->dma),
520 IGB_RX_HDR_LEN, true);
521 print_hex_dump(KERN_INFO, "",
525 buffer_info->page_dma +
526 buffer_info->page_offset),
531 if (i == rx_ring->next_to_use)
532 printk(KERN_CONT " NTU\n");
533 else if (i == rx_ring->next_to_clean)
534 printk(KERN_CONT " NTC\n");
536 printk(KERN_CONT "\n");
547 * igb_read_clock - read raw cycle counter (to be used by time counter)
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
551 struct igb_adapter *adapter =
552 container_of(tc, struct igb_adapter, cycles);
553 struct e1000_hw *hw = &adapter->hw;
558 * The timestamp latches on lowest register read. For the 82580
559 * the lowest register is SYSTIMR instead of SYSTIML. However we never
560 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
562 if (hw->mac.type == e1000_82580) {
563 stamp = rd32(E1000_SYSTIMR) >> 8;
564 shift = IGB_82580_TSYNC_SHIFT;
567 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 struct igb_adapter *adapter = hw->back;
579 return adapter->netdev;
583 * igb_init_module - Driver Registration Routine
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
588 static int __init igb_init_module(void)
591 printk(KERN_INFO "%s - version %s\n",
592 igb_driver_string, igb_driver_version);
594 printk(KERN_INFO "%s\n", igb_copyright);
596 #ifdef CONFIG_IGB_DCA
597 dca_register_notify(&dca_notifier);
599 ret = pci_register_driver(&igb_driver);
603 module_init(igb_init_module);
606 * igb_exit_module - Driver Exit Cleanup Routine
608 * igb_exit_module is called just before the driver is removed
611 static void __exit igb_exit_module(void)
613 #ifdef CONFIG_IGB_DCA
614 dca_unregister_notify(&dca_notifier);
616 pci_unregister_driver(&igb_driver);
619 module_exit(igb_exit_module);
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 u32 rbase_offset = adapter->vfs_allocated_count;
634 switch (adapter->hw.mac.type) {
636 /* The queues are allocated for virtualization such that VF 0
637 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638 * In order to avoid collision we start at the first free queue
639 * and continue consuming queues in the same sequence
641 if (adapter->vfs_allocated_count) {
642 for (; i < adapter->rss_queues; i++)
643 adapter->rx_ring[i]->reg_idx = rbase_offset +
650 for (; i < adapter->num_rx_queues; i++)
651 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652 for (; j < adapter->num_tx_queues; j++)
653 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
658 static void igb_free_queues(struct igb_adapter *adapter)
662 for (i = 0; i < adapter->num_tx_queues; i++) {
663 kfree(adapter->tx_ring[i]);
664 adapter->tx_ring[i] = NULL;
666 for (i = 0; i < adapter->num_rx_queues; i++) {
667 kfree(adapter->rx_ring[i]);
668 adapter->rx_ring[i] = NULL;
670 adapter->num_rx_queues = 0;
671 adapter->num_tx_queues = 0;
675 * igb_alloc_queues - Allocate memory for all rings
676 * @adapter: board private structure to initialize
678 * We allocate one ring per queue at run-time since we don't know the
679 * number of queues at compile-time.
681 static int igb_alloc_queues(struct igb_adapter *adapter)
683 struct igb_ring *ring;
686 for (i = 0; i < adapter->num_tx_queues; i++) {
687 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
690 ring->count = adapter->tx_ring_count;
691 ring->queue_index = i;
692 ring->dev = &adapter->pdev->dev;
693 ring->netdev = adapter->netdev;
694 /* For 82575, context index must be unique per ring. */
695 if (adapter->hw.mac.type == e1000_82575)
696 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697 adapter->tx_ring[i] = ring;
700 for (i = 0; i < adapter->num_rx_queues; i++) {
701 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
704 ring->count = adapter->rx_ring_count;
705 ring->queue_index = i;
706 ring->dev = &adapter->pdev->dev;
707 ring->netdev = adapter->netdev;
708 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
709 /* set flag indicating ring supports SCTP checksum offload */
710 if (adapter->hw.mac.type >= e1000_82576)
711 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
712 adapter->rx_ring[i] = ring;
715 igb_cache_ring_register(adapter);
720 igb_free_queues(adapter);
725 #define IGB_N0_QUEUE -1
726 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
729 struct igb_adapter *adapter = q_vector->adapter;
730 struct e1000_hw *hw = &adapter->hw;
732 int rx_queue = IGB_N0_QUEUE;
733 int tx_queue = IGB_N0_QUEUE;
735 if (q_vector->rx_ring)
736 rx_queue = q_vector->rx_ring->reg_idx;
737 if (q_vector->tx_ring)
738 tx_queue = q_vector->tx_ring->reg_idx;
740 switch (hw->mac.type) {
742 /* The 82575 assigns vectors using a bitmask, which matches the
743 bitmask for the EICR/EIMS/EIMC registers. To assign one
744 or more queues to a vector, we write the appropriate bits
745 into the MSIXBM register for that vector. */
746 if (rx_queue > IGB_N0_QUEUE)
747 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
748 if (tx_queue > IGB_N0_QUEUE)
749 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
750 if (!adapter->msix_entries && msix_vector == 0)
751 msixbm |= E1000_EIMS_OTHER;
752 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
753 q_vector->eims_value = msixbm;
756 /* 82576 uses a table-based method for assigning vectors.
757 Each queue has a single entry in the table to which we write
758 a vector number along with a "valid" bit. Sadly, the layout
759 of the table is somewhat counterintuitive. */
760 if (rx_queue > IGB_N0_QUEUE) {
761 index = (rx_queue & 0x7);
762 ivar = array_rd32(E1000_IVAR0, index);
764 /* vector goes into low byte of register */
765 ivar = ivar & 0xFFFFFF00;
766 ivar |= msix_vector | E1000_IVAR_VALID;
768 /* vector goes into third byte of register */
769 ivar = ivar & 0xFF00FFFF;
770 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
772 array_wr32(E1000_IVAR0, index, ivar);
774 if (tx_queue > IGB_N0_QUEUE) {
775 index = (tx_queue & 0x7);
776 ivar = array_rd32(E1000_IVAR0, index);
778 /* vector goes into second byte of register */
779 ivar = ivar & 0xFFFF00FF;
780 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
782 /* vector goes into high byte of register */
783 ivar = ivar & 0x00FFFFFF;
784 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
786 array_wr32(E1000_IVAR0, index, ivar);
788 q_vector->eims_value = 1 << msix_vector;
792 /* 82580 uses the same table-based approach as 82576 but has fewer
793 entries as a result we carry over for queues greater than 4. */
794 if (rx_queue > IGB_N0_QUEUE) {
795 index = (rx_queue >> 1);
796 ivar = array_rd32(E1000_IVAR0, index);
797 if (rx_queue & 0x1) {
798 /* vector goes into third byte of register */
799 ivar = ivar & 0xFF00FFFF;
800 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
802 /* vector goes into low byte of register */
803 ivar = ivar & 0xFFFFFF00;
804 ivar |= msix_vector | E1000_IVAR_VALID;
806 array_wr32(E1000_IVAR0, index, ivar);
808 if (tx_queue > IGB_N0_QUEUE) {
809 index = (tx_queue >> 1);
810 ivar = array_rd32(E1000_IVAR0, index);
811 if (tx_queue & 0x1) {
812 /* vector goes into high byte of register */
813 ivar = ivar & 0x00FFFFFF;
814 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
816 /* vector goes into second byte of register */
817 ivar = ivar & 0xFFFF00FF;
818 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
820 array_wr32(E1000_IVAR0, index, ivar);
822 q_vector->eims_value = 1 << msix_vector;
829 /* add q_vector eims value to global eims_enable_mask */
830 adapter->eims_enable_mask |= q_vector->eims_value;
832 /* configure q_vector to set itr on first interrupt */
833 q_vector->set_itr = 1;
837 * igb_configure_msix - Configure MSI-X hardware
839 * igb_configure_msix sets up the hardware to properly
840 * generate MSI-X interrupts.
842 static void igb_configure_msix(struct igb_adapter *adapter)
846 struct e1000_hw *hw = &adapter->hw;
848 adapter->eims_enable_mask = 0;
850 /* set vector for other causes, i.e. link changes */
851 switch (hw->mac.type) {
853 tmp = rd32(E1000_CTRL_EXT);
854 /* enable MSI-X PBA support*/
855 tmp |= E1000_CTRL_EXT_PBA_CLR;
857 /* Auto-Mask interrupts upon ICR read. */
858 tmp |= E1000_CTRL_EXT_EIAME;
859 tmp |= E1000_CTRL_EXT_IRCA;
861 wr32(E1000_CTRL_EXT, tmp);
863 /* enable msix_other interrupt */
864 array_wr32(E1000_MSIXBM(0), vector++,
866 adapter->eims_other = E1000_EIMS_OTHER;
873 /* Turn on MSI-X capability first, or our settings
874 * won't stick. And it will take days to debug. */
875 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
876 E1000_GPIE_PBA | E1000_GPIE_EIAME |
879 /* enable msix_other interrupt */
880 adapter->eims_other = 1 << vector;
881 tmp = (vector++ | E1000_IVAR_VALID) << 8;
883 wr32(E1000_IVAR_MISC, tmp);
886 /* do nothing, since nothing else supports MSI-X */
888 } /* switch (hw->mac.type) */
890 adapter->eims_enable_mask |= adapter->eims_other;
892 for (i = 0; i < adapter->num_q_vectors; i++)
893 igb_assign_vector(adapter->q_vector[i], vector++);
899 * igb_request_msix - Initialize MSI-X interrupts
901 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
904 static int igb_request_msix(struct igb_adapter *adapter)
906 struct net_device *netdev = adapter->netdev;
907 struct e1000_hw *hw = &adapter->hw;
908 int i, err = 0, vector = 0;
910 err = request_irq(adapter->msix_entries[vector].vector,
911 igb_msix_other, 0, netdev->name, adapter);
916 for (i = 0; i < adapter->num_q_vectors; i++) {
917 struct igb_q_vector *q_vector = adapter->q_vector[i];
919 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
921 if (q_vector->rx_ring && q_vector->tx_ring)
922 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
923 q_vector->rx_ring->queue_index);
924 else if (q_vector->tx_ring)
925 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
926 q_vector->tx_ring->queue_index);
927 else if (q_vector->rx_ring)
928 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
929 q_vector->rx_ring->queue_index);
931 sprintf(q_vector->name, "%s-unused", netdev->name);
933 err = request_irq(adapter->msix_entries[vector].vector,
934 igb_msix_ring, 0, q_vector->name,
941 igb_configure_msix(adapter);
947 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
949 if (adapter->msix_entries) {
950 pci_disable_msix(adapter->pdev);
951 kfree(adapter->msix_entries);
952 adapter->msix_entries = NULL;
953 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
954 pci_disable_msi(adapter->pdev);
959 * igb_free_q_vectors - Free memory allocated for interrupt vectors
960 * @adapter: board private structure to initialize
962 * This function frees the memory allocated to the q_vectors. In addition if
963 * NAPI is enabled it will delete any references to the NAPI struct prior
964 * to freeing the q_vector.
966 static void igb_free_q_vectors(struct igb_adapter *adapter)
970 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
971 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
972 adapter->q_vector[v_idx] = NULL;
975 netif_napi_del(&q_vector->napi);
978 adapter->num_q_vectors = 0;
982 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
984 * This function resets the device so that it has 0 rx queues, tx queues, and
985 * MSI-X interrupts allocated.
987 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
989 igb_free_queues(adapter);
990 igb_free_q_vectors(adapter);
991 igb_reset_interrupt_capability(adapter);
995 * igb_set_interrupt_capability - set MSI or MSI-X if supported
997 * Attempt to configure interrupts using the best available
998 * capabilities of the hardware and kernel.
1000 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1005 /* Number of supported queues. */
1006 adapter->num_rx_queues = adapter->rss_queues;
1007 if (adapter->vfs_allocated_count)
1008 adapter->num_tx_queues = 1;
1010 adapter->num_tx_queues = adapter->rss_queues;
1012 /* start with one vector for every rx queue */
1013 numvecs = adapter->num_rx_queues;
1015 /* if tx handler is separate add 1 for every tx queue */
1016 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1017 numvecs += adapter->num_tx_queues;
1019 /* store the number of vectors reserved for queues */
1020 adapter->num_q_vectors = numvecs;
1022 /* add 1 vector for link status interrupts */
1024 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1026 if (!adapter->msix_entries)
1029 for (i = 0; i < numvecs; i++)
1030 adapter->msix_entries[i].entry = i;
1032 err = pci_enable_msix(adapter->pdev,
1033 adapter->msix_entries,
1038 igb_reset_interrupt_capability(adapter);
1040 /* If we can't do MSI-X, try MSI */
1042 #ifdef CONFIG_PCI_IOV
1043 /* disable SR-IOV for non MSI-X configurations */
1044 if (adapter->vf_data) {
1045 struct e1000_hw *hw = &adapter->hw;
1046 /* disable iov and allow time for transactions to clear */
1047 pci_disable_sriov(adapter->pdev);
1050 kfree(adapter->vf_data);
1051 adapter->vf_data = NULL;
1052 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1058 adapter->vfs_allocated_count = 0;
1059 adapter->rss_queues = 1;
1060 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061 adapter->num_rx_queues = 1;
1062 adapter->num_tx_queues = 1;
1063 adapter->num_q_vectors = 1;
1064 if (!pci_enable_msi(adapter->pdev))
1065 adapter->flags |= IGB_FLAG_HAS_MSI;
1067 /* Notify the stack of the (possibly) reduced queue counts. */
1068 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069 return netif_set_real_num_rx_queues(adapter->netdev,
1070 adapter->num_rx_queues);
1074 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075 * @adapter: board private structure to initialize
1077 * We allocate one q_vector per queue interrupt. If allocation fails we
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1082 struct igb_q_vector *q_vector;
1083 struct e1000_hw *hw = &adapter->hw;
1086 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1090 q_vector->adapter = adapter;
1091 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092 q_vector->itr_val = IGB_START_ITR;
1093 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094 adapter->q_vector[v_idx] = q_vector;
1099 igb_free_q_vectors(adapter);
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104 int ring_idx, int v_idx)
1106 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1108 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109 q_vector->rx_ring->q_vector = q_vector;
1110 q_vector->itr_val = adapter->rx_itr_setting;
1111 if (q_vector->itr_val && q_vector->itr_val <= 3)
1112 q_vector->itr_val = IGB_START_ITR;
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116 int ring_idx, int v_idx)
1118 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1120 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121 q_vector->tx_ring->q_vector = q_vector;
1122 q_vector->itr_val = adapter->tx_itr_setting;
1123 if (q_vector->itr_val && q_vector->itr_val <= 3)
1124 q_vector->itr_val = IGB_START_ITR;
1128 * igb_map_ring_to_vector - maps allocated queues to vectors
1130 * This function maps the recently allocated queues to vectors.
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1137 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138 (adapter->num_q_vectors < adapter->num_tx_queues))
1141 if (adapter->num_q_vectors >=
1142 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143 for (i = 0; i < adapter->num_rx_queues; i++)
1144 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145 for (i = 0; i < adapter->num_tx_queues; i++)
1146 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1148 for (i = 0; i < adapter->num_rx_queues; i++) {
1149 if (i < adapter->num_tx_queues)
1150 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1153 for (; i < adapter->num_tx_queues; i++)
1154 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1160 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1162 * This function initializes the interrupts and allocates all of the queues.
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1166 struct pci_dev *pdev = adapter->pdev;
1169 err = igb_set_interrupt_capability(adapter);
1173 err = igb_alloc_q_vectors(adapter);
1175 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176 goto err_alloc_q_vectors;
1179 err = igb_alloc_queues(adapter);
1181 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182 goto err_alloc_queues;
1185 err = igb_map_ring_to_vector(adapter);
1187 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188 goto err_map_queues;
1194 igb_free_queues(adapter);
1196 igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198 igb_reset_interrupt_capability(adapter);
1203 * igb_request_irq - initialize interrupts
1205 * Attempts to configure interrupts using the best available
1206 * capabilities of the hardware and kernel.
1208 static int igb_request_irq(struct igb_adapter *adapter)
1210 struct net_device *netdev = adapter->netdev;
1211 struct pci_dev *pdev = adapter->pdev;
1214 if (adapter->msix_entries) {
1215 err = igb_request_msix(adapter);
1218 /* fall back to MSI */
1219 igb_clear_interrupt_scheme(adapter);
1220 if (!pci_enable_msi(adapter->pdev))
1221 adapter->flags |= IGB_FLAG_HAS_MSI;
1222 igb_free_all_tx_resources(adapter);
1223 igb_free_all_rx_resources(adapter);
1224 adapter->num_tx_queues = 1;
1225 adapter->num_rx_queues = 1;
1226 adapter->num_q_vectors = 1;
1227 err = igb_alloc_q_vectors(adapter);
1230 "Unable to allocate memory for vectors\n");
1233 err = igb_alloc_queues(adapter);
1236 "Unable to allocate memory for queues\n");
1237 igb_free_q_vectors(adapter);
1240 igb_setup_all_tx_resources(adapter);
1241 igb_setup_all_rx_resources(adapter);
1243 igb_assign_vector(adapter->q_vector[0], 0);
1246 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248 netdev->name, adapter);
1252 /* fall back to legacy interrupts */
1253 igb_reset_interrupt_capability(adapter);
1254 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1257 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258 netdev->name, adapter);
1261 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1268 static void igb_free_irq(struct igb_adapter *adapter)
1270 if (adapter->msix_entries) {
1273 free_irq(adapter->msix_entries[vector++].vector, adapter);
1275 for (i = 0; i < adapter->num_q_vectors; i++) {
1276 struct igb_q_vector *q_vector = adapter->q_vector[i];
1277 free_irq(adapter->msix_entries[vector++].vector,
1281 free_irq(adapter->pdev->irq, adapter);
1286 * igb_irq_disable - Mask off interrupt generation on the NIC
1287 * @adapter: board private structure
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1291 struct e1000_hw *hw = &adapter->hw;
1294 * we need to be careful when disabling interrupts. The VFs are also
1295 * mapped into these registers and so clearing the bits can cause
1296 * issues on the VF drivers so we only need to clear what we set
1298 if (adapter->msix_entries) {
1299 u32 regval = rd32(E1000_EIAM);
1300 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302 regval = rd32(E1000_EIAC);
1303 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1307 wr32(E1000_IMC, ~0);
1309 if (adapter->msix_entries) {
1311 for (i = 0; i < adapter->num_q_vectors; i++)
1312 synchronize_irq(adapter->msix_entries[i].vector);
1314 synchronize_irq(adapter->pdev->irq);
1319 * igb_irq_enable - Enable default interrupt generation settings
1320 * @adapter: board private structure
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1324 struct e1000_hw *hw = &adapter->hw;
1326 if (adapter->msix_entries) {
1327 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328 u32 regval = rd32(E1000_EIAC);
1329 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330 regval = rd32(E1000_EIAM);
1331 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333 if (adapter->vfs_allocated_count) {
1334 wr32(E1000_MBVFIMR, 0xFF);
1335 ims |= E1000_IMS_VMMB;
1337 if (adapter->hw.mac.type == e1000_82580)
1338 ims |= E1000_IMS_DRSTA;
1340 wr32(E1000_IMS, ims);
1342 wr32(E1000_IMS, IMS_ENABLE_MASK |
1344 wr32(E1000_IAM, IMS_ENABLE_MASK |
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1351 struct e1000_hw *hw = &adapter->hw;
1352 u16 vid = adapter->hw.mng_cookie.vlan_id;
1353 u16 old_vid = adapter->mng_vlan_id;
1355 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356 /* add VID to filter table */
1357 igb_vfta_set(hw, vid, true);
1358 adapter->mng_vlan_id = vid;
1360 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1363 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1365 !test_bit(old_vid, adapter->active_vlans)) {
1366 /* remove VID from filter table */
1367 igb_vfta_set(hw, old_vid, false);
1372 * igb_release_hw_control - release control of the h/w to f/w
1373 * @adapter: address of board private structure
1375 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376 * For ASF and Pass Through versions of f/w this means that the
1377 * driver is no longer loaded.
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1382 struct e1000_hw *hw = &adapter->hw;
1385 /* Let firmware take over control of h/w */
1386 ctrl_ext = rd32(E1000_CTRL_EXT);
1387 wr32(E1000_CTRL_EXT,
1388 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1392 * igb_get_hw_control - get control of the h/w from f/w
1393 * @adapter: address of board private structure
1395 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396 * For ASF and Pass Through versions of f/w this means that
1397 * the driver is loaded.
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1402 struct e1000_hw *hw = &adapter->hw;
1405 /* Let firmware know the driver has taken over */
1406 ctrl_ext = rd32(E1000_CTRL_EXT);
1407 wr32(E1000_CTRL_EXT,
1408 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1412 * igb_configure - configure the hardware for RX and TX
1413 * @adapter: private board structure
1415 static void igb_configure(struct igb_adapter *adapter)
1417 struct net_device *netdev = adapter->netdev;
1420 igb_get_hw_control(adapter);
1421 igb_set_rx_mode(netdev);
1423 igb_restore_vlan(adapter);
1425 igb_setup_tctl(adapter);
1426 igb_setup_mrqc(adapter);
1427 igb_setup_rctl(adapter);
1429 igb_configure_tx(adapter);
1430 igb_configure_rx(adapter);
1432 igb_rx_fifo_flush_82575(&adapter->hw);
1434 /* call igb_desc_unused which always leaves
1435 * at least 1 descriptor unused to make sure
1436 * next_to_use != next_to_clean */
1437 for (i = 0; i < adapter->num_rx_queues; i++) {
1438 struct igb_ring *ring = adapter->rx_ring[i];
1439 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1444 * igb_power_up_link - Power up the phy/serdes link
1445 * @adapter: address of board private structure
1447 void igb_power_up_link(struct igb_adapter *adapter)
1449 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450 igb_power_up_phy_copper(&adapter->hw);
1452 igb_power_up_serdes_link_82575(&adapter->hw);
1456 * igb_power_down_link - Power down the phy/serdes link
1457 * @adapter: address of board private structure
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1461 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462 igb_power_down_phy_copper_82575(&adapter->hw);
1464 igb_shutdown_serdes_link_82575(&adapter->hw);
1468 * igb_up - Open the interface and prepare it to handle traffic
1469 * @adapter: board private structure
1471 int igb_up(struct igb_adapter *adapter)
1473 struct e1000_hw *hw = &adapter->hw;
1476 /* hardware has been reset, we need to reload some things */
1477 igb_configure(adapter);
1479 clear_bit(__IGB_DOWN, &adapter->state);
1481 for (i = 0; i < adapter->num_q_vectors; i++) {
1482 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483 napi_enable(&q_vector->napi);
1485 if (adapter->msix_entries)
1486 igb_configure_msix(adapter);
1488 igb_assign_vector(adapter->q_vector[0], 0);
1490 /* Clear any pending interrupts. */
1492 igb_irq_enable(adapter);
1494 /* notify VFs that reset has been completed */
1495 if (adapter->vfs_allocated_count) {
1496 u32 reg_data = rd32(E1000_CTRL_EXT);
1497 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498 wr32(E1000_CTRL_EXT, reg_data);
1501 netif_tx_start_all_queues(adapter->netdev);
1503 /* start the watchdog. */
1504 hw->mac.get_link_status = 1;
1505 schedule_work(&adapter->watchdog_task);
1510 void igb_down(struct igb_adapter *adapter)
1512 struct net_device *netdev = adapter->netdev;
1513 struct e1000_hw *hw = &adapter->hw;
1517 /* signal that we're down so the interrupt handler does not
1518 * reschedule our watchdog timer */
1519 set_bit(__IGB_DOWN, &adapter->state);
1521 /* disable receives in the hardware */
1522 rctl = rd32(E1000_RCTL);
1523 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524 /* flush and sleep below */
1526 netif_tx_stop_all_queues(netdev);
1528 /* disable transmits in the hardware */
1529 tctl = rd32(E1000_TCTL);
1530 tctl &= ~E1000_TCTL_EN;
1531 wr32(E1000_TCTL, tctl);
1532 /* flush both disables and wait for them to finish */
1536 for (i = 0; i < adapter->num_q_vectors; i++) {
1537 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538 napi_disable(&q_vector->napi);
1541 igb_irq_disable(adapter);
1543 del_timer_sync(&adapter->watchdog_timer);
1544 del_timer_sync(&adapter->phy_info_timer);
1546 netif_carrier_off(netdev);
1548 /* record the stats before reset*/
1549 spin_lock(&adapter->stats64_lock);
1550 igb_update_stats(adapter, &adapter->stats64);
1551 spin_unlock(&adapter->stats64_lock);
1553 adapter->link_speed = 0;
1554 adapter->link_duplex = 0;
1556 if (!pci_channel_offline(adapter->pdev))
1558 igb_clean_all_tx_rings(adapter);
1559 igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1562 /* since we reset the hardware DCA settings were cleared */
1563 igb_setup_dca(adapter);
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1569 WARN_ON(in_interrupt());
1570 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1574 clear_bit(__IGB_RESETTING, &adapter->state);
1577 void igb_reset(struct igb_adapter *adapter)
1579 struct pci_dev *pdev = adapter->pdev;
1580 struct e1000_hw *hw = &adapter->hw;
1581 struct e1000_mac_info *mac = &hw->mac;
1582 struct e1000_fc_info *fc = &hw->fc;
1583 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1586 /* Repartition Pba for greater than 9k mtu
1587 * To take effect CTRL.RST is required.
1589 switch (mac->type) {
1592 pba = rd32(E1000_RXPBS);
1593 pba = igb_rxpbs_adjust_82580(pba);
1596 pba = rd32(E1000_RXPBS);
1597 pba &= E1000_RXPBS_SIZE_MASK_82576;
1601 pba = E1000_PBA_34K;
1605 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606 (mac->type < e1000_82576)) {
1607 /* adjust PBA for jumbo frames */
1608 wr32(E1000_PBA, pba);
1610 /* To maintain wire speed transmits, the Tx FIFO should be
1611 * large enough to accommodate two full transmit packets,
1612 * rounded up to the next 1KB and expressed in KB. Likewise,
1613 * the Rx FIFO should be large enough to accommodate at least
1614 * one full receive packet and is similarly rounded up and
1615 * expressed in KB. */
1616 pba = rd32(E1000_PBA);
1617 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618 tx_space = pba >> 16;
1619 /* lower 16 bits has Rx packet buffer allocation size in KB */
1621 /* the tx fifo also stores 16 bytes of information about the tx
1622 * but don't include ethernet FCS because hardware appends it */
1623 min_tx_space = (adapter->max_frame_size +
1624 sizeof(union e1000_adv_tx_desc) -
1626 min_tx_space = ALIGN(min_tx_space, 1024);
1627 min_tx_space >>= 10;
1628 /* software strips receive CRC, so leave room for it */
1629 min_rx_space = adapter->max_frame_size;
1630 min_rx_space = ALIGN(min_rx_space, 1024);
1631 min_rx_space >>= 10;
1633 /* If current Tx allocation is less than the min Tx FIFO size,
1634 * and the min Tx FIFO size is less than the current Rx FIFO
1635 * allocation, take space away from current Rx allocation */
1636 if (tx_space < min_tx_space &&
1637 ((min_tx_space - tx_space) < pba)) {
1638 pba = pba - (min_tx_space - tx_space);
1640 /* if short on rx space, rx wins and must trump tx
1642 if (pba < min_rx_space)
1645 wr32(E1000_PBA, pba);
1648 /* flow control settings */
1649 /* The high water mark must be low enough to fit one full frame
1650 * (or the size used for early receive) above it in the Rx FIFO.
1651 * Set it to the lower of:
1652 * - 90% of the Rx FIFO size, or
1653 * - the full Rx FIFO size minus one full frame */
1654 hwm = min(((pba << 10) * 9 / 10),
1655 ((pba << 10) - 2 * adapter->max_frame_size));
1657 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1658 fc->low_water = fc->high_water - 16;
1659 fc->pause_time = 0xFFFF;
1661 fc->current_mode = fc->requested_mode;
1663 /* disable receive for all VFs and wait one second */
1664 if (adapter->vfs_allocated_count) {
1666 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1669 /* ping all the active vfs to let them know we are going down */
1670 igb_ping_all_vfs(adapter);
1672 /* disable transmits and receives */
1673 wr32(E1000_VFRE, 0);
1674 wr32(E1000_VFTE, 0);
1677 /* Allow time for pending master requests to run */
1678 hw->mac.ops.reset_hw(hw);
1681 if (hw->mac.ops.init_hw(hw))
1682 dev_err(&pdev->dev, "Hardware Error\n");
1683 if (hw->mac.type > e1000_82580) {
1684 if (adapter->flags & IGB_FLAG_DMAC) {
1688 * DMA Coalescing high water mark needs to be higher
1689 * than * the * Rx threshold. The Rx threshold is
1690 * currently * pba - 6, so we * should use a high water
1691 * mark of pba * - 4. */
1692 hwm = (pba - 4) << 10;
1694 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695 & E1000_DMACR_DMACTHR_MASK);
1697 /* transition to L0x or L1 if available..*/
1698 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1700 /* watchdog timer= +-1000 usec in 32usec intervals */
1702 wr32(E1000_DMACR, reg);
1704 /* no lower threshold to disable coalescing(smart fifb)
1706 wr32(E1000_DMCRTRH, 0);
1708 /* set hwm to PBA - 2 * max frame size */
1709 wr32(E1000_FCRTC, hwm);
1712 * This sets the time to wait before requesting tran-
1713 * sition to * low power state to number of usecs needed
1714 * to receive 1 512 * byte frame at gigabit line rate
1716 reg = rd32(E1000_DMCTLX);
1717 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1719 /* Delay 255 usec before entering Lx state. */
1721 wr32(E1000_DMCTLX, reg);
1723 /* free space in Tx packet buffer to wake from DMAC */
1726 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1729 /* make low power state decision controlled by DMAC */
1730 reg = rd32(E1000_PCIEMISC);
1731 reg |= E1000_PCIEMISC_LX_DECISION;
1732 wr32(E1000_PCIEMISC, reg);
1733 } /* end if IGB_FLAG_DMAC set */
1735 if (hw->mac.type == e1000_82580) {
1736 u32 reg = rd32(E1000_PCIEMISC);
1737 wr32(E1000_PCIEMISC,
1738 reg & ~E1000_PCIEMISC_LX_DECISION);
1740 if (!netif_running(adapter->netdev))
1741 igb_power_down_link(adapter);
1743 igb_update_mng_vlan(adapter);
1745 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1748 igb_get_phy_info(hw);
1751 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1754 * Since there is no support for separate rx/tx vlan accel
1755 * enable/disable make sure tx flag is always in same state as rx.
1757 if (features & NETIF_F_HW_VLAN_RX)
1758 features |= NETIF_F_HW_VLAN_TX;
1760 features &= ~NETIF_F_HW_VLAN_TX;
1765 static int igb_set_features(struct net_device *netdev, u32 features)
1767 struct igb_adapter *adapter = netdev_priv(netdev);
1769 u32 changed = netdev->features ^ features;
1771 for (i = 0; i < adapter->num_rx_queues; i++) {
1772 if (features & NETIF_F_RXCSUM)
1773 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1775 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1778 if (changed & NETIF_F_HW_VLAN_RX)
1779 igb_vlan_mode(netdev, features);
1784 static const struct net_device_ops igb_netdev_ops = {
1785 .ndo_open = igb_open,
1786 .ndo_stop = igb_close,
1787 .ndo_start_xmit = igb_xmit_frame,
1788 .ndo_get_stats64 = igb_get_stats64,
1789 .ndo_set_rx_mode = igb_set_rx_mode,
1790 .ndo_set_mac_address = igb_set_mac,
1791 .ndo_change_mtu = igb_change_mtu,
1792 .ndo_do_ioctl = igb_ioctl,
1793 .ndo_tx_timeout = igb_tx_timeout,
1794 .ndo_validate_addr = eth_validate_addr,
1795 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1796 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1797 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1798 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1799 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1800 .ndo_get_vf_config = igb_ndo_get_vf_config,
1801 #ifdef CONFIG_NET_POLL_CONTROLLER
1802 .ndo_poll_controller = igb_netpoll,
1804 .ndo_fix_features = igb_fix_features,
1805 .ndo_set_features = igb_set_features,
1809 * igb_probe - Device Initialization Routine
1810 * @pdev: PCI device information struct
1811 * @ent: entry in igb_pci_tbl
1813 * Returns 0 on success, negative on failure
1815 * igb_probe initializes an adapter identified by a pci_dev structure.
1816 * The OS initialization, configuring of the adapter private structure,
1817 * and a hardware reset occur.
1819 static int __devinit igb_probe(struct pci_dev *pdev,
1820 const struct pci_device_id *ent)
1822 struct net_device *netdev;
1823 struct igb_adapter *adapter;
1824 struct e1000_hw *hw;
1825 u16 eeprom_data = 0;
1827 static int global_quad_port_a; /* global quad port a indication */
1828 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1829 unsigned long mmio_start, mmio_len;
1830 int err, pci_using_dac;
1831 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1832 u8 part_str[E1000_PBANUM_LENGTH];
1834 /* Catch broken hardware that put the wrong VF device ID in
1835 * the PCIe SR-IOV capability.
1837 if (pdev->is_virtfn) {
1838 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1839 pci_name(pdev), pdev->vendor, pdev->device);
1843 err = pci_enable_device_mem(pdev);
1848 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1850 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1854 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1856 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1858 dev_err(&pdev->dev, "No usable DMA "
1859 "configuration, aborting\n");
1865 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1871 pci_enable_pcie_error_reporting(pdev);
1873 pci_set_master(pdev);
1874 pci_save_state(pdev);
1877 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1880 goto err_alloc_etherdev;
1882 SET_NETDEV_DEV(netdev, &pdev->dev);
1884 pci_set_drvdata(pdev, netdev);
1885 adapter = netdev_priv(netdev);
1886 adapter->netdev = netdev;
1887 adapter->pdev = pdev;
1890 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1892 mmio_start = pci_resource_start(pdev, 0);
1893 mmio_len = pci_resource_len(pdev, 0);
1896 hw->hw_addr = ioremap(mmio_start, mmio_len);
1900 netdev->netdev_ops = &igb_netdev_ops;
1901 igb_set_ethtool_ops(netdev);
1902 netdev->watchdog_timeo = 5 * HZ;
1904 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1906 netdev->mem_start = mmio_start;
1907 netdev->mem_end = mmio_start + mmio_len;
1909 /* PCI config space info */
1910 hw->vendor_id = pdev->vendor;
1911 hw->device_id = pdev->device;
1912 hw->revision_id = pdev->revision;
1913 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1914 hw->subsystem_device_id = pdev->subsystem_device;
1916 /* Copy the default MAC, PHY and NVM function pointers */
1917 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1918 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1919 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1920 /* Initialize skew-specific constants */
1921 err = ei->get_invariants(hw);
1925 /* setup the private structure */
1926 err = igb_sw_init(adapter);
1930 igb_get_bus_info_pcie(hw);
1932 hw->phy.autoneg_wait_to_complete = false;
1934 /* Copper options */
1935 if (hw->phy.media_type == e1000_media_type_copper) {
1936 hw->phy.mdix = AUTO_ALL_MODES;
1937 hw->phy.disable_polarity_correction = false;
1938 hw->phy.ms_type = e1000_ms_hw_default;
1941 if (igb_check_reset_block(hw))
1942 dev_info(&pdev->dev,
1943 "PHY reset is blocked due to SOL/IDER session.\n");
1945 netdev->hw_features = NETIF_F_SG |
1953 netdev->features = netdev->hw_features |
1954 NETIF_F_HW_VLAN_TX |
1955 NETIF_F_HW_VLAN_FILTER;
1957 netdev->vlan_features |= NETIF_F_TSO;
1958 netdev->vlan_features |= NETIF_F_TSO6;
1959 netdev->vlan_features |= NETIF_F_IP_CSUM;
1960 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1961 netdev->vlan_features |= NETIF_F_SG;
1963 if (pci_using_dac) {
1964 netdev->features |= NETIF_F_HIGHDMA;
1965 netdev->vlan_features |= NETIF_F_HIGHDMA;
1968 if (hw->mac.type >= e1000_82576) {
1969 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1970 netdev->features |= NETIF_F_SCTP_CSUM;
1973 netdev->priv_flags |= IFF_UNICAST_FLT;
1975 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1977 /* before reading the NVM, reset the controller to put the device in a
1978 * known good starting state */
1979 hw->mac.ops.reset_hw(hw);
1981 /* make sure the NVM is good */
1982 if (hw->nvm.ops.validate(hw) < 0) {
1983 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988 /* copy the MAC address out of the NVM */
1989 if (hw->mac.ops.read_mac_addr(hw))
1990 dev_err(&pdev->dev, "NVM Read Error\n");
1992 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1993 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1995 if (!is_valid_ether_addr(netdev->perm_addr)) {
1996 dev_err(&pdev->dev, "Invalid MAC Address\n");
2001 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2002 (unsigned long) adapter);
2003 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2004 (unsigned long) adapter);
2006 INIT_WORK(&adapter->reset_task, igb_reset_task);
2007 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2009 /* Initialize link properties that are user-changeable */
2010 adapter->fc_autoneg = true;
2011 hw->mac.autoneg = true;
2012 hw->phy.autoneg_advertised = 0x2f;
2014 hw->fc.requested_mode = e1000_fc_default;
2015 hw->fc.current_mode = e1000_fc_default;
2017 igb_validate_mdi_setting(hw);
2019 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2020 * enable the ACPI Magic Packet filter
2023 if (hw->bus.func == 0)
2024 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2025 else if (hw->mac.type >= e1000_82580)
2026 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2027 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2029 else if (hw->bus.func == 1)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2032 if (eeprom_data & eeprom_apme_mask)
2033 adapter->eeprom_wol |= E1000_WUFC_MAG;
2035 /* now that we have the eeprom settings, apply the special cases where
2036 * the eeprom may be wrong or the board simply won't support wake on
2037 * lan on a particular port */
2038 switch (pdev->device) {
2039 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2040 adapter->eeprom_wol = 0;
2042 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2043 case E1000_DEV_ID_82576_FIBER:
2044 case E1000_DEV_ID_82576_SERDES:
2045 /* Wake events only supported on port A for dual fiber
2046 * regardless of eeprom setting */
2047 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2048 adapter->eeprom_wol = 0;
2050 case E1000_DEV_ID_82576_QUAD_COPPER:
2051 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2052 /* if quad port adapter, disable WoL on all but port A */
2053 if (global_quad_port_a != 0)
2054 adapter->eeprom_wol = 0;
2056 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2057 /* Reset for multiple quad port adapters */
2058 if (++global_quad_port_a == 4)
2059 global_quad_port_a = 0;
2063 /* initialize the wol settings based on the eeprom settings */
2064 adapter->wol = adapter->eeprom_wol;
2065 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2067 /* reset the hardware with the new settings */
2070 /* let the f/w know that the h/w is now under the control of the
2072 igb_get_hw_control(adapter);
2074 strcpy(netdev->name, "eth%d");
2075 err = register_netdev(netdev);
2079 igb_vlan_mode(netdev, netdev->features);
2081 /* carrier off reporting is important to ethtool even BEFORE open */
2082 netif_carrier_off(netdev);
2084 #ifdef CONFIG_IGB_DCA
2085 if (dca_add_requester(&pdev->dev) == 0) {
2086 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087 dev_info(&pdev->dev, "DCA enabled\n");
2088 igb_setup_dca(adapter);
2092 /* do hw tstamp init after resetting */
2093 igb_init_hw_timer(adapter);
2095 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096 /* print bus type/speed/width info */
2097 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2099 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2102 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2108 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2110 strcpy(part_str, "Unknown");
2111 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112 dev_info(&pdev->dev,
2113 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114 adapter->msix_entries ? "MSI-X" :
2115 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116 adapter->num_rx_queues, adapter->num_tx_queues);
2117 switch (hw->mac.type) {
2119 igb_set_eee_i350(hw);
2127 igb_release_hw_control(adapter);
2129 if (!igb_check_reset_block(hw))
2132 if (hw->flash_address)
2133 iounmap(hw->flash_address);
2135 igb_clear_interrupt_scheme(adapter);
2136 iounmap(hw->hw_addr);
2138 free_netdev(netdev);
2140 pci_release_selected_regions(pdev,
2141 pci_select_bars(pdev, IORESOURCE_MEM));
2144 pci_disable_device(pdev);
2149 * igb_remove - Device Removal Routine
2150 * @pdev: PCI device information struct
2152 * igb_remove is called by the PCI subsystem to alert the driver
2153 * that it should release a PCI device. The could be caused by a
2154 * Hot-Plug event, or because the driver is going to be removed from
2157 static void __devexit igb_remove(struct pci_dev *pdev)
2159 struct net_device *netdev = pci_get_drvdata(pdev);
2160 struct igb_adapter *adapter = netdev_priv(netdev);
2161 struct e1000_hw *hw = &adapter->hw;
2164 * The watchdog timer may be rescheduled, so explicitly
2165 * disable watchdog from being rescheduled.
2167 set_bit(__IGB_DOWN, &adapter->state);
2168 del_timer_sync(&adapter->watchdog_timer);
2169 del_timer_sync(&adapter->phy_info_timer);
2171 cancel_work_sync(&adapter->reset_task);
2172 cancel_work_sync(&adapter->watchdog_task);
2174 #ifdef CONFIG_IGB_DCA
2175 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2176 dev_info(&pdev->dev, "DCA disabled\n");
2177 dca_remove_requester(&pdev->dev);
2178 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2179 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2183 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2184 * would have already happened in close and is redundant. */
2185 igb_release_hw_control(adapter);
2187 unregister_netdev(netdev);
2189 igb_clear_interrupt_scheme(adapter);
2191 #ifdef CONFIG_PCI_IOV
2192 /* reclaim resources allocated to VFs */
2193 if (adapter->vf_data) {
2194 /* disable iov and allow time for transactions to clear */
2195 pci_disable_sriov(pdev);
2198 kfree(adapter->vf_data);
2199 adapter->vf_data = NULL;
2200 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2203 dev_info(&pdev->dev, "IOV Disabled\n");
2207 iounmap(hw->hw_addr);
2208 if (hw->flash_address)
2209 iounmap(hw->flash_address);
2210 pci_release_selected_regions(pdev,
2211 pci_select_bars(pdev, IORESOURCE_MEM));
2213 free_netdev(netdev);
2215 pci_disable_pcie_error_reporting(pdev);
2217 pci_disable_device(pdev);
2221 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2222 * @adapter: board private structure to initialize
2224 * This function initializes the vf specific data storage and then attempts to
2225 * allocate the VFs. The reason for ordering it this way is because it is much
2226 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2227 * the memory for the VFs.
2229 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2231 #ifdef CONFIG_PCI_IOV
2232 struct pci_dev *pdev = adapter->pdev;
2234 if (adapter->vfs_allocated_count) {
2235 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2236 sizeof(struct vf_data_storage),
2238 /* if allocation failed then we do not support SR-IOV */
2239 if (!adapter->vf_data) {
2240 adapter->vfs_allocated_count = 0;
2241 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2246 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2247 kfree(adapter->vf_data);
2248 adapter->vf_data = NULL;
2249 #endif /* CONFIG_PCI_IOV */
2250 adapter->vfs_allocated_count = 0;
2251 #ifdef CONFIG_PCI_IOV
2253 unsigned char mac_addr[ETH_ALEN];
2255 dev_info(&pdev->dev, "%d vfs allocated\n",
2256 adapter->vfs_allocated_count);
2257 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2258 random_ether_addr(mac_addr);
2259 igb_set_vf_mac(adapter, i, mac_addr);
2261 /* DMA Coalescing is not supported in IOV mode. */
2262 if (adapter->flags & IGB_FLAG_DMAC)
2263 adapter->flags &= ~IGB_FLAG_DMAC;
2265 #endif /* CONFIG_PCI_IOV */
2270 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2271 * @adapter: board private structure to initialize
2273 * igb_init_hw_timer initializes the function pointer and values for the hw
2274 * timer found in hardware.
2276 static void igb_init_hw_timer(struct igb_adapter *adapter)
2278 struct e1000_hw *hw = &adapter->hw;
2280 switch (hw->mac.type) {
2283 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2284 adapter->cycles.read = igb_read_clock;
2285 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2286 adapter->cycles.mult = 1;
2288 * The 82580 timesync updates the system timer every 8ns by 8ns
2289 * and the value cannot be shifted. Instead we need to shift
2290 * the registers to generate a 64bit timer value. As a result
2291 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2292 * 24 in order to generate a larger value for synchronization.
2294 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2295 /* disable system timer temporarily by setting bit 31 */
2296 wr32(E1000_TSAUXC, 0x80000000);
2299 /* Set registers so that rollover occurs soon to test this. */
2300 wr32(E1000_SYSTIMR, 0x00000000);
2301 wr32(E1000_SYSTIML, 0x80000000);
2302 wr32(E1000_SYSTIMH, 0x000000FF);
2305 /* enable system timer by clearing bit 31 */
2306 wr32(E1000_TSAUXC, 0x0);
2309 timecounter_init(&adapter->clock,
2311 ktime_to_ns(ktime_get_real()));
2313 * Synchronize our NIC clock against system wall clock. NIC
2314 * time stamp reading requires ~3us per sample, each sample
2315 * was pretty stable even under load => only require 10
2316 * samples for each offset comparison.
2318 memset(&adapter->compare, 0, sizeof(adapter->compare));
2319 adapter->compare.source = &adapter->clock;
2320 adapter->compare.target = ktime_get_real;
2321 adapter->compare.num_samples = 10;
2322 timecompare_update(&adapter->compare, 0);
2326 * Initialize hardware timer: we keep it running just in case
2327 * that some program needs it later on.
2329 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2330 adapter->cycles.read = igb_read_clock;
2331 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2332 adapter->cycles.mult = 1;
2334 * Scale the NIC clock cycle by a large factor so that
2335 * relatively small clock corrections can be added or
2336 * subtracted at each clock tick. The drawbacks of a large
2337 * factor are a) that the clock register overflows more quickly
2338 * (not such a big deal) and b) that the increment per tick has
2339 * to fit into 24 bits. As a result we need to use a shift of
2340 * 19 so we can fit a value of 16 into the TIMINCA register.
2342 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2344 (1 << E1000_TIMINCA_16NS_SHIFT) |
2345 (16 << IGB_82576_TSYNC_SHIFT));
2347 /* Set registers so that rollover occurs soon to test this. */
2348 wr32(E1000_SYSTIML, 0x00000000);
2349 wr32(E1000_SYSTIMH, 0xFF800000);
2352 timecounter_init(&adapter->clock,
2354 ktime_to_ns(ktime_get_real()));
2356 * Synchronize our NIC clock against system wall clock. NIC
2357 * time stamp reading requires ~3us per sample, each sample
2358 * was pretty stable even under load => only require 10
2359 * samples for each offset comparison.
2361 memset(&adapter->compare, 0, sizeof(adapter->compare));
2362 adapter->compare.source = &adapter->clock;
2363 adapter->compare.target = ktime_get_real;
2364 adapter->compare.num_samples = 10;
2365 timecompare_update(&adapter->compare, 0);
2368 /* 82575 does not support timesync */
2376 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2377 * @adapter: board private structure to initialize
2379 * igb_sw_init initializes the Adapter private data structure.
2380 * Fields are initialized based on PCI device information and
2381 * OS network device settings (MTU size).
2383 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2385 struct e1000_hw *hw = &adapter->hw;
2386 struct net_device *netdev = adapter->netdev;
2387 struct pci_dev *pdev = adapter->pdev;
2389 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2391 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2392 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2393 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2394 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2396 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2398 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2400 spin_lock_init(&adapter->stats64_lock);
2401 #ifdef CONFIG_PCI_IOV
2402 switch (hw->mac.type) {
2406 dev_warn(&pdev->dev,
2407 "Maximum of 7 VFs per PF, using max\n");
2408 adapter->vfs_allocated_count = 7;
2410 adapter->vfs_allocated_count = max_vfs;
2415 #endif /* CONFIG_PCI_IOV */
2416 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2417 /* i350 cannot do RSS and SR-IOV at the same time */
2418 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2419 adapter->rss_queues = 1;
2422 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2423 * then we should combine the queues into a queue pair in order to
2424 * conserve interrupts due to limited supply
2426 if ((adapter->rss_queues > 4) ||
2427 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2428 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2430 /* This call may decrease the number of queues */
2431 if (igb_init_interrupt_scheme(adapter)) {
2432 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2436 igb_probe_vfs(adapter);
2438 /* Explicitly disable IRQ since the NIC can be in any state. */
2439 igb_irq_disable(adapter);
2441 if (hw->mac.type == e1000_i350)
2442 adapter->flags &= ~IGB_FLAG_DMAC;
2444 set_bit(__IGB_DOWN, &adapter->state);
2449 * igb_open - Called when a network interface is made active
2450 * @netdev: network interface device structure
2452 * Returns 0 on success, negative value on failure
2454 * The open entry point is called when a network interface is made
2455 * active by the system (IFF_UP). At this point all resources needed
2456 * for transmit and receive operations are allocated, the interrupt
2457 * handler is registered with the OS, the watchdog timer is started,
2458 * and the stack is notified that the interface is ready.
2460 static int igb_open(struct net_device *netdev)
2462 struct igb_adapter *adapter = netdev_priv(netdev);
2463 struct e1000_hw *hw = &adapter->hw;
2467 /* disallow open during test */
2468 if (test_bit(__IGB_TESTING, &adapter->state))
2471 netif_carrier_off(netdev);
2473 /* allocate transmit descriptors */
2474 err = igb_setup_all_tx_resources(adapter);
2478 /* allocate receive descriptors */
2479 err = igb_setup_all_rx_resources(adapter);
2483 igb_power_up_link(adapter);
2485 /* before we allocate an interrupt, we must be ready to handle it.
2486 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2487 * as soon as we call pci_request_irq, so we have to setup our
2488 * clean_rx handler before we do so. */
2489 igb_configure(adapter);
2491 err = igb_request_irq(adapter);
2495 /* From here on the code is the same as igb_up() */
2496 clear_bit(__IGB_DOWN, &adapter->state);
2498 for (i = 0; i < adapter->num_q_vectors; i++) {
2499 struct igb_q_vector *q_vector = adapter->q_vector[i];
2500 napi_enable(&q_vector->napi);
2503 /* Clear any pending interrupts. */
2506 igb_irq_enable(adapter);
2508 /* notify VFs that reset has been completed */
2509 if (adapter->vfs_allocated_count) {
2510 u32 reg_data = rd32(E1000_CTRL_EXT);
2511 reg_data |= E1000_CTRL_EXT_PFRSTD;
2512 wr32(E1000_CTRL_EXT, reg_data);
2515 netif_tx_start_all_queues(netdev);
2517 /* start the watchdog. */
2518 hw->mac.get_link_status = 1;
2519 schedule_work(&adapter->watchdog_task);
2524 igb_release_hw_control(adapter);
2525 igb_power_down_link(adapter);
2526 igb_free_all_rx_resources(adapter);
2528 igb_free_all_tx_resources(adapter);
2536 * igb_close - Disables a network interface
2537 * @netdev: network interface device structure
2539 * Returns 0, this is not allowed to fail
2541 * The close entry point is called when an interface is de-activated
2542 * by the OS. The hardware is still under the driver's control, but
2543 * needs to be disabled. A global MAC reset is issued to stop the
2544 * hardware, and all transmit and receive resources are freed.
2546 static int igb_close(struct net_device *netdev)
2548 struct igb_adapter *adapter = netdev_priv(netdev);
2550 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2553 igb_free_irq(adapter);
2555 igb_free_all_tx_resources(adapter);
2556 igb_free_all_rx_resources(adapter);
2562 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2563 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2565 * Return 0 on success, negative on failure
2567 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2569 struct device *dev = tx_ring->dev;
2572 size = sizeof(struct igb_buffer) * tx_ring->count;
2573 tx_ring->buffer_info = vzalloc(size);
2574 if (!tx_ring->buffer_info)
2577 /* round up to nearest 4K */
2578 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2579 tx_ring->size = ALIGN(tx_ring->size, 4096);
2581 tx_ring->desc = dma_alloc_coherent(dev,
2589 tx_ring->next_to_use = 0;
2590 tx_ring->next_to_clean = 0;
2594 vfree(tx_ring->buffer_info);
2596 "Unable to allocate memory for the transmit descriptor ring\n");
2601 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2602 * (Descriptors) for all queues
2603 * @adapter: board private structure
2605 * Return 0 on success, negative on failure
2607 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2609 struct pci_dev *pdev = adapter->pdev;
2612 for (i = 0; i < adapter->num_tx_queues; i++) {
2613 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2616 "Allocation for Tx Queue %u failed\n", i);
2617 for (i--; i >= 0; i--)
2618 igb_free_tx_resources(adapter->tx_ring[i]);
2627 * igb_setup_tctl - configure the transmit control registers
2628 * @adapter: Board private structure
2630 void igb_setup_tctl(struct igb_adapter *adapter)
2632 struct e1000_hw *hw = &adapter->hw;
2635 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2636 wr32(E1000_TXDCTL(0), 0);
2638 /* Program the Transmit Control Register */
2639 tctl = rd32(E1000_TCTL);
2640 tctl &= ~E1000_TCTL_CT;
2641 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2642 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2644 igb_config_collision_dist(hw);
2646 /* Enable transmits */
2647 tctl |= E1000_TCTL_EN;
2649 wr32(E1000_TCTL, tctl);
2653 * igb_configure_tx_ring - Configure transmit ring after Reset
2654 * @adapter: board private structure
2655 * @ring: tx ring to configure
2657 * Configure a transmit ring after a reset.
2659 void igb_configure_tx_ring(struct igb_adapter *adapter,
2660 struct igb_ring *ring)
2662 struct e1000_hw *hw = &adapter->hw;
2664 u64 tdba = ring->dma;
2665 int reg_idx = ring->reg_idx;
2667 /* disable the queue */
2668 wr32(E1000_TXDCTL(reg_idx), 0);
2672 wr32(E1000_TDLEN(reg_idx),
2673 ring->count * sizeof(union e1000_adv_tx_desc));
2674 wr32(E1000_TDBAL(reg_idx),
2675 tdba & 0x00000000ffffffffULL);
2676 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2678 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2679 wr32(E1000_TDH(reg_idx), 0);
2680 writel(0, ring->tail);
2682 txdctl |= IGB_TX_PTHRESH;
2683 txdctl |= IGB_TX_HTHRESH << 8;
2684 txdctl |= IGB_TX_WTHRESH << 16;
2686 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2687 wr32(E1000_TXDCTL(reg_idx), txdctl);
2691 * igb_configure_tx - Configure transmit Unit after Reset
2692 * @adapter: board private structure
2694 * Configure the Tx unit of the MAC after a reset.
2696 static void igb_configure_tx(struct igb_adapter *adapter)
2700 for (i = 0; i < adapter->num_tx_queues; i++)
2701 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2705 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2706 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2708 * Returns 0 on success, negative on failure
2710 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2712 struct device *dev = rx_ring->dev;
2715 size = sizeof(struct igb_buffer) * rx_ring->count;
2716 rx_ring->buffer_info = vzalloc(size);
2717 if (!rx_ring->buffer_info)
2720 desc_len = sizeof(union e1000_adv_rx_desc);
2722 /* Round up to nearest 4K */
2723 rx_ring->size = rx_ring->count * desc_len;
2724 rx_ring->size = ALIGN(rx_ring->size, 4096);
2726 rx_ring->desc = dma_alloc_coherent(dev,
2734 rx_ring->next_to_clean = 0;
2735 rx_ring->next_to_use = 0;
2740 vfree(rx_ring->buffer_info);
2741 rx_ring->buffer_info = NULL;
2742 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2748 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2749 * (Descriptors) for all queues
2750 * @adapter: board private structure
2752 * Return 0 on success, negative on failure
2754 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2756 struct pci_dev *pdev = adapter->pdev;
2759 for (i = 0; i < adapter->num_rx_queues; i++) {
2760 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2763 "Allocation for Rx Queue %u failed\n", i);
2764 for (i--; i >= 0; i--)
2765 igb_free_rx_resources(adapter->rx_ring[i]);
2774 * igb_setup_mrqc - configure the multiple receive queue control registers
2775 * @adapter: Board private structure
2777 static void igb_setup_mrqc(struct igb_adapter *adapter)
2779 struct e1000_hw *hw = &adapter->hw;
2781 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2786 static const u8 rsshash[40] = {
2787 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2788 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2789 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2790 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2792 /* Fill out hash function seeds */
2793 for (j = 0; j < 10; j++) {
2794 u32 rsskey = rsshash[(j * 4)];
2795 rsskey |= rsshash[(j * 4) + 1] << 8;
2796 rsskey |= rsshash[(j * 4) + 2] << 16;
2797 rsskey |= rsshash[(j * 4) + 3] << 24;
2798 array_wr32(E1000_RSSRK(0), j, rsskey);
2801 num_rx_queues = adapter->rss_queues;
2803 if (adapter->vfs_allocated_count) {
2804 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2805 switch (hw->mac.type) {
2822 if (hw->mac.type == e1000_82575)
2826 for (j = 0; j < (32 * 4); j++) {
2827 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2829 reta.bytes[j & 3] |= num_rx_queues << shift2;
2831 wr32(E1000_RETA(j >> 2), reta.dword);
2835 * Disable raw packet checksumming so that RSS hash is placed in
2836 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2837 * offloads as they are enabled by default
2839 rxcsum = rd32(E1000_RXCSUM);
2840 rxcsum |= E1000_RXCSUM_PCSD;
2842 if (adapter->hw.mac.type >= e1000_82576)
2843 /* Enable Receive Checksum Offload for SCTP */
2844 rxcsum |= E1000_RXCSUM_CRCOFL;
2846 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2847 wr32(E1000_RXCSUM, rxcsum);
2849 /* If VMDq is enabled then we set the appropriate mode for that, else
2850 * we default to RSS so that an RSS hash is calculated per packet even
2851 * if we are only using one queue */
2852 if (adapter->vfs_allocated_count) {
2853 if (hw->mac.type > e1000_82575) {
2854 /* Set the default pool for the PF's first queue */
2855 u32 vtctl = rd32(E1000_VT_CTL);
2856 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2857 E1000_VT_CTL_DISABLE_DEF_POOL);
2858 vtctl |= adapter->vfs_allocated_count <<
2859 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2860 wr32(E1000_VT_CTL, vtctl);
2862 if (adapter->rss_queues > 1)
2863 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2865 mrqc = E1000_MRQC_ENABLE_VMDQ;
2867 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2869 igb_vmm_control(adapter);
2872 * Generate RSS hash based on TCP port numbers and/or
2873 * IPv4/v6 src and dst addresses since UDP cannot be
2874 * hashed reliably due to IP fragmentation
2876 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2877 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2878 E1000_MRQC_RSS_FIELD_IPV6 |
2879 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2880 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2882 wr32(E1000_MRQC, mrqc);
2886 * igb_setup_rctl - configure the receive control registers
2887 * @adapter: Board private structure
2889 void igb_setup_rctl(struct igb_adapter *adapter)
2891 struct e1000_hw *hw = &adapter->hw;
2894 rctl = rd32(E1000_RCTL);
2896 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2897 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2899 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2900 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2903 * enable stripping of CRC. It's unlikely this will break BMC
2904 * redirection as it did with e1000. Newer features require
2905 * that the HW strips the CRC.
2907 rctl |= E1000_RCTL_SECRC;
2909 /* disable store bad packets and clear size bits. */
2910 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2912 /* enable LPE to prevent packets larger than max_frame_size */
2913 rctl |= E1000_RCTL_LPE;
2915 /* disable queue 0 to prevent tail write w/o re-config */
2916 wr32(E1000_RXDCTL(0), 0);
2918 /* Attention!!! For SR-IOV PF driver operations you must enable
2919 * queue drop for all VF and PF queues to prevent head of line blocking
2920 * if an un-trusted VF does not provide descriptors to hardware.
2922 if (adapter->vfs_allocated_count) {
2923 /* set all queue drop enable bits */
2924 wr32(E1000_QDE, ALL_QUEUES);
2927 wr32(E1000_RCTL, rctl);
2930 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2933 struct e1000_hw *hw = &adapter->hw;
2936 /* if it isn't the PF check to see if VFs are enabled and
2937 * increase the size to support vlan tags */
2938 if (vfn < adapter->vfs_allocated_count &&
2939 adapter->vf_data[vfn].vlans_enabled)
2940 size += VLAN_TAG_SIZE;
2942 vmolr = rd32(E1000_VMOLR(vfn));
2943 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2944 vmolr |= size | E1000_VMOLR_LPE;
2945 wr32(E1000_VMOLR(vfn), vmolr);
2951 * igb_rlpml_set - set maximum receive packet size
2952 * @adapter: board private structure
2954 * Configure maximum receivable packet size.
2956 static void igb_rlpml_set(struct igb_adapter *adapter)
2958 u32 max_frame_size = adapter->max_frame_size;
2959 struct e1000_hw *hw = &adapter->hw;
2960 u16 pf_id = adapter->vfs_allocated_count;
2963 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2965 * If we're in VMDQ or SR-IOV mode, then set global RLPML
2966 * to our max jumbo frame size, in case we need to enable
2967 * jumbo frames on one of the rings later.
2968 * This will not pass over-length frames into the default
2969 * queue because it's gated by the VMOLR.RLPML.
2971 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2974 wr32(E1000_RLPML, max_frame_size);
2977 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2980 struct e1000_hw *hw = &adapter->hw;
2984 * This register exists only on 82576 and newer so if we are older then
2985 * we should exit and do nothing
2987 if (hw->mac.type < e1000_82576)
2990 vmolr = rd32(E1000_VMOLR(vfn));
2991 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2993 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2995 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2997 /* clear all bits that might not be set */
2998 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3000 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3001 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3003 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3006 if (vfn <= adapter->vfs_allocated_count)
3007 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3009 wr32(E1000_VMOLR(vfn), vmolr);
3013 * igb_configure_rx_ring - Configure a receive ring after Reset
3014 * @adapter: board private structure
3015 * @ring: receive ring to be configured
3017 * Configure the Rx unit of the MAC after a reset.
3019 void igb_configure_rx_ring(struct igb_adapter *adapter,
3020 struct igb_ring *ring)
3022 struct e1000_hw *hw = &adapter->hw;
3023 u64 rdba = ring->dma;
3024 int reg_idx = ring->reg_idx;
3025 u32 srrctl = 0, rxdctl = 0;
3027 /* disable the queue */
3028 wr32(E1000_RXDCTL(reg_idx), 0);
3030 /* Set DMA base address registers */
3031 wr32(E1000_RDBAL(reg_idx),
3032 rdba & 0x00000000ffffffffULL);
3033 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3034 wr32(E1000_RDLEN(reg_idx),
3035 ring->count * sizeof(union e1000_adv_rx_desc));
3037 /* initialize head and tail */
3038 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3039 wr32(E1000_RDH(reg_idx), 0);
3040 writel(0, ring->tail);
3042 /* set descriptor configuration */
3043 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3044 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3045 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3047 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3049 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3050 if (hw->mac.type == e1000_82580)
3051 srrctl |= E1000_SRRCTL_TIMESTAMP;
3052 /* Only set Drop Enable if we are supporting multiple queues */
3053 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3054 srrctl |= E1000_SRRCTL_DROP_EN;
3056 wr32(E1000_SRRCTL(reg_idx), srrctl);
3058 /* set filtering for VMDQ pools */
3059 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3061 rxdctl |= IGB_RX_PTHRESH;
3062 rxdctl |= IGB_RX_HTHRESH << 8;
3063 rxdctl |= IGB_RX_WTHRESH << 16;
3065 /* enable receive descriptor fetching */
3066 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3067 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3071 * igb_configure_rx - Configure receive Unit after Reset
3072 * @adapter: board private structure
3074 * Configure the Rx unit of the MAC after a reset.
3076 static void igb_configure_rx(struct igb_adapter *adapter)
3080 /* set UTA to appropriate mode */
3081 igb_set_uta(adapter);
3083 /* set the correct pool for the PF default MAC address in entry 0 */
3084 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3085 adapter->vfs_allocated_count);
3087 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3088 * the Base and Length of the Rx Descriptor Ring */
3089 for (i = 0; i < adapter->num_rx_queues; i++)
3090 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3094 * igb_free_tx_resources - Free Tx Resources per Queue
3095 * @tx_ring: Tx descriptor ring for a specific queue
3097 * Free all transmit software resources
3099 void igb_free_tx_resources(struct igb_ring *tx_ring)
3101 igb_clean_tx_ring(tx_ring);
3103 vfree(tx_ring->buffer_info);
3104 tx_ring->buffer_info = NULL;
3106 /* if not set, then don't free */
3110 dma_free_coherent(tx_ring->dev, tx_ring->size,
3111 tx_ring->desc, tx_ring->dma);
3113 tx_ring->desc = NULL;
3117 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3118 * @adapter: board private structure
3120 * Free all transmit software resources
3122 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3126 for (i = 0; i < adapter->num_tx_queues; i++)
3127 igb_free_tx_resources(adapter->tx_ring[i]);
3130 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3131 struct igb_buffer *buffer_info)
3133 if (buffer_info->dma) {
3134 if (buffer_info->mapped_as_page)
3135 dma_unmap_page(tx_ring->dev,
3137 buffer_info->length,
3140 dma_unmap_single(tx_ring->dev,
3142 buffer_info->length,
3144 buffer_info->dma = 0;
3146 if (buffer_info->skb) {
3147 dev_kfree_skb_any(buffer_info->skb);
3148 buffer_info->skb = NULL;
3150 buffer_info->time_stamp = 0;
3151 buffer_info->length = 0;
3152 buffer_info->next_to_watch = 0;
3153 buffer_info->mapped_as_page = false;
3157 * igb_clean_tx_ring - Free Tx Buffers
3158 * @tx_ring: ring to be cleaned
3160 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3162 struct igb_buffer *buffer_info;
3166 if (!tx_ring->buffer_info)
3168 /* Free all the Tx ring sk_buffs */
3170 for (i = 0; i < tx_ring->count; i++) {
3171 buffer_info = &tx_ring->buffer_info[i];
3172 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3175 size = sizeof(struct igb_buffer) * tx_ring->count;
3176 memset(tx_ring->buffer_info, 0, size);
3178 /* Zero out the descriptor ring */
3179 memset(tx_ring->desc, 0, tx_ring->size);
3181 tx_ring->next_to_use = 0;
3182 tx_ring->next_to_clean = 0;
3186 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3187 * @adapter: board private structure
3189 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3193 for (i = 0; i < adapter->num_tx_queues; i++)
3194 igb_clean_tx_ring(adapter->tx_ring[i]);
3198 * igb_free_rx_resources - Free Rx Resources
3199 * @rx_ring: ring to clean the resources from
3201 * Free all receive software resources
3203 void igb_free_rx_resources(struct igb_ring *rx_ring)
3205 igb_clean_rx_ring(rx_ring);
3207 vfree(rx_ring->buffer_info);
3208 rx_ring->buffer_info = NULL;
3210 /* if not set, then don't free */
3214 dma_free_coherent(rx_ring->dev, rx_ring->size,
3215 rx_ring->desc, rx_ring->dma);
3217 rx_ring->desc = NULL;
3221 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3222 * @adapter: board private structure
3224 * Free all receive software resources
3226 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3230 for (i = 0; i < adapter->num_rx_queues; i++)
3231 igb_free_rx_resources(adapter->rx_ring[i]);
3235 * igb_clean_rx_ring - Free Rx Buffers per Queue
3236 * @rx_ring: ring to free buffers from
3238 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3243 if (!rx_ring->buffer_info)
3246 /* Free all the Rx ring sk_buffs */
3247 for (i = 0; i < rx_ring->count; i++) {
3248 struct igb_buffer *buffer_info = &rx_ring->buffer_info[i];
3249 if (buffer_info->dma) {
3250 dma_unmap_single(rx_ring->dev,
3254 buffer_info->dma = 0;
3257 if (buffer_info->skb) {
3258 dev_kfree_skb(buffer_info->skb);
3259 buffer_info->skb = NULL;
3261 if (buffer_info->page_dma) {
3262 dma_unmap_page(rx_ring->dev,
3263 buffer_info->page_dma,
3266 buffer_info->page_dma = 0;
3268 if (buffer_info->page) {
3269 put_page(buffer_info->page);
3270 buffer_info->page = NULL;
3271 buffer_info->page_offset = 0;
3275 size = sizeof(struct igb_buffer) * rx_ring->count;
3276 memset(rx_ring->buffer_info, 0, size);
3278 /* Zero out the descriptor ring */
3279 memset(rx_ring->desc, 0, rx_ring->size);
3281 rx_ring->next_to_clean = 0;
3282 rx_ring->next_to_use = 0;
3286 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3287 * @adapter: board private structure
3289 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3293 for (i = 0; i < adapter->num_rx_queues; i++)
3294 igb_clean_rx_ring(adapter->rx_ring[i]);
3298 * igb_set_mac - Change the Ethernet Address of the NIC
3299 * @netdev: network interface device structure
3300 * @p: pointer to an address structure
3302 * Returns 0 on success, negative on failure
3304 static int igb_set_mac(struct net_device *netdev, void *p)
3306 struct igb_adapter *adapter = netdev_priv(netdev);
3307 struct e1000_hw *hw = &adapter->hw;
3308 struct sockaddr *addr = p;
3310 if (!is_valid_ether_addr(addr->sa_data))
3311 return -EADDRNOTAVAIL;
3313 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3314 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3316 /* set the correct pool for the new PF MAC address in entry 0 */
3317 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3318 adapter->vfs_allocated_count);
3324 * igb_write_mc_addr_list - write multicast addresses to MTA
3325 * @netdev: network interface device structure
3327 * Writes multicast address list to the MTA hash table.
3328 * Returns: -ENOMEM on failure
3329 * 0 on no addresses written
3330 * X on writing X addresses to MTA
3332 static int igb_write_mc_addr_list(struct net_device *netdev)
3334 struct igb_adapter *adapter = netdev_priv(netdev);
3335 struct e1000_hw *hw = &adapter->hw;
3336 struct netdev_hw_addr *ha;
3340 if (netdev_mc_empty(netdev)) {
3341 /* nothing to program, so clear mc list */
3342 igb_update_mc_addr_list(hw, NULL, 0);
3343 igb_restore_vf_multicasts(adapter);
3347 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3351 /* The shared function expects a packed array of only addresses. */
3353 netdev_for_each_mc_addr(ha, netdev)
3354 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3356 igb_update_mc_addr_list(hw, mta_list, i);
3359 return netdev_mc_count(netdev);
3363 * igb_write_uc_addr_list - write unicast addresses to RAR table
3364 * @netdev: network interface device structure
3366 * Writes unicast address list to the RAR table.
3367 * Returns: -ENOMEM on failure/insufficient address space
3368 * 0 on no addresses written
3369 * X on writing X addresses to the RAR table
3371 static int igb_write_uc_addr_list(struct net_device *netdev)
3373 struct igb_adapter *adapter = netdev_priv(netdev);
3374 struct e1000_hw *hw = &adapter->hw;
3375 unsigned int vfn = adapter->vfs_allocated_count;
3376 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3379 /* return ENOMEM indicating insufficient memory for addresses */
3380 if (netdev_uc_count(netdev) > rar_entries)
3383 if (!netdev_uc_empty(netdev) && rar_entries) {
3384 struct netdev_hw_addr *ha;
3386 netdev_for_each_uc_addr(ha, netdev) {
3389 igb_rar_set_qsel(adapter, ha->addr,
3395 /* write the addresses in reverse order to avoid write combining */
3396 for (; rar_entries > 0 ; rar_entries--) {
3397 wr32(E1000_RAH(rar_entries), 0);
3398 wr32(E1000_RAL(rar_entries), 0);
3406 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3407 * @netdev: network interface device structure
3409 * The set_rx_mode entry point is called whenever the unicast or multicast
3410 * address lists or the network interface flags are updated. This routine is
3411 * responsible for configuring the hardware for proper unicast, multicast,
3412 * promiscuous mode, and all-multi behavior.
3414 static void igb_set_rx_mode(struct net_device *netdev)
3416 struct igb_adapter *adapter = netdev_priv(netdev);
3417 struct e1000_hw *hw = &adapter->hw;
3418 unsigned int vfn = adapter->vfs_allocated_count;
3419 u32 rctl, vmolr = 0;
3422 /* Check for Promiscuous and All Multicast modes */
3423 rctl = rd32(E1000_RCTL);
3425 /* clear the effected bits */
3426 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3428 if (netdev->flags & IFF_PROMISC) {
3429 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3430 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3432 if (netdev->flags & IFF_ALLMULTI) {
3433 rctl |= E1000_RCTL_MPE;
3434 vmolr |= E1000_VMOLR_MPME;
3437 * Write addresses to the MTA, if the attempt fails
3438 * then we should just turn on promiscuous mode so
3439 * that we can at least receive multicast traffic
3441 count = igb_write_mc_addr_list(netdev);
3443 rctl |= E1000_RCTL_MPE;
3444 vmolr |= E1000_VMOLR_MPME;
3446 vmolr |= E1000_VMOLR_ROMPE;
3450 * Write addresses to available RAR registers, if there is not
3451 * sufficient space to store all the addresses then enable
3452 * unicast promiscuous mode
3454 count = igb_write_uc_addr_list(netdev);
3456 rctl |= E1000_RCTL_UPE;
3457 vmolr |= E1000_VMOLR_ROPE;
3459 rctl |= E1000_RCTL_VFE;
3461 wr32(E1000_RCTL, rctl);
3464 * In order to support SR-IOV and eventually VMDq it is necessary to set
3465 * the VMOLR to enable the appropriate modes. Without this workaround
3466 * we will have issues with VLAN tag stripping not being done for frames
3467 * that are only arriving because we are the default pool
3469 if (hw->mac.type < e1000_82576)
3472 vmolr |= rd32(E1000_VMOLR(vfn)) &
3473 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3474 wr32(E1000_VMOLR(vfn), vmolr);
3475 igb_restore_vf_multicasts(adapter);
3478 static void igb_check_wvbr(struct igb_adapter *adapter)
3480 struct e1000_hw *hw = &adapter->hw;
3483 switch (hw->mac.type) {
3486 if (!(wvbr = rd32(E1000_WVBR)))
3493 adapter->wvbr |= wvbr;
3496 #define IGB_STAGGERED_QUEUE_OFFSET 8
3498 static void igb_spoof_check(struct igb_adapter *adapter)
3505 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3506 if (adapter->wvbr & (1 << j) ||
3507 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3508 dev_warn(&adapter->pdev->dev,
3509 "Spoof event(s) detected on VF %d\n", j);
3512 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3517 /* Need to wait a few seconds after link up to get diagnostic information from
3519 static void igb_update_phy_info(unsigned long data)
3521 struct igb_adapter *adapter = (struct igb_adapter *) data;
3522 igb_get_phy_info(&adapter->hw);
3526 * igb_has_link - check shared code for link and determine up/down
3527 * @adapter: pointer to driver private info
3529 bool igb_has_link(struct igb_adapter *adapter)
3531 struct e1000_hw *hw = &adapter->hw;
3532 bool link_active = false;
3535 /* get_link_status is set on LSC (link status) interrupt or
3536 * rx sequence error interrupt. get_link_status will stay
3537 * false until the e1000_check_for_link establishes link
3538 * for copper adapters ONLY
3540 switch (hw->phy.media_type) {
3541 case e1000_media_type_copper:
3542 if (hw->mac.get_link_status) {
3543 ret_val = hw->mac.ops.check_for_link(hw);
3544 link_active = !hw->mac.get_link_status;
3549 case e1000_media_type_internal_serdes:
3550 ret_val = hw->mac.ops.check_for_link(hw);
3551 link_active = hw->mac.serdes_has_link;
3554 case e1000_media_type_unknown:
3561 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3564 u32 ctrl_ext, thstat;
3566 /* check for thermal sensor event on i350, copper only */
3567 if (hw->mac.type == e1000_i350) {
3568 thstat = rd32(E1000_THSTAT);
3569 ctrl_ext = rd32(E1000_CTRL_EXT);
3571 if ((hw->phy.media_type == e1000_media_type_copper) &&
3572 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3573 ret = !!(thstat & event);
3581 * igb_watchdog - Timer Call-back
3582 * @data: pointer to adapter cast into an unsigned long
3584 static void igb_watchdog(unsigned long data)
3586 struct igb_adapter *adapter = (struct igb_adapter *)data;
3587 /* Do the rest outside of interrupt context */
3588 schedule_work(&adapter->watchdog_task);
3591 static void igb_watchdog_task(struct work_struct *work)
3593 struct igb_adapter *adapter = container_of(work,
3596 struct e1000_hw *hw = &adapter->hw;
3597 struct net_device *netdev = adapter->netdev;
3601 link = igb_has_link(adapter);
3603 if (!netif_carrier_ok(netdev)) {
3605 hw->mac.ops.get_speed_and_duplex(hw,
3606 &adapter->link_speed,
3607 &adapter->link_duplex);
3609 ctrl = rd32(E1000_CTRL);
3610 /* Links status message must follow this format */
3611 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3612 "Flow Control: %s\n",
3614 adapter->link_speed,
3615 adapter->link_duplex == FULL_DUPLEX ?
3616 "Full Duplex" : "Half Duplex",
3617 ((ctrl & E1000_CTRL_TFCE) &&
3618 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3619 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3620 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3622 /* check for thermal sensor event */
3623 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3624 printk(KERN_INFO "igb: %s The network adapter "
3625 "link speed was downshifted "
3626 "because it overheated.\n",
3630 /* adjust timeout factor according to speed/duplex */
3631 adapter->tx_timeout_factor = 1;
3632 switch (adapter->link_speed) {
3634 adapter->tx_timeout_factor = 14;
3637 /* maybe add some timeout factor ? */
3641 netif_carrier_on(netdev);
3643 igb_ping_all_vfs(adapter);
3644 igb_check_vf_rate_limit(adapter);
3646 /* link state has changed, schedule phy info update */
3647 if (!test_bit(__IGB_DOWN, &adapter->state))
3648 mod_timer(&adapter->phy_info_timer,
3649 round_jiffies(jiffies + 2 * HZ));
3652 if (netif_carrier_ok(netdev)) {
3653 adapter->link_speed = 0;
3654 adapter->link_duplex = 0;
3656 /* check for thermal sensor event */
3657 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3658 printk(KERN_ERR "igb: %s The network adapter "
3659 "was stopped because it "
3664 /* Links status message must follow this format */
3665 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3667 netif_carrier_off(netdev);
3669 igb_ping_all_vfs(adapter);
3671 /* link state has changed, schedule phy info update */
3672 if (!test_bit(__IGB_DOWN, &adapter->state))
3673 mod_timer(&adapter->phy_info_timer,
3674 round_jiffies(jiffies + 2 * HZ));
3678 spin_lock(&adapter->stats64_lock);
3679 igb_update_stats(adapter, &adapter->stats64);
3680 spin_unlock(&adapter->stats64_lock);
3682 for (i = 0; i < adapter->num_tx_queues; i++) {
3683 struct igb_ring *tx_ring = adapter->tx_ring[i];
3684 if (!netif_carrier_ok(netdev)) {
3685 /* We've lost link, so the controller stops DMA,
3686 * but we've got queued Tx work that's never going
3687 * to get done, so reset controller to flush Tx.
3688 * (Do the reset outside of interrupt context). */
3689 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3690 adapter->tx_timeout_count++;
3691 schedule_work(&adapter->reset_task);
3692 /* return immediately since reset is imminent */
3697 /* Force detection of hung controller every watchdog period */
3698 tx_ring->detect_tx_hung = true;
3701 /* Cause software interrupt to ensure rx ring is cleaned */
3702 if (adapter->msix_entries) {
3704 for (i = 0; i < adapter->num_q_vectors; i++) {
3705 struct igb_q_vector *q_vector = adapter->q_vector[i];
3706 eics |= q_vector->eims_value;
3708 wr32(E1000_EICS, eics);
3710 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3713 igb_spoof_check(adapter);
3715 /* Reset the timer */
3716 if (!test_bit(__IGB_DOWN, &adapter->state))
3717 mod_timer(&adapter->watchdog_timer,
3718 round_jiffies(jiffies + 2 * HZ));
3721 enum latency_range {
3725 latency_invalid = 255
3729 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3731 * Stores a new ITR value based on strictly on packet size. This
3732 * algorithm is less sophisticated than that used in igb_update_itr,
3733 * due to the difficulty of synchronizing statistics across multiple
3734 * receive rings. The divisors and thresholds used by this function
3735 * were determined based on theoretical maximum wire speed and testing
3736 * data, in order to minimize response time while increasing bulk
3738 * This functionality is controlled by the InterruptThrottleRate module
3739 * parameter (see igb_param.c)
3740 * NOTE: This function is called only when operating in a multiqueue
3741 * receive environment.
3742 * @q_vector: pointer to q_vector
3744 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3746 int new_val = q_vector->itr_val;
3747 int avg_wire_size = 0;
3748 struct igb_adapter *adapter = q_vector->adapter;
3749 struct igb_ring *ring;
3750 unsigned int packets;
3752 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3753 * ints/sec - ITR timer value of 120 ticks.
3755 if (adapter->link_speed != SPEED_1000) {
3760 ring = q_vector->rx_ring;
3762 packets = ACCESS_ONCE(ring->total_packets);
3765 avg_wire_size = ring->total_bytes / packets;
3768 ring = q_vector->tx_ring;
3770 packets = ACCESS_ONCE(ring->total_packets);
3773 avg_wire_size = max_t(u32, avg_wire_size,
3774 ring->total_bytes / packets);
3777 /* if avg_wire_size isn't set no work was done */
3781 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3782 avg_wire_size += 24;
3784 /* Don't starve jumbo frames */
3785 avg_wire_size = min(avg_wire_size, 3000);
3787 /* Give a little boost to mid-size frames */
3788 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3789 new_val = avg_wire_size / 3;
3791 new_val = avg_wire_size / 2;
3793 /* when in itr mode 3 do not exceed 20K ints/sec */
3794 if (adapter->rx_itr_setting == 3 && new_val < 196)
3798 if (new_val != q_vector->itr_val) {
3799 q_vector->itr_val = new_val;
3800 q_vector->set_itr = 1;
3803 if (q_vector->rx_ring) {
3804 q_vector->rx_ring->total_bytes = 0;
3805 q_vector->rx_ring->total_packets = 0;
3807 if (q_vector->tx_ring) {
3808 q_vector->tx_ring->total_bytes = 0;
3809 q_vector->tx_ring->total_packets = 0;
3814 * igb_update_itr - update the dynamic ITR value based on statistics
3815 * Stores a new ITR value based on packets and byte
3816 * counts during the last interrupt. The advantage of per interrupt
3817 * computation is faster updates and more accurate ITR for the current
3818 * traffic pattern. Constants in this function were computed
3819 * based on theoretical maximum wire speed and thresholds were set based
3820 * on testing data as well as attempting to minimize response time
3821 * while increasing bulk throughput.
3822 * this functionality is controlled by the InterruptThrottleRate module
3823 * parameter (see igb_param.c)
3824 * NOTE: These calculations are only valid when operating in a single-
3825 * queue environment.
3826 * @adapter: pointer to adapter
3827 * @itr_setting: current q_vector->itr_val
3828 * @packets: the number of packets during this measurement interval
3829 * @bytes: the number of bytes during this measurement interval
3831 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3832 int packets, int bytes)
3834 unsigned int retval = itr_setting;
3837 goto update_itr_done;
3839 switch (itr_setting) {
3840 case lowest_latency:
3841 /* handle TSO and jumbo frames */
3842 if (bytes/packets > 8000)
3843 retval = bulk_latency;
3844 else if ((packets < 5) && (bytes > 512))
3845 retval = low_latency;
3847 case low_latency: /* 50 usec aka 20000 ints/s */
3848 if (bytes > 10000) {
3849 /* this if handles the TSO accounting */
3850 if (bytes/packets > 8000) {
3851 retval = bulk_latency;
3852 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3853 retval = bulk_latency;
3854 } else if ((packets > 35)) {
3855 retval = lowest_latency;
3857 } else if (bytes/packets > 2000) {
3858 retval = bulk_latency;
3859 } else if (packets <= 2 && bytes < 512) {
3860 retval = lowest_latency;
3863 case bulk_latency: /* 250 usec aka 4000 ints/s */
3864 if (bytes > 25000) {
3866 retval = low_latency;
3867 } else if (bytes < 1500) {
3868 retval = low_latency;
3877 static void igb_set_itr(struct igb_adapter *adapter)
3879 struct igb_q_vector *q_vector = adapter->q_vector[0];
3881 u32 new_itr = q_vector->itr_val;
3883 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3884 if (adapter->link_speed != SPEED_1000) {
3890 adapter->rx_itr = igb_update_itr(adapter,
3892 q_vector->rx_ring->total_packets,
3893 q_vector->rx_ring->total_bytes);
3895 adapter->tx_itr = igb_update_itr(adapter,
3897 q_vector->tx_ring->total_packets,
3898 q_vector->tx_ring->total_bytes);
3899 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3901 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3902 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3903 current_itr = low_latency;
3905 switch (current_itr) {
3906 /* counts and packets in update_itr are dependent on these numbers */
3907 case lowest_latency:
3908 new_itr = 56; /* aka 70,000 ints/sec */
3911 new_itr = 196; /* aka 20,000 ints/sec */
3914 new_itr = 980; /* aka 4,000 ints/sec */
3921 q_vector->rx_ring->total_bytes = 0;
3922 q_vector->rx_ring->total_packets = 0;
3923 q_vector->tx_ring->total_bytes = 0;
3924 q_vector->tx_ring->total_packets = 0;
3926 if (new_itr != q_vector->itr_val) {
3927 /* this attempts to bias the interrupt rate towards Bulk
3928 * by adding intermediate steps when interrupt rate is
3930 new_itr = new_itr > q_vector->itr_val ?
3931 max((new_itr * q_vector->itr_val) /
3932 (new_itr + (q_vector->itr_val >> 2)),
3935 /* Don't write the value here; it resets the adapter's
3936 * internal timer, and causes us to delay far longer than
3937 * we should between interrupts. Instead, we write the ITR
3938 * value at the beginning of the next interrupt so the timing
3939 * ends up being correct.
3941 q_vector->itr_val = new_itr;
3942 q_vector->set_itr = 1;
3946 #define IGB_TX_FLAGS_CSUM 0x00000001
3947 #define IGB_TX_FLAGS_VLAN 0x00000002
3948 #define IGB_TX_FLAGS_TSO 0x00000004
3949 #define IGB_TX_FLAGS_IPV4 0x00000008
3950 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3951 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3952 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3954 static inline int igb_tso(struct igb_ring *tx_ring,
3955 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3957 struct e1000_adv_tx_context_desc *context_desc;
3960 struct igb_buffer *buffer_info;
3961 u32 info = 0, tu_cmd = 0;
3965 if (skb_header_cloned(skb)) {
3966 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3971 l4len = tcp_hdrlen(skb);
3974 if (skb->protocol == htons(ETH_P_IP)) {
3975 struct iphdr *iph = ip_hdr(skb);
3978 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3982 } else if (skb_is_gso_v6(skb)) {
3983 ipv6_hdr(skb)->payload_len = 0;
3984 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3985 &ipv6_hdr(skb)->daddr,
3989 i = tx_ring->next_to_use;
3991 buffer_info = &tx_ring->buffer_info[i];
3992 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3993 /* VLAN MACLEN IPLEN */
3994 if (tx_flags & IGB_TX_FLAGS_VLAN)
3995 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3996 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3997 *hdr_len += skb_network_offset(skb);
3998 info |= skb_network_header_len(skb);
3999 *hdr_len += skb_network_header_len(skb);
4000 context_desc->vlan_macip_lens = cpu_to_le32(info);
4002 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4003 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4005 if (skb->protocol == htons(ETH_P_IP))
4006 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4007 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4009 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4012 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4013 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4015 /* For 82575, context index must be unique per ring. */
4016 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4017 mss_l4len_idx |= tx_ring->reg_idx << 4;
4019 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4020 context_desc->seqnum_seed = 0;
4022 buffer_info->time_stamp = jiffies;
4023 buffer_info->next_to_watch = i;
4024 buffer_info->dma = 0;
4026 if (i == tx_ring->count)
4029 tx_ring->next_to_use = i;
4034 static inline bool igb_tx_csum(struct igb_ring *tx_ring,
4035 struct sk_buff *skb, u32 tx_flags)
4037 struct e1000_adv_tx_context_desc *context_desc;
4038 struct device *dev = tx_ring->dev;
4039 struct igb_buffer *buffer_info;
4040 u32 info = 0, tu_cmd = 0;
4043 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4044 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4045 i = tx_ring->next_to_use;
4046 buffer_info = &tx_ring->buffer_info[i];
4047 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4049 if (tx_flags & IGB_TX_FLAGS_VLAN)
4050 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4052 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4053 if (skb->ip_summed == CHECKSUM_PARTIAL)
4054 info |= skb_network_header_len(skb);
4056 context_desc->vlan_macip_lens = cpu_to_le32(info);
4058 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4060 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4063 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4064 const struct vlan_ethhdr *vhdr =
4065 (const struct vlan_ethhdr*)skb->data;
4067 protocol = vhdr->h_vlan_encapsulated_proto;
4069 protocol = skb->protocol;
4073 case cpu_to_be16(ETH_P_IP):
4074 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4075 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4076 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4077 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4078 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4080 case cpu_to_be16(ETH_P_IPV6):
4081 /* XXX what about other V6 headers?? */
4082 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4083 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4084 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4085 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4088 if (unlikely(net_ratelimit()))
4090 "partial checksum but proto=%x!\n",
4096 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4097 context_desc->seqnum_seed = 0;
4098 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4099 context_desc->mss_l4len_idx =
4100 cpu_to_le32(tx_ring->reg_idx << 4);
4102 buffer_info->time_stamp = jiffies;
4103 buffer_info->next_to_watch = i;
4104 buffer_info->dma = 0;
4107 if (i == tx_ring->count)
4109 tx_ring->next_to_use = i;
4116 #define IGB_MAX_TXD_PWR 16
4117 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4119 static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
4122 struct igb_buffer *buffer_info;
4123 struct device *dev = tx_ring->dev;
4124 unsigned int hlen = skb_headlen(skb);
4125 unsigned int count = 0, i;
4127 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4129 i = tx_ring->next_to_use;
4131 buffer_info = &tx_ring->buffer_info[i];
4132 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4133 buffer_info->length = hlen;
4134 /* set time_stamp *before* dma to help avoid a possible race */
4135 buffer_info->time_stamp = jiffies;
4136 buffer_info->next_to_watch = i;
4137 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4139 if (dma_mapping_error(dev, buffer_info->dma))
4142 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4143 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4144 unsigned int len = frag->size;
4148 if (i == tx_ring->count)
4151 buffer_info = &tx_ring->buffer_info[i];
4152 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4153 buffer_info->length = len;
4154 buffer_info->time_stamp = jiffies;
4155 buffer_info->next_to_watch = i;
4156 buffer_info->mapped_as_page = true;
4157 buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len,
4159 if (dma_mapping_error(dev, buffer_info->dma))
4164 tx_ring->buffer_info[i].skb = skb;
4165 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4166 /* multiply data chunks by size of headers */
4167 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4168 tx_ring->buffer_info[i].gso_segs = gso_segs;
4169 tx_ring->buffer_info[first].next_to_watch = i;
4174 dev_err(dev, "TX DMA map failed\n");
4176 /* clear timestamp and dma mappings for failed buffer_info mapping */
4177 buffer_info->dma = 0;
4178 buffer_info->time_stamp = 0;
4179 buffer_info->length = 0;
4180 buffer_info->next_to_watch = 0;
4181 buffer_info->mapped_as_page = false;
4183 /* clear timestamp and dma mappings for remaining portion of packet */
4188 buffer_info = &tx_ring->buffer_info[i];
4189 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4195 static inline void igb_tx_queue(struct igb_ring *tx_ring,
4196 u32 tx_flags, int count, u32 paylen,
4199 union e1000_adv_tx_desc *tx_desc;
4200 struct igb_buffer *buffer_info;
4201 u32 olinfo_status = 0, cmd_type_len;
4202 unsigned int i = tx_ring->next_to_use;
4204 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4205 E1000_ADVTXD_DCMD_DEXT);
4207 if (tx_flags & IGB_TX_FLAGS_VLAN)
4208 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4210 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4211 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4213 if (tx_flags & IGB_TX_FLAGS_TSO) {
4214 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4216 /* insert tcp checksum */
4217 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4219 /* insert ip checksum */
4220 if (tx_flags & IGB_TX_FLAGS_IPV4)
4221 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4223 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4224 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4227 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4228 (tx_flags & (IGB_TX_FLAGS_CSUM |
4230 IGB_TX_FLAGS_VLAN)))
4231 olinfo_status |= tx_ring->reg_idx << 4;
4233 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4236 buffer_info = &tx_ring->buffer_info[i];
4237 tx_desc = IGB_TX_DESC(tx_ring, i);
4238 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4239 tx_desc->read.cmd_type_len =
4240 cpu_to_le32(cmd_type_len | buffer_info->length);
4241 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4244 if (i == tx_ring->count)
4246 } while (count > 0);
4248 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4249 /* Force memory writes to complete before letting h/w
4250 * know there are new descriptors to fetch. (Only
4251 * applicable for weak-ordered memory model archs,
4252 * such as IA-64). */
4255 tx_ring->next_to_use = i;
4256 writel(i, tx_ring->tail);
4257 /* we need this if more than one processor can write to our tail
4258 * at a time, it syncronizes IO on IA64/Altix systems */
4262 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4264 struct net_device *netdev = tx_ring->netdev;
4266 netif_stop_subqueue(netdev, tx_ring->queue_index);
4268 /* Herbert's original patch had:
4269 * smp_mb__after_netif_stop_queue();
4270 * but since that doesn't exist yet, just open code it. */
4273 /* We need to check again in a case another CPU has just
4274 * made room available. */
4275 if (igb_desc_unused(tx_ring) < size)
4279 netif_wake_subqueue(netdev, tx_ring->queue_index);
4281 u64_stats_update_begin(&tx_ring->tx_syncp2);
4282 tx_ring->tx_stats.restart_queue2++;
4283 u64_stats_update_end(&tx_ring->tx_syncp2);
4288 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4290 if (igb_desc_unused(tx_ring) >= size)
4292 return __igb_maybe_stop_tx(tx_ring, size);
4295 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4296 struct igb_ring *tx_ring)
4303 /* need: 1 descriptor per page,
4304 * + 2 desc gap to keep tail from touching head,
4305 * + 1 desc for skb->data,
4306 * + 1 desc for context descriptor,
4307 * otherwise try next time */
4308 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4309 /* this is a hard error */
4310 return NETDEV_TX_BUSY;
4313 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4314 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4315 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4318 if (vlan_tx_tag_present(skb)) {
4319 tx_flags |= IGB_TX_FLAGS_VLAN;
4320 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4323 if (skb->protocol == htons(ETH_P_IP))
4324 tx_flags |= IGB_TX_FLAGS_IPV4;
4326 first = tx_ring->next_to_use;
4327 if (skb_is_gso(skb)) {
4328 tso = igb_tso(tx_ring, skb, tx_flags, &hdr_len);
4331 dev_kfree_skb_any(skb);
4332 return NETDEV_TX_OK;
4337 tx_flags |= IGB_TX_FLAGS_TSO;
4338 else if (igb_tx_csum(tx_ring, skb, tx_flags) &&
4339 (skb->ip_summed == CHECKSUM_PARTIAL))
4340 tx_flags |= IGB_TX_FLAGS_CSUM;
4343 * count reflects descriptors mapped, if 0 or less then mapping error
4344 * has occurred and we need to rewind the descriptor queue
4346 count = igb_tx_map(tx_ring, skb, first);
4348 dev_kfree_skb_any(skb);
4349 tx_ring->buffer_info[first].time_stamp = 0;
4350 tx_ring->next_to_use = first;
4351 return NETDEV_TX_OK;
4354 igb_tx_queue(tx_ring, tx_flags, count, skb->len, hdr_len);
4356 /* Make sure there is space in the ring for the next send. */
4357 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4359 return NETDEV_TX_OK;
4362 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4363 struct sk_buff *skb)
4365 unsigned int r_idx = skb->queue_mapping;
4367 if (r_idx >= adapter->num_tx_queues)
4368 r_idx = r_idx % adapter->num_tx_queues;
4370 return adapter->tx_ring[r_idx];
4373 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4374 struct net_device *netdev)
4376 struct igb_adapter *adapter = netdev_priv(netdev);
4378 if (test_bit(__IGB_DOWN, &adapter->state)) {
4379 dev_kfree_skb_any(skb);
4380 return NETDEV_TX_OK;
4383 if (skb->len <= 0) {
4384 dev_kfree_skb_any(skb);
4385 return NETDEV_TX_OK;
4389 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4390 * in order to meet this minimum size requirement.
4392 if (skb->len < 17) {
4393 if (skb_padto(skb, 17))
4394 return NETDEV_TX_OK;
4398 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4402 * igb_tx_timeout - Respond to a Tx Hang
4403 * @netdev: network interface device structure
4405 static void igb_tx_timeout(struct net_device *netdev)
4407 struct igb_adapter *adapter = netdev_priv(netdev);
4408 struct e1000_hw *hw = &adapter->hw;
4410 /* Do the reset outside of interrupt context */
4411 adapter->tx_timeout_count++;
4413 if (hw->mac.type == e1000_82580)
4414 hw->dev_spec._82575.global_device_reset = true;
4416 schedule_work(&adapter->reset_task);
4418 (adapter->eims_enable_mask & ~adapter->eims_other));
4421 static void igb_reset_task(struct work_struct *work)
4423 struct igb_adapter *adapter;
4424 adapter = container_of(work, struct igb_adapter, reset_task);
4427 netdev_err(adapter->netdev, "Reset adapter\n");
4428 igb_reinit_locked(adapter);
4432 * igb_get_stats64 - Get System Network Statistics
4433 * @netdev: network interface device structure
4434 * @stats: rtnl_link_stats64 pointer
4437 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4438 struct rtnl_link_stats64 *stats)
4440 struct igb_adapter *adapter = netdev_priv(netdev);
4442 spin_lock(&adapter->stats64_lock);
4443 igb_update_stats(adapter, &adapter->stats64);
4444 memcpy(stats, &adapter->stats64, sizeof(*stats));
4445 spin_unlock(&adapter->stats64_lock);
4451 * igb_change_mtu - Change the Maximum Transfer Unit
4452 * @netdev: network interface device structure
4453 * @new_mtu: new value for maximum frame size
4455 * Returns 0 on success, negative on failure
4457 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4459 struct igb_adapter *adapter = netdev_priv(netdev);
4460 struct pci_dev *pdev = adapter->pdev;
4461 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4463 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4464 dev_err(&pdev->dev, "Invalid MTU setting\n");
4468 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4469 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4470 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4474 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4477 /* igb_down has a dependency on max_frame_size */
4478 adapter->max_frame_size = max_frame;
4480 if (netif_running(netdev))
4483 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4484 netdev->mtu, new_mtu);
4485 netdev->mtu = new_mtu;
4487 if (netif_running(netdev))
4492 clear_bit(__IGB_RESETTING, &adapter->state);
4498 * igb_update_stats - Update the board statistics counters
4499 * @adapter: board private structure
4502 void igb_update_stats(struct igb_adapter *adapter,
4503 struct rtnl_link_stats64 *net_stats)
4505 struct e1000_hw *hw = &adapter->hw;
4506 struct pci_dev *pdev = adapter->pdev;
4512 u64 _bytes, _packets;
4514 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4517 * Prevent stats update while adapter is being reset, or if the pci
4518 * connection is down.
4520 if (adapter->link_speed == 0)
4522 if (pci_channel_offline(pdev))
4527 for (i = 0; i < adapter->num_rx_queues; i++) {
4528 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4529 struct igb_ring *ring = adapter->rx_ring[i];
4531 ring->rx_stats.drops += rqdpc_tmp;
4532 net_stats->rx_fifo_errors += rqdpc_tmp;
4535 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4536 _bytes = ring->rx_stats.bytes;
4537 _packets = ring->rx_stats.packets;
4538 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4540 packets += _packets;
4543 net_stats->rx_bytes = bytes;
4544 net_stats->rx_packets = packets;
4548 for (i = 0; i < adapter->num_tx_queues; i++) {
4549 struct igb_ring *ring = adapter->tx_ring[i];
4551 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4552 _bytes = ring->tx_stats.bytes;
4553 _packets = ring->tx_stats.packets;
4554 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4556 packets += _packets;
4558 net_stats->tx_bytes = bytes;
4559 net_stats->tx_packets = packets;
4561 /* read stats registers */
4562 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4563 adapter->stats.gprc += rd32(E1000_GPRC);
4564 adapter->stats.gorc += rd32(E1000_GORCL);
4565 rd32(E1000_GORCH); /* clear GORCL */
4566 adapter->stats.bprc += rd32(E1000_BPRC);
4567 adapter->stats.mprc += rd32(E1000_MPRC);
4568 adapter->stats.roc += rd32(E1000_ROC);
4570 adapter->stats.prc64 += rd32(E1000_PRC64);
4571 adapter->stats.prc127 += rd32(E1000_PRC127);
4572 adapter->stats.prc255 += rd32(E1000_PRC255);
4573 adapter->stats.prc511 += rd32(E1000_PRC511);
4574 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4575 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4576 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4577 adapter->stats.sec += rd32(E1000_SEC);
4579 mpc = rd32(E1000_MPC);
4580 adapter->stats.mpc += mpc;
4581 net_stats->rx_fifo_errors += mpc;
4582 adapter->stats.scc += rd32(E1000_SCC);
4583 adapter->stats.ecol += rd32(E1000_ECOL);
4584 adapter->stats.mcc += rd32(E1000_MCC);
4585 adapter->stats.latecol += rd32(E1000_LATECOL);
4586 adapter->stats.dc += rd32(E1000_DC);
4587 adapter->stats.rlec += rd32(E1000_RLEC);
4588 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4589 adapter->stats.xontxc += rd32(E1000_XONTXC);
4590 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4591 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4592 adapter->stats.fcruc += rd32(E1000_FCRUC);
4593 adapter->stats.gptc += rd32(E1000_GPTC);
4594 adapter->stats.gotc += rd32(E1000_GOTCL);
4595 rd32(E1000_GOTCH); /* clear GOTCL */
4596 adapter->stats.rnbc += rd32(E1000_RNBC);
4597 adapter->stats.ruc += rd32(E1000_RUC);
4598 adapter->stats.rfc += rd32(E1000_RFC);
4599 adapter->stats.rjc += rd32(E1000_RJC);
4600 adapter->stats.tor += rd32(E1000_TORH);
4601 adapter->stats.tot += rd32(E1000_TOTH);
4602 adapter->stats.tpr += rd32(E1000_TPR);
4604 adapter->stats.ptc64 += rd32(E1000_PTC64);
4605 adapter->stats.ptc127 += rd32(E1000_PTC127);
4606 adapter->stats.ptc255 += rd32(E1000_PTC255);
4607 adapter->stats.ptc511 += rd32(E1000_PTC511);
4608 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4609 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4611 adapter->stats.mptc += rd32(E1000_MPTC);
4612 adapter->stats.bptc += rd32(E1000_BPTC);
4614 adapter->stats.tpt += rd32(E1000_TPT);
4615 adapter->stats.colc += rd32(E1000_COLC);
4617 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4618 /* read internal phy specific stats */
4619 reg = rd32(E1000_CTRL_EXT);
4620 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4621 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4622 adapter->stats.tncrs += rd32(E1000_TNCRS);
4625 adapter->stats.tsctc += rd32(E1000_TSCTC);
4626 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4628 adapter->stats.iac += rd32(E1000_IAC);
4629 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4630 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4631 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4632 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4633 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4634 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4635 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4636 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4638 /* Fill out the OS statistics structure */
4639 net_stats->multicast = adapter->stats.mprc;
4640 net_stats->collisions = adapter->stats.colc;
4644 /* RLEC on some newer hardware can be incorrect so build
4645 * our own version based on RUC and ROC */
4646 net_stats->rx_errors = adapter->stats.rxerrc +
4647 adapter->stats.crcerrs + adapter->stats.algnerrc +
4648 adapter->stats.ruc + adapter->stats.roc +
4649 adapter->stats.cexterr;
4650 net_stats->rx_length_errors = adapter->stats.ruc +
4652 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4653 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4654 net_stats->rx_missed_errors = adapter->stats.mpc;
4657 net_stats->tx_errors = adapter->stats.ecol +
4658 adapter->stats.latecol;
4659 net_stats->tx_aborted_errors = adapter->stats.ecol;
4660 net_stats->tx_window_errors = adapter->stats.latecol;
4661 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4663 /* Tx Dropped needs to be maintained elsewhere */
4666 if (hw->phy.media_type == e1000_media_type_copper) {
4667 if ((adapter->link_speed == SPEED_1000) &&
4668 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4669 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4670 adapter->phy_stats.idle_errors += phy_tmp;
4674 /* Management Stats */
4675 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4676 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4677 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4680 reg = rd32(E1000_MANC);
4681 if (reg & E1000_MANC_EN_BMC2OS) {
4682 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4683 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4684 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4685 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4689 static irqreturn_t igb_msix_other(int irq, void *data)
4691 struct igb_adapter *adapter = data;
4692 struct e1000_hw *hw = &adapter->hw;
4693 u32 icr = rd32(E1000_ICR);
4694 /* reading ICR causes bit 31 of EICR to be cleared */
4696 if (icr & E1000_ICR_DRSTA)
4697 schedule_work(&adapter->reset_task);
4699 if (icr & E1000_ICR_DOUTSYNC) {
4700 /* HW is reporting DMA is out of sync */
4701 adapter->stats.doosync++;
4702 /* The DMA Out of Sync is also indication of a spoof event
4703 * in IOV mode. Check the Wrong VM Behavior register to
4704 * see if it is really a spoof event. */
4705 igb_check_wvbr(adapter);
4708 /* Check for a mailbox event */
4709 if (icr & E1000_ICR_VMMB)
4710 igb_msg_task(adapter);
4712 if (icr & E1000_ICR_LSC) {
4713 hw->mac.get_link_status = 1;
4714 /* guard against interrupt when we're going down */
4715 if (!test_bit(__IGB_DOWN, &adapter->state))
4716 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4719 if (adapter->vfs_allocated_count)
4720 wr32(E1000_IMS, E1000_IMS_LSC |
4722 E1000_IMS_DOUTSYNC);
4724 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4725 wr32(E1000_EIMS, adapter->eims_other);
4730 static void igb_write_itr(struct igb_q_vector *q_vector)
4732 struct igb_adapter *adapter = q_vector->adapter;
4733 u32 itr_val = q_vector->itr_val & 0x7FFC;
4735 if (!q_vector->set_itr)
4741 if (adapter->hw.mac.type == e1000_82575)
4742 itr_val |= itr_val << 16;
4744 itr_val |= 0x8000000;
4746 writel(itr_val, q_vector->itr_register);
4747 q_vector->set_itr = 0;
4750 static irqreturn_t igb_msix_ring(int irq, void *data)
4752 struct igb_q_vector *q_vector = data;
4754 /* Write the ITR value calculated from the previous interrupt. */
4755 igb_write_itr(q_vector);
4757 napi_schedule(&q_vector->napi);
4762 #ifdef CONFIG_IGB_DCA
4763 static void igb_update_dca(struct igb_q_vector *q_vector)
4765 struct igb_adapter *adapter = q_vector->adapter;
4766 struct e1000_hw *hw = &adapter->hw;
4767 int cpu = get_cpu();
4769 if (q_vector->cpu == cpu)
4772 if (q_vector->tx_ring) {
4773 int q = q_vector->tx_ring->reg_idx;
4774 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4775 if (hw->mac.type == e1000_82575) {
4776 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4777 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4779 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4780 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4781 E1000_DCA_TXCTRL_CPUID_SHIFT;
4783 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4784 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4786 if (q_vector->rx_ring) {
4787 int q = q_vector->rx_ring->reg_idx;
4788 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4789 if (hw->mac.type == e1000_82575) {
4790 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4791 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4793 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4794 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4795 E1000_DCA_RXCTRL_CPUID_SHIFT;
4797 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4798 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4799 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4800 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4802 q_vector->cpu = cpu;
4807 static void igb_setup_dca(struct igb_adapter *adapter)
4809 struct e1000_hw *hw = &adapter->hw;
4812 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4815 /* Always use CB2 mode, difference is masked in the CB driver. */
4816 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4818 for (i = 0; i < adapter->num_q_vectors; i++) {
4819 adapter->q_vector[i]->cpu = -1;
4820 igb_update_dca(adapter->q_vector[i]);
4824 static int __igb_notify_dca(struct device *dev, void *data)
4826 struct net_device *netdev = dev_get_drvdata(dev);
4827 struct igb_adapter *adapter = netdev_priv(netdev);
4828 struct pci_dev *pdev = adapter->pdev;
4829 struct e1000_hw *hw = &adapter->hw;
4830 unsigned long event = *(unsigned long *)data;
4833 case DCA_PROVIDER_ADD:
4834 /* if already enabled, don't do it again */
4835 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4837 if (dca_add_requester(dev) == 0) {
4838 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4839 dev_info(&pdev->dev, "DCA enabled\n");
4840 igb_setup_dca(adapter);
4843 /* Fall Through since DCA is disabled. */
4844 case DCA_PROVIDER_REMOVE:
4845 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4846 /* without this a class_device is left
4847 * hanging around in the sysfs model */
4848 dca_remove_requester(dev);
4849 dev_info(&pdev->dev, "DCA disabled\n");
4850 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4851 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4859 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4864 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4867 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4869 #endif /* CONFIG_IGB_DCA */
4871 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4873 struct e1000_hw *hw = &adapter->hw;
4877 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4878 ping = E1000_PF_CONTROL_MSG;
4879 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4880 ping |= E1000_VT_MSGTYPE_CTS;
4881 igb_write_mbx(hw, &ping, 1, i);
4885 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4887 struct e1000_hw *hw = &adapter->hw;
4888 u32 vmolr = rd32(E1000_VMOLR(vf));
4889 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4891 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4892 IGB_VF_FLAG_MULTI_PROMISC);
4893 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4895 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4896 vmolr |= E1000_VMOLR_MPME;
4897 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4898 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4901 * if we have hashes and we are clearing a multicast promisc
4902 * flag we need to write the hashes to the MTA as this step
4903 * was previously skipped
4905 if (vf_data->num_vf_mc_hashes > 30) {
4906 vmolr |= E1000_VMOLR_MPME;
4907 } else if (vf_data->num_vf_mc_hashes) {
4909 vmolr |= E1000_VMOLR_ROMPE;
4910 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4911 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4915 wr32(E1000_VMOLR(vf), vmolr);
4917 /* there are flags left unprocessed, likely not supported */
4918 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4925 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4926 u32 *msgbuf, u32 vf)
4928 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4929 u16 *hash_list = (u16 *)&msgbuf[1];
4930 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4933 /* salt away the number of multicast addresses assigned
4934 * to this VF for later use to restore when the PF multi cast
4937 vf_data->num_vf_mc_hashes = n;
4939 /* only up to 30 hash values supported */
4943 /* store the hashes for later use */
4944 for (i = 0; i < n; i++)
4945 vf_data->vf_mc_hashes[i] = hash_list[i];
4947 /* Flush and reset the mta with the new values */
4948 igb_set_rx_mode(adapter->netdev);
4953 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4955 struct e1000_hw *hw = &adapter->hw;
4956 struct vf_data_storage *vf_data;
4959 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4960 u32 vmolr = rd32(E1000_VMOLR(i));
4961 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4963 vf_data = &adapter->vf_data[i];
4965 if ((vf_data->num_vf_mc_hashes > 30) ||
4966 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4967 vmolr |= E1000_VMOLR_MPME;
4968 } else if (vf_data->num_vf_mc_hashes) {
4969 vmolr |= E1000_VMOLR_ROMPE;
4970 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4971 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4973 wr32(E1000_VMOLR(i), vmolr);
4977 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4979 struct e1000_hw *hw = &adapter->hw;
4980 u32 pool_mask, reg, vid;
4983 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4985 /* Find the vlan filter for this id */
4986 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4987 reg = rd32(E1000_VLVF(i));
4989 /* remove the vf from the pool */
4992 /* if pool is empty then remove entry from vfta */
4993 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4994 (reg & E1000_VLVF_VLANID_ENABLE)) {
4996 vid = reg & E1000_VLVF_VLANID_MASK;
4997 igb_vfta_set(hw, vid, false);
5000 wr32(E1000_VLVF(i), reg);
5003 adapter->vf_data[vf].vlans_enabled = 0;
5006 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5008 struct e1000_hw *hw = &adapter->hw;
5011 /* The vlvf table only exists on 82576 hardware and newer */
5012 if (hw->mac.type < e1000_82576)
5015 /* we only need to do this if VMDq is enabled */
5016 if (!adapter->vfs_allocated_count)
5019 /* Find the vlan filter for this id */
5020 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5021 reg = rd32(E1000_VLVF(i));
5022 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5023 vid == (reg & E1000_VLVF_VLANID_MASK))
5028 if (i == E1000_VLVF_ARRAY_SIZE) {
5029 /* Did not find a matching VLAN ID entry that was
5030 * enabled. Search for a free filter entry, i.e.
5031 * one without the enable bit set
5033 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5034 reg = rd32(E1000_VLVF(i));
5035 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5039 if (i < E1000_VLVF_ARRAY_SIZE) {
5040 /* Found an enabled/available entry */
5041 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5043 /* if !enabled we need to set this up in vfta */
5044 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5045 /* add VID to filter table */
5046 igb_vfta_set(hw, vid, true);
5047 reg |= E1000_VLVF_VLANID_ENABLE;
5049 reg &= ~E1000_VLVF_VLANID_MASK;
5051 wr32(E1000_VLVF(i), reg);
5053 /* do not modify RLPML for PF devices */
5054 if (vf >= adapter->vfs_allocated_count)
5057 if (!adapter->vf_data[vf].vlans_enabled) {
5059 reg = rd32(E1000_VMOLR(vf));
5060 size = reg & E1000_VMOLR_RLPML_MASK;
5062 reg &= ~E1000_VMOLR_RLPML_MASK;
5064 wr32(E1000_VMOLR(vf), reg);
5067 adapter->vf_data[vf].vlans_enabled++;
5071 if (i < E1000_VLVF_ARRAY_SIZE) {
5072 /* remove vf from the pool */
5073 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5074 /* if pool is empty then remove entry from vfta */
5075 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5077 igb_vfta_set(hw, vid, false);
5079 wr32(E1000_VLVF(i), reg);
5081 /* do not modify RLPML for PF devices */
5082 if (vf >= adapter->vfs_allocated_count)
5085 adapter->vf_data[vf].vlans_enabled--;
5086 if (!adapter->vf_data[vf].vlans_enabled) {
5088 reg = rd32(E1000_VMOLR(vf));
5089 size = reg & E1000_VMOLR_RLPML_MASK;
5091 reg &= ~E1000_VMOLR_RLPML_MASK;
5093 wr32(E1000_VMOLR(vf), reg);
5100 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5102 struct e1000_hw *hw = &adapter->hw;
5105 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5107 wr32(E1000_VMVIR(vf), 0);
5110 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5111 int vf, u16 vlan, u8 qos)
5114 struct igb_adapter *adapter = netdev_priv(netdev);
5116 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5119 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5122 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5123 igb_set_vmolr(adapter, vf, !vlan);
5124 adapter->vf_data[vf].pf_vlan = vlan;
5125 adapter->vf_data[vf].pf_qos = qos;
5126 dev_info(&adapter->pdev->dev,
5127 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5128 if (test_bit(__IGB_DOWN, &adapter->state)) {
5129 dev_warn(&adapter->pdev->dev,
5130 "The VF VLAN has been set,"
5131 " but the PF device is not up.\n");
5132 dev_warn(&adapter->pdev->dev,
5133 "Bring the PF device up before"
5134 " attempting to use the VF device.\n");
5137 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5139 igb_set_vmvir(adapter, vlan, vf);
5140 igb_set_vmolr(adapter, vf, true);
5141 adapter->vf_data[vf].pf_vlan = 0;
5142 adapter->vf_data[vf].pf_qos = 0;
5148 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5150 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5151 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5153 return igb_vlvf_set(adapter, vid, add, vf);
5156 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5158 /* clear flags - except flag that indicates PF has set the MAC */
5159 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5160 adapter->vf_data[vf].last_nack = jiffies;
5162 /* reset offloads to defaults */
5163 igb_set_vmolr(adapter, vf, true);
5165 /* reset vlans for device */
5166 igb_clear_vf_vfta(adapter, vf);
5167 if (adapter->vf_data[vf].pf_vlan)
5168 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5169 adapter->vf_data[vf].pf_vlan,
5170 adapter->vf_data[vf].pf_qos);
5172 igb_clear_vf_vfta(adapter, vf);
5174 /* reset multicast table array for vf */
5175 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5177 /* Flush and reset the mta with the new values */
5178 igb_set_rx_mode(adapter->netdev);
5181 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5183 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5185 /* generate a new mac address as we were hotplug removed/added */
5186 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5187 random_ether_addr(vf_mac);
5189 /* process remaining reset events */
5190 igb_vf_reset(adapter, vf);
5193 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5195 struct e1000_hw *hw = &adapter->hw;
5196 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5197 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5199 u8 *addr = (u8 *)(&msgbuf[1]);
5201 /* process all the same items cleared in a function level reset */
5202 igb_vf_reset(adapter, vf);
5204 /* set vf mac address */
5205 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5207 /* enable transmit and receive for vf */
5208 reg = rd32(E1000_VFTE);
5209 wr32(E1000_VFTE, reg | (1 << vf));
5210 reg = rd32(E1000_VFRE);
5211 wr32(E1000_VFRE, reg | (1 << vf));
5213 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5215 /* reply to reset with ack and vf mac address */
5216 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5217 memcpy(addr, vf_mac, 6);
5218 igb_write_mbx(hw, msgbuf, 3, vf);
5221 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5224 * The VF MAC Address is stored in a packed array of bytes
5225 * starting at the second 32 bit word of the msg array
5227 unsigned char *addr = (char *)&msg[1];
5230 if (is_valid_ether_addr(addr))
5231 err = igb_set_vf_mac(adapter, vf, addr);
5236 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5238 struct e1000_hw *hw = &adapter->hw;
5239 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5240 u32 msg = E1000_VT_MSGTYPE_NACK;
5242 /* if device isn't clear to send it shouldn't be reading either */
5243 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5244 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5245 igb_write_mbx(hw, &msg, 1, vf);
5246 vf_data->last_nack = jiffies;
5250 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5252 struct pci_dev *pdev = adapter->pdev;
5253 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5254 struct e1000_hw *hw = &adapter->hw;
5255 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5258 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5261 /* if receive failed revoke VF CTS stats and restart init */
5262 dev_err(&pdev->dev, "Error receiving message from VF\n");
5263 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5264 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5269 /* this is a message we already processed, do nothing */
5270 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5274 * until the vf completes a reset it should not be
5275 * allowed to start any configuration.
5278 if (msgbuf[0] == E1000_VF_RESET) {
5279 igb_vf_reset_msg(adapter, vf);
5283 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5284 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5290 switch ((msgbuf[0] & 0xFFFF)) {
5291 case E1000_VF_SET_MAC_ADDR:
5293 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5294 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5296 dev_warn(&pdev->dev,
5297 "VF %d attempted to override administratively "
5298 "set MAC address\nReload the VF driver to "
5299 "resume operations\n", vf);
5301 case E1000_VF_SET_PROMISC:
5302 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5304 case E1000_VF_SET_MULTICAST:
5305 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5307 case E1000_VF_SET_LPE:
5308 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5310 case E1000_VF_SET_VLAN:
5312 if (vf_data->pf_vlan)
5313 dev_warn(&pdev->dev,
5314 "VF %d attempted to override administratively "
5315 "set VLAN tag\nReload the VF driver to "
5316 "resume operations\n", vf);
5318 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5321 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5326 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5328 /* notify the VF of the results of what it sent us */
5330 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5332 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5334 igb_write_mbx(hw, msgbuf, 1, vf);
5337 static void igb_msg_task(struct igb_adapter *adapter)
5339 struct e1000_hw *hw = &adapter->hw;
5342 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5343 /* process any reset requests */
5344 if (!igb_check_for_rst(hw, vf))
5345 igb_vf_reset_event(adapter, vf);
5347 /* process any messages pending */
5348 if (!igb_check_for_msg(hw, vf))
5349 igb_rcv_msg_from_vf(adapter, vf);
5351 /* process any acks */
5352 if (!igb_check_for_ack(hw, vf))
5353 igb_rcv_ack_from_vf(adapter, vf);
5358 * igb_set_uta - Set unicast filter table address
5359 * @adapter: board private structure
5361 * The unicast table address is a register array of 32-bit registers.
5362 * The table is meant to be used in a way similar to how the MTA is used
5363 * however due to certain limitations in the hardware it is necessary to
5364 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5365 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5367 static void igb_set_uta(struct igb_adapter *adapter)
5369 struct e1000_hw *hw = &adapter->hw;
5372 /* The UTA table only exists on 82576 hardware and newer */
5373 if (hw->mac.type < e1000_82576)
5376 /* we only need to do this if VMDq is enabled */
5377 if (!adapter->vfs_allocated_count)
5380 for (i = 0; i < hw->mac.uta_reg_count; i++)
5381 array_wr32(E1000_UTA, i, ~0);
5385 * igb_intr_msi - Interrupt Handler
5386 * @irq: interrupt number
5387 * @data: pointer to a network interface device structure
5389 static irqreturn_t igb_intr_msi(int irq, void *data)
5391 struct igb_adapter *adapter = data;
5392 struct igb_q_vector *q_vector = adapter->q_vector[0];
5393 struct e1000_hw *hw = &adapter->hw;
5394 /* read ICR disables interrupts using IAM */
5395 u32 icr = rd32(E1000_ICR);
5397 igb_write_itr(q_vector);
5399 if (icr & E1000_ICR_DRSTA)
5400 schedule_work(&adapter->reset_task);
5402 if (icr & E1000_ICR_DOUTSYNC) {
5403 /* HW is reporting DMA is out of sync */
5404 adapter->stats.doosync++;
5407 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5408 hw->mac.get_link_status = 1;
5409 if (!test_bit(__IGB_DOWN, &adapter->state))
5410 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5413 napi_schedule(&q_vector->napi);
5419 * igb_intr - Legacy Interrupt Handler
5420 * @irq: interrupt number
5421 * @data: pointer to a network interface device structure
5423 static irqreturn_t igb_intr(int irq, void *data)
5425 struct igb_adapter *adapter = data;
5426 struct igb_q_vector *q_vector = adapter->q_vector[0];
5427 struct e1000_hw *hw = &adapter->hw;
5428 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5429 * need for the IMC write */
5430 u32 icr = rd32(E1000_ICR);
5432 return IRQ_NONE; /* Not our interrupt */
5434 igb_write_itr(q_vector);
5436 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5437 * not set, then the adapter didn't send an interrupt */
5438 if (!(icr & E1000_ICR_INT_ASSERTED))
5441 if (icr & E1000_ICR_DRSTA)
5442 schedule_work(&adapter->reset_task);
5444 if (icr & E1000_ICR_DOUTSYNC) {
5445 /* HW is reporting DMA is out of sync */
5446 adapter->stats.doosync++;
5449 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5450 hw->mac.get_link_status = 1;
5451 /* guard against interrupt when we're going down */
5452 if (!test_bit(__IGB_DOWN, &adapter->state))
5453 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5456 napi_schedule(&q_vector->napi);
5461 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5463 struct igb_adapter *adapter = q_vector->adapter;
5464 struct e1000_hw *hw = &adapter->hw;
5466 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5467 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5468 if (!adapter->msix_entries)
5469 igb_set_itr(adapter);
5471 igb_update_ring_itr(q_vector);
5474 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5475 if (adapter->msix_entries)
5476 wr32(E1000_EIMS, q_vector->eims_value);
5478 igb_irq_enable(adapter);
5483 * igb_poll - NAPI Rx polling callback
5484 * @napi: napi polling structure
5485 * @budget: count of how many packets we should handle
5487 static int igb_poll(struct napi_struct *napi, int budget)
5489 struct igb_q_vector *q_vector = container_of(napi,
5490 struct igb_q_vector,
5492 bool clean_complete = true;
5494 #ifdef CONFIG_IGB_DCA
5495 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5496 igb_update_dca(q_vector);
5498 if (q_vector->tx_ring)
5499 clean_complete = !!igb_clean_tx_irq(q_vector);
5501 if (q_vector->rx_ring)
5502 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5504 /* If all work not completed, return budget and keep polling */
5505 if (!clean_complete)
5508 /* If not enough Rx work done, exit the polling mode */
5509 napi_complete(napi);
5510 igb_ring_irq_enable(q_vector);
5516 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5517 * @adapter: board private structure
5518 * @shhwtstamps: timestamp structure to update
5519 * @regval: unsigned 64bit system time value.
5521 * We need to convert the system time value stored in the RX/TXSTMP registers
5522 * into a hwtstamp which can be used by the upper level timestamping functions
5524 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5525 struct skb_shared_hwtstamps *shhwtstamps,
5531 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5532 * 24 to match clock shift we setup earlier.
5534 if (adapter->hw.mac.type == e1000_82580)
5535 regval <<= IGB_82580_TSYNC_SHIFT;
5537 ns = timecounter_cyc2time(&adapter->clock, regval);
5538 timecompare_update(&adapter->compare, ns);
5539 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5540 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5541 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5545 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5546 * @q_vector: pointer to q_vector containing needed info
5547 * @buffer: pointer to igb_buffer structure
5549 * If we were asked to do hardware stamping and such a time stamp is
5550 * available, then it must have been for this skb here because we only
5551 * allow only one such packet into the queue.
5553 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5555 struct igb_adapter *adapter = q_vector->adapter;
5556 struct e1000_hw *hw = &adapter->hw;
5557 struct skb_shared_hwtstamps shhwtstamps;
5560 /* if skb does not support hw timestamp or TX stamp not valid exit */
5561 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5562 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5565 regval = rd32(E1000_TXSTMPL);
5566 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5568 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5569 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5573 * igb_clean_tx_irq - Reclaim resources after transmit completes
5574 * @q_vector: pointer to q_vector containing needed info
5575 * returns true if ring is completely cleaned
5577 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5579 struct igb_adapter *adapter = q_vector->adapter;
5580 struct igb_ring *tx_ring = q_vector->tx_ring;
5581 struct net_device *netdev = tx_ring->netdev;
5582 struct e1000_hw *hw = &adapter->hw;
5583 struct igb_buffer *buffer_info;
5584 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5585 unsigned int total_bytes = 0, total_packets = 0;
5586 unsigned int i, eop, count = 0;
5587 bool cleaned = false;
5589 i = tx_ring->next_to_clean;
5590 eop = tx_ring->buffer_info[i].next_to_watch;
5591 eop_desc = IGB_TX_DESC(tx_ring, eop);
5593 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5594 (count < tx_ring->count)) {
5595 rmb(); /* read buffer_info after eop_desc status */
5596 for (cleaned = false; !cleaned; count++) {
5597 tx_desc = IGB_TX_DESC(tx_ring, i);
5598 buffer_info = &tx_ring->buffer_info[i];
5599 cleaned = (i == eop);
5601 if (buffer_info->skb) {
5602 total_bytes += buffer_info->bytecount;
5603 /* gso_segs is currently only valid for tcp */
5604 total_packets += buffer_info->gso_segs;
5605 igb_tx_hwtstamp(q_vector, buffer_info);
5608 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5609 tx_desc->wb.status = 0;
5612 if (i == tx_ring->count)
5615 eop = tx_ring->buffer_info[i].next_to_watch;
5616 eop_desc = IGB_TX_DESC(tx_ring, eop);
5619 tx_ring->next_to_clean = i;
5621 if (unlikely(count &&
5622 netif_carrier_ok(netdev) &&
5623 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5624 /* Make sure that anybody stopping the queue after this
5625 * sees the new next_to_clean.
5628 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5629 !(test_bit(__IGB_DOWN, &adapter->state))) {
5630 netif_wake_subqueue(netdev, tx_ring->queue_index);
5632 u64_stats_update_begin(&tx_ring->tx_syncp);
5633 tx_ring->tx_stats.restart_queue++;
5634 u64_stats_update_end(&tx_ring->tx_syncp);
5638 if (tx_ring->detect_tx_hung) {
5639 /* Detect a transmit hang in hardware, this serializes the
5640 * check with the clearing of time_stamp and movement of i */
5641 tx_ring->detect_tx_hung = false;
5642 if (tx_ring->buffer_info[i].time_stamp &&
5643 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5644 (adapter->tx_timeout_factor * HZ)) &&
5645 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5647 /* detected Tx unit hang */
5648 dev_err(tx_ring->dev,
5649 "Detected Tx Unit Hang\n"
5653 " next_to_use <%x>\n"
5654 " next_to_clean <%x>\n"
5655 "buffer_info[next_to_clean]\n"
5656 " time_stamp <%lx>\n"
5657 " next_to_watch <%x>\n"
5659 " desc.status <%x>\n",
5660 tx_ring->queue_index,
5661 rd32(E1000_TDH(tx_ring->reg_idx)),
5662 readl(tx_ring->tail),
5663 tx_ring->next_to_use,
5664 tx_ring->next_to_clean,
5665 tx_ring->buffer_info[eop].time_stamp,
5668 eop_desc->wb.status);
5669 netif_stop_subqueue(netdev, tx_ring->queue_index);
5672 tx_ring->total_bytes += total_bytes;
5673 tx_ring->total_packets += total_packets;
5674 u64_stats_update_begin(&tx_ring->tx_syncp);
5675 tx_ring->tx_stats.bytes += total_bytes;
5676 tx_ring->tx_stats.packets += total_packets;
5677 u64_stats_update_end(&tx_ring->tx_syncp);
5678 return count < tx_ring->count;
5681 static inline void igb_rx_checksum(struct igb_ring *ring,
5682 u32 status_err, struct sk_buff *skb)
5684 skb_checksum_none_assert(skb);
5686 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5687 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5688 (status_err & E1000_RXD_STAT_IXSM))
5691 /* TCP/UDP checksum error bit is set */
5693 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5695 * work around errata with sctp packets where the TCPE aka
5696 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5697 * packets, (aka let the stack check the crc32c)
5699 if ((skb->len == 60) &&
5700 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5701 u64_stats_update_begin(&ring->rx_syncp);
5702 ring->rx_stats.csum_err++;
5703 u64_stats_update_end(&ring->rx_syncp);
5705 /* let the stack verify checksum errors */
5708 /* It must be a TCP or UDP packet with a valid checksum */
5709 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5710 skb->ip_summed = CHECKSUM_UNNECESSARY;
5712 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5715 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5716 struct sk_buff *skb)
5718 struct igb_adapter *adapter = q_vector->adapter;
5719 struct e1000_hw *hw = &adapter->hw;
5723 * If this bit is set, then the RX registers contain the time stamp. No
5724 * other packet will be time stamped until we read these registers, so
5725 * read the registers to make them available again. Because only one
5726 * packet can be time stamped at a time, we know that the register
5727 * values must belong to this one here and therefore we don't need to
5728 * compare any of the additional attributes stored for it.
5730 * If nothing went wrong, then it should have a shared tx_flags that we
5731 * can turn into a skb_shared_hwtstamps.
5733 if (staterr & E1000_RXDADV_STAT_TSIP) {
5734 u32 *stamp = (u32 *)skb->data;
5735 regval = le32_to_cpu(*(stamp + 2));
5736 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5737 skb_pull(skb, IGB_TS_HDR_LEN);
5739 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5742 regval = rd32(E1000_RXSTMPL);
5743 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5746 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5748 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5750 /* HW will not DMA in data larger than the given buffer, even if it
5751 * parses the (NFS, of course) header to be larger. In that case, it
5752 * fills the header buffer and spills the rest into the page.
5754 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5755 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5756 if (hlen > IGB_RX_HDR_LEN)
5757 hlen = IGB_RX_HDR_LEN;
5761 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5763 struct igb_ring *rx_ring = q_vector->rx_ring;
5764 union e1000_adv_rx_desc *rx_desc;
5765 const int current_node = numa_node_id();
5766 unsigned int total_bytes = 0, total_packets = 0;
5768 u16 cleaned_count = igb_desc_unused(rx_ring);
5769 u16 i = rx_ring->next_to_clean;
5771 rx_desc = IGB_RX_DESC(rx_ring, i);
5772 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5774 while (staterr & E1000_RXD_STAT_DD) {
5775 struct igb_buffer *buffer_info = &rx_ring->buffer_info[i];
5776 struct sk_buff *skb = buffer_info->skb;
5777 union e1000_adv_rx_desc *next_rxd;
5779 buffer_info->skb = NULL;
5780 prefetch(skb->data);
5783 if (i == rx_ring->count)
5786 next_rxd = IGB_RX_DESC(rx_ring, i);
5790 * This memory barrier is needed to keep us from reading
5791 * any other fields out of the rx_desc until we know the
5792 * RXD_STAT_DD bit is set
5796 if (!skb_is_nonlinear(skb)) {
5797 __skb_put(skb, igb_get_hlen(rx_desc));
5798 dma_unmap_single(rx_ring->dev, buffer_info->dma,
5801 buffer_info->dma = 0;
5804 if (rx_desc->wb.upper.length) {
5805 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
5807 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5809 buffer_info->page_offset,
5813 skb->data_len += length;
5814 skb->truesize += length;
5816 if ((page_count(buffer_info->page) != 1) ||
5817 (page_to_nid(buffer_info->page) != current_node))
5818 buffer_info->page = NULL;
5820 get_page(buffer_info->page);
5822 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
5823 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5824 buffer_info->page_dma = 0;
5827 if (!(staterr & E1000_RXD_STAT_EOP)) {
5828 struct igb_buffer *next_buffer;
5829 next_buffer = &rx_ring->buffer_info[i];
5830 buffer_info->skb = next_buffer->skb;
5831 buffer_info->dma = next_buffer->dma;
5832 next_buffer->skb = skb;
5833 next_buffer->dma = 0;
5837 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5838 dev_kfree_skb_any(skb);
5842 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5843 igb_rx_hwtstamp(q_vector, staterr, skb);
5844 total_bytes += skb->len;
5847 igb_rx_checksum(rx_ring, staterr, skb);
5849 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
5851 if (staterr & E1000_RXD_STAT_VP) {
5852 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5854 __vlan_hwaccel_put_tag(skb, vid);
5856 napi_gro_receive(&q_vector->napi, skb);
5864 /* return some buffers to hardware, one at a time is too slow */
5865 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5866 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5870 /* use prefetched values */
5872 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5875 rx_ring->next_to_clean = i;
5876 u64_stats_update_begin(&rx_ring->rx_syncp);
5877 rx_ring->rx_stats.packets += total_packets;
5878 rx_ring->rx_stats.bytes += total_bytes;
5879 u64_stats_update_end(&rx_ring->rx_syncp);
5880 rx_ring->total_packets += total_packets;
5881 rx_ring->total_bytes += total_bytes;
5884 igb_alloc_rx_buffers(rx_ring, cleaned_count);
5889 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
5890 struct igb_buffer *bi)
5892 struct sk_buff *skb = bi->skb;
5893 dma_addr_t dma = bi->dma;
5899 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
5903 rx_ring->rx_stats.alloc_failed++;
5907 /* initialize skb for ring */
5908 skb_record_rx_queue(skb, rx_ring->queue_index);
5911 dma = dma_map_single(rx_ring->dev, skb->data,
5912 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
5914 if (dma_mapping_error(rx_ring->dev, dma)) {
5915 rx_ring->rx_stats.alloc_failed++;
5923 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
5924 struct igb_buffer *bi)
5926 struct page *page = bi->page;
5927 dma_addr_t page_dma = bi->page_dma;
5928 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
5934 page = netdev_alloc_page(rx_ring->netdev);
5936 if (unlikely(!page)) {
5937 rx_ring->rx_stats.alloc_failed++;
5942 page_dma = dma_map_page(rx_ring->dev, page,
5943 page_offset, PAGE_SIZE / 2,
5946 if (dma_mapping_error(rx_ring->dev, page_dma)) {
5947 rx_ring->rx_stats.alloc_failed++;
5951 bi->page_dma = page_dma;
5952 bi->page_offset = page_offset;
5957 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
5958 * @adapter: address of board private structure
5960 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
5962 union e1000_adv_rx_desc *rx_desc;
5963 struct igb_buffer *bi;
5964 u16 i = rx_ring->next_to_use;
5966 rx_desc = IGB_RX_DESC(rx_ring, i);
5967 bi = &rx_ring->buffer_info[i];
5968 i -= rx_ring->count;
5970 while (cleaned_count--) {
5971 if (!igb_alloc_mapped_skb(rx_ring, bi))
5974 /* Refresh the desc even if buffer_addrs didn't change
5975 * because each write-back erases this info. */
5976 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
5978 if (!igb_alloc_mapped_page(rx_ring, bi))
5981 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
5987 rx_desc = IGB_RX_DESC(rx_ring, 0);
5988 bi = rx_ring->buffer_info;
5989 i -= rx_ring->count;
5992 /* clear the hdr_addr for the next_to_use descriptor */
5993 rx_desc->read.hdr_addr = 0;
5996 i += rx_ring->count;
5998 if (rx_ring->next_to_use != i) {
5999 rx_ring->next_to_use = i;
6001 /* Force memory writes to complete before letting h/w
6002 * know there are new descriptors to fetch. (Only
6003 * applicable for weak-ordered memory model archs,
6004 * such as IA-64). */
6006 writel(i, rx_ring->tail);
6016 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6018 struct igb_adapter *adapter = netdev_priv(netdev);
6019 struct mii_ioctl_data *data = if_mii(ifr);
6021 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6026 data->phy_id = adapter->hw.phy.addr;
6029 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6041 * igb_hwtstamp_ioctl - control hardware time stamping
6046 * Outgoing time stamping can be enabled and disabled. Play nice and
6047 * disable it when requested, although it shouldn't case any overhead
6048 * when no packet needs it. At most one packet in the queue may be
6049 * marked for time stamping, otherwise it would be impossible to tell
6050 * for sure to which packet the hardware time stamp belongs.
6052 * Incoming time stamping has to be configured via the hardware
6053 * filters. Not all combinations are supported, in particular event
6054 * type has to be specified. Matching the kind of event packet is
6055 * not supported, with the exception of "all V2 events regardless of
6059 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6060 struct ifreq *ifr, int cmd)
6062 struct igb_adapter *adapter = netdev_priv(netdev);
6063 struct e1000_hw *hw = &adapter->hw;
6064 struct hwtstamp_config config;
6065 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6066 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6067 u32 tsync_rx_cfg = 0;
6072 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6075 /* reserved for future extensions */
6079 switch (config.tx_type) {
6080 case HWTSTAMP_TX_OFF:
6082 case HWTSTAMP_TX_ON:
6088 switch (config.rx_filter) {
6089 case HWTSTAMP_FILTER_NONE:
6092 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6093 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6094 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6095 case HWTSTAMP_FILTER_ALL:
6097 * register TSYNCRXCFG must be set, therefore it is not
6098 * possible to time stamp both Sync and Delay_Req messages
6099 * => fall back to time stamping all packets
6101 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6102 config.rx_filter = HWTSTAMP_FILTER_ALL;
6104 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6105 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6106 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6109 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6110 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6111 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6114 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6115 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6116 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6117 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6120 config.rx_filter = HWTSTAMP_FILTER_SOME;
6122 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6123 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6124 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6125 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6128 config.rx_filter = HWTSTAMP_FILTER_SOME;
6130 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6131 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6132 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6133 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6134 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6141 if (hw->mac.type == e1000_82575) {
6142 if (tsync_rx_ctl | tsync_tx_ctl)
6148 * Per-packet timestamping only works if all packets are
6149 * timestamped, so enable timestamping in all packets as
6150 * long as one rx filter was configured.
6152 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6153 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6154 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6157 /* enable/disable TX */
6158 regval = rd32(E1000_TSYNCTXCTL);
6159 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6160 regval |= tsync_tx_ctl;
6161 wr32(E1000_TSYNCTXCTL, regval);
6163 /* enable/disable RX */
6164 regval = rd32(E1000_TSYNCRXCTL);
6165 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6166 regval |= tsync_rx_ctl;
6167 wr32(E1000_TSYNCRXCTL, regval);
6169 /* define which PTP packets are time stamped */
6170 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6172 /* define ethertype filter for timestamped packets */
6175 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6176 E1000_ETQF_1588 | /* enable timestamping */
6177 ETH_P_1588)); /* 1588 eth protocol type */
6179 wr32(E1000_ETQF(3), 0);
6181 #define PTP_PORT 319
6182 /* L4 Queue Filter[3]: filter by destination port and protocol */
6184 u32 ftqf = (IPPROTO_UDP /* UDP */
6185 | E1000_FTQF_VF_BP /* VF not compared */
6186 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6187 | E1000_FTQF_MASK); /* mask all inputs */
6188 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6190 wr32(E1000_IMIR(3), htons(PTP_PORT));
6191 wr32(E1000_IMIREXT(3),
6192 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6193 if (hw->mac.type == e1000_82576) {
6194 /* enable source port check */
6195 wr32(E1000_SPQF(3), htons(PTP_PORT));
6196 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6198 wr32(E1000_FTQF(3), ftqf);
6200 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6204 adapter->hwtstamp_config = config;
6206 /* clear TX/RX time stamp registers, just to be sure */
6207 regval = rd32(E1000_TXSTMPH);
6208 regval = rd32(E1000_RXSTMPH);
6210 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6220 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6226 return igb_mii_ioctl(netdev, ifr, cmd);
6228 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6234 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6236 struct igb_adapter *adapter = hw->back;
6239 cap_offset = adapter->pdev->pcie_cap;
6241 return -E1000_ERR_CONFIG;
6243 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6248 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6250 struct igb_adapter *adapter = hw->back;
6253 cap_offset = adapter->pdev->pcie_cap;
6255 return -E1000_ERR_CONFIG;
6257 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6262 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6264 struct igb_adapter *adapter = netdev_priv(netdev);
6265 struct e1000_hw *hw = &adapter->hw;
6268 igb_irq_disable(adapter);
6270 if (features & NETIF_F_HW_VLAN_RX) {
6271 /* enable VLAN tag insert/strip */
6272 ctrl = rd32(E1000_CTRL);
6273 ctrl |= E1000_CTRL_VME;
6274 wr32(E1000_CTRL, ctrl);
6276 /* Disable CFI check */
6277 rctl = rd32(E1000_RCTL);
6278 rctl &= ~E1000_RCTL_CFIEN;
6279 wr32(E1000_RCTL, rctl);
6281 /* disable VLAN tag insert/strip */
6282 ctrl = rd32(E1000_CTRL);
6283 ctrl &= ~E1000_CTRL_VME;
6284 wr32(E1000_CTRL, ctrl);
6287 igb_rlpml_set(adapter);
6289 if (!test_bit(__IGB_DOWN, &adapter->state))
6290 igb_irq_enable(adapter);
6293 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6295 struct igb_adapter *adapter = netdev_priv(netdev);
6296 struct e1000_hw *hw = &adapter->hw;
6297 int pf_id = adapter->vfs_allocated_count;
6299 /* attempt to add filter to vlvf array */
6300 igb_vlvf_set(adapter, vid, true, pf_id);
6302 /* add the filter since PF can receive vlans w/o entry in vlvf */
6303 igb_vfta_set(hw, vid, true);
6305 set_bit(vid, adapter->active_vlans);
6308 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6310 struct igb_adapter *adapter = netdev_priv(netdev);
6311 struct e1000_hw *hw = &adapter->hw;
6312 int pf_id = adapter->vfs_allocated_count;
6315 igb_irq_disable(adapter);
6317 if (!test_bit(__IGB_DOWN, &adapter->state))
6318 igb_irq_enable(adapter);
6320 /* remove vlan from VLVF table array */
6321 err = igb_vlvf_set(adapter, vid, false, pf_id);
6323 /* if vid was not present in VLVF just remove it from table */
6325 igb_vfta_set(hw, vid, false);
6327 clear_bit(vid, adapter->active_vlans);
6330 static void igb_restore_vlan(struct igb_adapter *adapter)
6334 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6335 igb_vlan_rx_add_vid(adapter->netdev, vid);
6338 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6340 struct pci_dev *pdev = adapter->pdev;
6341 struct e1000_mac_info *mac = &adapter->hw.mac;
6345 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6346 * for the switch() below to work */
6347 if ((spd & 1) || (dplx & ~1))
6350 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6351 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6352 spd != SPEED_1000 &&
6353 dplx != DUPLEX_FULL)
6356 switch (spd + dplx) {
6357 case SPEED_10 + DUPLEX_HALF:
6358 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6360 case SPEED_10 + DUPLEX_FULL:
6361 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6363 case SPEED_100 + DUPLEX_HALF:
6364 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6366 case SPEED_100 + DUPLEX_FULL:
6367 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6369 case SPEED_1000 + DUPLEX_FULL:
6371 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6373 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6380 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6384 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6386 struct net_device *netdev = pci_get_drvdata(pdev);
6387 struct igb_adapter *adapter = netdev_priv(netdev);
6388 struct e1000_hw *hw = &adapter->hw;
6389 u32 ctrl, rctl, status;
6390 u32 wufc = adapter->wol;
6395 netif_device_detach(netdev);
6397 if (netif_running(netdev))
6400 igb_clear_interrupt_scheme(adapter);
6403 retval = pci_save_state(pdev);
6408 status = rd32(E1000_STATUS);
6409 if (status & E1000_STATUS_LU)
6410 wufc &= ~E1000_WUFC_LNKC;
6413 igb_setup_rctl(adapter);
6414 igb_set_rx_mode(netdev);
6416 /* turn on all-multi mode if wake on multicast is enabled */
6417 if (wufc & E1000_WUFC_MC) {
6418 rctl = rd32(E1000_RCTL);
6419 rctl |= E1000_RCTL_MPE;
6420 wr32(E1000_RCTL, rctl);
6423 ctrl = rd32(E1000_CTRL);
6424 /* advertise wake from D3Cold */
6425 #define E1000_CTRL_ADVD3WUC 0x00100000
6426 /* phy power management enable */
6427 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6428 ctrl |= E1000_CTRL_ADVD3WUC;
6429 wr32(E1000_CTRL, ctrl);
6431 /* Allow time for pending master requests to run */
6432 igb_disable_pcie_master(hw);
6434 wr32(E1000_WUC, E1000_WUC_PME_EN);
6435 wr32(E1000_WUFC, wufc);
6438 wr32(E1000_WUFC, 0);
6441 *enable_wake = wufc || adapter->en_mng_pt;
6443 igb_power_down_link(adapter);
6445 igb_power_up_link(adapter);
6447 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6448 * would have already happened in close and is redundant. */
6449 igb_release_hw_control(adapter);
6451 pci_disable_device(pdev);
6457 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6462 retval = __igb_shutdown(pdev, &wake);
6467 pci_prepare_to_sleep(pdev);
6469 pci_wake_from_d3(pdev, false);
6470 pci_set_power_state(pdev, PCI_D3hot);
6476 static int igb_resume(struct pci_dev *pdev)
6478 struct net_device *netdev = pci_get_drvdata(pdev);
6479 struct igb_adapter *adapter = netdev_priv(netdev);
6480 struct e1000_hw *hw = &adapter->hw;
6483 pci_set_power_state(pdev, PCI_D0);
6484 pci_restore_state(pdev);
6485 pci_save_state(pdev);
6487 err = pci_enable_device_mem(pdev);
6490 "igb: Cannot enable PCI device from suspend\n");
6493 pci_set_master(pdev);
6495 pci_enable_wake(pdev, PCI_D3hot, 0);
6496 pci_enable_wake(pdev, PCI_D3cold, 0);
6498 if (igb_init_interrupt_scheme(adapter)) {
6499 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6505 /* let the f/w know that the h/w is now under the control of the
6507 igb_get_hw_control(adapter);
6509 wr32(E1000_WUS, ~0);
6511 if (netif_running(netdev)) {
6512 err = igb_open(netdev);
6517 netif_device_attach(netdev);
6523 static void igb_shutdown(struct pci_dev *pdev)
6527 __igb_shutdown(pdev, &wake);
6529 if (system_state == SYSTEM_POWER_OFF) {
6530 pci_wake_from_d3(pdev, wake);
6531 pci_set_power_state(pdev, PCI_D3hot);
6535 #ifdef CONFIG_NET_POLL_CONTROLLER
6537 * Polling 'interrupt' - used by things like netconsole to send skbs
6538 * without having to re-enable interrupts. It's not called while
6539 * the interrupt routine is executing.
6541 static void igb_netpoll(struct net_device *netdev)
6543 struct igb_adapter *adapter = netdev_priv(netdev);
6544 struct e1000_hw *hw = &adapter->hw;
6547 if (!adapter->msix_entries) {
6548 struct igb_q_vector *q_vector = adapter->q_vector[0];
6549 igb_irq_disable(adapter);
6550 napi_schedule(&q_vector->napi);
6554 for (i = 0; i < adapter->num_q_vectors; i++) {
6555 struct igb_q_vector *q_vector = adapter->q_vector[i];
6556 wr32(E1000_EIMC, q_vector->eims_value);
6557 napi_schedule(&q_vector->napi);
6560 #endif /* CONFIG_NET_POLL_CONTROLLER */
6563 * igb_io_error_detected - called when PCI error is detected
6564 * @pdev: Pointer to PCI device
6565 * @state: The current pci connection state
6567 * This function is called after a PCI bus error affecting
6568 * this device has been detected.
6570 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6571 pci_channel_state_t state)
6573 struct net_device *netdev = pci_get_drvdata(pdev);
6574 struct igb_adapter *adapter = netdev_priv(netdev);
6576 netif_device_detach(netdev);
6578 if (state == pci_channel_io_perm_failure)
6579 return PCI_ERS_RESULT_DISCONNECT;
6581 if (netif_running(netdev))
6583 pci_disable_device(pdev);
6585 /* Request a slot slot reset. */
6586 return PCI_ERS_RESULT_NEED_RESET;
6590 * igb_io_slot_reset - called after the pci bus has been reset.
6591 * @pdev: Pointer to PCI device
6593 * Restart the card from scratch, as if from a cold-boot. Implementation
6594 * resembles the first-half of the igb_resume routine.
6596 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6598 struct net_device *netdev = pci_get_drvdata(pdev);
6599 struct igb_adapter *adapter = netdev_priv(netdev);
6600 struct e1000_hw *hw = &adapter->hw;
6601 pci_ers_result_t result;
6604 if (pci_enable_device_mem(pdev)) {
6606 "Cannot re-enable PCI device after reset.\n");
6607 result = PCI_ERS_RESULT_DISCONNECT;
6609 pci_set_master(pdev);
6610 pci_restore_state(pdev);
6611 pci_save_state(pdev);
6613 pci_enable_wake(pdev, PCI_D3hot, 0);
6614 pci_enable_wake(pdev, PCI_D3cold, 0);
6617 wr32(E1000_WUS, ~0);
6618 result = PCI_ERS_RESULT_RECOVERED;
6621 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6623 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6624 "failed 0x%0x\n", err);
6625 /* non-fatal, continue */
6632 * igb_io_resume - called when traffic can start flowing again.
6633 * @pdev: Pointer to PCI device
6635 * This callback is called when the error recovery driver tells us that
6636 * its OK to resume normal operation. Implementation resembles the
6637 * second-half of the igb_resume routine.
6639 static void igb_io_resume(struct pci_dev *pdev)
6641 struct net_device *netdev = pci_get_drvdata(pdev);
6642 struct igb_adapter *adapter = netdev_priv(netdev);
6644 if (netif_running(netdev)) {
6645 if (igb_up(adapter)) {
6646 dev_err(&pdev->dev, "igb_up failed after reset\n");
6651 netif_device_attach(netdev);
6653 /* let the f/w know that the h/w is now under the control of the
6655 igb_get_hw_control(adapter);
6658 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6661 u32 rar_low, rar_high;
6662 struct e1000_hw *hw = &adapter->hw;
6664 /* HW expects these in little endian so we reverse the byte order
6665 * from network order (big endian) to little endian
6667 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6668 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6669 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6671 /* Indicate to hardware the Address is Valid. */
6672 rar_high |= E1000_RAH_AV;
6674 if (hw->mac.type == e1000_82575)
6675 rar_high |= E1000_RAH_POOL_1 * qsel;
6677 rar_high |= E1000_RAH_POOL_1 << qsel;
6679 wr32(E1000_RAL(index), rar_low);
6681 wr32(E1000_RAH(index), rar_high);
6685 static int igb_set_vf_mac(struct igb_adapter *adapter,
6686 int vf, unsigned char *mac_addr)
6688 struct e1000_hw *hw = &adapter->hw;
6689 /* VF MAC addresses start at end of receive addresses and moves
6690 * torwards the first, as a result a collision should not be possible */
6691 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6693 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6695 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6700 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6702 struct igb_adapter *adapter = netdev_priv(netdev);
6703 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6705 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6706 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6707 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6708 " change effective.");
6709 if (test_bit(__IGB_DOWN, &adapter->state)) {
6710 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6711 " but the PF device is not up.\n");
6712 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6713 " attempting to use the VF device.\n");
6715 return igb_set_vf_mac(adapter, vf, mac);
6718 static int igb_link_mbps(int internal_link_speed)
6720 switch (internal_link_speed) {
6730 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6737 /* Calculate the rate factor values to set */
6738 rf_int = link_speed / tx_rate;
6739 rf_dec = (link_speed - (rf_int * tx_rate));
6740 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6742 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6743 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6744 E1000_RTTBCNRC_RF_INT_MASK);
6745 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6750 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6751 wr32(E1000_RTTBCNRC, bcnrc_val);
6754 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6756 int actual_link_speed, i;
6757 bool reset_rate = false;
6759 /* VF TX rate limit was not set or not supported */
6760 if ((adapter->vf_rate_link_speed == 0) ||
6761 (adapter->hw.mac.type != e1000_82576))
6764 actual_link_speed = igb_link_mbps(adapter->link_speed);
6765 if (actual_link_speed != adapter->vf_rate_link_speed) {
6767 adapter->vf_rate_link_speed = 0;
6768 dev_info(&adapter->pdev->dev,
6769 "Link speed has been changed. VF Transmit "
6770 "rate is disabled\n");
6773 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6775 adapter->vf_data[i].tx_rate = 0;
6777 igb_set_vf_rate_limit(&adapter->hw, i,
6778 adapter->vf_data[i].tx_rate,
6783 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6785 struct igb_adapter *adapter = netdev_priv(netdev);
6786 struct e1000_hw *hw = &adapter->hw;
6787 int actual_link_speed;
6789 if (hw->mac.type != e1000_82576)
6792 actual_link_speed = igb_link_mbps(adapter->link_speed);
6793 if ((vf >= adapter->vfs_allocated_count) ||
6794 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6795 (tx_rate < 0) || (tx_rate > actual_link_speed))
6798 adapter->vf_rate_link_speed = actual_link_speed;
6799 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6800 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6805 static int igb_ndo_get_vf_config(struct net_device *netdev,
6806 int vf, struct ifla_vf_info *ivi)
6808 struct igb_adapter *adapter = netdev_priv(netdev);
6809 if (vf >= adapter->vfs_allocated_count)
6812 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6813 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6814 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6815 ivi->qos = adapter->vf_data[vf].pf_qos;
6819 static void igb_vmm_control(struct igb_adapter *adapter)
6821 struct e1000_hw *hw = &adapter->hw;
6824 switch (hw->mac.type) {
6827 /* replication is not supported for 82575 */
6830 /* notify HW that the MAC is adding vlan tags */
6831 reg = rd32(E1000_DTXCTL);
6832 reg |= E1000_DTXCTL_VLAN_ADDED;
6833 wr32(E1000_DTXCTL, reg);
6835 /* enable replication vlan tag stripping */
6836 reg = rd32(E1000_RPLOLR);
6837 reg |= E1000_RPLOLR_STRVLAN;
6838 wr32(E1000_RPLOLR, reg);
6840 /* none of the above registers are supported by i350 */
6844 if (adapter->vfs_allocated_count) {
6845 igb_vmdq_set_loopback_pf(hw, true);
6846 igb_vmdq_set_replication_pf(hw, true);
6847 igb_vmdq_set_anti_spoofing_pf(hw, true,
6848 adapter->vfs_allocated_count);
6850 igb_vmdq_set_loopback_pf(hw, false);
6851 igb_vmdq_set_replication_pf(hw, false);