igb: use packet buffer sizes from RXPBS register
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
129 static void igb_vmm_control(struct igb_adapter *);
130 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
131 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
132
133 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
134 {
135         u32 reg_data;
136
137         reg_data = rd32(E1000_VMOLR(vfn));
138         reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
139                     E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
140                     E1000_VMOLR_AUPE |   /* Accept untagged packets */
141                     E1000_VMOLR_STRVLAN; /* Strip vlan tags */
142         wr32(E1000_VMOLR(vfn), reg_data);
143 }
144
145 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
146                                  int vfn)
147 {
148         struct e1000_hw *hw = &adapter->hw;
149         u32 vmolr;
150
151         /* if it isn't the PF check to see if VFs are enabled and
152          * increase the size to support vlan tags */
153         if (vfn < adapter->vfs_allocated_count &&
154             adapter->vf_data[vfn].vlans_enabled)
155                 size += VLAN_TAG_SIZE;
156
157         vmolr = rd32(E1000_VMOLR(vfn));
158         vmolr &= ~E1000_VMOLR_RLPML_MASK;
159         vmolr |= size | E1000_VMOLR_LPE;
160         wr32(E1000_VMOLR(vfn), vmolr);
161
162         return 0;
163 }
164
165 #ifdef CONFIG_PM
166 static int igb_suspend(struct pci_dev *, pm_message_t);
167 static int igb_resume(struct pci_dev *);
168 #endif
169 static void igb_shutdown(struct pci_dev *);
170 #ifdef CONFIG_IGB_DCA
171 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
172 static struct notifier_block dca_notifier = {
173         .notifier_call  = igb_notify_dca,
174         .next           = NULL,
175         .priority       = 0
176 };
177 #endif
178 #ifdef CONFIG_NET_POLL_CONTROLLER
179 /* for netdump / net console */
180 static void igb_netpoll(struct net_device *);
181 #endif
182 #ifdef CONFIG_PCI_IOV
183 static unsigned int max_vfs = 0;
184 module_param(max_vfs, uint, 0);
185 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
186                  "per physical function");
187 #endif /* CONFIG_PCI_IOV */
188
189 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
190                      pci_channel_state_t);
191 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
192 static void igb_io_resume(struct pci_dev *);
193
194 static struct pci_error_handlers igb_err_handler = {
195         .error_detected = igb_io_error_detected,
196         .slot_reset = igb_io_slot_reset,
197         .resume = igb_io_resume,
198 };
199
200
201 static struct pci_driver igb_driver = {
202         .name     = igb_driver_name,
203         .id_table = igb_pci_tbl,
204         .probe    = igb_probe,
205         .remove   = __devexit_p(igb_remove),
206 #ifdef CONFIG_PM
207         /* Power Managment Hooks */
208         .suspend  = igb_suspend,
209         .resume   = igb_resume,
210 #endif
211         .shutdown = igb_shutdown,
212         .err_handler = &igb_err_handler
213 };
214
215 static int global_quad_port_a; /* global quad port a indication */
216
217 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
218 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
219 MODULE_LICENSE("GPL");
220 MODULE_VERSION(DRV_VERSION);
221
222 /**
223  * igb_read_clock - read raw cycle counter (to be used by time counter)
224  */
225 static cycle_t igb_read_clock(const struct cyclecounter *tc)
226 {
227         struct igb_adapter *adapter =
228                 container_of(tc, struct igb_adapter, cycles);
229         struct e1000_hw *hw = &adapter->hw;
230         u64 stamp = 0;
231         int shift = 0;
232
233         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
234         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
235         return stamp;
236 }
237
238 #ifdef DEBUG
239 /**
240  * igb_get_hw_dev_name - return device name string
241  * used by hardware layer to print debugging information
242  **/
243 char *igb_get_hw_dev_name(struct e1000_hw *hw)
244 {
245         struct igb_adapter *adapter = hw->back;
246         return adapter->netdev->name;
247 }
248
249 /**
250  * igb_get_time_str - format current NIC and system time as string
251  */
252 static char *igb_get_time_str(struct igb_adapter *adapter,
253                               char buffer[160])
254 {
255         cycle_t hw = adapter->cycles.read(&adapter->cycles);
256         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
257         struct timespec sys;
258         struct timespec delta;
259         getnstimeofday(&sys);
260
261         delta = timespec_sub(nic, sys);
262
263         sprintf(buffer,
264                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
265                 hw,
266                 (long)nic.tv_sec, nic.tv_nsec,
267                 (long)sys.tv_sec, sys.tv_nsec,
268                 (long)delta.tv_sec, delta.tv_nsec);
269
270         return buffer;
271 }
272 #endif
273
274 /**
275  * igb_init_module - Driver Registration Routine
276  *
277  * igb_init_module is the first routine called when the driver is
278  * loaded. All it does is register with the PCI subsystem.
279  **/
280 static int __init igb_init_module(void)
281 {
282         int ret;
283         printk(KERN_INFO "%s - version %s\n",
284                igb_driver_string, igb_driver_version);
285
286         printk(KERN_INFO "%s\n", igb_copyright);
287
288         global_quad_port_a = 0;
289
290 #ifdef CONFIG_IGB_DCA
291         dca_register_notify(&dca_notifier);
292 #endif
293
294         ret = pci_register_driver(&igb_driver);
295         return ret;
296 }
297
298 module_init(igb_init_module);
299
300 /**
301  * igb_exit_module - Driver Exit Cleanup Routine
302  *
303  * igb_exit_module is called just before the driver is removed
304  * from memory.
305  **/
306 static void __exit igb_exit_module(void)
307 {
308 #ifdef CONFIG_IGB_DCA
309         dca_unregister_notify(&dca_notifier);
310 #endif
311         pci_unregister_driver(&igb_driver);
312 }
313
314 module_exit(igb_exit_module);
315
316 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
317 /**
318  * igb_cache_ring_register - Descriptor ring to register mapping
319  * @adapter: board private structure to initialize
320  *
321  * Once we know the feature-set enabled for the device, we'll cache
322  * the register offset the descriptor ring is assigned to.
323  **/
324 static void igb_cache_ring_register(struct igb_adapter *adapter)
325 {
326         int i;
327         u32 rbase_offset = adapter->vfs_allocated_count;
328
329         switch (adapter->hw.mac.type) {
330         case e1000_82576:
331                 /* The queues are allocated for virtualization such that VF 0
332                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
333                  * In order to avoid collision we start at the first free queue
334                  * and continue consuming queues in the same sequence
335                  */
336                 for (i = 0; i < adapter->num_rx_queues; i++)
337                         adapter->rx_ring[i].reg_idx = rbase_offset +
338                                                       Q_IDX_82576(i);
339                 for (i = 0; i < adapter->num_tx_queues; i++)
340                         adapter->tx_ring[i].reg_idx = rbase_offset +
341                                                       Q_IDX_82576(i);
342                 break;
343         case e1000_82575:
344         default:
345                 for (i = 0; i < adapter->num_rx_queues; i++)
346                         adapter->rx_ring[i].reg_idx = i;
347                 for (i = 0; i < adapter->num_tx_queues; i++)
348                         adapter->tx_ring[i].reg_idx = i;
349                 break;
350         }
351 }
352
353 static void igb_free_queues(struct igb_adapter *adapter)
354 {
355         kfree(adapter->tx_ring);
356         kfree(adapter->rx_ring);
357
358         adapter->tx_ring = NULL;
359         adapter->rx_ring = NULL;
360
361         adapter->num_rx_queues = 0;
362         adapter->num_tx_queues = 0;
363 }
364
365 /**
366  * igb_alloc_queues - Allocate memory for all rings
367  * @adapter: board private structure to initialize
368  *
369  * We allocate one ring per queue at run-time since we don't know the
370  * number of queues at compile-time.
371  **/
372 static int igb_alloc_queues(struct igb_adapter *adapter)
373 {
374         int i;
375
376         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
377                                    sizeof(struct igb_ring), GFP_KERNEL);
378         if (!adapter->tx_ring)
379                 goto err;
380
381         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
382                                    sizeof(struct igb_ring), GFP_KERNEL);
383         if (!adapter->rx_ring)
384                 goto err;
385
386         for (i = 0; i < adapter->num_tx_queues; i++) {
387                 struct igb_ring *ring = &(adapter->tx_ring[i]);
388                 ring->count = adapter->tx_ring_count;
389                 ring->queue_index = i;
390                 ring->pdev = adapter->pdev;
391                 ring->netdev = adapter->netdev;
392                 /* For 82575, context index must be unique per ring. */
393                 if (adapter->hw.mac.type == e1000_82575)
394                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
395         }
396
397         for (i = 0; i < adapter->num_rx_queues; i++) {
398                 struct igb_ring *ring = &(adapter->rx_ring[i]);
399                 ring->count = adapter->rx_ring_count;
400                 ring->queue_index = i;
401                 ring->pdev = adapter->pdev;
402                 ring->netdev = adapter->netdev;
403                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
404                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
405                 /* set flag indicating ring supports SCTP checksum offload */
406                 if (adapter->hw.mac.type >= e1000_82576)
407                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
408         }
409
410         igb_cache_ring_register(adapter);
411
412         return 0;
413
414 err:
415         igb_free_queues(adapter);
416
417         return -ENOMEM;
418 }
419
420 #define IGB_N0_QUEUE -1
421 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
422 {
423         u32 msixbm = 0;
424         struct igb_adapter *adapter = q_vector->adapter;
425         struct e1000_hw *hw = &adapter->hw;
426         u32 ivar, index;
427         int rx_queue = IGB_N0_QUEUE;
428         int tx_queue = IGB_N0_QUEUE;
429
430         if (q_vector->rx_ring)
431                 rx_queue = q_vector->rx_ring->reg_idx;
432         if (q_vector->tx_ring)
433                 tx_queue = q_vector->tx_ring->reg_idx;
434
435         switch (hw->mac.type) {
436         case e1000_82575:
437                 /* The 82575 assigns vectors using a bitmask, which matches the
438                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
439                    or more queues to a vector, we write the appropriate bits
440                    into the MSIXBM register for that vector. */
441                 if (rx_queue > IGB_N0_QUEUE)
442                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
443                 if (tx_queue > IGB_N0_QUEUE)
444                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
445                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
446                 q_vector->eims_value = msixbm;
447                 break;
448         case e1000_82576:
449                 /* 82576 uses a table-based method for assigning vectors.
450                    Each queue has a single entry in the table to which we write
451                    a vector number along with a "valid" bit.  Sadly, the layout
452                    of the table is somewhat counterintuitive. */
453                 if (rx_queue > IGB_N0_QUEUE) {
454                         index = (rx_queue & 0x7);
455                         ivar = array_rd32(E1000_IVAR0, index);
456                         if (rx_queue < 8) {
457                                 /* vector goes into low byte of register */
458                                 ivar = ivar & 0xFFFFFF00;
459                                 ivar |= msix_vector | E1000_IVAR_VALID;
460                         } else {
461                                 /* vector goes into third byte of register */
462                                 ivar = ivar & 0xFF00FFFF;
463                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
464                         }
465                         array_wr32(E1000_IVAR0, index, ivar);
466                 }
467                 if (tx_queue > IGB_N0_QUEUE) {
468                         index = (tx_queue & 0x7);
469                         ivar = array_rd32(E1000_IVAR0, index);
470                         if (tx_queue < 8) {
471                                 /* vector goes into second byte of register */
472                                 ivar = ivar & 0xFFFF00FF;
473                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
474                         } else {
475                                 /* vector goes into high byte of register */
476                                 ivar = ivar & 0x00FFFFFF;
477                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
478                         }
479                         array_wr32(E1000_IVAR0, index, ivar);
480                 }
481                 q_vector->eims_value = 1 << msix_vector;
482                 break;
483         default:
484                 BUG();
485                 break;
486         }
487 }
488
489 /**
490  * igb_configure_msix - Configure MSI-X hardware
491  *
492  * igb_configure_msix sets up the hardware to properly
493  * generate MSI-X interrupts.
494  **/
495 static void igb_configure_msix(struct igb_adapter *adapter)
496 {
497         u32 tmp;
498         int i, vector = 0;
499         struct e1000_hw *hw = &adapter->hw;
500
501         adapter->eims_enable_mask = 0;
502
503         /* set vector for other causes, i.e. link changes */
504         switch (hw->mac.type) {
505         case e1000_82575:
506                 tmp = rd32(E1000_CTRL_EXT);
507                 /* enable MSI-X PBA support*/
508                 tmp |= E1000_CTRL_EXT_PBA_CLR;
509
510                 /* Auto-Mask interrupts upon ICR read. */
511                 tmp |= E1000_CTRL_EXT_EIAME;
512                 tmp |= E1000_CTRL_EXT_IRCA;
513
514                 wr32(E1000_CTRL_EXT, tmp);
515
516                 /* enable msix_other interrupt */
517                 array_wr32(E1000_MSIXBM(0), vector++,
518                                       E1000_EIMS_OTHER);
519                 adapter->eims_other = E1000_EIMS_OTHER;
520
521                 break;
522
523         case e1000_82576:
524                 /* Turn on MSI-X capability first, or our settings
525                  * won't stick.  And it will take days to debug. */
526                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
527                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
528                                 E1000_GPIE_NSICR);
529
530                 /* enable msix_other interrupt */
531                 adapter->eims_other = 1 << vector;
532                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
533
534                 wr32(E1000_IVAR_MISC, tmp);
535                 break;
536         default:
537                 /* do nothing, since nothing else supports MSI-X */
538                 break;
539         } /* switch (hw->mac.type) */
540
541         adapter->eims_enable_mask |= adapter->eims_other;
542
543         for (i = 0; i < adapter->num_q_vectors; i++) {
544                 struct igb_q_vector *q_vector = adapter->q_vector[i];
545                 igb_assign_vector(q_vector, vector++);
546                 adapter->eims_enable_mask |= q_vector->eims_value;
547         }
548
549         wrfl();
550 }
551
552 /**
553  * igb_request_msix - Initialize MSI-X interrupts
554  *
555  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
556  * kernel.
557  **/
558 static int igb_request_msix(struct igb_adapter *adapter)
559 {
560         struct net_device *netdev = adapter->netdev;
561         struct e1000_hw *hw = &adapter->hw;
562         int i, err = 0, vector = 0;
563
564         err = request_irq(adapter->msix_entries[vector].vector,
565                           &igb_msix_other, 0, netdev->name, adapter);
566         if (err)
567                 goto out;
568         vector++;
569
570         for (i = 0; i < adapter->num_q_vectors; i++) {
571                 struct igb_q_vector *q_vector = adapter->q_vector[i];
572
573                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
574
575                 if (q_vector->rx_ring && q_vector->tx_ring)
576                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
577                                 q_vector->rx_ring->queue_index);
578                 else if (q_vector->tx_ring)
579                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
580                                 q_vector->tx_ring->queue_index);
581                 else if (q_vector->rx_ring)
582                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
583                                 q_vector->rx_ring->queue_index);
584                 else
585                         sprintf(q_vector->name, "%s-unused", netdev->name);
586
587                 err = request_irq(adapter->msix_entries[vector].vector,
588                                   &igb_msix_ring, 0, q_vector->name,
589                                   q_vector);
590                 if (err)
591                         goto out;
592                 vector++;
593         }
594
595         igb_configure_msix(adapter);
596         return 0;
597 out:
598         return err;
599 }
600
601 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
602 {
603         if (adapter->msix_entries) {
604                 pci_disable_msix(adapter->pdev);
605                 kfree(adapter->msix_entries);
606                 adapter->msix_entries = NULL;
607         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
608                 pci_disable_msi(adapter->pdev);
609         }
610 }
611
612 /**
613  * igb_free_q_vectors - Free memory allocated for interrupt vectors
614  * @adapter: board private structure to initialize
615  *
616  * This function frees the memory allocated to the q_vectors.  In addition if
617  * NAPI is enabled it will delete any references to the NAPI struct prior
618  * to freeing the q_vector.
619  **/
620 static void igb_free_q_vectors(struct igb_adapter *adapter)
621 {
622         int v_idx;
623
624         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
625                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
626                 adapter->q_vector[v_idx] = NULL;
627                 netif_napi_del(&q_vector->napi);
628                 kfree(q_vector);
629         }
630         adapter->num_q_vectors = 0;
631 }
632
633 /**
634  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
635  *
636  * This function resets the device so that it has 0 rx queues, tx queues, and
637  * MSI-X interrupts allocated.
638  */
639 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
640 {
641         igb_free_queues(adapter);
642         igb_free_q_vectors(adapter);
643         igb_reset_interrupt_capability(adapter);
644 }
645
646 /**
647  * igb_set_interrupt_capability - set MSI or MSI-X if supported
648  *
649  * Attempt to configure interrupts using the best available
650  * capabilities of the hardware and kernel.
651  **/
652 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
653 {
654         int err;
655         int numvecs, i;
656
657         /* Number of supported queues. */
658         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
659         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
660
661         /* start with one vector for every rx queue */
662         numvecs = adapter->num_rx_queues;
663
664         /* if tx handler is seperate add 1 for every tx queue */
665         numvecs += adapter->num_tx_queues;
666
667         /* store the number of vectors reserved for queues */
668         adapter->num_q_vectors = numvecs;
669
670         /* add 1 vector for link status interrupts */
671         numvecs++;
672         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
673                                         GFP_KERNEL);
674         if (!adapter->msix_entries)
675                 goto msi_only;
676
677         for (i = 0; i < numvecs; i++)
678                 adapter->msix_entries[i].entry = i;
679
680         err = pci_enable_msix(adapter->pdev,
681                               adapter->msix_entries,
682                               numvecs);
683         if (err == 0)
684                 goto out;
685
686         igb_reset_interrupt_capability(adapter);
687
688         /* If we can't do MSI-X, try MSI */
689 msi_only:
690 #ifdef CONFIG_PCI_IOV
691         /* disable SR-IOV for non MSI-X configurations */
692         if (adapter->vf_data) {
693                 struct e1000_hw *hw = &adapter->hw;
694                 /* disable iov and allow time for transactions to clear */
695                 pci_disable_sriov(adapter->pdev);
696                 msleep(500);
697
698                 kfree(adapter->vf_data);
699                 adapter->vf_data = NULL;
700                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
701                 msleep(100);
702                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
703         }
704 #endif
705         adapter->vfs_allocated_count = 0;
706         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
707         adapter->num_rx_queues = 1;
708         adapter->num_tx_queues = 1;
709         adapter->num_q_vectors = 1;
710         if (!pci_enable_msi(adapter->pdev))
711                 adapter->flags |= IGB_FLAG_HAS_MSI;
712 out:
713         /* Notify the stack of the (possibly) reduced Tx Queue count. */
714         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
715         return;
716 }
717
718 /**
719  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
720  * @adapter: board private structure to initialize
721  *
722  * We allocate one q_vector per queue interrupt.  If allocation fails we
723  * return -ENOMEM.
724  **/
725 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
726 {
727         struct igb_q_vector *q_vector;
728         struct e1000_hw *hw = &adapter->hw;
729         int v_idx;
730
731         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
732                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
733                 if (!q_vector)
734                         goto err_out;
735                 q_vector->adapter = adapter;
736                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
737                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
738                 q_vector->itr_val = IGB_START_ITR;
739                 q_vector->set_itr = 1;
740                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
741                 adapter->q_vector[v_idx] = q_vector;
742         }
743         return 0;
744
745 err_out:
746         while (v_idx) {
747                 v_idx--;
748                 q_vector = adapter->q_vector[v_idx];
749                 netif_napi_del(&q_vector->napi);
750                 kfree(q_vector);
751                 adapter->q_vector[v_idx] = NULL;
752         }
753         return -ENOMEM;
754 }
755
756 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
757                                       int ring_idx, int v_idx)
758 {
759         struct igb_q_vector *q_vector;
760
761         q_vector = adapter->q_vector[v_idx];
762         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
763         q_vector->rx_ring->q_vector = q_vector;
764         q_vector->itr_val = adapter->rx_itr_setting;
765         if (q_vector->itr_val && q_vector->itr_val <= 3)
766                 q_vector->itr_val = IGB_START_ITR;
767 }
768
769 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
770                                       int ring_idx, int v_idx)
771 {
772         struct igb_q_vector *q_vector;
773
774         q_vector = adapter->q_vector[v_idx];
775         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
776         q_vector->tx_ring->q_vector = q_vector;
777         q_vector->itr_val = adapter->tx_itr_setting;
778         if (q_vector->itr_val && q_vector->itr_val <= 3)
779                 q_vector->itr_val = IGB_START_ITR;
780 }
781
782 /**
783  * igb_map_ring_to_vector - maps allocated queues to vectors
784  *
785  * This function maps the recently allocated queues to vectors.
786  **/
787 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
788 {
789         int i;
790         int v_idx = 0;
791
792         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
793             (adapter->num_q_vectors < adapter->num_tx_queues))
794                 return -ENOMEM;
795
796         if (adapter->num_q_vectors >=
797             (adapter->num_rx_queues + adapter->num_tx_queues)) {
798                 for (i = 0; i < adapter->num_rx_queues; i++)
799                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
800                 for (i = 0; i < adapter->num_tx_queues; i++)
801                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
802         } else {
803                 for (i = 0; i < adapter->num_rx_queues; i++) {
804                         if (i < adapter->num_tx_queues)
805                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
806                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
807                 }
808                 for (; i < adapter->num_tx_queues; i++)
809                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
810         }
811         return 0;
812 }
813
814 /**
815  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
816  *
817  * This function initializes the interrupts and allocates all of the queues.
818  **/
819 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
820 {
821         struct pci_dev *pdev = adapter->pdev;
822         int err;
823
824         igb_set_interrupt_capability(adapter);
825
826         err = igb_alloc_q_vectors(adapter);
827         if (err) {
828                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
829                 goto err_alloc_q_vectors;
830         }
831
832         err = igb_alloc_queues(adapter);
833         if (err) {
834                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
835                 goto err_alloc_queues;
836         }
837
838         err = igb_map_ring_to_vector(adapter);
839         if (err) {
840                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
841                 goto err_map_queues;
842         }
843
844
845         return 0;
846 err_map_queues:
847         igb_free_queues(adapter);
848 err_alloc_queues:
849         igb_free_q_vectors(adapter);
850 err_alloc_q_vectors:
851         igb_reset_interrupt_capability(adapter);
852         return err;
853 }
854
855 /**
856  * igb_request_irq - initialize interrupts
857  *
858  * Attempts to configure interrupts using the best available
859  * capabilities of the hardware and kernel.
860  **/
861 static int igb_request_irq(struct igb_adapter *adapter)
862 {
863         struct net_device *netdev = adapter->netdev;
864         struct pci_dev *pdev = adapter->pdev;
865         struct e1000_hw *hw = &adapter->hw;
866         int err = 0;
867
868         if (adapter->msix_entries) {
869                 err = igb_request_msix(adapter);
870                 if (!err)
871                         goto request_done;
872                 /* fall back to MSI */
873                 igb_clear_interrupt_scheme(adapter);
874                 if (!pci_enable_msi(adapter->pdev))
875                         adapter->flags |= IGB_FLAG_HAS_MSI;
876                 igb_free_all_tx_resources(adapter);
877                 igb_free_all_rx_resources(adapter);
878                 adapter->num_tx_queues = 1;
879                 adapter->num_rx_queues = 1;
880                 adapter->num_q_vectors = 1;
881                 err = igb_alloc_q_vectors(adapter);
882                 if (err) {
883                         dev_err(&pdev->dev,
884                                 "Unable to allocate memory for vectors\n");
885                         goto request_done;
886                 }
887                 err = igb_alloc_queues(adapter);
888                 if (err) {
889                         dev_err(&pdev->dev,
890                                 "Unable to allocate memory for queues\n");
891                         igb_free_q_vectors(adapter);
892                         goto request_done;
893                 }
894                 igb_setup_all_tx_resources(adapter);
895                 igb_setup_all_rx_resources(adapter);
896         } else {
897                 switch (hw->mac.type) {
898                 case e1000_82575:
899                         wr32(E1000_MSIXBM(0),
900                              (E1000_EICR_RX_QUEUE0 |
901                               E1000_EICR_TX_QUEUE0 |
902                               E1000_EIMS_OTHER));
903                         break;
904                 case e1000_82576:
905                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
906                         break;
907                 default:
908                         break;
909                 }
910         }
911
912         if (adapter->flags & IGB_FLAG_HAS_MSI) {
913                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
914                                   netdev->name, adapter);
915                 if (!err)
916                         goto request_done;
917
918                 /* fall back to legacy interrupts */
919                 igb_reset_interrupt_capability(adapter);
920                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
921         }
922
923         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
924                           netdev->name, adapter);
925
926         if (err)
927                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
928                         err);
929
930 request_done:
931         return err;
932 }
933
934 static void igb_free_irq(struct igb_adapter *adapter)
935 {
936         if (adapter->msix_entries) {
937                 int vector = 0, i;
938
939                 free_irq(adapter->msix_entries[vector++].vector, adapter);
940
941                 for (i = 0; i < adapter->num_q_vectors; i++) {
942                         struct igb_q_vector *q_vector = adapter->q_vector[i];
943                         free_irq(adapter->msix_entries[vector++].vector,
944                                  q_vector);
945                 }
946         } else {
947                 free_irq(adapter->pdev->irq, adapter);
948         }
949 }
950
951 /**
952  * igb_irq_disable - Mask off interrupt generation on the NIC
953  * @adapter: board private structure
954  **/
955 static void igb_irq_disable(struct igb_adapter *adapter)
956 {
957         struct e1000_hw *hw = &adapter->hw;
958
959         if (adapter->msix_entries) {
960                 u32 regval = rd32(E1000_EIAM);
961                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
962                 wr32(E1000_EIMC, adapter->eims_enable_mask);
963                 regval = rd32(E1000_EIAC);
964                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
965         }
966
967         wr32(E1000_IAM, 0);
968         wr32(E1000_IMC, ~0);
969         wrfl();
970         synchronize_irq(adapter->pdev->irq);
971 }
972
973 /**
974  * igb_irq_enable - Enable default interrupt generation settings
975  * @adapter: board private structure
976  **/
977 static void igb_irq_enable(struct igb_adapter *adapter)
978 {
979         struct e1000_hw *hw = &adapter->hw;
980
981         if (adapter->msix_entries) {
982                 u32 regval = rd32(E1000_EIAC);
983                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
984                 regval = rd32(E1000_EIAM);
985                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
986                 wr32(E1000_EIMS, adapter->eims_enable_mask);
987                 if (adapter->vfs_allocated_count)
988                         wr32(E1000_MBVFIMR, 0xFF);
989                 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
990                                  E1000_IMS_DOUTSYNC));
991         } else {
992                 wr32(E1000_IMS, IMS_ENABLE_MASK);
993                 wr32(E1000_IAM, IMS_ENABLE_MASK);
994         }
995 }
996
997 static void igb_update_mng_vlan(struct igb_adapter *adapter)
998 {
999         struct net_device *netdev = adapter->netdev;
1000         u16 vid = adapter->hw.mng_cookie.vlan_id;
1001         u16 old_vid = adapter->mng_vlan_id;
1002         if (adapter->vlgrp) {
1003                 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1004                         if (adapter->hw.mng_cookie.status &
1005                                 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1006                                 igb_vlan_rx_add_vid(netdev, vid);
1007                                 adapter->mng_vlan_id = vid;
1008                         } else
1009                                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1010
1011                         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1012                                         (vid != old_vid) &&
1013                             !vlan_group_get_device(adapter->vlgrp, old_vid))
1014                                 igb_vlan_rx_kill_vid(netdev, old_vid);
1015                 } else
1016                         adapter->mng_vlan_id = vid;
1017         }
1018 }
1019
1020 /**
1021  * igb_release_hw_control - release control of the h/w to f/w
1022  * @adapter: address of board private structure
1023  *
1024  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1025  * For ASF and Pass Through versions of f/w this means that the
1026  * driver is no longer loaded.
1027  *
1028  **/
1029 static void igb_release_hw_control(struct igb_adapter *adapter)
1030 {
1031         struct e1000_hw *hw = &adapter->hw;
1032         u32 ctrl_ext;
1033
1034         /* Let firmware take over control of h/w */
1035         ctrl_ext = rd32(E1000_CTRL_EXT);
1036         wr32(E1000_CTRL_EXT,
1037                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1038 }
1039
1040
1041 /**
1042  * igb_get_hw_control - get control of the h/w from f/w
1043  * @adapter: address of board private structure
1044  *
1045  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1046  * For ASF and Pass Through versions of f/w this means that
1047  * the driver is loaded.
1048  *
1049  **/
1050 static void igb_get_hw_control(struct igb_adapter *adapter)
1051 {
1052         struct e1000_hw *hw = &adapter->hw;
1053         u32 ctrl_ext;
1054
1055         /* Let firmware know the driver has taken over */
1056         ctrl_ext = rd32(E1000_CTRL_EXT);
1057         wr32(E1000_CTRL_EXT,
1058                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1059 }
1060
1061 /**
1062  * igb_configure - configure the hardware for RX and TX
1063  * @adapter: private board structure
1064  **/
1065 static void igb_configure(struct igb_adapter *adapter)
1066 {
1067         struct net_device *netdev = adapter->netdev;
1068         int i;
1069
1070         igb_get_hw_control(adapter);
1071         igb_set_rx_mode(netdev);
1072
1073         igb_restore_vlan(adapter);
1074
1075         igb_setup_tctl(adapter);
1076         igb_setup_mrqc(adapter);
1077         igb_setup_rctl(adapter);
1078
1079         igb_configure_tx(adapter);
1080         igb_configure_rx(adapter);
1081
1082         igb_rx_fifo_flush_82575(&adapter->hw);
1083
1084         /* call igb_desc_unused which always leaves
1085          * at least 1 descriptor unused to make sure
1086          * next_to_use != next_to_clean */
1087         for (i = 0; i < adapter->num_rx_queues; i++) {
1088                 struct igb_ring *ring = &adapter->rx_ring[i];
1089                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1090         }
1091
1092
1093         adapter->tx_queue_len = netdev->tx_queue_len;
1094 }
1095
1096
1097 /**
1098  * igb_up - Open the interface and prepare it to handle traffic
1099  * @adapter: board private structure
1100  **/
1101
1102 int igb_up(struct igb_adapter *adapter)
1103 {
1104         struct e1000_hw *hw = &adapter->hw;
1105         int i;
1106
1107         /* hardware has been reset, we need to reload some things */
1108         igb_configure(adapter);
1109
1110         clear_bit(__IGB_DOWN, &adapter->state);
1111
1112         for (i = 0; i < adapter->num_q_vectors; i++) {
1113                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1114                 napi_enable(&q_vector->napi);
1115         }
1116         if (adapter->msix_entries)
1117                 igb_configure_msix(adapter);
1118
1119         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1120
1121         /* Clear any pending interrupts. */
1122         rd32(E1000_ICR);
1123         igb_irq_enable(adapter);
1124
1125         /* notify VFs that reset has been completed */
1126         if (adapter->vfs_allocated_count) {
1127                 u32 reg_data = rd32(E1000_CTRL_EXT);
1128                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1129                 wr32(E1000_CTRL_EXT, reg_data);
1130         }
1131
1132         netif_tx_start_all_queues(adapter->netdev);
1133
1134         /* Fire a link change interrupt to start the watchdog. */
1135         wr32(E1000_ICS, E1000_ICS_LSC);
1136         return 0;
1137 }
1138
1139 void igb_down(struct igb_adapter *adapter)
1140 {
1141         struct e1000_hw *hw = &adapter->hw;
1142         struct net_device *netdev = adapter->netdev;
1143         u32 tctl, rctl;
1144         int i;
1145
1146         /* signal that we're down so the interrupt handler does not
1147          * reschedule our watchdog timer */
1148         set_bit(__IGB_DOWN, &adapter->state);
1149
1150         /* disable receives in the hardware */
1151         rctl = rd32(E1000_RCTL);
1152         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1153         /* flush and sleep below */
1154
1155         netif_tx_stop_all_queues(netdev);
1156
1157         /* disable transmits in the hardware */
1158         tctl = rd32(E1000_TCTL);
1159         tctl &= ~E1000_TCTL_EN;
1160         wr32(E1000_TCTL, tctl);
1161         /* flush both disables and wait for them to finish */
1162         wrfl();
1163         msleep(10);
1164
1165         for (i = 0; i < adapter->num_q_vectors; i++) {
1166                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1167                 napi_disable(&q_vector->napi);
1168         }
1169
1170         igb_irq_disable(adapter);
1171
1172         del_timer_sync(&adapter->watchdog_timer);
1173         del_timer_sync(&adapter->phy_info_timer);
1174
1175         netdev->tx_queue_len = adapter->tx_queue_len;
1176         netif_carrier_off(netdev);
1177
1178         /* record the stats before reset*/
1179         igb_update_stats(adapter);
1180
1181         adapter->link_speed = 0;
1182         adapter->link_duplex = 0;
1183
1184         if (!pci_channel_offline(adapter->pdev))
1185                 igb_reset(adapter);
1186         igb_clean_all_tx_rings(adapter);
1187         igb_clean_all_rx_rings(adapter);
1188 #ifdef CONFIG_IGB_DCA
1189
1190         /* since we reset the hardware DCA settings were cleared */
1191         igb_setup_dca(adapter);
1192 #endif
1193 }
1194
1195 void igb_reinit_locked(struct igb_adapter *adapter)
1196 {
1197         WARN_ON(in_interrupt());
1198         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1199                 msleep(1);
1200         igb_down(adapter);
1201         igb_up(adapter);
1202         clear_bit(__IGB_RESETTING, &adapter->state);
1203 }
1204
1205 void igb_reset(struct igb_adapter *adapter)
1206 {
1207         struct e1000_hw *hw = &adapter->hw;
1208         struct e1000_mac_info *mac = &hw->mac;
1209         struct e1000_fc_info *fc = &hw->fc;
1210         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1211         u16 hwm;
1212
1213         /* Repartition Pba for greater than 9k mtu
1214          * To take effect CTRL.RST is required.
1215          */
1216         switch (mac->type) {
1217         case e1000_82576:
1218                 pba = rd32(E1000_RXPBS);
1219                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1220                 break;
1221         case e1000_82575:
1222         default:
1223                 pba = E1000_PBA_34K;
1224                 break;
1225         }
1226
1227         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1228             (mac->type < e1000_82576)) {
1229                 /* adjust PBA for jumbo frames */
1230                 wr32(E1000_PBA, pba);
1231
1232                 /* To maintain wire speed transmits, the Tx FIFO should be
1233                  * large enough to accommodate two full transmit packets,
1234                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1235                  * the Rx FIFO should be large enough to accommodate at least
1236                  * one full receive packet and is similarly rounded up and
1237                  * expressed in KB. */
1238                 pba = rd32(E1000_PBA);
1239                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1240                 tx_space = pba >> 16;
1241                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1242                 pba &= 0xffff;
1243                 /* the tx fifo also stores 16 bytes of information about the tx
1244                  * but don't include ethernet FCS because hardware appends it */
1245                 min_tx_space = (adapter->max_frame_size +
1246                                 sizeof(union e1000_adv_tx_desc) -
1247                                 ETH_FCS_LEN) * 2;
1248                 min_tx_space = ALIGN(min_tx_space, 1024);
1249                 min_tx_space >>= 10;
1250                 /* software strips receive CRC, so leave room for it */
1251                 min_rx_space = adapter->max_frame_size;
1252                 min_rx_space = ALIGN(min_rx_space, 1024);
1253                 min_rx_space >>= 10;
1254
1255                 /* If current Tx allocation is less than the min Tx FIFO size,
1256                  * and the min Tx FIFO size is less than the current Rx FIFO
1257                  * allocation, take space away from current Rx allocation */
1258                 if (tx_space < min_tx_space &&
1259                     ((min_tx_space - tx_space) < pba)) {
1260                         pba = pba - (min_tx_space - tx_space);
1261
1262                         /* if short on rx space, rx wins and must trump tx
1263                          * adjustment */
1264                         if (pba < min_rx_space)
1265                                 pba = min_rx_space;
1266                 }
1267                 wr32(E1000_PBA, pba);
1268         }
1269
1270         /* flow control settings */
1271         /* The high water mark must be low enough to fit one full frame
1272          * (or the size used for early receive) above it in the Rx FIFO.
1273          * Set it to the lower of:
1274          * - 90% of the Rx FIFO size, or
1275          * - the full Rx FIFO size minus one full frame */
1276         hwm = min(((pba << 10) * 9 / 10),
1277                         ((pba << 10) - 2 * adapter->max_frame_size));
1278
1279         if (mac->type < e1000_82576) {
1280                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1281                 fc->low_water = fc->high_water - 8;
1282         } else {
1283                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1284                 fc->low_water = fc->high_water - 16;
1285         }
1286         fc->pause_time = 0xFFFF;
1287         fc->send_xon = 1;
1288         fc->current_mode = fc->requested_mode;
1289
1290         /* disable receive for all VFs and wait one second */
1291         if (adapter->vfs_allocated_count) {
1292                 int i;
1293                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1294                         adapter->vf_data[i].clear_to_send = false;
1295
1296                 /* ping all the active vfs to let them know we are going down */
1297                         igb_ping_all_vfs(adapter);
1298
1299                 /* disable transmits and receives */
1300                 wr32(E1000_VFRE, 0);
1301                 wr32(E1000_VFTE, 0);
1302         }
1303
1304         /* Allow time for pending master requests to run */
1305         adapter->hw.mac.ops.reset_hw(&adapter->hw);
1306         wr32(E1000_WUC, 0);
1307
1308         if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1309                 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1310
1311         igb_update_mng_vlan(adapter);
1312
1313         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1314         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1315
1316         igb_reset_adaptive(&adapter->hw);
1317         igb_get_phy_info(&adapter->hw);
1318 }
1319
1320 static const struct net_device_ops igb_netdev_ops = {
1321         .ndo_open               = igb_open,
1322         .ndo_stop               = igb_close,
1323         .ndo_start_xmit         = igb_xmit_frame_adv,
1324         .ndo_get_stats          = igb_get_stats,
1325         .ndo_set_rx_mode        = igb_set_rx_mode,
1326         .ndo_set_multicast_list = igb_set_rx_mode,
1327         .ndo_set_mac_address    = igb_set_mac,
1328         .ndo_change_mtu         = igb_change_mtu,
1329         .ndo_do_ioctl           = igb_ioctl,
1330         .ndo_tx_timeout         = igb_tx_timeout,
1331         .ndo_validate_addr      = eth_validate_addr,
1332         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1333         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1334         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1335 #ifdef CONFIG_NET_POLL_CONTROLLER
1336         .ndo_poll_controller    = igb_netpoll,
1337 #endif
1338 };
1339
1340 /**
1341  * igb_probe - Device Initialization Routine
1342  * @pdev: PCI device information struct
1343  * @ent: entry in igb_pci_tbl
1344  *
1345  * Returns 0 on success, negative on failure
1346  *
1347  * igb_probe initializes an adapter identified by a pci_dev structure.
1348  * The OS initialization, configuring of the adapter private structure,
1349  * and a hardware reset occur.
1350  **/
1351 static int __devinit igb_probe(struct pci_dev *pdev,
1352                                const struct pci_device_id *ent)
1353 {
1354         struct net_device *netdev;
1355         struct igb_adapter *adapter;
1356         struct e1000_hw *hw;
1357         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1358         unsigned long mmio_start, mmio_len;
1359         int err, pci_using_dac;
1360         u16 eeprom_data = 0;
1361         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1362         u32 part_num;
1363
1364         err = pci_enable_device_mem(pdev);
1365         if (err)
1366                 return err;
1367
1368         pci_using_dac = 0;
1369         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1370         if (!err) {
1371                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1372                 if (!err)
1373                         pci_using_dac = 1;
1374         } else {
1375                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1376                 if (err) {
1377                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1378                         if (err) {
1379                                 dev_err(&pdev->dev, "No usable DMA "
1380                                         "configuration, aborting\n");
1381                                 goto err_dma;
1382                         }
1383                 }
1384         }
1385
1386         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1387                                            IORESOURCE_MEM),
1388                                            igb_driver_name);
1389         if (err)
1390                 goto err_pci_reg;
1391
1392         pci_enable_pcie_error_reporting(pdev);
1393
1394         pci_set_master(pdev);
1395         pci_save_state(pdev);
1396
1397         err = -ENOMEM;
1398         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1399                                    IGB_ABS_MAX_TX_QUEUES);
1400         if (!netdev)
1401                 goto err_alloc_etherdev;
1402
1403         SET_NETDEV_DEV(netdev, &pdev->dev);
1404
1405         pci_set_drvdata(pdev, netdev);
1406         adapter = netdev_priv(netdev);
1407         adapter->netdev = netdev;
1408         adapter->pdev = pdev;
1409         hw = &adapter->hw;
1410         hw->back = adapter;
1411         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1412
1413         mmio_start = pci_resource_start(pdev, 0);
1414         mmio_len = pci_resource_len(pdev, 0);
1415
1416         err = -EIO;
1417         hw->hw_addr = ioremap(mmio_start, mmio_len);
1418         if (!hw->hw_addr)
1419                 goto err_ioremap;
1420
1421         netdev->netdev_ops = &igb_netdev_ops;
1422         igb_set_ethtool_ops(netdev);
1423         netdev->watchdog_timeo = 5 * HZ;
1424
1425         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1426
1427         netdev->mem_start = mmio_start;
1428         netdev->mem_end = mmio_start + mmio_len;
1429
1430         /* PCI config space info */
1431         hw->vendor_id = pdev->vendor;
1432         hw->device_id = pdev->device;
1433         hw->revision_id = pdev->revision;
1434         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1435         hw->subsystem_device_id = pdev->subsystem_device;
1436
1437         /* setup the private structure */
1438         hw->back = adapter;
1439         /* Copy the default MAC, PHY and NVM function pointers */
1440         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1441         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1442         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1443         /* Initialize skew-specific constants */
1444         err = ei->get_invariants(hw);
1445         if (err)
1446                 goto err_sw_init;
1447
1448 #ifdef CONFIG_PCI_IOV
1449         /* since iov functionality isn't critical to base device function we
1450          * can accept failure.  If it fails we don't allow iov to be enabled */
1451         if (hw->mac.type == e1000_82576) {
1452                 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1453                 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1454                 int i;
1455                 unsigned char mac_addr[ETH_ALEN];
1456
1457                 if (num_vfs) {
1458                         adapter->vf_data = kcalloc(num_vfs,
1459                                                 sizeof(struct vf_data_storage),
1460                                                 GFP_KERNEL);
1461                         if (!adapter->vf_data) {
1462                                 dev_err(&pdev->dev,
1463                                         "Could not allocate VF private data - "
1464                                         "IOV enable failed\n");
1465                         } else {
1466                                 err = pci_enable_sriov(pdev, num_vfs);
1467                                 if (!err) {
1468                                         adapter->vfs_allocated_count = num_vfs;
1469                                         dev_info(&pdev->dev,
1470                                                  "%d vfs allocated\n",
1471                                                  num_vfs);
1472                                         for (i = 0;
1473                                              i < adapter->vfs_allocated_count;
1474                                              i++) {
1475                                                 random_ether_addr(mac_addr);
1476                                                 igb_set_vf_mac(adapter, i,
1477                                                                mac_addr);
1478                                         }
1479                                 } else {
1480                                         kfree(adapter->vf_data);
1481                                         adapter->vf_data = NULL;
1482                                 }
1483                         }
1484                 }
1485         }
1486
1487 #endif
1488         /* setup the private structure */
1489         err = igb_sw_init(adapter);
1490         if (err)
1491                 goto err_sw_init;
1492
1493         igb_get_bus_info_pcie(hw);
1494
1495         hw->phy.autoneg_wait_to_complete = false;
1496         hw->mac.adaptive_ifs = true;
1497
1498         /* Copper options */
1499         if (hw->phy.media_type == e1000_media_type_copper) {
1500                 hw->phy.mdix = AUTO_ALL_MODES;
1501                 hw->phy.disable_polarity_correction = false;
1502                 hw->phy.ms_type = e1000_ms_hw_default;
1503         }
1504
1505         if (igb_check_reset_block(hw))
1506                 dev_info(&pdev->dev,
1507                         "PHY reset is blocked due to SOL/IDER session.\n");
1508
1509         netdev->features = NETIF_F_SG |
1510                            NETIF_F_IP_CSUM |
1511                            NETIF_F_HW_VLAN_TX |
1512                            NETIF_F_HW_VLAN_RX |
1513                            NETIF_F_HW_VLAN_FILTER;
1514
1515         netdev->features |= NETIF_F_IPV6_CSUM;
1516         netdev->features |= NETIF_F_TSO;
1517         netdev->features |= NETIF_F_TSO6;
1518
1519         netdev->features |= NETIF_F_GRO;
1520
1521         netdev->vlan_features |= NETIF_F_TSO;
1522         netdev->vlan_features |= NETIF_F_TSO6;
1523         netdev->vlan_features |= NETIF_F_IP_CSUM;
1524         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1525         netdev->vlan_features |= NETIF_F_SG;
1526
1527         if (pci_using_dac)
1528                 netdev->features |= NETIF_F_HIGHDMA;
1529
1530         if (adapter->hw.mac.type == e1000_82576)
1531                 netdev->features |= NETIF_F_SCTP_CSUM;
1532
1533         adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1534
1535         /* before reading the NVM, reset the controller to put the device in a
1536          * known good starting state */
1537         hw->mac.ops.reset_hw(hw);
1538
1539         /* make sure the NVM is good */
1540         if (igb_validate_nvm_checksum(hw) < 0) {
1541                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1542                 err = -EIO;
1543                 goto err_eeprom;
1544         }
1545
1546         /* copy the MAC address out of the NVM */
1547         if (hw->mac.ops.read_mac_addr(hw))
1548                 dev_err(&pdev->dev, "NVM Read Error\n");
1549
1550         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1551         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1552
1553         if (!is_valid_ether_addr(netdev->perm_addr)) {
1554                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1555                 err = -EIO;
1556                 goto err_eeprom;
1557         }
1558
1559         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1560                     (unsigned long) adapter);
1561         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1562                     (unsigned long) adapter);
1563
1564         INIT_WORK(&adapter->reset_task, igb_reset_task);
1565         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1566
1567         /* Initialize link properties that are user-changeable */
1568         adapter->fc_autoneg = true;
1569         hw->mac.autoneg = true;
1570         hw->phy.autoneg_advertised = 0x2f;
1571
1572         hw->fc.requested_mode = e1000_fc_default;
1573         hw->fc.current_mode = e1000_fc_default;
1574
1575         igb_validate_mdi_setting(hw);
1576
1577         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1578          * enable the ACPI Magic Packet filter
1579          */
1580
1581         if (hw->bus.func == 0)
1582                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1583         else if (hw->bus.func == 1)
1584                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1585
1586         if (eeprom_data & eeprom_apme_mask)
1587                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1588
1589         /* now that we have the eeprom settings, apply the special cases where
1590          * the eeprom may be wrong or the board simply won't support wake on
1591          * lan on a particular port */
1592         switch (pdev->device) {
1593         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1594                 adapter->eeprom_wol = 0;
1595                 break;
1596         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1597         case E1000_DEV_ID_82576_FIBER:
1598         case E1000_DEV_ID_82576_SERDES:
1599                 /* Wake events only supported on port A for dual fiber
1600                  * regardless of eeprom setting */
1601                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1602                         adapter->eeprom_wol = 0;
1603                 break;
1604         case E1000_DEV_ID_82576_QUAD_COPPER:
1605                 /* if quad port adapter, disable WoL on all but port A */
1606                 if (global_quad_port_a != 0)
1607                         adapter->eeprom_wol = 0;
1608                 else
1609                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1610                 /* Reset for multiple quad port adapters */
1611                 if (++global_quad_port_a == 4)
1612                         global_quad_port_a = 0;
1613                 break;
1614         }
1615
1616         /* initialize the wol settings based on the eeprom settings */
1617         adapter->wol = adapter->eeprom_wol;
1618         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1619
1620         /* reset the hardware with the new settings */
1621         igb_reset(adapter);
1622
1623         /* let the f/w know that the h/w is now under the control of the
1624          * driver. */
1625         igb_get_hw_control(adapter);
1626
1627         strcpy(netdev->name, "eth%d");
1628         err = register_netdev(netdev);
1629         if (err)
1630                 goto err_register;
1631
1632         /* carrier off reporting is important to ethtool even BEFORE open */
1633         netif_carrier_off(netdev);
1634
1635 #ifdef CONFIG_IGB_DCA
1636         if (dca_add_requester(&pdev->dev) == 0) {
1637                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1638                 dev_info(&pdev->dev, "DCA enabled\n");
1639                 igb_setup_dca(adapter);
1640         }
1641
1642 #endif
1643
1644         switch (hw->mac.type) {
1645         case e1000_82576:
1646                 /*
1647                  * Initialize hardware timer: we keep it running just in case
1648                  * that some program needs it later on.
1649                  */
1650                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1651                 adapter->cycles.read = igb_read_clock;
1652                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1653                 adapter->cycles.mult = 1;
1654                 /**
1655                  * Scale the NIC clock cycle by a large factor so that
1656                  * relatively small clock corrections can be added or
1657                  * substracted at each clock tick. The drawbacks of a large
1658                  * factor are a) that the clock register overflows more quickly
1659                  * (not such a big deal) and b) that the increment per tick has
1660                  * to fit into 24 bits.  As a result we need to use a shift of
1661                  * 19 so we can fit a value of 16 into the TIMINCA register.
1662                  */
1663                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1664                 wr32(E1000_TIMINCA,
1665                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1666                                 (16 << IGB_82576_TSYNC_SHIFT));
1667
1668                 /* Set registers so that rollover occurs soon to test this. */
1669                 wr32(E1000_SYSTIML, 0x00000000);
1670                 wr32(E1000_SYSTIMH, 0xFF800000);
1671                 wrfl();
1672
1673                 timecounter_init(&adapter->clock,
1674                                  &adapter->cycles,
1675                                  ktime_to_ns(ktime_get_real()));
1676                 /*
1677                  * Synchronize our NIC clock against system wall clock. NIC
1678                  * time stamp reading requires ~3us per sample, each sample
1679                  * was pretty stable even under load => only require 10
1680                  * samples for each offset comparison.
1681                  */
1682                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1683                 adapter->compare.source = &adapter->clock;
1684                 adapter->compare.target = ktime_get_real;
1685                 adapter->compare.num_samples = 10;
1686                 timecompare_update(&adapter->compare, 0);
1687                 break;
1688         case e1000_82575:
1689                 /* 82575 does not support timesync */
1690         default:
1691                 break;
1692         }
1693
1694         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1695         /* print bus type/speed/width info */
1696         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1697                  netdev->name,
1698                  ((hw->bus.speed == e1000_bus_speed_2500)
1699                   ? "2.5Gb/s" : "unknown"),
1700                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1701                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1702                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1703                    "unknown"),
1704                  netdev->dev_addr);
1705
1706         igb_read_part_num(hw, &part_num);
1707         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1708                 (part_num >> 8), (part_num & 0xff));
1709
1710         dev_info(&pdev->dev,
1711                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1712                 adapter->msix_entries ? "MSI-X" :
1713                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1714                 adapter->num_rx_queues, adapter->num_tx_queues);
1715
1716         return 0;
1717
1718 err_register:
1719         igb_release_hw_control(adapter);
1720 err_eeprom:
1721         if (!igb_check_reset_block(hw))
1722                 igb_reset_phy(hw);
1723
1724         if (hw->flash_address)
1725                 iounmap(hw->flash_address);
1726 err_sw_init:
1727         igb_clear_interrupt_scheme(adapter);
1728         iounmap(hw->hw_addr);
1729 err_ioremap:
1730         free_netdev(netdev);
1731 err_alloc_etherdev:
1732         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1733                                      IORESOURCE_MEM));
1734 err_pci_reg:
1735 err_dma:
1736         pci_disable_device(pdev);
1737         return err;
1738 }
1739
1740 /**
1741  * igb_remove - Device Removal Routine
1742  * @pdev: PCI device information struct
1743  *
1744  * igb_remove is called by the PCI subsystem to alert the driver
1745  * that it should release a PCI device.  The could be caused by a
1746  * Hot-Plug event, or because the driver is going to be removed from
1747  * memory.
1748  **/
1749 static void __devexit igb_remove(struct pci_dev *pdev)
1750 {
1751         struct net_device *netdev = pci_get_drvdata(pdev);
1752         struct igb_adapter *adapter = netdev_priv(netdev);
1753         struct e1000_hw *hw = &adapter->hw;
1754
1755         /* flush_scheduled work may reschedule our watchdog task, so
1756          * explicitly disable watchdog tasks from being rescheduled  */
1757         set_bit(__IGB_DOWN, &adapter->state);
1758         del_timer_sync(&adapter->watchdog_timer);
1759         del_timer_sync(&adapter->phy_info_timer);
1760
1761         flush_scheduled_work();
1762
1763 #ifdef CONFIG_IGB_DCA
1764         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1765                 dev_info(&pdev->dev, "DCA disabled\n");
1766                 dca_remove_requester(&pdev->dev);
1767                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1768                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1769         }
1770 #endif
1771
1772         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1773          * would have already happened in close and is redundant. */
1774         igb_release_hw_control(adapter);
1775
1776         unregister_netdev(netdev);
1777
1778         if (!igb_check_reset_block(&adapter->hw))
1779                 igb_reset_phy(&adapter->hw);
1780
1781         igb_clear_interrupt_scheme(adapter);
1782
1783 #ifdef CONFIG_PCI_IOV
1784         /* reclaim resources allocated to VFs */
1785         if (adapter->vf_data) {
1786                 /* disable iov and allow time for transactions to clear */
1787                 pci_disable_sriov(pdev);
1788                 msleep(500);
1789
1790                 kfree(adapter->vf_data);
1791                 adapter->vf_data = NULL;
1792                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1793                 msleep(100);
1794                 dev_info(&pdev->dev, "IOV Disabled\n");
1795         }
1796 #endif
1797         iounmap(hw->hw_addr);
1798         if (hw->flash_address)
1799                 iounmap(hw->flash_address);
1800         pci_release_selected_regions(pdev, pci_select_bars(pdev,
1801                                      IORESOURCE_MEM));
1802
1803         free_netdev(netdev);
1804
1805         pci_disable_pcie_error_reporting(pdev);
1806
1807         pci_disable_device(pdev);
1808 }
1809
1810 /**
1811  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1812  * @adapter: board private structure to initialize
1813  *
1814  * igb_sw_init initializes the Adapter private data structure.
1815  * Fields are initialized based on PCI device information and
1816  * OS network device settings (MTU size).
1817  **/
1818 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1819 {
1820         struct e1000_hw *hw = &adapter->hw;
1821         struct net_device *netdev = adapter->netdev;
1822         struct pci_dev *pdev = adapter->pdev;
1823
1824         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1825
1826         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1827         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1828         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1829         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1830
1831         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1832         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1833
1834         /* This call may decrease the number of queues depending on
1835          * interrupt mode. */
1836         if (igb_init_interrupt_scheme(adapter)) {
1837                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1838                 return -ENOMEM;
1839         }
1840
1841         /* Explicitly disable IRQ since the NIC can be in any state. */
1842         igb_irq_disable(adapter);
1843
1844         set_bit(__IGB_DOWN, &adapter->state);
1845         return 0;
1846 }
1847
1848 /**
1849  * igb_open - Called when a network interface is made active
1850  * @netdev: network interface device structure
1851  *
1852  * Returns 0 on success, negative value on failure
1853  *
1854  * The open entry point is called when a network interface is made
1855  * active by the system (IFF_UP).  At this point all resources needed
1856  * for transmit and receive operations are allocated, the interrupt
1857  * handler is registered with the OS, the watchdog timer is started,
1858  * and the stack is notified that the interface is ready.
1859  **/
1860 static int igb_open(struct net_device *netdev)
1861 {
1862         struct igb_adapter *adapter = netdev_priv(netdev);
1863         struct e1000_hw *hw = &adapter->hw;
1864         int err;
1865         int i;
1866
1867         /* disallow open during test */
1868         if (test_bit(__IGB_TESTING, &adapter->state))
1869                 return -EBUSY;
1870
1871         netif_carrier_off(netdev);
1872
1873         /* allocate transmit descriptors */
1874         err = igb_setup_all_tx_resources(adapter);
1875         if (err)
1876                 goto err_setup_tx;
1877
1878         /* allocate receive descriptors */
1879         err = igb_setup_all_rx_resources(adapter);
1880         if (err)
1881                 goto err_setup_rx;
1882
1883         /* e1000_power_up_phy(adapter); */
1884
1885         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1886         if ((adapter->hw.mng_cookie.status &
1887              E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1888                 igb_update_mng_vlan(adapter);
1889
1890         /* before we allocate an interrupt, we must be ready to handle it.
1891          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1892          * as soon as we call pci_request_irq, so we have to setup our
1893          * clean_rx handler before we do so.  */
1894         igb_configure(adapter);
1895
1896         igb_set_vmolr(hw, adapter->vfs_allocated_count);
1897
1898         err = igb_request_irq(adapter);
1899         if (err)
1900                 goto err_req_irq;
1901
1902         /* From here on the code is the same as igb_up() */
1903         clear_bit(__IGB_DOWN, &adapter->state);
1904
1905         for (i = 0; i < adapter->num_q_vectors; i++) {
1906                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1907                 napi_enable(&q_vector->napi);
1908         }
1909
1910         /* Clear any pending interrupts. */
1911         rd32(E1000_ICR);
1912
1913         igb_irq_enable(adapter);
1914
1915         /* notify VFs that reset has been completed */
1916         if (adapter->vfs_allocated_count) {
1917                 u32 reg_data = rd32(E1000_CTRL_EXT);
1918                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1919                 wr32(E1000_CTRL_EXT, reg_data);
1920         }
1921
1922         netif_tx_start_all_queues(netdev);
1923
1924         /* Fire a link status change interrupt to start the watchdog. */
1925         wr32(E1000_ICS, E1000_ICS_LSC);
1926
1927         return 0;
1928
1929 err_req_irq:
1930         igb_release_hw_control(adapter);
1931         /* e1000_power_down_phy(adapter); */
1932         igb_free_all_rx_resources(adapter);
1933 err_setup_rx:
1934         igb_free_all_tx_resources(adapter);
1935 err_setup_tx:
1936         igb_reset(adapter);
1937
1938         return err;
1939 }
1940
1941 /**
1942  * igb_close - Disables a network interface
1943  * @netdev: network interface device structure
1944  *
1945  * Returns 0, this is not allowed to fail
1946  *
1947  * The close entry point is called when an interface is de-activated
1948  * by the OS.  The hardware is still under the driver's control, but
1949  * needs to be disabled.  A global MAC reset is issued to stop the
1950  * hardware, and all transmit and receive resources are freed.
1951  **/
1952 static int igb_close(struct net_device *netdev)
1953 {
1954         struct igb_adapter *adapter = netdev_priv(netdev);
1955
1956         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1957         igb_down(adapter);
1958
1959         igb_free_irq(adapter);
1960
1961         igb_free_all_tx_resources(adapter);
1962         igb_free_all_rx_resources(adapter);
1963
1964         /* kill manageability vlan ID if supported, but not if a vlan with
1965          * the same ID is registered on the host OS (let 8021q kill it) */
1966         if ((adapter->hw.mng_cookie.status &
1967                           E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
1968              !(adapter->vlgrp &&
1969                vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
1970                 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
1971
1972         return 0;
1973 }
1974
1975 /**
1976  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1977  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1978  *
1979  * Return 0 on success, negative on failure
1980  **/
1981 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1982 {
1983         struct pci_dev *pdev = tx_ring->pdev;
1984         int size;
1985
1986         size = sizeof(struct igb_buffer) * tx_ring->count;
1987         tx_ring->buffer_info = vmalloc(size);
1988         if (!tx_ring->buffer_info)
1989                 goto err;
1990         memset(tx_ring->buffer_info, 0, size);
1991
1992         /* round up to nearest 4K */
1993         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1994         tx_ring->size = ALIGN(tx_ring->size, 4096);
1995
1996         tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
1997                                              &tx_ring->dma);
1998
1999         if (!tx_ring->desc)
2000                 goto err;
2001
2002         tx_ring->next_to_use = 0;
2003         tx_ring->next_to_clean = 0;
2004         return 0;
2005
2006 err:
2007         vfree(tx_ring->buffer_info);
2008         dev_err(&pdev->dev,
2009                 "Unable to allocate memory for the transmit descriptor ring\n");
2010         return -ENOMEM;
2011 }
2012
2013 /**
2014  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2015  *                                (Descriptors) for all queues
2016  * @adapter: board private structure
2017  *
2018  * Return 0 on success, negative on failure
2019  **/
2020 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2021 {
2022         int i, err = 0;
2023         int r_idx;
2024
2025         for (i = 0; i < adapter->num_tx_queues; i++) {
2026                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2027                 if (err) {
2028                         dev_err(&adapter->pdev->dev,
2029                                 "Allocation for Tx Queue %u failed\n", i);
2030                         for (i--; i >= 0; i--)
2031                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2032                         break;
2033                 }
2034         }
2035
2036         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2037                 r_idx = i % adapter->num_tx_queues;
2038                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2039         }
2040         return err;
2041 }
2042
2043 /**
2044  * igb_setup_tctl - configure the transmit control registers
2045  * @adapter: Board private structure
2046  **/
2047 void igb_setup_tctl(struct igb_adapter *adapter)
2048 {
2049         struct e1000_hw *hw = &adapter->hw;
2050         u32 tctl;
2051
2052         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2053         wr32(E1000_TXDCTL(0), 0);
2054
2055         /* Program the Transmit Control Register */
2056         tctl = rd32(E1000_TCTL);
2057         tctl &= ~E1000_TCTL_CT;
2058         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2059                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2060
2061         igb_config_collision_dist(hw);
2062
2063         /* Enable transmits */
2064         tctl |= E1000_TCTL_EN;
2065
2066         wr32(E1000_TCTL, tctl);
2067 }
2068
2069 /**
2070  * igb_configure_tx_ring - Configure transmit ring after Reset
2071  * @adapter: board private structure
2072  * @ring: tx ring to configure
2073  *
2074  * Configure a transmit ring after a reset.
2075  **/
2076 void igb_configure_tx_ring(struct igb_adapter *adapter,
2077                            struct igb_ring *ring)
2078 {
2079         struct e1000_hw *hw = &adapter->hw;
2080         u32 txdctl;
2081         u64 tdba = ring->dma;
2082         int reg_idx = ring->reg_idx;
2083
2084         /* disable the queue */
2085         txdctl = rd32(E1000_TXDCTL(reg_idx));
2086         wr32(E1000_TXDCTL(reg_idx),
2087                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2088         wrfl();
2089         mdelay(10);
2090
2091         wr32(E1000_TDLEN(reg_idx),
2092                         ring->count * sizeof(union e1000_adv_tx_desc));
2093         wr32(E1000_TDBAL(reg_idx),
2094                         tdba & 0x00000000ffffffffULL);
2095         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2096
2097         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2098         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2099         writel(0, ring->head);
2100         writel(0, ring->tail);
2101
2102         txdctl |= IGB_TX_PTHRESH;
2103         txdctl |= IGB_TX_HTHRESH << 8;
2104         txdctl |= IGB_TX_WTHRESH << 16;
2105
2106         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2107         wr32(E1000_TXDCTL(reg_idx), txdctl);
2108 }
2109
2110 /**
2111  * igb_configure_tx - Configure transmit Unit after Reset
2112  * @adapter: board private structure
2113  *
2114  * Configure the Tx unit of the MAC after a reset.
2115  **/
2116 static void igb_configure_tx(struct igb_adapter *adapter)
2117 {
2118         int i;
2119
2120         for (i = 0; i < adapter->num_tx_queues; i++)
2121                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2122 }
2123
2124 /**
2125  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2126  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2127  *
2128  * Returns 0 on success, negative on failure
2129  **/
2130 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2131 {
2132         struct pci_dev *pdev = rx_ring->pdev;
2133         int size, desc_len;
2134
2135         size = sizeof(struct igb_buffer) * rx_ring->count;
2136         rx_ring->buffer_info = vmalloc(size);
2137         if (!rx_ring->buffer_info)
2138                 goto err;
2139         memset(rx_ring->buffer_info, 0, size);
2140
2141         desc_len = sizeof(union e1000_adv_rx_desc);
2142
2143         /* Round up to nearest 4K */
2144         rx_ring->size = rx_ring->count * desc_len;
2145         rx_ring->size = ALIGN(rx_ring->size, 4096);
2146
2147         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2148                                              &rx_ring->dma);
2149
2150         if (!rx_ring->desc)
2151                 goto err;
2152
2153         rx_ring->next_to_clean = 0;
2154         rx_ring->next_to_use = 0;
2155
2156         return 0;
2157
2158 err:
2159         vfree(rx_ring->buffer_info);
2160         dev_err(&pdev->dev, "Unable to allocate memory for "
2161                 "the receive descriptor ring\n");
2162         return -ENOMEM;
2163 }
2164
2165 /**
2166  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2167  *                                (Descriptors) for all queues
2168  * @adapter: board private structure
2169  *
2170  * Return 0 on success, negative on failure
2171  **/
2172 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2173 {
2174         int i, err = 0;
2175
2176         for (i = 0; i < adapter->num_rx_queues; i++) {
2177                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2178                 if (err) {
2179                         dev_err(&adapter->pdev->dev,
2180                                 "Allocation for Rx Queue %u failed\n", i);
2181                         for (i--; i >= 0; i--)
2182                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2183                         break;
2184                 }
2185         }
2186
2187         return err;
2188 }
2189
2190 /**
2191  * igb_setup_mrqc - configure the multiple receive queue control registers
2192  * @adapter: Board private structure
2193  **/
2194 static void igb_setup_mrqc(struct igb_adapter *adapter)
2195 {
2196         struct e1000_hw *hw = &adapter->hw;
2197         u32 mrqc, rxcsum;
2198         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2199         union e1000_reta {
2200                 u32 dword;
2201                 u8  bytes[4];
2202         } reta;
2203         static const u8 rsshash[40] = {
2204                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2205                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2206                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2207                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2208
2209         /* Fill out hash function seeds */
2210         for (j = 0; j < 10; j++) {
2211                 u32 rsskey = rsshash[(j * 4)];
2212                 rsskey |= rsshash[(j * 4) + 1] << 8;
2213                 rsskey |= rsshash[(j * 4) + 2] << 16;
2214                 rsskey |= rsshash[(j * 4) + 3] << 24;
2215                 array_wr32(E1000_RSSRK(0), j, rsskey);
2216         }
2217
2218         num_rx_queues = adapter->num_rx_queues;
2219
2220         if (adapter->vfs_allocated_count) {
2221                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2222                 switch (hw->mac.type) {
2223                 case e1000_82576:
2224                         shift = 3;
2225                         num_rx_queues = 2;
2226                         break;
2227                 case e1000_82575:
2228                         shift = 2;
2229                         shift2 = 6;
2230                 default:
2231                         break;
2232                 }
2233         } else {
2234                 if (hw->mac.type == e1000_82575)
2235                         shift = 6;
2236         }
2237
2238         for (j = 0; j < (32 * 4); j++) {
2239                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2240                 if (shift2)
2241                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2242                 if ((j & 3) == 3)
2243                         wr32(E1000_RETA(j >> 2), reta.dword);
2244         }
2245
2246         /*
2247          * Disable raw packet checksumming so that RSS hash is placed in
2248          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2249          * offloads as they are enabled by default
2250          */
2251         rxcsum = rd32(E1000_RXCSUM);
2252         rxcsum |= E1000_RXCSUM_PCSD;
2253
2254         if (adapter->hw.mac.type >= e1000_82576)
2255                 /* Enable Receive Checksum Offload for SCTP */
2256                 rxcsum |= E1000_RXCSUM_CRCOFL;
2257
2258         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2259         wr32(E1000_RXCSUM, rxcsum);
2260
2261         /* If VMDq is enabled then we set the appropriate mode for that, else
2262          * we default to RSS so that an RSS hash is calculated per packet even
2263          * if we are only using one queue */
2264         if (adapter->vfs_allocated_count) {
2265                 if (hw->mac.type > e1000_82575) {
2266                         /* Set the default pool for the PF's first queue */
2267                         u32 vtctl = rd32(E1000_VT_CTL);
2268                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2269                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2270                         vtctl |= adapter->vfs_allocated_count <<
2271                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2272                         wr32(E1000_VT_CTL, vtctl);
2273                 }
2274                 if (adapter->num_rx_queues > 1)
2275                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2276                 else
2277                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2278         } else {
2279                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2280         }
2281         igb_vmm_control(adapter);
2282
2283         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2284                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2285         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2286                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2287         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2288                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2289         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2290                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2291
2292         wr32(E1000_MRQC, mrqc);
2293 }
2294
2295 /**
2296  * igb_setup_rctl - configure the receive control registers
2297  * @adapter: Board private structure
2298  **/
2299 void igb_setup_rctl(struct igb_adapter *adapter)
2300 {
2301         struct e1000_hw *hw = &adapter->hw;
2302         u32 rctl;
2303
2304         rctl = rd32(E1000_RCTL);
2305
2306         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2307         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2308
2309         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2310                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2311
2312         /*
2313          * enable stripping of CRC. It's unlikely this will break BMC
2314          * redirection as it did with e1000. Newer features require
2315          * that the HW strips the CRC.
2316          */
2317         rctl |= E1000_RCTL_SECRC;
2318
2319         /*
2320          * disable store bad packets and clear size bits.
2321          */
2322         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2323
2324         /* enable LPE to prevent packets larger than max_frame_size */
2325         rctl |= E1000_RCTL_LPE;
2326
2327         /* disable queue 0 to prevent tail write w/o re-config */
2328         wr32(E1000_RXDCTL(0), 0);
2329
2330         /* Attention!!!  For SR-IOV PF driver operations you must enable
2331          * queue drop for all VF and PF queues to prevent head of line blocking
2332          * if an un-trusted VF does not provide descriptors to hardware.
2333          */
2334         if (adapter->vfs_allocated_count) {
2335                 u32 vmolr;
2336
2337                 /* set all queue drop enable bits */
2338                 wr32(E1000_QDE, ALL_QUEUES);
2339
2340                 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2341                 if (rctl & E1000_RCTL_LPE)
2342                         vmolr |= E1000_VMOLR_LPE;
2343                 if (adapter->num_rx_queues > 1)
2344                         vmolr |= E1000_VMOLR_RSSE;
2345                 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2346         }
2347
2348         wr32(E1000_RCTL, rctl);
2349 }
2350
2351 /**
2352  * igb_rlpml_set - set maximum receive packet size
2353  * @adapter: board private structure
2354  *
2355  * Configure maximum receivable packet size.
2356  **/
2357 static void igb_rlpml_set(struct igb_adapter *adapter)
2358 {
2359         u32 max_frame_size = adapter->max_frame_size;
2360         struct e1000_hw *hw = &adapter->hw;
2361         u16 pf_id = adapter->vfs_allocated_count;
2362
2363         if (adapter->vlgrp)
2364                 max_frame_size += VLAN_TAG_SIZE;
2365
2366         /* if vfs are enabled we set RLPML to the largest possible request
2367          * size and set the VMOLR RLPML to the size we need */
2368         if (pf_id) {
2369                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2370                 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2371         }
2372
2373         wr32(E1000_RLPML, max_frame_size);
2374 }
2375
2376 /**
2377  * igb_configure_rx_ring - Configure a receive ring after Reset
2378  * @adapter: board private structure
2379  * @ring: receive ring to be configured
2380  *
2381  * Configure the Rx unit of the MAC after a reset.
2382  **/
2383 void igb_configure_rx_ring(struct igb_adapter *adapter,
2384                            struct igb_ring *ring)
2385 {
2386         struct e1000_hw *hw = &adapter->hw;
2387         u64 rdba = ring->dma;
2388         int reg_idx = ring->reg_idx;
2389         u32 srrctl, rxdctl;
2390
2391         /* disable the queue */
2392         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2393         wr32(E1000_RXDCTL(reg_idx),
2394                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2395
2396         /* Set DMA base address registers */
2397         wr32(E1000_RDBAL(reg_idx),
2398              rdba & 0x00000000ffffffffULL);
2399         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2400         wr32(E1000_RDLEN(reg_idx),
2401                        ring->count * sizeof(union e1000_adv_rx_desc));
2402
2403         /* initialize head and tail */
2404         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2405         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2406         writel(0, ring->head);
2407         writel(0, ring->tail);
2408
2409         /* set descriptor configuration */
2410         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2411                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2412                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2413 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2414                 srrctl |= IGB_RXBUFFER_16384 >>
2415                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2416 #else
2417                 srrctl |= (PAGE_SIZE / 2) >>
2418                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2419 #endif
2420                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2421         } else {
2422                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2423                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2424                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2425         }
2426
2427         wr32(E1000_SRRCTL(reg_idx), srrctl);
2428
2429         /* enable receive descriptor fetching */
2430         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2431         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2432         rxdctl &= 0xFFF00000;
2433         rxdctl |= IGB_RX_PTHRESH;
2434         rxdctl |= IGB_RX_HTHRESH << 8;
2435         rxdctl |= IGB_RX_WTHRESH << 16;
2436         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2437 }
2438
2439 /**
2440  * igb_configure_rx - Configure receive Unit after Reset
2441  * @adapter: board private structure
2442  *
2443  * Configure the Rx unit of the MAC after a reset.
2444  **/
2445 static void igb_configure_rx(struct igb_adapter *adapter)
2446 {
2447         int i;
2448
2449         /* set UTA to appropriate mode */
2450         igb_set_uta(adapter);
2451
2452         /* set the correct pool for the PF default MAC address in entry 0 */
2453         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2454                          adapter->vfs_allocated_count);
2455
2456         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2457          * the Base and Length of the Rx Descriptor Ring */
2458         for (i = 0; i < adapter->num_rx_queues; i++)
2459                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2460 }
2461
2462 /**
2463  * igb_free_tx_resources - Free Tx Resources per Queue
2464  * @tx_ring: Tx descriptor ring for a specific queue
2465  *
2466  * Free all transmit software resources
2467  **/
2468 void igb_free_tx_resources(struct igb_ring *tx_ring)
2469 {
2470         igb_clean_tx_ring(tx_ring);
2471
2472         vfree(tx_ring->buffer_info);
2473         tx_ring->buffer_info = NULL;
2474
2475         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2476                             tx_ring->desc, tx_ring->dma);
2477
2478         tx_ring->desc = NULL;
2479 }
2480
2481 /**
2482  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2483  * @adapter: board private structure
2484  *
2485  * Free all transmit software resources
2486  **/
2487 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2488 {
2489         int i;
2490
2491         for (i = 0; i < adapter->num_tx_queues; i++)
2492                 igb_free_tx_resources(&adapter->tx_ring[i]);
2493 }
2494
2495 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2496                                     struct igb_buffer *buffer_info)
2497 {
2498         buffer_info->dma = 0;
2499         if (buffer_info->skb) {
2500                 skb_dma_unmap(&tx_ring->pdev->dev,
2501                               buffer_info->skb,
2502                               DMA_TO_DEVICE);
2503                 dev_kfree_skb_any(buffer_info->skb);
2504                 buffer_info->skb = NULL;
2505         }
2506         buffer_info->time_stamp = 0;
2507         /* buffer_info must be completely set up in the transmit path */
2508 }
2509
2510 /**
2511  * igb_clean_tx_ring - Free Tx Buffers
2512  * @tx_ring: ring to be cleaned
2513  **/
2514 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2515 {
2516         struct igb_buffer *buffer_info;
2517         unsigned long size;
2518         unsigned int i;
2519
2520         if (!tx_ring->buffer_info)
2521                 return;
2522         /* Free all the Tx ring sk_buffs */
2523
2524         for (i = 0; i < tx_ring->count; i++) {
2525                 buffer_info = &tx_ring->buffer_info[i];
2526                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2527         }
2528
2529         size = sizeof(struct igb_buffer) * tx_ring->count;
2530         memset(tx_ring->buffer_info, 0, size);
2531
2532         /* Zero out the descriptor ring */
2533
2534         memset(tx_ring->desc, 0, tx_ring->size);
2535
2536         tx_ring->next_to_use = 0;
2537         tx_ring->next_to_clean = 0;
2538
2539         writel(0, tx_ring->head);
2540         writel(0, tx_ring->tail);
2541 }
2542
2543 /**
2544  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2545  * @adapter: board private structure
2546  **/
2547 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2548 {
2549         int i;
2550
2551         for (i = 0; i < adapter->num_tx_queues; i++)
2552                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2553 }
2554
2555 /**
2556  * igb_free_rx_resources - Free Rx Resources
2557  * @rx_ring: ring to clean the resources from
2558  *
2559  * Free all receive software resources
2560  **/
2561 void igb_free_rx_resources(struct igb_ring *rx_ring)
2562 {
2563         igb_clean_rx_ring(rx_ring);
2564
2565         vfree(rx_ring->buffer_info);
2566         rx_ring->buffer_info = NULL;
2567
2568         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2569                             rx_ring->desc, rx_ring->dma);
2570
2571         rx_ring->desc = NULL;
2572 }
2573
2574 /**
2575  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2576  * @adapter: board private structure
2577  *
2578  * Free all receive software resources
2579  **/
2580 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2581 {
2582         int i;
2583
2584         for (i = 0; i < adapter->num_rx_queues; i++)
2585                 igb_free_rx_resources(&adapter->rx_ring[i]);
2586 }
2587
2588 /**
2589  * igb_clean_rx_ring - Free Rx Buffers per Queue
2590  * @rx_ring: ring to free buffers from
2591  **/
2592 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2593 {
2594         struct igb_buffer *buffer_info;
2595         unsigned long size;
2596         unsigned int i;
2597
2598         if (!rx_ring->buffer_info)
2599                 return;
2600         /* Free all the Rx ring sk_buffs */
2601         for (i = 0; i < rx_ring->count; i++) {
2602                 buffer_info = &rx_ring->buffer_info[i];
2603                 if (buffer_info->dma) {
2604                         pci_unmap_single(rx_ring->pdev,
2605                                          buffer_info->dma,
2606                                          rx_ring->rx_buffer_len,
2607                                          PCI_DMA_FROMDEVICE);
2608                         buffer_info->dma = 0;
2609                 }
2610
2611                 if (buffer_info->skb) {
2612                         dev_kfree_skb(buffer_info->skb);
2613                         buffer_info->skb = NULL;
2614                 }
2615                 if (buffer_info->page_dma) {
2616                         pci_unmap_page(rx_ring->pdev,
2617                                        buffer_info->page_dma,
2618                                        PAGE_SIZE / 2,
2619                                        PCI_DMA_FROMDEVICE);
2620                         buffer_info->page_dma = 0;
2621                 }
2622                 if (buffer_info->page) {
2623                         put_page(buffer_info->page);
2624                         buffer_info->page = NULL;
2625                         buffer_info->page_offset = 0;
2626                 }
2627         }
2628
2629         size = sizeof(struct igb_buffer) * rx_ring->count;
2630         memset(rx_ring->buffer_info, 0, size);
2631
2632         /* Zero out the descriptor ring */
2633         memset(rx_ring->desc, 0, rx_ring->size);
2634
2635         rx_ring->next_to_clean = 0;
2636         rx_ring->next_to_use = 0;
2637
2638         writel(0, rx_ring->head);
2639         writel(0, rx_ring->tail);
2640 }
2641
2642 /**
2643  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2644  * @adapter: board private structure
2645  **/
2646 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2647 {
2648         int i;
2649
2650         for (i = 0; i < adapter->num_rx_queues; i++)
2651                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2652 }
2653
2654 /**
2655  * igb_set_mac - Change the Ethernet Address of the NIC
2656  * @netdev: network interface device structure
2657  * @p: pointer to an address structure
2658  *
2659  * Returns 0 on success, negative on failure
2660  **/
2661 static int igb_set_mac(struct net_device *netdev, void *p)
2662 {
2663         struct igb_adapter *adapter = netdev_priv(netdev);
2664         struct e1000_hw *hw = &adapter->hw;
2665         struct sockaddr *addr = p;
2666
2667         if (!is_valid_ether_addr(addr->sa_data))
2668                 return -EADDRNOTAVAIL;
2669
2670         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2671         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2672
2673         /* set the correct pool for the new PF MAC address in entry 0 */
2674         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2675                          adapter->vfs_allocated_count);
2676
2677         return 0;
2678 }
2679
2680 /**
2681  * igb_write_mc_addr_list - write multicast addresses to MTA
2682  * @netdev: network interface device structure
2683  *
2684  * Writes multicast address list to the MTA hash table.
2685  * Returns: -ENOMEM on failure
2686  *                0 on no addresses written
2687  *                X on writing X addresses to MTA
2688  **/
2689 static int igb_write_mc_addr_list(struct net_device *netdev)
2690 {
2691         struct igb_adapter *adapter = netdev_priv(netdev);
2692         struct e1000_hw *hw = &adapter->hw;
2693         struct dev_mc_list *mc_ptr = netdev->mc_list;
2694         u8  *mta_list;
2695         u32 vmolr = 0;
2696         int i;
2697
2698         if (!netdev->mc_count) {
2699                 /* nothing to program, so clear mc list */
2700                 igb_update_mc_addr_list(hw, NULL, 0);
2701                 igb_restore_vf_multicasts(adapter);
2702                 return 0;
2703         }
2704
2705         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2706         if (!mta_list)
2707                 return -ENOMEM;
2708
2709         /* set vmolr receive overflow multicast bit */
2710         vmolr |= E1000_VMOLR_ROMPE;
2711
2712         /* The shared function expects a packed array of only addresses. */
2713         mc_ptr = netdev->mc_list;
2714
2715         for (i = 0; i < netdev->mc_count; i++) {
2716                 if (!mc_ptr)
2717                         break;
2718                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2719                 mc_ptr = mc_ptr->next;
2720         }
2721         igb_update_mc_addr_list(hw, mta_list, i);
2722         kfree(mta_list);
2723
2724         return netdev->mc_count;
2725 }
2726
2727 /**
2728  * igb_write_uc_addr_list - write unicast addresses to RAR table
2729  * @netdev: network interface device structure
2730  *
2731  * Writes unicast address list to the RAR table.
2732  * Returns: -ENOMEM on failure/insufficient address space
2733  *                0 on no addresses written
2734  *                X on writing X addresses to the RAR table
2735  **/
2736 static int igb_write_uc_addr_list(struct net_device *netdev)
2737 {
2738         struct igb_adapter *adapter = netdev_priv(netdev);
2739         struct e1000_hw *hw = &adapter->hw;
2740         unsigned int vfn = adapter->vfs_allocated_count;
2741         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2742         int count = 0;
2743
2744         /* return ENOMEM indicating insufficient memory for addresses */
2745         if (netdev->uc.count > rar_entries)
2746                 return -ENOMEM;
2747
2748         if (netdev->uc.count && rar_entries) {
2749                 struct netdev_hw_addr *ha;
2750                 list_for_each_entry(ha, &netdev->uc.list, list) {
2751                         if (!rar_entries)
2752                                 break;
2753                         igb_rar_set_qsel(adapter, ha->addr,
2754                                          rar_entries--,
2755                                          vfn);
2756                         count++;
2757                 }
2758         }
2759         /* write the addresses in reverse order to avoid write combining */
2760         for (; rar_entries > 0 ; rar_entries--) {
2761                 wr32(E1000_RAH(rar_entries), 0);
2762                 wr32(E1000_RAL(rar_entries), 0);
2763         }
2764         wrfl();
2765
2766         return count;
2767 }
2768
2769 /**
2770  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2771  * @netdev: network interface device structure
2772  *
2773  * The set_rx_mode entry point is called whenever the unicast or multicast
2774  * address lists or the network interface flags are updated.  This routine is
2775  * responsible for configuring the hardware for proper unicast, multicast,
2776  * promiscuous mode, and all-multi behavior.
2777  **/
2778 static void igb_set_rx_mode(struct net_device *netdev)
2779 {
2780         struct igb_adapter *adapter = netdev_priv(netdev);
2781         struct e1000_hw *hw = &adapter->hw;
2782         unsigned int vfn = adapter->vfs_allocated_count;
2783         u32 rctl, vmolr = 0;
2784         int count;
2785
2786         /* Check for Promiscuous and All Multicast modes */
2787         rctl = rd32(E1000_RCTL);
2788
2789         /* clear the effected bits */
2790         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2791
2792         if (netdev->flags & IFF_PROMISC) {
2793                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2794                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2795         } else {
2796                 if (netdev->flags & IFF_ALLMULTI) {
2797                         rctl |= E1000_RCTL_MPE;
2798                         vmolr |= E1000_VMOLR_MPME;
2799                 } else {
2800                         /*
2801                          * Write addresses to the MTA, if the attempt fails
2802                          * then we should just turn on promiscous mode so
2803                          * that we can at least receive multicast traffic
2804                          */
2805                         count = igb_write_mc_addr_list(netdev);
2806                         if (count < 0) {
2807                                 rctl |= E1000_RCTL_MPE;
2808                                 vmolr |= E1000_VMOLR_MPME;
2809                         } else if (count) {
2810                                 vmolr |= E1000_VMOLR_ROMPE;
2811                         }
2812                 }
2813                 /*
2814                  * Write addresses to available RAR registers, if there is not
2815                  * sufficient space to store all the addresses then enable
2816                  * unicast promiscous mode
2817                  */
2818                 count = igb_write_uc_addr_list(netdev);
2819                 if (count < 0) {
2820                         rctl |= E1000_RCTL_UPE;
2821                         vmolr |= E1000_VMOLR_ROPE;
2822                 }
2823                 rctl |= E1000_RCTL_VFE;
2824         }
2825         wr32(E1000_RCTL, rctl);
2826
2827         /*
2828          * In order to support SR-IOV and eventually VMDq it is necessary to set
2829          * the VMOLR to enable the appropriate modes.  Without this workaround
2830          * we will have issues with VLAN tag stripping not being done for frames
2831          * that are only arriving because we are the default pool
2832          */
2833         if (hw->mac.type < e1000_82576)
2834                 return;
2835
2836         vmolr |= rd32(E1000_VMOLR(vfn)) &
2837                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2838         wr32(E1000_VMOLR(vfn), vmolr);
2839         igb_restore_vf_multicasts(adapter);
2840 }
2841
2842 /* Need to wait a few seconds after link up to get diagnostic information from
2843  * the phy */
2844 static void igb_update_phy_info(unsigned long data)
2845 {
2846         struct igb_adapter *adapter = (struct igb_adapter *) data;
2847         igb_get_phy_info(&adapter->hw);
2848 }
2849
2850 /**
2851  * igb_has_link - check shared code for link and determine up/down
2852  * @adapter: pointer to driver private info
2853  **/
2854 static bool igb_has_link(struct igb_adapter *adapter)
2855 {
2856         struct e1000_hw *hw = &adapter->hw;
2857         bool link_active = false;
2858         s32 ret_val = 0;
2859
2860         /* get_link_status is set on LSC (link status) interrupt or
2861          * rx sequence error interrupt.  get_link_status will stay
2862          * false until the e1000_check_for_link establishes link
2863          * for copper adapters ONLY
2864          */
2865         switch (hw->phy.media_type) {
2866         case e1000_media_type_copper:
2867                 if (hw->mac.get_link_status) {
2868                         ret_val = hw->mac.ops.check_for_link(hw);
2869                         link_active = !hw->mac.get_link_status;
2870                 } else {
2871                         link_active = true;
2872                 }
2873                 break;
2874         case e1000_media_type_internal_serdes:
2875                 ret_val = hw->mac.ops.check_for_link(hw);
2876                 link_active = hw->mac.serdes_has_link;
2877                 break;
2878         default:
2879         case e1000_media_type_unknown:
2880                 break;
2881         }
2882
2883         return link_active;
2884 }
2885
2886 /**
2887  * igb_watchdog - Timer Call-back
2888  * @data: pointer to adapter cast into an unsigned long
2889  **/
2890 static void igb_watchdog(unsigned long data)
2891 {
2892         struct igb_adapter *adapter = (struct igb_adapter *)data;
2893         /* Do the rest outside of interrupt context */
2894         schedule_work(&adapter->watchdog_task);
2895 }
2896
2897 static void igb_watchdog_task(struct work_struct *work)
2898 {
2899         struct igb_adapter *adapter = container_of(work,
2900                                         struct igb_adapter, watchdog_task);
2901         struct e1000_hw *hw = &adapter->hw;
2902         struct net_device *netdev = adapter->netdev;
2903         struct igb_ring *tx_ring = adapter->tx_ring;
2904         u32 link;
2905         int i;
2906
2907         link = igb_has_link(adapter);
2908         if ((netif_carrier_ok(netdev)) && link)
2909                 goto link_up;
2910
2911         if (link) {
2912                 if (!netif_carrier_ok(netdev)) {
2913                         u32 ctrl;
2914                         hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2915                                                    &adapter->link_speed,
2916                                                    &adapter->link_duplex);
2917
2918                         ctrl = rd32(E1000_CTRL);
2919                         /* Links status message must follow this format */
2920                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2921                                  "Flow Control: %s\n",
2922                                  netdev->name,
2923                                  adapter->link_speed,
2924                                  adapter->link_duplex == FULL_DUPLEX ?
2925                                  "Full Duplex" : "Half Duplex",
2926                                  ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2927                                  E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2928                                  E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2929                                  E1000_CTRL_TFCE) ? "TX" : "None")));
2930
2931                         /* tweak tx_queue_len according to speed/duplex and
2932                          * adjust the timeout factor */
2933                         netdev->tx_queue_len = adapter->tx_queue_len;
2934                         adapter->tx_timeout_factor = 1;
2935                         switch (adapter->link_speed) {
2936                         case SPEED_10:
2937                                 netdev->tx_queue_len = 10;
2938                                 adapter->tx_timeout_factor = 14;
2939                                 break;
2940                         case SPEED_100:
2941                                 netdev->tx_queue_len = 100;
2942                                 /* maybe add some timeout factor ? */
2943                                 break;
2944                         }
2945
2946                         netif_carrier_on(netdev);
2947
2948                         igb_ping_all_vfs(adapter);
2949
2950                         /* link state has changed, schedule phy info update */
2951                         if (!test_bit(__IGB_DOWN, &adapter->state))
2952                                 mod_timer(&adapter->phy_info_timer,
2953                                           round_jiffies(jiffies + 2 * HZ));
2954                 }
2955         } else {
2956                 if (netif_carrier_ok(netdev)) {
2957                         adapter->link_speed = 0;
2958                         adapter->link_duplex = 0;
2959                         /* Links status message must follow this format */
2960                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2961                                netdev->name);
2962                         netif_carrier_off(netdev);
2963
2964                         igb_ping_all_vfs(adapter);
2965
2966                         /* link state has changed, schedule phy info update */
2967                         if (!test_bit(__IGB_DOWN, &adapter->state))
2968                                 mod_timer(&adapter->phy_info_timer,
2969                                           round_jiffies(jiffies + 2 * HZ));
2970                 }
2971         }
2972
2973 link_up:
2974         igb_update_stats(adapter);
2975
2976         hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
2977         adapter->tpt_old = adapter->stats.tpt;
2978         hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
2979         adapter->colc_old = adapter->stats.colc;
2980
2981         adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
2982         adapter->gorc_old = adapter->stats.gorc;
2983         adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
2984         adapter->gotc_old = adapter->stats.gotc;
2985
2986         igb_update_adaptive(&adapter->hw);
2987
2988         if (!netif_carrier_ok(netdev)) {
2989                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
2990                         /* We've lost link, so the controller stops DMA,
2991                          * but we've got queued Tx work that's never going
2992                          * to get done, so reset controller to flush Tx.
2993                          * (Do the reset outside of interrupt context). */
2994                         adapter->tx_timeout_count++;
2995                         schedule_work(&adapter->reset_task);
2996                         /* return immediately since reset is imminent */
2997                         return;
2998                 }
2999         }
3000
3001         /* Cause software interrupt to ensure rx ring is cleaned */
3002         if (adapter->msix_entries) {
3003                 u32 eics = 0;
3004                 for (i = 0; i < adapter->num_q_vectors; i++) {
3005                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3006                         eics |= q_vector->eims_value;
3007                 }
3008                 wr32(E1000_EICS, eics);
3009         } else {
3010                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3011         }
3012
3013         /* Force detection of hung controller every watchdog period */
3014         tx_ring->detect_tx_hung = true;
3015
3016         /* Reset the timer */
3017         if (!test_bit(__IGB_DOWN, &adapter->state))
3018                 mod_timer(&adapter->watchdog_timer,
3019                           round_jiffies(jiffies + 2 * HZ));
3020 }
3021
3022 enum latency_range {
3023         lowest_latency = 0,
3024         low_latency = 1,
3025         bulk_latency = 2,
3026         latency_invalid = 255
3027 };
3028
3029 /**
3030  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3031  *
3032  *      Stores a new ITR value based on strictly on packet size.  This
3033  *      algorithm is less sophisticated than that used in igb_update_itr,
3034  *      due to the difficulty of synchronizing statistics across multiple
3035  *      receive rings.  The divisors and thresholds used by this fuction
3036  *      were determined based on theoretical maximum wire speed and testing
3037  *      data, in order to minimize response time while increasing bulk
3038  *      throughput.
3039  *      This functionality is controlled by the InterruptThrottleRate module
3040  *      parameter (see igb_param.c)
3041  *      NOTE:  This function is called only when operating in a multiqueue
3042  *             receive environment.
3043  * @q_vector: pointer to q_vector
3044  **/
3045 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3046 {
3047         int new_val = q_vector->itr_val;
3048         int avg_wire_size = 0;
3049         struct igb_adapter *adapter = q_vector->adapter;
3050
3051         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3052          * ints/sec - ITR timer value of 120 ticks.
3053          */
3054         if (adapter->link_speed != SPEED_1000) {
3055                 new_val = 976;
3056                 goto set_itr_val;
3057         }
3058
3059         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3060                 struct igb_ring *ring = q_vector->rx_ring;
3061                 avg_wire_size = ring->total_bytes / ring->total_packets;
3062         }
3063
3064         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3065                 struct igb_ring *ring = q_vector->tx_ring;
3066                 avg_wire_size = max_t(u32, avg_wire_size,
3067                                       (ring->total_bytes /
3068                                        ring->total_packets));
3069         }
3070
3071         /* if avg_wire_size isn't set no work was done */
3072         if (!avg_wire_size)
3073                 goto clear_counts;
3074
3075         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3076         avg_wire_size += 24;
3077
3078         /* Don't starve jumbo frames */
3079         avg_wire_size = min(avg_wire_size, 3000);
3080
3081         /* Give a little boost to mid-size frames */
3082         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3083                 new_val = avg_wire_size / 3;
3084         else
3085                 new_val = avg_wire_size / 2;
3086
3087 set_itr_val:
3088         if (new_val != q_vector->itr_val) {
3089                 q_vector->itr_val = new_val;
3090                 q_vector->set_itr = 1;
3091         }
3092 clear_counts:
3093         if (q_vector->rx_ring) {
3094                 q_vector->rx_ring->total_bytes = 0;
3095                 q_vector->rx_ring->total_packets = 0;
3096         }
3097         if (q_vector->tx_ring) {
3098                 q_vector->tx_ring->total_bytes = 0;
3099                 q_vector->tx_ring->total_packets = 0;
3100         }
3101 }
3102
3103 /**
3104  * igb_update_itr - update the dynamic ITR value based on statistics
3105  *      Stores a new ITR value based on packets and byte
3106  *      counts during the last interrupt.  The advantage of per interrupt
3107  *      computation is faster updates and more accurate ITR for the current
3108  *      traffic pattern.  Constants in this function were computed
3109  *      based on theoretical maximum wire speed and thresholds were set based
3110  *      on testing data as well as attempting to minimize response time
3111  *      while increasing bulk throughput.
3112  *      this functionality is controlled by the InterruptThrottleRate module
3113  *      parameter (see igb_param.c)
3114  *      NOTE:  These calculations are only valid when operating in a single-
3115  *             queue environment.
3116  * @adapter: pointer to adapter
3117  * @itr_setting: current q_vector->itr_val
3118  * @packets: the number of packets during this measurement interval
3119  * @bytes: the number of bytes during this measurement interval
3120  **/
3121 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3122                                    int packets, int bytes)
3123 {
3124         unsigned int retval = itr_setting;
3125
3126         if (packets == 0)
3127                 goto update_itr_done;
3128
3129         switch (itr_setting) {
3130         case lowest_latency:
3131                 /* handle TSO and jumbo frames */
3132                 if (bytes/packets > 8000)
3133                         retval = bulk_latency;
3134                 else if ((packets < 5) && (bytes > 512))
3135                         retval = low_latency;
3136                 break;
3137         case low_latency:  /* 50 usec aka 20000 ints/s */
3138                 if (bytes > 10000) {
3139                         /* this if handles the TSO accounting */
3140                         if (bytes/packets > 8000) {
3141                                 retval = bulk_latency;
3142                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3143                                 retval = bulk_latency;
3144                         } else if ((packets > 35)) {
3145                                 retval = lowest_latency;
3146                         }
3147                 } else if (bytes/packets > 2000) {
3148                         retval = bulk_latency;
3149                 } else if (packets <= 2 && bytes < 512) {
3150                         retval = lowest_latency;
3151                 }
3152                 break;
3153         case bulk_latency: /* 250 usec aka 4000 ints/s */
3154                 if (bytes > 25000) {
3155                         if (packets > 35)
3156                                 retval = low_latency;
3157                 } else if (bytes < 1500) {
3158                         retval = low_latency;
3159                 }
3160                 break;
3161         }
3162
3163 update_itr_done:
3164         return retval;
3165 }
3166
3167 static void igb_set_itr(struct igb_adapter *adapter)
3168 {
3169         struct igb_q_vector *q_vector = adapter->q_vector[0];
3170         u16 current_itr;
3171         u32 new_itr = q_vector->itr_val;
3172
3173         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3174         if (adapter->link_speed != SPEED_1000) {
3175                 current_itr = 0;
3176                 new_itr = 4000;
3177                 goto set_itr_now;
3178         }
3179
3180         adapter->rx_itr = igb_update_itr(adapter,
3181                                     adapter->rx_itr,
3182                                     adapter->rx_ring->total_packets,
3183                                     adapter->rx_ring->total_bytes);
3184
3185         adapter->tx_itr = igb_update_itr(adapter,
3186                                     adapter->tx_itr,
3187                                     adapter->tx_ring->total_packets,
3188                                     adapter->tx_ring->total_bytes);
3189         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3190
3191         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3192         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3193                 current_itr = low_latency;
3194
3195         switch (current_itr) {
3196         /* counts and packets in update_itr are dependent on these numbers */
3197         case lowest_latency:
3198                 new_itr = 56;  /* aka 70,000 ints/sec */
3199                 break;
3200         case low_latency:
3201                 new_itr = 196; /* aka 20,000 ints/sec */
3202                 break;
3203         case bulk_latency:
3204                 new_itr = 980; /* aka 4,000 ints/sec */
3205                 break;
3206         default:
3207                 break;
3208         }
3209
3210 set_itr_now:
3211         adapter->rx_ring->total_bytes = 0;
3212         adapter->rx_ring->total_packets = 0;
3213         adapter->tx_ring->total_bytes = 0;
3214         adapter->tx_ring->total_packets = 0;
3215
3216         if (new_itr != q_vector->itr_val) {
3217                 /* this attempts to bias the interrupt rate towards Bulk
3218                  * by adding intermediate steps when interrupt rate is
3219                  * increasing */
3220                 new_itr = new_itr > q_vector->itr_val ?
3221                              max((new_itr * q_vector->itr_val) /
3222                                  (new_itr + (q_vector->itr_val >> 2)),
3223                                  new_itr) :
3224                              new_itr;
3225                 /* Don't write the value here; it resets the adapter's
3226                  * internal timer, and causes us to delay far longer than
3227                  * we should between interrupts.  Instead, we write the ITR
3228                  * value at the beginning of the next interrupt so the timing
3229                  * ends up being correct.
3230                  */
3231                 q_vector->itr_val = new_itr;
3232                 q_vector->set_itr = 1;
3233         }
3234
3235         return;
3236 }
3237
3238 #define IGB_TX_FLAGS_CSUM               0x00000001
3239 #define IGB_TX_FLAGS_VLAN               0x00000002
3240 #define IGB_TX_FLAGS_TSO                0x00000004
3241 #define IGB_TX_FLAGS_IPV4               0x00000008
3242 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3243 #define IGB_TX_FLAGS_VLAN_MASK  0xffff0000
3244 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3245
3246 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3247                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3248 {
3249         struct e1000_adv_tx_context_desc *context_desc;
3250         unsigned int i;
3251         int err;
3252         struct igb_buffer *buffer_info;
3253         u32 info = 0, tu_cmd = 0;
3254         u32 mss_l4len_idx, l4len;
3255         *hdr_len = 0;
3256
3257         if (skb_header_cloned(skb)) {
3258                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3259                 if (err)
3260                         return err;
3261         }
3262
3263         l4len = tcp_hdrlen(skb);
3264         *hdr_len += l4len;
3265
3266         if (skb->protocol == htons(ETH_P_IP)) {
3267                 struct iphdr *iph = ip_hdr(skb);
3268                 iph->tot_len = 0;
3269                 iph->check = 0;
3270                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3271                                                          iph->daddr, 0,
3272                                                          IPPROTO_TCP,
3273                                                          0);
3274         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3275                 ipv6_hdr(skb)->payload_len = 0;
3276                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3277                                                        &ipv6_hdr(skb)->daddr,
3278                                                        0, IPPROTO_TCP, 0);
3279         }
3280
3281         i = tx_ring->next_to_use;
3282
3283         buffer_info = &tx_ring->buffer_info[i];
3284         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3285         /* VLAN MACLEN IPLEN */
3286         if (tx_flags & IGB_TX_FLAGS_VLAN)
3287                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3288         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3289         *hdr_len += skb_network_offset(skb);
3290         info |= skb_network_header_len(skb);
3291         *hdr_len += skb_network_header_len(skb);
3292         context_desc->vlan_macip_lens = cpu_to_le32(info);
3293
3294         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3295         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3296
3297         if (skb->protocol == htons(ETH_P_IP))
3298                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3299         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3300
3301         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3302
3303         /* MSS L4LEN IDX */
3304         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3305         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3306
3307         /* For 82575, context index must be unique per ring. */
3308         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3309                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3310
3311         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3312         context_desc->seqnum_seed = 0;
3313
3314         buffer_info->time_stamp = jiffies;
3315         buffer_info->next_to_watch = i;
3316         buffer_info->dma = 0;
3317         i++;
3318         if (i == tx_ring->count)
3319                 i = 0;
3320
3321         tx_ring->next_to_use = i;
3322
3323         return true;
3324 }
3325
3326 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3327                                    struct sk_buff *skb, u32 tx_flags)
3328 {
3329         struct e1000_adv_tx_context_desc *context_desc;
3330         struct pci_dev *pdev = tx_ring->pdev;
3331         struct igb_buffer *buffer_info;
3332         u32 info = 0, tu_cmd = 0;
3333         unsigned int i;
3334
3335         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3336             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3337                 i = tx_ring->next_to_use;
3338                 buffer_info = &tx_ring->buffer_info[i];
3339                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3340
3341                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3342                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3343                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3344                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3345                         info |= skb_network_header_len(skb);
3346
3347                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3348
3349                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3350
3351                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3352                         __be16 protocol;
3353
3354                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3355                                 const struct vlan_ethhdr *vhdr =
3356                                           (const struct vlan_ethhdr*)skb->data;
3357
3358                                 protocol = vhdr->h_vlan_encapsulated_proto;
3359                         } else {
3360                                 protocol = skb->protocol;
3361                         }
3362
3363                         switch (protocol) {
3364                         case cpu_to_be16(ETH_P_IP):
3365                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3366                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3367                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3368                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3369                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3370                                 break;
3371                         case cpu_to_be16(ETH_P_IPV6):
3372                                 /* XXX what about other V6 headers?? */
3373                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3374                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3375                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3376                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3377                                 break;
3378                         default:
3379                                 if (unlikely(net_ratelimit()))
3380                                         dev_warn(&pdev->dev,
3381                                             "partial checksum but proto=%x!\n",
3382                                             skb->protocol);
3383                                 break;
3384                         }
3385                 }
3386
3387                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3388                 context_desc->seqnum_seed = 0;
3389                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3390                         context_desc->mss_l4len_idx =
3391                                 cpu_to_le32(tx_ring->reg_idx << 4);
3392
3393                 buffer_info->time_stamp = jiffies;
3394                 buffer_info->next_to_watch = i;
3395                 buffer_info->dma = 0;
3396
3397                 i++;
3398                 if (i == tx_ring->count)
3399                         i = 0;
3400                 tx_ring->next_to_use = i;
3401
3402                 return true;
3403         }
3404         return false;
3405 }
3406
3407 #define IGB_MAX_TXD_PWR 16
3408 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3409
3410 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3411                                  unsigned int first)
3412 {
3413         struct igb_buffer *buffer_info;
3414         struct pci_dev *pdev = tx_ring->pdev;
3415         unsigned int len = skb_headlen(skb);
3416         unsigned int count = 0, i;
3417         unsigned int f;
3418         dma_addr_t *map;
3419
3420         i = tx_ring->next_to_use;
3421
3422         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3423                 dev_err(&pdev->dev, "TX DMA map failed\n");
3424                 return 0;
3425         }
3426
3427         map = skb_shinfo(skb)->dma_maps;
3428
3429         buffer_info = &tx_ring->buffer_info[i];
3430         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3431         buffer_info->length = len;
3432         /* set time_stamp *before* dma to help avoid a possible race */
3433         buffer_info->time_stamp = jiffies;
3434         buffer_info->next_to_watch = i;
3435         buffer_info->dma = skb_shinfo(skb)->dma_head;
3436
3437         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3438                 struct skb_frag_struct *frag;
3439
3440                 i++;
3441                 if (i == tx_ring->count)
3442                         i = 0;
3443
3444                 frag = &skb_shinfo(skb)->frags[f];
3445                 len = frag->size;
3446
3447                 buffer_info = &tx_ring->buffer_info[i];
3448                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3449                 buffer_info->length = len;
3450                 buffer_info->time_stamp = jiffies;
3451                 buffer_info->next_to_watch = i;
3452                 buffer_info->dma = map[count];
3453                 count++;
3454         }
3455
3456         tx_ring->buffer_info[i].skb = skb;
3457         tx_ring->buffer_info[first].next_to_watch = i;
3458
3459         return count + 1;
3460 }
3461
3462 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3463                                     int tx_flags, int count, u32 paylen,
3464                                     u8 hdr_len)
3465 {
3466         union e1000_adv_tx_desc *tx_desc = NULL;
3467         struct igb_buffer *buffer_info;
3468         u32 olinfo_status = 0, cmd_type_len;
3469         unsigned int i;
3470
3471         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3472                         E1000_ADVTXD_DCMD_DEXT);
3473
3474         if (tx_flags & IGB_TX_FLAGS_VLAN)
3475                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3476
3477         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3478                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3479
3480         if (tx_flags & IGB_TX_FLAGS_TSO) {
3481                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3482
3483                 /* insert tcp checksum */
3484                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3485
3486                 /* insert ip checksum */
3487                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3488                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3489
3490         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3491                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3492         }
3493
3494         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3495             (tx_flags & (IGB_TX_FLAGS_CSUM |
3496                          IGB_TX_FLAGS_TSO |
3497                          IGB_TX_FLAGS_VLAN)))
3498                 olinfo_status |= tx_ring->reg_idx << 4;
3499
3500         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3501
3502         i = tx_ring->next_to_use;
3503         while (count--) {
3504                 buffer_info = &tx_ring->buffer_info[i];
3505                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3506                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3507                 tx_desc->read.cmd_type_len =
3508                         cpu_to_le32(cmd_type_len | buffer_info->length);
3509                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3510                 i++;
3511                 if (i == tx_ring->count)
3512                         i = 0;
3513         }
3514
3515         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3516         /* Force memory writes to complete before letting h/w
3517          * know there are new descriptors to fetch.  (Only
3518          * applicable for weak-ordered memory model archs,
3519          * such as IA-64). */
3520         wmb();
3521
3522         tx_ring->next_to_use = i;
3523         writel(i, tx_ring->tail);
3524         /* we need this if more than one processor can write to our tail
3525          * at a time, it syncronizes IO on IA64/Altix systems */
3526         mmiowb();
3527 }
3528
3529 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3530 {
3531         struct net_device *netdev = tx_ring->netdev;
3532
3533         netif_stop_subqueue(netdev, tx_ring->queue_index);
3534
3535         /* Herbert's original patch had:
3536          *  smp_mb__after_netif_stop_queue();
3537          * but since that doesn't exist yet, just open code it. */
3538         smp_mb();
3539
3540         /* We need to check again in a case another CPU has just
3541          * made room available. */
3542         if (igb_desc_unused(tx_ring) < size)
3543                 return -EBUSY;
3544
3545         /* A reprieve! */
3546         netif_wake_subqueue(netdev, tx_ring->queue_index);
3547         tx_ring->tx_stats.restart_queue++;
3548         return 0;
3549 }
3550
3551 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3552 {
3553         if (igb_desc_unused(tx_ring) >= size)
3554                 return 0;
3555         return __igb_maybe_stop_tx(tx_ring, size);
3556 }
3557
3558 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3559                                     struct igb_ring *tx_ring)
3560 {
3561         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3562         unsigned int first;
3563         unsigned int tx_flags = 0;
3564         u8 hdr_len = 0;
3565         int count = 0;
3566         int tso = 0;
3567         union skb_shared_tx *shtx = skb_tx(skb);
3568
3569         /* need: 1 descriptor per page,
3570          *       + 2 desc gap to keep tail from touching head,
3571          *       + 1 desc for skb->data,
3572          *       + 1 desc for context descriptor,
3573          * otherwise try next time */
3574         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3575                 /* this is a hard error */
3576                 return NETDEV_TX_BUSY;
3577         }
3578
3579         if (unlikely(shtx->hardware)) {
3580                 shtx->in_progress = 1;
3581                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3582         }
3583
3584         if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3585                 tx_flags |= IGB_TX_FLAGS_VLAN;
3586                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3587         }
3588
3589         if (skb->protocol == htons(ETH_P_IP))
3590                 tx_flags |= IGB_TX_FLAGS_IPV4;
3591
3592         first = tx_ring->next_to_use;
3593         if (skb_is_gso(skb)) {
3594                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3595                 if (tso < 0) {
3596                         dev_kfree_skb_any(skb);
3597                         return NETDEV_TX_OK;
3598                 }
3599         }
3600
3601         if (tso)
3602                 tx_flags |= IGB_TX_FLAGS_TSO;
3603         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3604                  (skb->ip_summed == CHECKSUM_PARTIAL))
3605                 tx_flags |= IGB_TX_FLAGS_CSUM;
3606
3607         /*
3608          * count reflects descriptors mapped, if 0 then mapping error
3609          * has occured and we need to rewind the descriptor queue
3610          */
3611         count = igb_tx_map_adv(tx_ring, skb, first);
3612
3613         if (!count) {
3614                 dev_kfree_skb_any(skb);
3615                 tx_ring->buffer_info[first].time_stamp = 0;
3616                 tx_ring->next_to_use = first;
3617                 return NETDEV_TX_OK;
3618         }
3619
3620         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3621
3622         /* Make sure there is space in the ring for the next send. */
3623         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3624
3625         return NETDEV_TX_OK;
3626 }
3627
3628 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3629                                       struct net_device *netdev)
3630 {
3631         struct igb_adapter *adapter = netdev_priv(netdev);
3632         struct igb_ring *tx_ring;
3633         int r_idx = 0;
3634
3635         if (test_bit(__IGB_DOWN, &adapter->state)) {
3636                 dev_kfree_skb_any(skb);
3637                 return NETDEV_TX_OK;
3638         }
3639
3640         if (skb->len <= 0) {
3641                 dev_kfree_skb_any(skb);
3642                 return NETDEV_TX_OK;
3643         }
3644
3645         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3646         tx_ring = adapter->multi_tx_table[r_idx];
3647
3648         /* This goes back to the question of how to logically map a tx queue
3649          * to a flow.  Right now, performance is impacted slightly negatively
3650          * if using multiple tx queues.  If the stack breaks away from a
3651          * single qdisc implementation, we can look at this again. */
3652         return igb_xmit_frame_ring_adv(skb, tx_ring);
3653 }
3654
3655 /**
3656  * igb_tx_timeout - Respond to a Tx Hang
3657  * @netdev: network interface device structure
3658  **/
3659 static void igb_tx_timeout(struct net_device *netdev)
3660 {
3661         struct igb_adapter *adapter = netdev_priv(netdev);
3662         struct e1000_hw *hw = &adapter->hw;
3663
3664         /* Do the reset outside of interrupt context */
3665         adapter->tx_timeout_count++;
3666         schedule_work(&adapter->reset_task);
3667         wr32(E1000_EICS,
3668              (adapter->eims_enable_mask & ~adapter->eims_other));
3669 }
3670
3671 static void igb_reset_task(struct work_struct *work)
3672 {
3673         struct igb_adapter *adapter;
3674         adapter = container_of(work, struct igb_adapter, reset_task);
3675
3676         igb_reinit_locked(adapter);
3677 }
3678
3679 /**
3680  * igb_get_stats - Get System Network Statistics
3681  * @netdev: network interface device structure
3682  *
3683  * Returns the address of the device statistics structure.
3684  * The statistics are actually updated from the timer callback.
3685  **/
3686 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3687 {
3688         /* only return the current stats */
3689         return &netdev->stats;
3690 }
3691
3692 /**
3693  * igb_change_mtu - Change the Maximum Transfer Unit
3694  * @netdev: network interface device structure
3695  * @new_mtu: new value for maximum frame size
3696  *
3697  * Returns 0 on success, negative on failure
3698  **/
3699 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3700 {
3701         struct igb_adapter *adapter = netdev_priv(netdev);
3702         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3703         u32 rx_buffer_len, i;
3704
3705         if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3706             (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3707                 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3708                 return -EINVAL;
3709         }
3710
3711         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3712                 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3713                 return -EINVAL;
3714         }
3715
3716         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3717                 msleep(1);
3718
3719         /* igb_down has a dependency on max_frame_size */
3720         adapter->max_frame_size = max_frame;
3721         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3722          * means we reserve 2 more, this pushes us to allocate from the next
3723          * larger slab size.
3724          * i.e. RXBUFFER_2048 --> size-4096 slab
3725          */
3726
3727         if (max_frame <= IGB_RXBUFFER_1024)
3728                 rx_buffer_len = IGB_RXBUFFER_1024;
3729         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3730                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3731         else
3732                 rx_buffer_len = IGB_RXBUFFER_128;
3733
3734         if (netif_running(netdev))
3735                 igb_down(adapter);
3736
3737         dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3738                  netdev->mtu, new_mtu);
3739         netdev->mtu = new_mtu;
3740
3741         for (i = 0; i < adapter->num_rx_queues; i++)
3742                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3743
3744         if (netif_running(netdev))
3745                 igb_up(adapter);
3746         else
3747                 igb_reset(adapter);
3748
3749         clear_bit(__IGB_RESETTING, &adapter->state);
3750
3751         return 0;
3752 }
3753
3754 /**
3755  * igb_update_stats - Update the board statistics counters
3756  * @adapter: board private structure
3757  **/
3758
3759 void igb_update_stats(struct igb_adapter *adapter)
3760 {
3761         struct net_device *netdev = adapter->netdev;
3762         struct e1000_hw *hw = &adapter->hw;
3763         struct pci_dev *pdev = adapter->pdev;
3764         u16 phy_tmp;
3765
3766 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3767
3768         /*
3769          * Prevent stats update while adapter is being reset, or if the pci
3770          * connection is down.
3771          */
3772         if (adapter->link_speed == 0)
3773                 return;
3774         if (pci_channel_offline(pdev))
3775                 return;
3776
3777         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3778         adapter->stats.gprc += rd32(E1000_GPRC);
3779         adapter->stats.gorc += rd32(E1000_GORCL);
3780         rd32(E1000_GORCH); /* clear GORCL */
3781         adapter->stats.bprc += rd32(E1000_BPRC);
3782         adapter->stats.mprc += rd32(E1000_MPRC);
3783         adapter->stats.roc += rd32(E1000_ROC);
3784
3785         adapter->stats.prc64 += rd32(E1000_PRC64);
3786         adapter->stats.prc127 += rd32(E1000_PRC127);
3787         adapter->stats.prc255 += rd32(E1000_PRC255);
3788         adapter->stats.prc511 += rd32(E1000_PRC511);
3789         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3790         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3791         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3792         adapter->stats.sec += rd32(E1000_SEC);
3793
3794         adapter->stats.mpc += rd32(E1000_MPC);
3795         adapter->stats.scc += rd32(E1000_SCC);
3796         adapter->stats.ecol += rd32(E1000_ECOL);
3797         adapter->stats.mcc += rd32(E1000_MCC);
3798         adapter->stats.latecol += rd32(E1000_LATECOL);
3799         adapter->stats.dc += rd32(E1000_DC);
3800         adapter->stats.rlec += rd32(E1000_RLEC);
3801         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3802         adapter->stats.xontxc += rd32(E1000_XONTXC);
3803         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3804         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3805         adapter->stats.fcruc += rd32(E1000_FCRUC);
3806         adapter->stats.gptc += rd32(E1000_GPTC);
3807         adapter->stats.gotc += rd32(E1000_GOTCL);
3808         rd32(E1000_GOTCH); /* clear GOTCL */
3809         adapter->stats.rnbc += rd32(E1000_RNBC);
3810         adapter->stats.ruc += rd32(E1000_RUC);
3811         adapter->stats.rfc += rd32(E1000_RFC);
3812         adapter->stats.rjc += rd32(E1000_RJC);
3813         adapter->stats.tor += rd32(E1000_TORH);
3814         adapter->stats.tot += rd32(E1000_TOTH);
3815         adapter->stats.tpr += rd32(E1000_TPR);
3816
3817         adapter->stats.ptc64 += rd32(E1000_PTC64);
3818         adapter->stats.ptc127 += rd32(E1000_PTC127);
3819         adapter->stats.ptc255 += rd32(E1000_PTC255);
3820         adapter->stats.ptc511 += rd32(E1000_PTC511);
3821         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3822         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3823
3824         adapter->stats.mptc += rd32(E1000_MPTC);
3825         adapter->stats.bptc += rd32(E1000_BPTC);
3826
3827         /* used for adaptive IFS */
3828
3829         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3830         adapter->stats.tpt += hw->mac.tx_packet_delta;
3831         hw->mac.collision_delta = rd32(E1000_COLC);
3832         adapter->stats.colc += hw->mac.collision_delta;
3833
3834         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3835         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3836         adapter->stats.tncrs += rd32(E1000_TNCRS);
3837         adapter->stats.tsctc += rd32(E1000_TSCTC);
3838         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3839
3840         adapter->stats.iac += rd32(E1000_IAC);
3841         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3842         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3843         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3844         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3845         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3846         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3847         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3848         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3849
3850         /* Fill out the OS statistics structure */
3851         netdev->stats.multicast = adapter->stats.mprc;
3852         netdev->stats.collisions = adapter->stats.colc;
3853
3854         /* Rx Errors */
3855
3856         if (hw->mac.type != e1000_82575) {
3857                 u32 rqdpc_tmp;
3858                 u64 rqdpc_total = 0;
3859                 int i;
3860                 /* Read out drops stats per RX queue.  Notice RQDPC (Receive
3861                  * Queue Drop Packet Count) stats only gets incremented, if
3862                  * the DROP_EN but it set (in the SRRCTL register for that
3863                  * queue).  If DROP_EN bit is NOT set, then the some what
3864                  * equivalent count is stored in RNBC (not per queue basis).
3865                  * Also note the drop count is due to lack of available
3866                  * descriptors.
3867                  */
3868                 for (i = 0; i < adapter->num_rx_queues; i++) {
3869                         rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3870                         adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3871                         rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3872                 }
3873                 netdev->stats.rx_fifo_errors = rqdpc_total;
3874         }
3875
3876         /* Note RNBC (Receive No Buffers Count) is an not an exact
3877          * drop count as the hardware FIFO might save the day.  Thats
3878          * one of the reason for saving it in rx_fifo_errors, as its
3879          * potentially not a true drop.
3880          */
3881         netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3882
3883         /* RLEC on some newer hardware can be incorrect so build
3884          * our own version based on RUC and ROC */
3885         netdev->stats.rx_errors = adapter->stats.rxerrc +
3886                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3887                 adapter->stats.ruc + adapter->stats.roc +
3888                 adapter->stats.cexterr;
3889         netdev->stats.rx_length_errors = adapter->stats.ruc +
3890                                               adapter->stats.roc;
3891         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3892         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3893         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3894
3895         /* Tx Errors */
3896         netdev->stats.tx_errors = adapter->stats.ecol +
3897                                        adapter->stats.latecol;
3898         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3899         netdev->stats.tx_window_errors = adapter->stats.latecol;
3900         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3901
3902         /* Tx Dropped needs to be maintained elsewhere */
3903
3904         /* Phy Stats */
3905         if (hw->phy.media_type == e1000_media_type_copper) {
3906                 if ((adapter->link_speed == SPEED_1000) &&
3907                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3908                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3909                         adapter->phy_stats.idle_errors += phy_tmp;
3910                 }
3911         }
3912
3913         /* Management Stats */
3914         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3915         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3916         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3917 }
3918
3919 static irqreturn_t igb_msix_other(int irq, void *data)
3920 {
3921         struct igb_adapter *adapter = data;
3922         struct e1000_hw *hw = &adapter->hw;
3923         u32 icr = rd32(E1000_ICR);
3924         /* reading ICR causes bit 31 of EICR to be cleared */
3925
3926         if (icr & E1000_ICR_DOUTSYNC) {
3927                 /* HW is reporting DMA is out of sync */
3928                 adapter->stats.doosync++;
3929         }
3930
3931         /* Check for a mailbox event */
3932         if (icr & E1000_ICR_VMMB)
3933                 igb_msg_task(adapter);
3934
3935         if (icr & E1000_ICR_LSC) {
3936                 hw->mac.get_link_status = 1;
3937                 /* guard against interrupt when we're going down */
3938                 if (!test_bit(__IGB_DOWN, &adapter->state))
3939                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3940         }
3941
3942         wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3943         wr32(E1000_EIMS, adapter->eims_other);
3944
3945         return IRQ_HANDLED;
3946 }
3947
3948 static void igb_write_itr(struct igb_q_vector *q_vector)
3949 {
3950         u32 itr_val = q_vector->itr_val & 0x7FFC;
3951
3952         if (!q_vector->set_itr)
3953                 return;
3954
3955         if (!itr_val)
3956                 itr_val = 0x4;
3957
3958         if (q_vector->itr_shift)
3959                 itr_val |= itr_val << q_vector->itr_shift;
3960         else
3961                 itr_val |= 0x8000000;
3962
3963         writel(itr_val, q_vector->itr_register);
3964         q_vector->set_itr = 0;
3965 }
3966
3967 static irqreturn_t igb_msix_ring(int irq, void *data)
3968 {
3969         struct igb_q_vector *q_vector = data;
3970
3971         /* Write the ITR value calculated from the previous interrupt. */
3972         igb_write_itr(q_vector);
3973
3974         napi_schedule(&q_vector->napi);
3975
3976         return IRQ_HANDLED;
3977 }
3978
3979 #ifdef CONFIG_IGB_DCA
3980 static void igb_update_dca(struct igb_q_vector *q_vector)
3981 {
3982         struct igb_adapter *adapter = q_vector->adapter;
3983         struct e1000_hw *hw = &adapter->hw;
3984         int cpu = get_cpu();
3985
3986         if (q_vector->cpu == cpu)
3987                 goto out_no_update;
3988
3989         if (q_vector->tx_ring) {
3990                 int q = q_vector->tx_ring->reg_idx;
3991                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
3992                 if (hw->mac.type == e1000_82575) {
3993                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
3994                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
3995                 } else {
3996                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
3997                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
3998                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
3999                 }
4000                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4001                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4002         }
4003         if (q_vector->rx_ring) {
4004                 int q = q_vector->rx_ring->reg_idx;
4005                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4006                 if (hw->mac.type == e1000_82575) {
4007                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4008                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4009                 } else {
4010                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4011                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4012                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4013                 }
4014                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4015                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4016                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4017                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4018         }
4019         q_vector->cpu = cpu;
4020 out_no_update:
4021         put_cpu();
4022 }
4023
4024 static void igb_setup_dca(struct igb_adapter *adapter)
4025 {
4026         struct e1000_hw *hw = &adapter->hw;
4027         int i;
4028
4029         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4030                 return;
4031
4032         /* Always use CB2 mode, difference is masked in the CB driver. */
4033         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4034
4035         for (i = 0; i < adapter->num_q_vectors; i++) {
4036                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4037                 q_vector->cpu = -1;
4038                 igb_update_dca(q_vector);
4039         }
4040 }
4041
4042 static int __igb_notify_dca(struct device *dev, void *data)
4043 {
4044         struct net_device *netdev = dev_get_drvdata(dev);
4045         struct igb_adapter *adapter = netdev_priv(netdev);
4046         struct e1000_hw *hw = &adapter->hw;
4047         unsigned long event = *(unsigned long *)data;
4048
4049         switch (event) {
4050         case DCA_PROVIDER_ADD:
4051                 /* if already enabled, don't do it again */
4052                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4053                         break;
4054                 /* Always use CB2 mode, difference is masked
4055                  * in the CB driver. */
4056                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4057                 if (dca_add_requester(dev) == 0) {
4058                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4059                         dev_info(&adapter->pdev->dev, "DCA enabled\n");
4060                         igb_setup_dca(adapter);
4061                         break;
4062                 }
4063                 /* Fall Through since DCA is disabled. */
4064         case DCA_PROVIDER_REMOVE:
4065                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4066                         /* without this a class_device is left
4067                          * hanging around in the sysfs model */
4068                         dca_remove_requester(dev);
4069                         dev_info(&adapter->pdev->dev, "DCA disabled\n");
4070                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4071                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4072                 }
4073                 break;
4074         }
4075
4076         return 0;
4077 }
4078
4079 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4080                           void *p)
4081 {
4082         int ret_val;
4083
4084         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4085                                          __igb_notify_dca);
4086
4087         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4088 }
4089 #endif /* CONFIG_IGB_DCA */
4090
4091 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4092 {
4093         struct e1000_hw *hw = &adapter->hw;
4094         u32 ping;
4095         int i;
4096
4097         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4098                 ping = E1000_PF_CONTROL_MSG;
4099                 if (adapter->vf_data[i].clear_to_send)
4100                         ping |= E1000_VT_MSGTYPE_CTS;
4101                 igb_write_mbx(hw, &ping, 1, i);
4102         }
4103 }
4104
4105 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4106                                   u32 *msgbuf, u32 vf)
4107 {
4108         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4109         u16 *hash_list = (u16 *)&msgbuf[1];
4110         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4111         int i;
4112
4113         /* only up to 30 hash values supported */
4114         if (n > 30)
4115                 n = 30;
4116
4117         /* salt away the number of multi cast addresses assigned
4118          * to this VF for later use to restore when the PF multi cast
4119          * list changes
4120          */
4121         vf_data->num_vf_mc_hashes = n;
4122
4123         /* VFs are limited to using the MTA hash table for their multicast
4124          * addresses */
4125         for (i = 0; i < n; i++)
4126                 vf_data->vf_mc_hashes[i] = hash_list[i];
4127
4128         /* Flush and reset the mta with the new values */
4129         igb_set_rx_mode(adapter->netdev);
4130
4131         return 0;
4132 }
4133
4134 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4135 {
4136         struct e1000_hw *hw = &adapter->hw;
4137         struct vf_data_storage *vf_data;
4138         int i, j;
4139
4140         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4141                 vf_data = &adapter->vf_data[i];
4142                 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4143                         igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4144         }
4145 }
4146
4147 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4148 {
4149         struct e1000_hw *hw = &adapter->hw;
4150         u32 pool_mask, reg, vid;
4151         int i;
4152
4153         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4154
4155         /* Find the vlan filter for this id */
4156         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4157                 reg = rd32(E1000_VLVF(i));
4158
4159                 /* remove the vf from the pool */
4160                 reg &= ~pool_mask;
4161
4162                 /* if pool is empty then remove entry from vfta */
4163                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4164                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4165                         reg = 0;
4166                         vid = reg & E1000_VLVF_VLANID_MASK;
4167                         igb_vfta_set(hw, vid, false);
4168                 }
4169
4170                 wr32(E1000_VLVF(i), reg);
4171         }
4172
4173         adapter->vf_data[vf].vlans_enabled = 0;
4174 }
4175
4176 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4177 {
4178         struct e1000_hw *hw = &adapter->hw;
4179         u32 reg, i;
4180
4181         /* It is an error to call this function when VFs are not enabled */
4182         if (!adapter->vfs_allocated_count)
4183                 return -1;
4184
4185         /* Find the vlan filter for this id */
4186         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4187                 reg = rd32(E1000_VLVF(i));
4188                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4189                     vid == (reg & E1000_VLVF_VLANID_MASK))
4190                         break;
4191         }
4192
4193         if (add) {
4194                 if (i == E1000_VLVF_ARRAY_SIZE) {
4195                         /* Did not find a matching VLAN ID entry that was
4196                          * enabled.  Search for a free filter entry, i.e.
4197                          * one without the enable bit set
4198                          */
4199                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4200                                 reg = rd32(E1000_VLVF(i));
4201                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4202                                         break;
4203                         }
4204                 }
4205                 if (i < E1000_VLVF_ARRAY_SIZE) {
4206                         /* Found an enabled/available entry */
4207                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4208
4209                         /* if !enabled we need to set this up in vfta */
4210                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4211                                 /* add VID to filter table, if bit already set
4212                                  * PF must have added it outside of table */
4213                                 if (igb_vfta_set(hw, vid, true))
4214                                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4215                                                 adapter->vfs_allocated_count);
4216                                 reg |= E1000_VLVF_VLANID_ENABLE;
4217                         }
4218                         reg &= ~E1000_VLVF_VLANID_MASK;
4219                         reg |= vid;
4220
4221                         wr32(E1000_VLVF(i), reg);
4222
4223                         /* do not modify RLPML for PF devices */
4224                         if (vf >= adapter->vfs_allocated_count)
4225                                 return 0;
4226
4227                         if (!adapter->vf_data[vf].vlans_enabled) {
4228                                 u32 size;
4229                                 reg = rd32(E1000_VMOLR(vf));
4230                                 size = reg & E1000_VMOLR_RLPML_MASK;
4231                                 size += 4;
4232                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4233                                 reg |= size;
4234                                 wr32(E1000_VMOLR(vf), reg);
4235                         }
4236                         adapter->vf_data[vf].vlans_enabled++;
4237
4238                         return 0;
4239                 }
4240         } else {
4241                 if (i < E1000_VLVF_ARRAY_SIZE) {
4242                         /* remove vf from the pool */
4243                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4244                         /* if pool is empty then remove entry from vfta */
4245                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4246                                 reg = 0;
4247                                 igb_vfta_set(hw, vid, false);
4248                         }
4249                         wr32(E1000_VLVF(i), reg);
4250
4251                         /* do not modify RLPML for PF devices */
4252                         if (vf >= adapter->vfs_allocated_count)
4253                                 return 0;
4254
4255                         adapter->vf_data[vf].vlans_enabled--;
4256                         if (!adapter->vf_data[vf].vlans_enabled) {
4257                                 u32 size;
4258                                 reg = rd32(E1000_VMOLR(vf));
4259                                 size = reg & E1000_VMOLR_RLPML_MASK;
4260                                 size -= 4;
4261                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4262                                 reg |= size;
4263                                 wr32(E1000_VMOLR(vf), reg);
4264                         }
4265                         return 0;
4266                 }
4267         }
4268         return -1;
4269 }
4270
4271 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4272 {
4273         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4274         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4275
4276         return igb_vlvf_set(adapter, vid, add, vf);
4277 }
4278
4279 static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4280 {
4281         struct e1000_hw *hw = &adapter->hw;
4282
4283         /* disable mailbox functionality for vf */
4284         adapter->vf_data[vf].clear_to_send = false;
4285
4286         /* reset offloads to defaults */
4287         igb_set_vmolr(hw, vf);
4288
4289         /* reset vlans for device */
4290         igb_clear_vf_vfta(adapter, vf);
4291
4292         /* reset multicast table array for vf */
4293         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4294
4295         /* Flush and reset the mta with the new values */
4296         igb_set_rx_mode(adapter->netdev);
4297 }
4298
4299 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4300 {
4301         struct e1000_hw *hw = &adapter->hw;
4302         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4303         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4304         u32 reg, msgbuf[3];
4305         u8 *addr = (u8 *)(&msgbuf[1]);
4306
4307         /* process all the same items cleared in a function level reset */
4308         igb_vf_reset_event(adapter, vf);
4309
4310         /* set vf mac address */
4311         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4312
4313         /* enable transmit and receive for vf */
4314         reg = rd32(E1000_VFTE);
4315         wr32(E1000_VFTE, reg | (1 << vf));
4316         reg = rd32(E1000_VFRE);
4317         wr32(E1000_VFRE, reg | (1 << vf));
4318
4319         /* enable mailbox functionality for vf */
4320         adapter->vf_data[vf].clear_to_send = true;
4321
4322         /* reply to reset with ack and vf mac address */
4323         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4324         memcpy(addr, vf_mac, 6);
4325         igb_write_mbx(hw, msgbuf, 3, vf);
4326 }
4327
4328 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4329 {
4330                 unsigned char *addr = (char *)&msg[1];
4331                 int err = -1;
4332
4333                 if (is_valid_ether_addr(addr))
4334                         err = igb_set_vf_mac(adapter, vf, addr);
4335
4336                 return err;
4337
4338 }
4339
4340 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4341 {
4342         struct e1000_hw *hw = &adapter->hw;
4343         u32 msg = E1000_VT_MSGTYPE_NACK;
4344
4345         /* if device isn't clear to send it shouldn't be reading either */
4346         if (!adapter->vf_data[vf].clear_to_send)
4347                 igb_write_mbx(hw, &msg, 1, vf);
4348 }
4349
4350
4351 static void igb_msg_task(struct igb_adapter *adapter)
4352 {
4353         struct e1000_hw *hw = &adapter->hw;
4354         u32 vf;
4355
4356         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4357                 /* process any reset requests */
4358                 if (!igb_check_for_rst(hw, vf)) {
4359                         adapter->vf_data[vf].clear_to_send = false;
4360                         igb_vf_reset_event(adapter, vf);
4361                 }
4362
4363                 /* process any messages pending */
4364                 if (!igb_check_for_msg(hw, vf))
4365                         igb_rcv_msg_from_vf(adapter, vf);
4366
4367                 /* process any acks */
4368                 if (!igb_check_for_ack(hw, vf))
4369                         igb_rcv_ack_from_vf(adapter, vf);
4370
4371         }
4372 }
4373
4374 static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4375 {
4376         u32 mbx_size = E1000_VFMAILBOX_SIZE;
4377         u32 msgbuf[mbx_size];
4378         struct e1000_hw *hw = &adapter->hw;
4379         s32 retval;
4380
4381         retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
4382
4383         if (retval)
4384                 dev_err(&adapter->pdev->dev,
4385                         "Error receiving message from VF\n");
4386
4387         /* this is a message we already processed, do nothing */
4388         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4389                 return retval;
4390
4391         /*
4392          * until the vf completes a reset it should not be
4393          * allowed to start any configuration.
4394          */
4395
4396         if (msgbuf[0] == E1000_VF_RESET) {
4397                 igb_vf_reset_msg(adapter, vf);
4398
4399                 return retval;
4400         }
4401
4402         if (!adapter->vf_data[vf].clear_to_send) {
4403                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4404                 igb_write_mbx(hw, msgbuf, 1, vf);
4405                 return retval;
4406         }
4407
4408         switch ((msgbuf[0] & 0xFFFF)) {
4409         case E1000_VF_SET_MAC_ADDR:
4410                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4411                 break;
4412         case E1000_VF_SET_MULTICAST:
4413                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4414                 break;
4415         case E1000_VF_SET_LPE:
4416                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4417                 break;
4418         case E1000_VF_SET_VLAN:
4419                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4420                 break;
4421         default:
4422                 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4423                 retval = -1;
4424                 break;
4425         }
4426
4427         /* notify the VF of the results of what it sent us */
4428         if (retval)
4429                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4430         else
4431                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4432
4433         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4434
4435         igb_write_mbx(hw, msgbuf, 1, vf);
4436
4437         return retval;
4438 }
4439
4440 /**
4441  *  igb_set_uta - Set unicast filter table address
4442  *  @adapter: board private structure
4443  *
4444  *  The unicast table address is a register array of 32-bit registers.
4445  *  The table is meant to be used in a way similar to how the MTA is used
4446  *  however due to certain limitations in the hardware it is necessary to
4447  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4448  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4449  **/
4450 static void igb_set_uta(struct igb_adapter *adapter)
4451 {
4452         struct e1000_hw *hw = &adapter->hw;
4453         int i;
4454
4455         /* The UTA table only exists on 82576 hardware and newer */
4456         if (hw->mac.type < e1000_82576)
4457                 return;
4458
4459         /* we only need to do this if VMDq is enabled */
4460         if (!adapter->vfs_allocated_count)
4461                 return;
4462
4463         for (i = 0; i < hw->mac.uta_reg_count; i++)
4464                 array_wr32(E1000_UTA, i, ~0);
4465 }
4466
4467 /**
4468  * igb_intr_msi - Interrupt Handler
4469  * @irq: interrupt number
4470  * @data: pointer to a network interface device structure
4471  **/
4472 static irqreturn_t igb_intr_msi(int irq, void *data)
4473 {
4474         struct igb_adapter *adapter = data;
4475         struct igb_q_vector *q_vector = adapter->q_vector[0];
4476         struct e1000_hw *hw = &adapter->hw;
4477         /* read ICR disables interrupts using IAM */
4478         u32 icr = rd32(E1000_ICR);
4479
4480         igb_write_itr(q_vector);
4481
4482         if (icr & E1000_ICR_DOUTSYNC) {
4483                 /* HW is reporting DMA is out of sync */
4484                 adapter->stats.doosync++;
4485         }
4486
4487         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4488                 hw->mac.get_link_status = 1;
4489                 if (!test_bit(__IGB_DOWN, &adapter->state))
4490                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4491         }
4492
4493         napi_schedule(&q_vector->napi);
4494
4495         return IRQ_HANDLED;
4496 }
4497
4498 /**
4499  * igb_intr - Legacy Interrupt Handler
4500  * @irq: interrupt number
4501  * @data: pointer to a network interface device structure
4502  **/
4503 static irqreturn_t igb_intr(int irq, void *data)
4504 {
4505         struct igb_adapter *adapter = data;
4506         struct igb_q_vector *q_vector = adapter->q_vector[0];
4507         struct e1000_hw *hw = &adapter->hw;
4508         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4509          * need for the IMC write */
4510         u32 icr = rd32(E1000_ICR);
4511         if (!icr)
4512                 return IRQ_NONE;  /* Not our interrupt */
4513
4514         igb_write_itr(q_vector);
4515
4516         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4517          * not set, then the adapter didn't send an interrupt */
4518         if (!(icr & E1000_ICR_INT_ASSERTED))
4519                 return IRQ_NONE;
4520
4521         if (icr & E1000_ICR_DOUTSYNC) {
4522                 /* HW is reporting DMA is out of sync */
4523                 adapter->stats.doosync++;
4524         }
4525
4526         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4527                 hw->mac.get_link_status = 1;
4528                 /* guard against interrupt when we're going down */
4529                 if (!test_bit(__IGB_DOWN, &adapter->state))
4530                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4531         }
4532
4533         napi_schedule(&q_vector->napi);
4534
4535         return IRQ_HANDLED;
4536 }
4537
4538 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4539 {
4540         struct igb_adapter *adapter = q_vector->adapter;
4541         struct e1000_hw *hw = &adapter->hw;
4542
4543         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4544             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4545                 if (!adapter->msix_entries)
4546                         igb_set_itr(adapter);
4547                 else
4548                         igb_update_ring_itr(q_vector);
4549         }
4550
4551         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4552                 if (adapter->msix_entries)
4553                         wr32(E1000_EIMS, q_vector->eims_value);
4554                 else
4555                         igb_irq_enable(adapter);
4556         }
4557 }
4558
4559 /**
4560  * igb_poll - NAPI Rx polling callback
4561  * @napi: napi polling structure
4562  * @budget: count of how many packets we should handle
4563  **/
4564 static int igb_poll(struct napi_struct *napi, int budget)
4565 {
4566         struct igb_q_vector *q_vector = container_of(napi,
4567                                                      struct igb_q_vector,
4568                                                      napi);
4569         int tx_clean_complete = 1, work_done = 0;
4570
4571 #ifdef CONFIG_IGB_DCA
4572         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4573                 igb_update_dca(q_vector);
4574 #endif
4575         if (q_vector->tx_ring)
4576                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4577
4578         if (q_vector->rx_ring)
4579                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4580
4581         if (!tx_clean_complete)
4582                 work_done = budget;
4583
4584         /* If not enough Rx work done, exit the polling mode */
4585         if (work_done < budget) {
4586                 napi_complete(napi);
4587                 igb_ring_irq_enable(q_vector);
4588         }
4589
4590         return work_done;
4591 }
4592
4593 /**
4594  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4595  * @adapter: board private structure
4596  * @shhwtstamps: timestamp structure to update
4597  * @regval: unsigned 64bit system time value.
4598  *
4599  * We need to convert the system time value stored in the RX/TXSTMP registers
4600  * into a hwtstamp which can be used by the upper level timestamping functions
4601  */
4602 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4603                                    struct skb_shared_hwtstamps *shhwtstamps,
4604                                    u64 regval)
4605 {
4606         u64 ns;
4607
4608         ns = timecounter_cyc2time(&adapter->clock, regval);
4609         timecompare_update(&adapter->compare, ns);
4610         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4611         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4612         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4613 }
4614
4615 /**
4616  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4617  * @q_vector: pointer to q_vector containing needed info
4618  * @skb: packet that was just sent
4619  *
4620  * If we were asked to do hardware stamping and such a time stamp is
4621  * available, then it must have been for this skb here because we only
4622  * allow only one such packet into the queue.
4623  */
4624 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4625 {
4626         struct igb_adapter *adapter = q_vector->adapter;
4627         union skb_shared_tx *shtx = skb_tx(skb);
4628         struct e1000_hw *hw = &adapter->hw;
4629         struct skb_shared_hwtstamps shhwtstamps;
4630         u64 regval;
4631
4632         /* if skb does not support hw timestamp or TX stamp not valid exit */
4633         if (likely(!shtx->hardware) ||
4634             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4635                 return;
4636
4637         regval = rd32(E1000_TXSTMPL);
4638         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4639
4640         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4641         skb_tstamp_tx(skb, &shhwtstamps);
4642 }
4643
4644 /**
4645  * igb_clean_tx_irq - Reclaim resources after transmit completes
4646  * @q_vector: pointer to q_vector containing needed info
4647  * returns true if ring is completely cleaned
4648  **/
4649 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4650 {
4651         struct igb_adapter *adapter = q_vector->adapter;
4652         struct igb_ring *tx_ring = q_vector->tx_ring;
4653         struct net_device *netdev = tx_ring->netdev;
4654         struct e1000_hw *hw = &adapter->hw;
4655         struct igb_buffer *buffer_info;
4656         struct sk_buff *skb;
4657         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4658         unsigned int total_bytes = 0, total_packets = 0;
4659         unsigned int i, eop, count = 0;
4660         bool cleaned = false;
4661
4662         i = tx_ring->next_to_clean;
4663         eop = tx_ring->buffer_info[i].next_to_watch;
4664         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4665
4666         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4667                (count < tx_ring->count)) {
4668                 for (cleaned = false; !cleaned; count++) {
4669                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4670                         buffer_info = &tx_ring->buffer_info[i];
4671                         cleaned = (i == eop);
4672                         skb = buffer_info->skb;
4673
4674                         if (skb) {
4675                                 unsigned int segs, bytecount;
4676                                 /* gso_segs is currently only valid for tcp */
4677                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4678                                 /* multiply data chunks by size of headers */
4679                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4680                                             skb->len;
4681                                 total_packets += segs;
4682                                 total_bytes += bytecount;
4683
4684                                 igb_tx_hwtstamp(q_vector, skb);
4685                         }
4686
4687                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4688                         tx_desc->wb.status = 0;
4689
4690                         i++;
4691                         if (i == tx_ring->count)
4692                                 i = 0;
4693                 }
4694                 eop = tx_ring->buffer_info[i].next_to_watch;
4695                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4696         }
4697
4698         tx_ring->next_to_clean = i;
4699
4700         if (unlikely(count &&
4701                      netif_carrier_ok(netdev) &&
4702                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4703                 /* Make sure that anybody stopping the queue after this
4704                  * sees the new next_to_clean.
4705                  */
4706                 smp_mb();
4707                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4708                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4709                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4710                         tx_ring->tx_stats.restart_queue++;
4711                 }
4712         }
4713
4714         if (tx_ring->detect_tx_hung) {
4715                 /* Detect a transmit hang in hardware, this serializes the
4716                  * check with the clearing of time_stamp and movement of i */
4717                 tx_ring->detect_tx_hung = false;
4718                 if (tx_ring->buffer_info[i].time_stamp &&
4719                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4720                                (adapter->tx_timeout_factor * HZ))
4721                     && !(rd32(E1000_STATUS) &
4722                          E1000_STATUS_TXOFF)) {
4723
4724                         /* detected Tx unit hang */
4725                         dev_err(&tx_ring->pdev->dev,
4726                                 "Detected Tx Unit Hang\n"
4727                                 "  Tx Queue             <%d>\n"
4728                                 "  TDH                  <%x>\n"
4729                                 "  TDT                  <%x>\n"
4730                                 "  next_to_use          <%x>\n"
4731                                 "  next_to_clean        <%x>\n"
4732                                 "buffer_info[next_to_clean]\n"
4733                                 "  time_stamp           <%lx>\n"
4734                                 "  next_to_watch        <%x>\n"
4735                                 "  jiffies              <%lx>\n"
4736                                 "  desc.status          <%x>\n",
4737                                 tx_ring->queue_index,
4738                                 readl(tx_ring->head),
4739                                 readl(tx_ring->tail),
4740                                 tx_ring->next_to_use,
4741                                 tx_ring->next_to_clean,
4742                                 tx_ring->buffer_info[i].time_stamp,
4743                                 eop,
4744                                 jiffies,
4745                                 eop_desc->wb.status);
4746                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4747                 }
4748         }
4749         tx_ring->total_bytes += total_bytes;
4750         tx_ring->total_packets += total_packets;
4751         tx_ring->tx_stats.bytes += total_bytes;
4752         tx_ring->tx_stats.packets += total_packets;
4753         netdev->stats.tx_bytes += total_bytes;
4754         netdev->stats.tx_packets += total_packets;
4755         return (count < tx_ring->count);
4756 }
4757
4758 /**
4759  * igb_receive_skb - helper function to handle rx indications
4760  * @q_vector: structure containing interrupt and ring information
4761  * @skb: packet to send up
4762  * @vlan_tag: vlan tag for packet
4763  **/
4764 static void igb_receive_skb(struct igb_q_vector *q_vector,
4765                             struct sk_buff *skb,
4766                             u16 vlan_tag)
4767 {
4768         struct igb_adapter *adapter = q_vector->adapter;
4769
4770         if (vlan_tag)
4771                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4772                                  vlan_tag, skb);
4773         else
4774                 napi_gro_receive(&q_vector->napi, skb);
4775 }
4776
4777 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4778                                        u32 status_err, struct sk_buff *skb)
4779 {
4780         skb->ip_summed = CHECKSUM_NONE;
4781
4782         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4783         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4784              (status_err & E1000_RXD_STAT_IXSM))
4785                 return;
4786
4787         /* TCP/UDP checksum error bit is set */
4788         if (status_err &
4789             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4790                 /*
4791                  * work around errata with sctp packets where the TCPE aka
4792                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4793                  * packets, (aka let the stack check the crc32c)
4794                  */
4795                 if ((skb->len == 60) &&
4796                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4797                         ring->rx_stats.csum_err++;
4798
4799                 /* let the stack verify checksum errors */
4800                 return;
4801         }
4802         /* It must be a TCP or UDP packet with a valid checksum */
4803         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4804                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4805
4806         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4807 }
4808
4809 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4810                                    struct sk_buff *skb)
4811 {
4812         struct igb_adapter *adapter = q_vector->adapter;
4813         struct e1000_hw *hw = &adapter->hw;
4814         u64 regval;
4815
4816         /*
4817          * If this bit is set, then the RX registers contain the time stamp. No
4818          * other packet will be time stamped until we read these registers, so
4819          * read the registers to make them available again. Because only one
4820          * packet can be time stamped at a time, we know that the register
4821          * values must belong to this one here and therefore we don't need to
4822          * compare any of the additional attributes stored for it.
4823          *
4824          * If nothing went wrong, then it should have a skb_shared_tx that we
4825          * can turn into a skb_shared_hwtstamps.
4826          */
4827         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4828                 return;
4829         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4830                 return;
4831
4832         regval = rd32(E1000_RXSTMPL);
4833         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4834
4835         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4836 }
4837 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4838                                union e1000_adv_rx_desc *rx_desc)
4839 {
4840         /* HW will not DMA in data larger than the given buffer, even if it
4841          * parses the (NFS, of course) header to be larger.  In that case, it
4842          * fills the header buffer and spills the rest into the page.
4843          */
4844         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4845                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4846         if (hlen > rx_ring->rx_buffer_len)
4847                 hlen = rx_ring->rx_buffer_len;
4848         return hlen;
4849 }
4850
4851 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4852                                  int *work_done, int budget)
4853 {
4854         struct igb_ring *rx_ring = q_vector->rx_ring;
4855         struct net_device *netdev = rx_ring->netdev;
4856         struct pci_dev *pdev = rx_ring->pdev;
4857         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4858         struct igb_buffer *buffer_info , *next_buffer;
4859         struct sk_buff *skb;
4860         bool cleaned = false;
4861         int cleaned_count = 0;
4862         unsigned int total_bytes = 0, total_packets = 0;
4863         unsigned int i;
4864         u32 staterr;
4865         u16 length;
4866         u16 vlan_tag;
4867
4868         i = rx_ring->next_to_clean;
4869         buffer_info = &rx_ring->buffer_info[i];
4870         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4871         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4872
4873         while (staterr & E1000_RXD_STAT_DD) {
4874                 if (*work_done >= budget)
4875                         break;
4876                 (*work_done)++;
4877
4878                 skb = buffer_info->skb;
4879                 prefetch(skb->data - NET_IP_ALIGN);
4880                 buffer_info->skb = NULL;
4881
4882                 i++;
4883                 if (i == rx_ring->count)
4884                         i = 0;
4885                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4886                 prefetch(next_rxd);
4887                 next_buffer = &rx_ring->buffer_info[i];
4888
4889                 length = le16_to_cpu(rx_desc->wb.upper.length);
4890                 cleaned = true;
4891                 cleaned_count++;
4892
4893                 if (buffer_info->dma) {
4894                         pci_unmap_single(pdev, buffer_info->dma,
4895                                          rx_ring->rx_buffer_len,
4896                                          PCI_DMA_FROMDEVICE);
4897                         buffer_info->dma = 0;
4898                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4899                                 skb_put(skb, length);
4900                                 goto send_up;
4901                         }
4902                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4903                 }
4904
4905                 if (length) {
4906                         pci_unmap_page(pdev, buffer_info->page_dma,
4907                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4908                         buffer_info->page_dma = 0;
4909
4910                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4911                                                 buffer_info->page,
4912                                                 buffer_info->page_offset,
4913                                                 length);
4914
4915                         if (page_count(buffer_info->page) != 1)
4916                                 buffer_info->page = NULL;
4917                         else
4918                                 get_page(buffer_info->page);
4919
4920                         skb->len += length;
4921                         skb->data_len += length;
4922
4923                         skb->truesize += length;
4924                 }
4925
4926                 if (!(staterr & E1000_RXD_STAT_EOP)) {
4927                         buffer_info->skb = next_buffer->skb;
4928                         buffer_info->dma = next_buffer->dma;
4929                         next_buffer->skb = skb;
4930                         next_buffer->dma = 0;
4931                         goto next_desc;
4932                 }
4933 send_up:
4934                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4935                         dev_kfree_skb_irq(skb);
4936                         goto next_desc;
4937                 }
4938
4939                 igb_rx_hwtstamp(q_vector, staterr, skb);
4940                 total_bytes += skb->len;
4941                 total_packets++;
4942
4943                 igb_rx_checksum_adv(rx_ring, staterr, skb);
4944
4945                 skb->protocol = eth_type_trans(skb, netdev);
4946                 skb_record_rx_queue(skb, rx_ring->queue_index);
4947
4948                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4949                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4950
4951                 igb_receive_skb(q_vector, skb, vlan_tag);
4952
4953 next_desc:
4954                 rx_desc->wb.upper.status_error = 0;
4955
4956                 /* return some buffers to hardware, one at a time is too slow */
4957                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4958                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4959                         cleaned_count = 0;
4960                 }
4961
4962                 /* use prefetched values */
4963                 rx_desc = next_rxd;
4964                 buffer_info = next_buffer;
4965                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4966         }
4967
4968         rx_ring->next_to_clean = i;
4969         cleaned_count = igb_desc_unused(rx_ring);
4970
4971         if (cleaned_count)
4972                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4973
4974         rx_ring->total_packets += total_packets;
4975         rx_ring->total_bytes += total_bytes;
4976         rx_ring->rx_stats.packets += total_packets;
4977         rx_ring->rx_stats.bytes += total_bytes;
4978         netdev->stats.rx_bytes += total_bytes;
4979         netdev->stats.rx_packets += total_packets;
4980         return cleaned;
4981 }
4982
4983 /**
4984  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
4985  * @adapter: address of board private structure
4986  **/
4987 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
4988 {
4989         struct net_device *netdev = rx_ring->netdev;
4990         union e1000_adv_rx_desc *rx_desc;
4991         struct igb_buffer *buffer_info;
4992         struct sk_buff *skb;
4993         unsigned int i;
4994         int bufsz;
4995
4996         i = rx_ring->next_to_use;
4997         buffer_info = &rx_ring->buffer_info[i];
4998
4999         bufsz = rx_ring->rx_buffer_len;
5000
5001         while (cleaned_count--) {
5002                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5003
5004                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5005                         if (!buffer_info->page) {
5006                                 buffer_info->page = alloc_page(GFP_ATOMIC);
5007                                 if (!buffer_info->page) {
5008                                         rx_ring->rx_stats.alloc_failed++;
5009                                         goto no_buffers;
5010                                 }
5011                                 buffer_info->page_offset = 0;
5012                         } else {
5013                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5014                         }
5015                         buffer_info->page_dma =
5016                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5017                                              buffer_info->page_offset,
5018                                              PAGE_SIZE / 2,
5019                                              PCI_DMA_FROMDEVICE);
5020                 }
5021
5022                 if (!buffer_info->skb) {
5023                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5024                         if (!skb) {
5025                                 rx_ring->rx_stats.alloc_failed++;
5026                                 goto no_buffers;
5027                         }
5028
5029                         buffer_info->skb = skb;
5030                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5031                                                           skb->data,
5032                                                           bufsz,
5033                                                           PCI_DMA_FROMDEVICE);
5034                 }
5035                 /* Refresh the desc even if buffer_addrs didn't change because
5036                  * each write-back erases this info. */
5037                 if (bufsz < IGB_RXBUFFER_1024) {
5038                         rx_desc->read.pkt_addr =
5039                              cpu_to_le64(buffer_info->page_dma);
5040                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5041                 } else {
5042                         rx_desc->read.pkt_addr =
5043                              cpu_to_le64(buffer_info->dma);
5044                         rx_desc->read.hdr_addr = 0;
5045                 }
5046
5047                 i++;
5048                 if (i == rx_ring->count)
5049                         i = 0;
5050                 buffer_info = &rx_ring->buffer_info[i];
5051         }
5052
5053 no_buffers:
5054         if (rx_ring->next_to_use != i) {
5055                 rx_ring->next_to_use = i;
5056                 if (i == 0)
5057                         i = (rx_ring->count - 1);
5058                 else
5059                         i--;
5060
5061                 /* Force memory writes to complete before letting h/w
5062                  * know there are new descriptors to fetch.  (Only
5063                  * applicable for weak-ordered memory model archs,
5064                  * such as IA-64). */
5065                 wmb();
5066                 writel(i, rx_ring->tail);
5067         }
5068 }
5069
5070 /**
5071  * igb_mii_ioctl -
5072  * @netdev:
5073  * @ifreq:
5074  * @cmd:
5075  **/
5076 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5077 {
5078         struct igb_adapter *adapter = netdev_priv(netdev);
5079         struct mii_ioctl_data *data = if_mii(ifr);
5080
5081         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5082                 return -EOPNOTSUPP;
5083
5084         switch (cmd) {
5085         case SIOCGMIIPHY:
5086                 data->phy_id = adapter->hw.phy.addr;
5087                 break;
5088         case SIOCGMIIREG:
5089                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5090                                      &data->val_out))
5091                         return -EIO;
5092                 break;
5093         case SIOCSMIIREG:
5094         default:
5095                 return -EOPNOTSUPP;
5096         }
5097         return 0;
5098 }
5099
5100 /**
5101  * igb_hwtstamp_ioctl - control hardware time stamping
5102  * @netdev:
5103  * @ifreq:
5104  * @cmd:
5105  *
5106  * Outgoing time stamping can be enabled and disabled. Play nice and
5107  * disable it when requested, although it shouldn't case any overhead
5108  * when no packet needs it. At most one packet in the queue may be
5109  * marked for time stamping, otherwise it would be impossible to tell
5110  * for sure to which packet the hardware time stamp belongs.
5111  *
5112  * Incoming time stamping has to be configured via the hardware
5113  * filters. Not all combinations are supported, in particular event
5114  * type has to be specified. Matching the kind of event packet is
5115  * not supported, with the exception of "all V2 events regardless of
5116  * level 2 or 4".
5117  *
5118  **/
5119 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5120                               struct ifreq *ifr, int cmd)
5121 {
5122         struct igb_adapter *adapter = netdev_priv(netdev);
5123         struct e1000_hw *hw = &adapter->hw;
5124         struct hwtstamp_config config;
5125         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5126         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5127         u32 tsync_rx_cfg = 0;
5128         bool is_l4 = false;
5129         bool is_l2 = false;
5130         u32 regval;
5131
5132         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5133                 return -EFAULT;
5134
5135         /* reserved for future extensions */
5136         if (config.flags)
5137                 return -EINVAL;
5138
5139         switch (config.tx_type) {
5140         case HWTSTAMP_TX_OFF:
5141                 tsync_tx_ctl = 0;
5142         case HWTSTAMP_TX_ON:
5143                 break;
5144         default:
5145                 return -ERANGE;
5146         }
5147
5148         switch (config.rx_filter) {
5149         case HWTSTAMP_FILTER_NONE:
5150                 tsync_rx_ctl = 0;
5151                 break;
5152         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5153         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5154         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5155         case HWTSTAMP_FILTER_ALL:
5156                 /*
5157                  * register TSYNCRXCFG must be set, therefore it is not
5158                  * possible to time stamp both Sync and Delay_Req messages
5159                  * => fall back to time stamping all packets
5160                  */
5161                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5162                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5163                 break;
5164         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5165                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5166                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5167                 is_l4 = true;
5168                 break;
5169         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5170                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5171                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5172                 is_l4 = true;
5173                 break;
5174         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5175         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5176                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5177                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5178                 is_l2 = true;
5179                 is_l4 = true;
5180                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5181                 break;
5182         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5183         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5184                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5185                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5186                 is_l2 = true;
5187                 is_l4 = true;
5188                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5189                 break;
5190         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5191         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5192         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5193                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5194                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5195                 is_l2 = true;
5196                 break;
5197         default:
5198                 return -ERANGE;
5199         }
5200
5201         if (hw->mac.type == e1000_82575) {
5202                 if (tsync_rx_ctl | tsync_tx_ctl)
5203                         return -EINVAL;
5204                 return 0;
5205         }
5206
5207         /* enable/disable TX */
5208         regval = rd32(E1000_TSYNCTXCTL);
5209         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5210         regval |= tsync_tx_ctl;
5211         wr32(E1000_TSYNCTXCTL, regval);
5212
5213         /* enable/disable RX */
5214         regval = rd32(E1000_TSYNCRXCTL);
5215         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5216         regval |= tsync_rx_ctl;
5217         wr32(E1000_TSYNCRXCTL, regval);
5218
5219         /* define which PTP packets are time stamped */
5220         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5221
5222         /* define ethertype filter for timestamped packets */
5223         if (is_l2)
5224                 wr32(E1000_ETQF(3),
5225                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5226                                  E1000_ETQF_1588 | /* enable timestamping */
5227                                  ETH_P_1588));     /* 1588 eth protocol type */
5228         else
5229                 wr32(E1000_ETQF(3), 0);
5230
5231 #define PTP_PORT 319
5232         /* L4 Queue Filter[3]: filter by destination port and protocol */
5233         if (is_l4) {
5234                 u32 ftqf = (IPPROTO_UDP /* UDP */
5235                         | E1000_FTQF_VF_BP /* VF not compared */
5236                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5237                         | E1000_FTQF_MASK); /* mask all inputs */
5238                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5239
5240                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5241                 wr32(E1000_IMIREXT(3),
5242                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5243                 if (hw->mac.type == e1000_82576) {
5244                         /* enable source port check */
5245                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5246                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5247                 }
5248                 wr32(E1000_FTQF(3), ftqf);
5249         } else {
5250                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5251         }
5252         wrfl();
5253
5254         adapter->hwtstamp_config = config;
5255
5256         /* clear TX/RX time stamp registers, just to be sure */
5257         regval = rd32(E1000_TXSTMPH);
5258         regval = rd32(E1000_RXSTMPH);
5259
5260         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5261                 -EFAULT : 0;
5262 }
5263
5264 /**
5265  * igb_ioctl -
5266  * @netdev:
5267  * @ifreq:
5268  * @cmd:
5269  **/
5270 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5271 {
5272         switch (cmd) {
5273         case SIOCGMIIPHY:
5274         case SIOCGMIIREG:
5275         case SIOCSMIIREG:
5276                 return igb_mii_ioctl(netdev, ifr, cmd);
5277         case SIOCSHWTSTAMP:
5278                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5279         default:
5280                 return -EOPNOTSUPP;
5281         }
5282 }
5283
5284 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5285 {
5286         struct igb_adapter *adapter = hw->back;
5287         u16 cap_offset;
5288
5289         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5290         if (!cap_offset)
5291                 return -E1000_ERR_CONFIG;
5292
5293         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5294
5295         return 0;
5296 }
5297
5298 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5299 {
5300         struct igb_adapter *adapter = hw->back;
5301         u16 cap_offset;
5302
5303         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5304         if (!cap_offset)
5305                 return -E1000_ERR_CONFIG;
5306
5307         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5308
5309         return 0;
5310 }
5311
5312 static void igb_vlan_rx_register(struct net_device *netdev,
5313                                  struct vlan_group *grp)
5314 {
5315         struct igb_adapter *adapter = netdev_priv(netdev);
5316         struct e1000_hw *hw = &adapter->hw;
5317         u32 ctrl, rctl;
5318
5319         igb_irq_disable(adapter);
5320         adapter->vlgrp = grp;
5321
5322         if (grp) {
5323                 /* enable VLAN tag insert/strip */
5324                 ctrl = rd32(E1000_CTRL);
5325                 ctrl |= E1000_CTRL_VME;
5326                 wr32(E1000_CTRL, ctrl);
5327
5328                 /* enable VLAN receive filtering */
5329                 rctl = rd32(E1000_RCTL);
5330                 rctl &= ~E1000_RCTL_CFIEN;
5331                 wr32(E1000_RCTL, rctl);
5332                 igb_update_mng_vlan(adapter);
5333         } else {
5334                 /* disable VLAN tag insert/strip */
5335                 ctrl = rd32(E1000_CTRL);
5336                 ctrl &= ~E1000_CTRL_VME;
5337                 wr32(E1000_CTRL, ctrl);
5338
5339                 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5340                         igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5341                         adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5342                 }
5343         }
5344
5345         igb_rlpml_set(adapter);
5346
5347         if (!test_bit(__IGB_DOWN, &adapter->state))
5348                 igb_irq_enable(adapter);
5349 }
5350
5351 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5352 {
5353         struct igb_adapter *adapter = netdev_priv(netdev);
5354         struct e1000_hw *hw = &adapter->hw;
5355         int pf_id = adapter->vfs_allocated_count;
5356
5357         if ((hw->mng_cookie.status &
5358              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5359             (vid == adapter->mng_vlan_id))
5360                 return;
5361
5362         /* add vid to vlvf if sr-iov is enabled,
5363          * if that fails add directly to filter table */
5364         if (igb_vlvf_set(adapter, vid, true, pf_id))
5365                 igb_vfta_set(hw, vid, true);
5366
5367 }
5368
5369 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5370 {
5371         struct igb_adapter *adapter = netdev_priv(netdev);
5372         struct e1000_hw *hw = &adapter->hw;
5373         int pf_id = adapter->vfs_allocated_count;
5374
5375         igb_irq_disable(adapter);
5376         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5377
5378         if (!test_bit(__IGB_DOWN, &adapter->state))
5379                 igb_irq_enable(adapter);
5380
5381         if ((adapter->hw.mng_cookie.status &
5382              E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5383             (vid == adapter->mng_vlan_id)) {
5384                 /* release control to f/w */
5385                 igb_release_hw_control(adapter);
5386                 return;
5387         }
5388
5389         /* remove vid from vlvf if sr-iov is enabled,
5390          * if not in vlvf remove from vfta */
5391         if (igb_vlvf_set(adapter, vid, false, pf_id))
5392                 igb_vfta_set(hw, vid, false);
5393 }
5394
5395 static void igb_restore_vlan(struct igb_adapter *adapter)
5396 {
5397         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5398
5399         if (adapter->vlgrp) {
5400                 u16 vid;
5401                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5402                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5403                                 continue;
5404                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5405                 }
5406         }
5407 }
5408
5409 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5410 {
5411         struct e1000_mac_info *mac = &adapter->hw.mac;
5412
5413         mac->autoneg = 0;
5414
5415         switch (spddplx) {
5416         case SPEED_10 + DUPLEX_HALF:
5417                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5418                 break;
5419         case SPEED_10 + DUPLEX_FULL:
5420                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5421                 break;
5422         case SPEED_100 + DUPLEX_HALF:
5423                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5424                 break;
5425         case SPEED_100 + DUPLEX_FULL:
5426                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5427                 break;
5428         case SPEED_1000 + DUPLEX_FULL:
5429                 mac->autoneg = 1;
5430                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5431                 break;
5432         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5433         default:
5434                 dev_err(&adapter->pdev->dev,
5435                         "Unsupported Speed/Duplex configuration\n");
5436                 return -EINVAL;
5437         }
5438         return 0;
5439 }
5440
5441 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5442 {
5443         struct net_device *netdev = pci_get_drvdata(pdev);
5444         struct igb_adapter *adapter = netdev_priv(netdev);
5445         struct e1000_hw *hw = &adapter->hw;
5446         u32 ctrl, rctl, status;
5447         u32 wufc = adapter->wol;
5448 #ifdef CONFIG_PM
5449         int retval = 0;
5450 #endif
5451
5452         netif_device_detach(netdev);
5453
5454         if (netif_running(netdev))
5455                 igb_close(netdev);
5456
5457         igb_clear_interrupt_scheme(adapter);
5458
5459 #ifdef CONFIG_PM
5460         retval = pci_save_state(pdev);
5461         if (retval)
5462                 return retval;
5463 #endif
5464
5465         status = rd32(E1000_STATUS);
5466         if (status & E1000_STATUS_LU)
5467                 wufc &= ~E1000_WUFC_LNKC;
5468
5469         if (wufc) {
5470                 igb_setup_rctl(adapter);
5471                 igb_set_rx_mode(netdev);
5472
5473                 /* turn on all-multi mode if wake on multicast is enabled */
5474                 if (wufc & E1000_WUFC_MC) {
5475                         rctl = rd32(E1000_RCTL);
5476                         rctl |= E1000_RCTL_MPE;
5477                         wr32(E1000_RCTL, rctl);
5478                 }
5479
5480                 ctrl = rd32(E1000_CTRL);
5481                 /* advertise wake from D3Cold */
5482                 #define E1000_CTRL_ADVD3WUC 0x00100000
5483                 /* phy power management enable */
5484                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5485                 ctrl |= E1000_CTRL_ADVD3WUC;
5486                 wr32(E1000_CTRL, ctrl);
5487
5488                 /* Allow time for pending master requests to run */
5489                 igb_disable_pcie_master(&adapter->hw);
5490
5491                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5492                 wr32(E1000_WUFC, wufc);
5493         } else {
5494                 wr32(E1000_WUC, 0);
5495                 wr32(E1000_WUFC, 0);
5496         }
5497
5498         *enable_wake = wufc || adapter->en_mng_pt;
5499         if (!*enable_wake)
5500                 igb_shutdown_serdes_link_82575(hw);
5501
5502         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5503          * would have already happened in close and is redundant. */
5504         igb_release_hw_control(adapter);
5505
5506         pci_disable_device(pdev);
5507
5508         return 0;
5509 }
5510
5511 #ifdef CONFIG_PM
5512 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5513 {
5514         int retval;
5515         bool wake;
5516
5517         retval = __igb_shutdown(pdev, &wake);
5518         if (retval)
5519                 return retval;
5520
5521         if (wake) {
5522                 pci_prepare_to_sleep(pdev);
5523         } else {
5524                 pci_wake_from_d3(pdev, false);
5525                 pci_set_power_state(pdev, PCI_D3hot);
5526         }
5527
5528         return 0;
5529 }
5530
5531 static int igb_resume(struct pci_dev *pdev)
5532 {
5533         struct net_device *netdev = pci_get_drvdata(pdev);
5534         struct igb_adapter *adapter = netdev_priv(netdev);
5535         struct e1000_hw *hw = &adapter->hw;
5536         u32 err;
5537
5538         pci_set_power_state(pdev, PCI_D0);
5539         pci_restore_state(pdev);
5540
5541         err = pci_enable_device_mem(pdev);
5542         if (err) {
5543                 dev_err(&pdev->dev,
5544                         "igb: Cannot enable PCI device from suspend\n");
5545                 return err;
5546         }
5547         pci_set_master(pdev);
5548
5549         pci_enable_wake(pdev, PCI_D3hot, 0);
5550         pci_enable_wake(pdev, PCI_D3cold, 0);
5551
5552         if (igb_init_interrupt_scheme(adapter)) {
5553                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5554                 return -ENOMEM;
5555         }
5556
5557         /* e1000_power_up_phy(adapter); */
5558
5559         igb_reset(adapter);
5560
5561         /* let the f/w know that the h/w is now under the control of the
5562          * driver. */
5563         igb_get_hw_control(adapter);
5564
5565         wr32(E1000_WUS, ~0);
5566
5567         if (netif_running(netdev)) {
5568                 err = igb_open(netdev);
5569                 if (err)
5570                         return err;
5571         }
5572
5573         netif_device_attach(netdev);
5574
5575         return 0;
5576 }
5577 #endif
5578
5579 static void igb_shutdown(struct pci_dev *pdev)
5580 {
5581         bool wake;
5582
5583         __igb_shutdown(pdev, &wake);
5584
5585         if (system_state == SYSTEM_POWER_OFF) {
5586                 pci_wake_from_d3(pdev, wake);
5587                 pci_set_power_state(pdev, PCI_D3hot);
5588         }
5589 }
5590
5591 #ifdef CONFIG_NET_POLL_CONTROLLER
5592 /*
5593  * Polling 'interrupt' - used by things like netconsole to send skbs
5594  * without having to re-enable interrupts. It's not called while
5595  * the interrupt routine is executing.
5596  */
5597 static void igb_netpoll(struct net_device *netdev)
5598 {
5599         struct igb_adapter *adapter = netdev_priv(netdev);
5600         struct e1000_hw *hw = &adapter->hw;
5601         int i;
5602
5603         if (!adapter->msix_entries) {
5604                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5605                 igb_irq_disable(adapter);
5606                 napi_schedule(&q_vector->napi);
5607                 return;
5608         }
5609
5610         for (i = 0; i < adapter->num_q_vectors; i++) {
5611                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5612                 wr32(E1000_EIMC, q_vector->eims_value);
5613                 napi_schedule(&q_vector->napi);
5614         }
5615 }
5616 #endif /* CONFIG_NET_POLL_CONTROLLER */
5617
5618 /**
5619  * igb_io_error_detected - called when PCI error is detected
5620  * @pdev: Pointer to PCI device
5621  * @state: The current pci connection state
5622  *
5623  * This function is called after a PCI bus error affecting
5624  * this device has been detected.
5625  */
5626 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5627                                               pci_channel_state_t state)
5628 {
5629         struct net_device *netdev = pci_get_drvdata(pdev);
5630         struct igb_adapter *adapter = netdev_priv(netdev);
5631
5632         netif_device_detach(netdev);
5633
5634         if (state == pci_channel_io_perm_failure)
5635                 return PCI_ERS_RESULT_DISCONNECT;
5636
5637         if (netif_running(netdev))
5638                 igb_down(adapter);
5639         pci_disable_device(pdev);
5640
5641         /* Request a slot slot reset. */
5642         return PCI_ERS_RESULT_NEED_RESET;
5643 }
5644
5645 /**
5646  * igb_io_slot_reset - called after the pci bus has been reset.
5647  * @pdev: Pointer to PCI device
5648  *
5649  * Restart the card from scratch, as if from a cold-boot. Implementation
5650  * resembles the first-half of the igb_resume routine.
5651  */
5652 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5653 {
5654         struct net_device *netdev = pci_get_drvdata(pdev);
5655         struct igb_adapter *adapter = netdev_priv(netdev);
5656         struct e1000_hw *hw = &adapter->hw;
5657         pci_ers_result_t result;
5658         int err;
5659
5660         if (pci_enable_device_mem(pdev)) {
5661                 dev_err(&pdev->dev,
5662                         "Cannot re-enable PCI device after reset.\n");
5663                 result = PCI_ERS_RESULT_DISCONNECT;
5664         } else {
5665                 pci_set_master(pdev);
5666                 pci_restore_state(pdev);
5667
5668                 pci_enable_wake(pdev, PCI_D3hot, 0);
5669                 pci_enable_wake(pdev, PCI_D3cold, 0);
5670
5671                 igb_reset(adapter);
5672                 wr32(E1000_WUS, ~0);
5673                 result = PCI_ERS_RESULT_RECOVERED;
5674         }
5675
5676         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5677         if (err) {
5678                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5679                         "failed 0x%0x\n", err);
5680                 /* non-fatal, continue */
5681         }
5682
5683         return result;
5684 }
5685
5686 /**
5687  * igb_io_resume - called when traffic can start flowing again.
5688  * @pdev: Pointer to PCI device
5689  *
5690  * This callback is called when the error recovery driver tells us that
5691  * its OK to resume normal operation. Implementation resembles the
5692  * second-half of the igb_resume routine.
5693  */
5694 static void igb_io_resume(struct pci_dev *pdev)
5695 {
5696         struct net_device *netdev = pci_get_drvdata(pdev);
5697         struct igb_adapter *adapter = netdev_priv(netdev);
5698
5699         if (netif_running(netdev)) {
5700                 if (igb_up(adapter)) {
5701                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5702                         return;
5703                 }
5704         }
5705
5706         netif_device_attach(netdev);
5707
5708         /* let the f/w know that the h/w is now under the control of the
5709          * driver. */
5710         igb_get_hw_control(adapter);
5711 }
5712
5713 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5714                              u8 qsel)
5715 {
5716         u32 rar_low, rar_high;
5717         struct e1000_hw *hw = &adapter->hw;
5718
5719         /* HW expects these in little endian so we reverse the byte order
5720          * from network order (big endian) to little endian
5721          */
5722         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5723                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5724         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5725
5726         /* Indicate to hardware the Address is Valid. */
5727         rar_high |= E1000_RAH_AV;
5728
5729         if (hw->mac.type == e1000_82575)
5730                 rar_high |= E1000_RAH_POOL_1 * qsel;
5731         else
5732                 rar_high |= E1000_RAH_POOL_1 << qsel;
5733
5734         wr32(E1000_RAL(index), rar_low);
5735         wrfl();
5736         wr32(E1000_RAH(index), rar_high);
5737         wrfl();
5738 }
5739
5740 static int igb_set_vf_mac(struct igb_adapter *adapter,
5741                           int vf, unsigned char *mac_addr)
5742 {
5743         struct e1000_hw *hw = &adapter->hw;
5744         /* VF MAC addresses start at end of receive addresses and moves
5745          * torwards the first, as a result a collision should not be possible */
5746         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5747
5748         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5749
5750         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5751
5752         return 0;
5753 }
5754
5755 static void igb_vmm_control(struct igb_adapter *adapter)
5756 {
5757         struct e1000_hw *hw = &adapter->hw;
5758         u32 reg;
5759
5760         /* replication is not supported for 82575 */
5761         if (hw->mac.type == e1000_82575)
5762                 return;
5763
5764         /* enable replication vlan tag stripping */
5765         reg = rd32(E1000_RPLOLR);
5766         reg |= E1000_RPLOLR_STRVLAN;
5767         wr32(E1000_RPLOLR, reg);
5768
5769         /* notify HW that the MAC is adding vlan tags */
5770         reg = rd32(E1000_DTXCTL);
5771         reg |= E1000_DTXCTL_VLAN_ADDED;
5772         wr32(E1000_DTXCTL, reg);
5773
5774         if (adapter->vfs_allocated_count) {
5775                 igb_vmdq_set_loopback_pf(hw, true);
5776                 igb_vmdq_set_replication_pf(hw, true);
5777         } else {
5778                 igb_vmdq_set_loopback_pf(hw, false);
5779                 igb_vmdq_set_replication_pf(hw, false);
5780         }
5781 }
5782
5783 /* igb_main.c */