b044c985df0b5eff05b88e934c37c472b8dd19cc
[pandora-kernel.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132 #ifdef CONFIG_PM
133 static int igb_suspend(struct pci_dev *, pm_message_t);
134 static int igb_resume(struct pci_dev *);
135 #endif
136 static void igb_shutdown(struct pci_dev *);
137 #ifdef CONFIG_IGB_DCA
138 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139 static struct notifier_block dca_notifier = {
140         .notifier_call  = igb_notify_dca,
141         .next           = NULL,
142         .priority       = 0
143 };
144 #endif
145 #ifdef CONFIG_NET_POLL_CONTROLLER
146 /* for netdump / net console */
147 static void igb_netpoll(struct net_device *);
148 #endif
149 #ifdef CONFIG_PCI_IOV
150 static unsigned int max_vfs = 0;
151 module_param(max_vfs, uint, 0);
152 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153                  "per physical function");
154 #endif /* CONFIG_PCI_IOV */
155
156 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157                      pci_channel_state_t);
158 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159 static void igb_io_resume(struct pci_dev *);
160
161 static struct pci_error_handlers igb_err_handler = {
162         .error_detected = igb_io_error_detected,
163         .slot_reset = igb_io_slot_reset,
164         .resume = igb_io_resume,
165 };
166
167
168 static struct pci_driver igb_driver = {
169         .name     = igb_driver_name,
170         .id_table = igb_pci_tbl,
171         .probe    = igb_probe,
172         .remove   = __devexit_p(igb_remove),
173 #ifdef CONFIG_PM
174         /* Power Managment Hooks */
175         .suspend  = igb_suspend,
176         .resume   = igb_resume,
177 #endif
178         .shutdown = igb_shutdown,
179         .err_handler = &igb_err_handler
180 };
181
182 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184 MODULE_LICENSE("GPL");
185 MODULE_VERSION(DRV_VERSION);
186
187 /**
188  * igb_read_clock - read raw cycle counter (to be used by time counter)
189  */
190 static cycle_t igb_read_clock(const struct cyclecounter *tc)
191 {
192         struct igb_adapter *adapter =
193                 container_of(tc, struct igb_adapter, cycles);
194         struct e1000_hw *hw = &adapter->hw;
195         u64 stamp = 0;
196         int shift = 0;
197
198         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
200         return stamp;
201 }
202
203 #ifdef DEBUG
204 /**
205  * igb_get_hw_dev_name - return device name string
206  * used by hardware layer to print debugging information
207  **/
208 char *igb_get_hw_dev_name(struct e1000_hw *hw)
209 {
210         struct igb_adapter *adapter = hw->back;
211         return adapter->netdev->name;
212 }
213
214 /**
215  * igb_get_time_str - format current NIC and system time as string
216  */
217 static char *igb_get_time_str(struct igb_adapter *adapter,
218                               char buffer[160])
219 {
220         cycle_t hw = adapter->cycles.read(&adapter->cycles);
221         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
222         struct timespec sys;
223         struct timespec delta;
224         getnstimeofday(&sys);
225
226         delta = timespec_sub(nic, sys);
227
228         sprintf(buffer,
229                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
230                 hw,
231                 (long)nic.tv_sec, nic.tv_nsec,
232                 (long)sys.tv_sec, sys.tv_nsec,
233                 (long)delta.tv_sec, delta.tv_nsec);
234
235         return buffer;
236 }
237 #endif
238
239 /**
240  * igb_init_module - Driver Registration Routine
241  *
242  * igb_init_module is the first routine called when the driver is
243  * loaded. All it does is register with the PCI subsystem.
244  **/
245 static int __init igb_init_module(void)
246 {
247         int ret;
248         printk(KERN_INFO "%s - version %s\n",
249                igb_driver_string, igb_driver_version);
250
251         printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254         dca_register_notify(&dca_notifier);
255 #endif
256         ret = pci_register_driver(&igb_driver);
257         return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263  * igb_exit_module - Driver Exit Cleanup Routine
264  *
265  * igb_exit_module is called just before the driver is removed
266  * from memory.
267  **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271         dca_unregister_notify(&dca_notifier);
272 #endif
273         pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280  * igb_cache_ring_register - Descriptor ring to register mapping
281  * @adapter: board private structure to initialize
282  *
283  * Once we know the feature-set enabled for the device, we'll cache
284  * the register offset the descriptor ring is assigned to.
285  **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288         int i = 0, j = 0;
289         u32 rbase_offset = adapter->vfs_allocated_count;
290
291         switch (adapter->hw.mac.type) {
292         case e1000_82576:
293                 /* The queues are allocated for virtualization such that VF 0
294                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295                  * In order to avoid collision we start at the first free queue
296                  * and continue consuming queues in the same sequence
297                  */
298                 if (adapter->vfs_allocated_count) {
299                         for (; i < adapter->num_rx_queues; i++)
300                                 adapter->rx_ring[i].reg_idx = rbase_offset +
301                                                               Q_IDX_82576(i);
302                         for (; j < adapter->num_tx_queues; j++)
303                                 adapter->tx_ring[j].reg_idx = rbase_offset +
304                                                               Q_IDX_82576(j);
305                 }
306         case e1000_82575:
307         default:
308                 for (; i < adapter->num_rx_queues; i++)
309                         adapter->rx_ring[i].reg_idx = rbase_offset + i;
310                 for (; j < adapter->num_tx_queues; j++)
311                         adapter->tx_ring[j].reg_idx = rbase_offset + j;
312                 break;
313         }
314 }
315
316 static void igb_free_queues(struct igb_adapter *adapter)
317 {
318         kfree(adapter->tx_ring);
319         kfree(adapter->rx_ring);
320
321         adapter->tx_ring = NULL;
322         adapter->rx_ring = NULL;
323
324         adapter->num_rx_queues = 0;
325         adapter->num_tx_queues = 0;
326 }
327
328 /**
329  * igb_alloc_queues - Allocate memory for all rings
330  * @adapter: board private structure to initialize
331  *
332  * We allocate one ring per queue at run-time since we don't know the
333  * number of queues at compile-time.
334  **/
335 static int igb_alloc_queues(struct igb_adapter *adapter)
336 {
337         int i;
338
339         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340                                    sizeof(struct igb_ring), GFP_KERNEL);
341         if (!adapter->tx_ring)
342                 goto err;
343
344         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345                                    sizeof(struct igb_ring), GFP_KERNEL);
346         if (!adapter->rx_ring)
347                 goto err;
348
349         for (i = 0; i < adapter->num_tx_queues; i++) {
350                 struct igb_ring *ring = &(adapter->tx_ring[i]);
351                 ring->count = adapter->tx_ring_count;
352                 ring->queue_index = i;
353                 ring->pdev = adapter->pdev;
354                 ring->netdev = adapter->netdev;
355                 /* For 82575, context index must be unique per ring. */
356                 if (adapter->hw.mac.type == e1000_82575)
357                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358         }
359
360         for (i = 0; i < adapter->num_rx_queues; i++) {
361                 struct igb_ring *ring = &(adapter->rx_ring[i]);
362                 ring->count = adapter->rx_ring_count;
363                 ring->queue_index = i;
364                 ring->pdev = adapter->pdev;
365                 ring->netdev = adapter->netdev;
366                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368                 /* set flag indicating ring supports SCTP checksum offload */
369                 if (adapter->hw.mac.type >= e1000_82576)
370                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
371         }
372
373         igb_cache_ring_register(adapter);
374
375         return 0;
376
377 err:
378         igb_free_queues(adapter);
379
380         return -ENOMEM;
381 }
382
383 #define IGB_N0_QUEUE -1
384 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
385 {
386         u32 msixbm = 0;
387         struct igb_adapter *adapter = q_vector->adapter;
388         struct e1000_hw *hw = &adapter->hw;
389         u32 ivar, index;
390         int rx_queue = IGB_N0_QUEUE;
391         int tx_queue = IGB_N0_QUEUE;
392
393         if (q_vector->rx_ring)
394                 rx_queue = q_vector->rx_ring->reg_idx;
395         if (q_vector->tx_ring)
396                 tx_queue = q_vector->tx_ring->reg_idx;
397
398         switch (hw->mac.type) {
399         case e1000_82575:
400                 /* The 82575 assigns vectors using a bitmask, which matches the
401                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
402                    or more queues to a vector, we write the appropriate bits
403                    into the MSIXBM register for that vector. */
404                 if (rx_queue > IGB_N0_QUEUE)
405                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406                 if (tx_queue > IGB_N0_QUEUE)
407                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409                 q_vector->eims_value = msixbm;
410                 break;
411         case e1000_82576:
412                 /* 82576 uses a table-based method for assigning vectors.
413                    Each queue has a single entry in the table to which we write
414                    a vector number along with a "valid" bit.  Sadly, the layout
415                    of the table is somewhat counterintuitive. */
416                 if (rx_queue > IGB_N0_QUEUE) {
417                         index = (rx_queue & 0x7);
418                         ivar = array_rd32(E1000_IVAR0, index);
419                         if (rx_queue < 8) {
420                                 /* vector goes into low byte of register */
421                                 ivar = ivar & 0xFFFFFF00;
422                                 ivar |= msix_vector | E1000_IVAR_VALID;
423                         } else {
424                                 /* vector goes into third byte of register */
425                                 ivar = ivar & 0xFF00FFFF;
426                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
427                         }
428                         array_wr32(E1000_IVAR0, index, ivar);
429                 }
430                 if (tx_queue > IGB_N0_QUEUE) {
431                         index = (tx_queue & 0x7);
432                         ivar = array_rd32(E1000_IVAR0, index);
433                         if (tx_queue < 8) {
434                                 /* vector goes into second byte of register */
435                                 ivar = ivar & 0xFFFF00FF;
436                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
437                         } else {
438                                 /* vector goes into high byte of register */
439                                 ivar = ivar & 0x00FFFFFF;
440                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
441                         }
442                         array_wr32(E1000_IVAR0, index, ivar);
443                 }
444                 q_vector->eims_value = 1 << msix_vector;
445                 break;
446         default:
447                 BUG();
448                 break;
449         }
450 }
451
452 /**
453  * igb_configure_msix - Configure MSI-X hardware
454  *
455  * igb_configure_msix sets up the hardware to properly
456  * generate MSI-X interrupts.
457  **/
458 static void igb_configure_msix(struct igb_adapter *adapter)
459 {
460         u32 tmp;
461         int i, vector = 0;
462         struct e1000_hw *hw = &adapter->hw;
463
464         adapter->eims_enable_mask = 0;
465
466         /* set vector for other causes, i.e. link changes */
467         switch (hw->mac.type) {
468         case e1000_82575:
469                 tmp = rd32(E1000_CTRL_EXT);
470                 /* enable MSI-X PBA support*/
471                 tmp |= E1000_CTRL_EXT_PBA_CLR;
472
473                 /* Auto-Mask interrupts upon ICR read. */
474                 tmp |= E1000_CTRL_EXT_EIAME;
475                 tmp |= E1000_CTRL_EXT_IRCA;
476
477                 wr32(E1000_CTRL_EXT, tmp);
478
479                 /* enable msix_other interrupt */
480                 array_wr32(E1000_MSIXBM(0), vector++,
481                                       E1000_EIMS_OTHER);
482                 adapter->eims_other = E1000_EIMS_OTHER;
483
484                 break;
485
486         case e1000_82576:
487                 /* Turn on MSI-X capability first, or our settings
488                  * won't stick.  And it will take days to debug. */
489                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
491                                 E1000_GPIE_NSICR);
492
493                 /* enable msix_other interrupt */
494                 adapter->eims_other = 1 << vector;
495                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
496
497                 wr32(E1000_IVAR_MISC, tmp);
498                 break;
499         default:
500                 /* do nothing, since nothing else supports MSI-X */
501                 break;
502         } /* switch (hw->mac.type) */
503
504         adapter->eims_enable_mask |= adapter->eims_other;
505
506         for (i = 0; i < adapter->num_q_vectors; i++) {
507                 struct igb_q_vector *q_vector = adapter->q_vector[i];
508                 igb_assign_vector(q_vector, vector++);
509                 adapter->eims_enable_mask |= q_vector->eims_value;
510         }
511
512         wrfl();
513 }
514
515 /**
516  * igb_request_msix - Initialize MSI-X interrupts
517  *
518  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
519  * kernel.
520  **/
521 static int igb_request_msix(struct igb_adapter *adapter)
522 {
523         struct net_device *netdev = adapter->netdev;
524         struct e1000_hw *hw = &adapter->hw;
525         int i, err = 0, vector = 0;
526
527         err = request_irq(adapter->msix_entries[vector].vector,
528                           &igb_msix_other, 0, netdev->name, adapter);
529         if (err)
530                 goto out;
531         vector++;
532
533         for (i = 0; i < adapter->num_q_vectors; i++) {
534                 struct igb_q_vector *q_vector = adapter->q_vector[i];
535
536                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
537
538                 if (q_vector->rx_ring && q_vector->tx_ring)
539                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540                                 q_vector->rx_ring->queue_index);
541                 else if (q_vector->tx_ring)
542                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543                                 q_vector->tx_ring->queue_index);
544                 else if (q_vector->rx_ring)
545                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546                                 q_vector->rx_ring->queue_index);
547                 else
548                         sprintf(q_vector->name, "%s-unused", netdev->name);
549
550                 err = request_irq(adapter->msix_entries[vector].vector,
551                                   &igb_msix_ring, 0, q_vector->name,
552                                   q_vector);
553                 if (err)
554                         goto out;
555                 vector++;
556         }
557
558         igb_configure_msix(adapter);
559         return 0;
560 out:
561         return err;
562 }
563
564 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
565 {
566         if (adapter->msix_entries) {
567                 pci_disable_msix(adapter->pdev);
568                 kfree(adapter->msix_entries);
569                 adapter->msix_entries = NULL;
570         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571                 pci_disable_msi(adapter->pdev);
572         }
573 }
574
575 /**
576  * igb_free_q_vectors - Free memory allocated for interrupt vectors
577  * @adapter: board private structure to initialize
578  *
579  * This function frees the memory allocated to the q_vectors.  In addition if
580  * NAPI is enabled it will delete any references to the NAPI struct prior
581  * to freeing the q_vector.
582  **/
583 static void igb_free_q_vectors(struct igb_adapter *adapter)
584 {
585         int v_idx;
586
587         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589                 adapter->q_vector[v_idx] = NULL;
590                 netif_napi_del(&q_vector->napi);
591                 kfree(q_vector);
592         }
593         adapter->num_q_vectors = 0;
594 }
595
596 /**
597  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
598  *
599  * This function resets the device so that it has 0 rx queues, tx queues, and
600  * MSI-X interrupts allocated.
601  */
602 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
603 {
604         igb_free_queues(adapter);
605         igb_free_q_vectors(adapter);
606         igb_reset_interrupt_capability(adapter);
607 }
608
609 /**
610  * igb_set_interrupt_capability - set MSI or MSI-X if supported
611  *
612  * Attempt to configure interrupts using the best available
613  * capabilities of the hardware and kernel.
614  **/
615 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
616 {
617         int err;
618         int numvecs, i;
619
620         /* Number of supported queues. */
621         adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
622         adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
623
624         /* start with one vector for every rx queue */
625         numvecs = adapter->num_rx_queues;
626
627         /* if tx handler is seperate add 1 for every tx queue */
628         numvecs += adapter->num_tx_queues;
629
630         /* store the number of vectors reserved for queues */
631         adapter->num_q_vectors = numvecs;
632
633         /* add 1 vector for link status interrupts */
634         numvecs++;
635         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
636                                         GFP_KERNEL);
637         if (!adapter->msix_entries)
638                 goto msi_only;
639
640         for (i = 0; i < numvecs; i++)
641                 adapter->msix_entries[i].entry = i;
642
643         err = pci_enable_msix(adapter->pdev,
644                               adapter->msix_entries,
645                               numvecs);
646         if (err == 0)
647                 goto out;
648
649         igb_reset_interrupt_capability(adapter);
650
651         /* If we can't do MSI-X, try MSI */
652 msi_only:
653 #ifdef CONFIG_PCI_IOV
654         /* disable SR-IOV for non MSI-X configurations */
655         if (adapter->vf_data) {
656                 struct e1000_hw *hw = &adapter->hw;
657                 /* disable iov and allow time for transactions to clear */
658                 pci_disable_sriov(adapter->pdev);
659                 msleep(500);
660
661                 kfree(adapter->vf_data);
662                 adapter->vf_data = NULL;
663                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
664                 msleep(100);
665                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
666         }
667 #endif
668         adapter->vfs_allocated_count = 0;
669         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
670         adapter->num_rx_queues = 1;
671         adapter->num_tx_queues = 1;
672         adapter->num_q_vectors = 1;
673         if (!pci_enable_msi(adapter->pdev))
674                 adapter->flags |= IGB_FLAG_HAS_MSI;
675 out:
676         /* Notify the stack of the (possibly) reduced Tx Queue count. */
677         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
678         return;
679 }
680
681 /**
682  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
683  * @adapter: board private structure to initialize
684  *
685  * We allocate one q_vector per queue interrupt.  If allocation fails we
686  * return -ENOMEM.
687  **/
688 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
689 {
690         struct igb_q_vector *q_vector;
691         struct e1000_hw *hw = &adapter->hw;
692         int v_idx;
693
694         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
695                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
696                 if (!q_vector)
697                         goto err_out;
698                 q_vector->adapter = adapter;
699                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
700                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
701                 q_vector->itr_val = IGB_START_ITR;
702                 q_vector->set_itr = 1;
703                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
704                 adapter->q_vector[v_idx] = q_vector;
705         }
706         return 0;
707
708 err_out:
709         while (v_idx) {
710                 v_idx--;
711                 q_vector = adapter->q_vector[v_idx];
712                 netif_napi_del(&q_vector->napi);
713                 kfree(q_vector);
714                 adapter->q_vector[v_idx] = NULL;
715         }
716         return -ENOMEM;
717 }
718
719 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
720                                       int ring_idx, int v_idx)
721 {
722         struct igb_q_vector *q_vector;
723
724         q_vector = adapter->q_vector[v_idx];
725         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
726         q_vector->rx_ring->q_vector = q_vector;
727         q_vector->itr_val = adapter->rx_itr_setting;
728         if (q_vector->itr_val && q_vector->itr_val <= 3)
729                 q_vector->itr_val = IGB_START_ITR;
730 }
731
732 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
733                                       int ring_idx, int v_idx)
734 {
735         struct igb_q_vector *q_vector;
736
737         q_vector = adapter->q_vector[v_idx];
738         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
739         q_vector->tx_ring->q_vector = q_vector;
740         q_vector->itr_val = adapter->tx_itr_setting;
741         if (q_vector->itr_val && q_vector->itr_val <= 3)
742                 q_vector->itr_val = IGB_START_ITR;
743 }
744
745 /**
746  * igb_map_ring_to_vector - maps allocated queues to vectors
747  *
748  * This function maps the recently allocated queues to vectors.
749  **/
750 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
751 {
752         int i;
753         int v_idx = 0;
754
755         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
756             (adapter->num_q_vectors < adapter->num_tx_queues))
757                 return -ENOMEM;
758
759         if (adapter->num_q_vectors >=
760             (adapter->num_rx_queues + adapter->num_tx_queues)) {
761                 for (i = 0; i < adapter->num_rx_queues; i++)
762                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
763                 for (i = 0; i < adapter->num_tx_queues; i++)
764                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
765         } else {
766                 for (i = 0; i < adapter->num_rx_queues; i++) {
767                         if (i < adapter->num_tx_queues)
768                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
769                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
770                 }
771                 for (; i < adapter->num_tx_queues; i++)
772                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
773         }
774         return 0;
775 }
776
777 /**
778  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
779  *
780  * This function initializes the interrupts and allocates all of the queues.
781  **/
782 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
783 {
784         struct pci_dev *pdev = adapter->pdev;
785         int err;
786
787         igb_set_interrupt_capability(adapter);
788
789         err = igb_alloc_q_vectors(adapter);
790         if (err) {
791                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
792                 goto err_alloc_q_vectors;
793         }
794
795         err = igb_alloc_queues(adapter);
796         if (err) {
797                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
798                 goto err_alloc_queues;
799         }
800
801         err = igb_map_ring_to_vector(adapter);
802         if (err) {
803                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
804                 goto err_map_queues;
805         }
806
807
808         return 0;
809 err_map_queues:
810         igb_free_queues(adapter);
811 err_alloc_queues:
812         igb_free_q_vectors(adapter);
813 err_alloc_q_vectors:
814         igb_reset_interrupt_capability(adapter);
815         return err;
816 }
817
818 /**
819  * igb_request_irq - initialize interrupts
820  *
821  * Attempts to configure interrupts using the best available
822  * capabilities of the hardware and kernel.
823  **/
824 static int igb_request_irq(struct igb_adapter *adapter)
825 {
826         struct net_device *netdev = adapter->netdev;
827         struct pci_dev *pdev = adapter->pdev;
828         struct e1000_hw *hw = &adapter->hw;
829         int err = 0;
830
831         if (adapter->msix_entries) {
832                 err = igb_request_msix(adapter);
833                 if (!err)
834                         goto request_done;
835                 /* fall back to MSI */
836                 igb_clear_interrupt_scheme(adapter);
837                 if (!pci_enable_msi(adapter->pdev))
838                         adapter->flags |= IGB_FLAG_HAS_MSI;
839                 igb_free_all_tx_resources(adapter);
840                 igb_free_all_rx_resources(adapter);
841                 adapter->num_tx_queues = 1;
842                 adapter->num_rx_queues = 1;
843                 adapter->num_q_vectors = 1;
844                 err = igb_alloc_q_vectors(adapter);
845                 if (err) {
846                         dev_err(&pdev->dev,
847                                 "Unable to allocate memory for vectors\n");
848                         goto request_done;
849                 }
850                 err = igb_alloc_queues(adapter);
851                 if (err) {
852                         dev_err(&pdev->dev,
853                                 "Unable to allocate memory for queues\n");
854                         igb_free_q_vectors(adapter);
855                         goto request_done;
856                 }
857                 igb_setup_all_tx_resources(adapter);
858                 igb_setup_all_rx_resources(adapter);
859         } else {
860                 switch (hw->mac.type) {
861                 case e1000_82575:
862                         wr32(E1000_MSIXBM(0),
863                              (E1000_EICR_RX_QUEUE0 |
864                               E1000_EICR_TX_QUEUE0 |
865                               E1000_EIMS_OTHER));
866                         break;
867                 case e1000_82576:
868                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
869                         break;
870                 default:
871                         break;
872                 }
873         }
874
875         if (adapter->flags & IGB_FLAG_HAS_MSI) {
876                 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
877                                   netdev->name, adapter);
878                 if (!err)
879                         goto request_done;
880
881                 /* fall back to legacy interrupts */
882                 igb_reset_interrupt_capability(adapter);
883                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
884         }
885
886         err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
887                           netdev->name, adapter);
888
889         if (err)
890                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
891                         err);
892
893 request_done:
894         return err;
895 }
896
897 static void igb_free_irq(struct igb_adapter *adapter)
898 {
899         if (adapter->msix_entries) {
900                 int vector = 0, i;
901
902                 free_irq(adapter->msix_entries[vector++].vector, adapter);
903
904                 for (i = 0; i < adapter->num_q_vectors; i++) {
905                         struct igb_q_vector *q_vector = adapter->q_vector[i];
906                         free_irq(adapter->msix_entries[vector++].vector,
907                                  q_vector);
908                 }
909         } else {
910                 free_irq(adapter->pdev->irq, adapter);
911         }
912 }
913
914 /**
915  * igb_irq_disable - Mask off interrupt generation on the NIC
916  * @adapter: board private structure
917  **/
918 static void igb_irq_disable(struct igb_adapter *adapter)
919 {
920         struct e1000_hw *hw = &adapter->hw;
921
922         /*
923          * we need to be careful when disabling interrupts.  The VFs are also
924          * mapped into these registers and so clearing the bits can cause
925          * issues on the VF drivers so we only need to clear what we set
926          */
927         if (adapter->msix_entries) {
928                 u32 regval = rd32(E1000_EIAM);
929                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
930                 wr32(E1000_EIMC, adapter->eims_enable_mask);
931                 regval = rd32(E1000_EIAC);
932                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
933         }
934
935         wr32(E1000_IAM, 0);
936         wr32(E1000_IMC, ~0);
937         wrfl();
938         synchronize_irq(adapter->pdev->irq);
939 }
940
941 /**
942  * igb_irq_enable - Enable default interrupt generation settings
943  * @adapter: board private structure
944  **/
945 static void igb_irq_enable(struct igb_adapter *adapter)
946 {
947         struct e1000_hw *hw = &adapter->hw;
948
949         if (adapter->msix_entries) {
950                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
951                 u32 regval = rd32(E1000_EIAC);
952                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
953                 regval = rd32(E1000_EIAM);
954                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
955                 wr32(E1000_EIMS, adapter->eims_enable_mask);
956                 if (adapter->vfs_allocated_count) {
957                         wr32(E1000_MBVFIMR, 0xFF);
958                         ims |= E1000_IMS_VMMB;
959                 }
960                 wr32(E1000_IMS, ims);
961         } else {
962                 wr32(E1000_IMS, IMS_ENABLE_MASK);
963                 wr32(E1000_IAM, IMS_ENABLE_MASK);
964         }
965 }
966
967 static void igb_update_mng_vlan(struct igb_adapter *adapter)
968 {
969         struct e1000_hw *hw = &adapter->hw;
970         u16 vid = adapter->hw.mng_cookie.vlan_id;
971         u16 old_vid = adapter->mng_vlan_id;
972
973         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
974                 /* add VID to filter table */
975                 igb_vfta_set(hw, vid, true);
976                 adapter->mng_vlan_id = vid;
977         } else {
978                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
979         }
980
981         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
982             (vid != old_vid) &&
983             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
984                 /* remove VID from filter table */
985                 igb_vfta_set(hw, old_vid, false);
986         }
987 }
988
989 /**
990  * igb_release_hw_control - release control of the h/w to f/w
991  * @adapter: address of board private structure
992  *
993  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
994  * For ASF and Pass Through versions of f/w this means that the
995  * driver is no longer loaded.
996  *
997  **/
998 static void igb_release_hw_control(struct igb_adapter *adapter)
999 {
1000         struct e1000_hw *hw = &adapter->hw;
1001         u32 ctrl_ext;
1002
1003         /* Let firmware take over control of h/w */
1004         ctrl_ext = rd32(E1000_CTRL_EXT);
1005         wr32(E1000_CTRL_EXT,
1006                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1007 }
1008
1009 /**
1010  * igb_get_hw_control - get control of the h/w from f/w
1011  * @adapter: address of board private structure
1012  *
1013  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1014  * For ASF and Pass Through versions of f/w this means that
1015  * the driver is loaded.
1016  *
1017  **/
1018 static void igb_get_hw_control(struct igb_adapter *adapter)
1019 {
1020         struct e1000_hw *hw = &adapter->hw;
1021         u32 ctrl_ext;
1022
1023         /* Let firmware know the driver has taken over */
1024         ctrl_ext = rd32(E1000_CTRL_EXT);
1025         wr32(E1000_CTRL_EXT,
1026                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1027 }
1028
1029 /**
1030  * igb_configure - configure the hardware for RX and TX
1031  * @adapter: private board structure
1032  **/
1033 static void igb_configure(struct igb_adapter *adapter)
1034 {
1035         struct net_device *netdev = adapter->netdev;
1036         int i;
1037
1038         igb_get_hw_control(adapter);
1039         igb_set_rx_mode(netdev);
1040
1041         igb_restore_vlan(adapter);
1042
1043         igb_setup_tctl(adapter);
1044         igb_setup_mrqc(adapter);
1045         igb_setup_rctl(adapter);
1046
1047         igb_configure_tx(adapter);
1048         igb_configure_rx(adapter);
1049
1050         igb_rx_fifo_flush_82575(&adapter->hw);
1051
1052         /* call igb_desc_unused which always leaves
1053          * at least 1 descriptor unused to make sure
1054          * next_to_use != next_to_clean */
1055         for (i = 0; i < adapter->num_rx_queues; i++) {
1056                 struct igb_ring *ring = &adapter->rx_ring[i];
1057                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1058         }
1059
1060
1061         adapter->tx_queue_len = netdev->tx_queue_len;
1062 }
1063
1064
1065 /**
1066  * igb_up - Open the interface and prepare it to handle traffic
1067  * @adapter: board private structure
1068  **/
1069 int igb_up(struct igb_adapter *adapter)
1070 {
1071         struct e1000_hw *hw = &adapter->hw;
1072         int i;
1073
1074         /* hardware has been reset, we need to reload some things */
1075         igb_configure(adapter);
1076
1077         clear_bit(__IGB_DOWN, &adapter->state);
1078
1079         for (i = 0; i < adapter->num_q_vectors; i++) {
1080                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1081                 napi_enable(&q_vector->napi);
1082         }
1083         if (adapter->msix_entries)
1084                 igb_configure_msix(adapter);
1085
1086         /* Clear any pending interrupts. */
1087         rd32(E1000_ICR);
1088         igb_irq_enable(adapter);
1089
1090         /* notify VFs that reset has been completed */
1091         if (adapter->vfs_allocated_count) {
1092                 u32 reg_data = rd32(E1000_CTRL_EXT);
1093                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1094                 wr32(E1000_CTRL_EXT, reg_data);
1095         }
1096
1097         netif_tx_start_all_queues(adapter->netdev);
1098
1099         /* start the watchdog. */
1100         hw->mac.get_link_status = 1;
1101         schedule_work(&adapter->watchdog_task);
1102
1103         return 0;
1104 }
1105
1106 void igb_down(struct igb_adapter *adapter)
1107 {
1108         struct net_device *netdev = adapter->netdev;
1109         struct e1000_hw *hw = &adapter->hw;
1110         u32 tctl, rctl;
1111         int i;
1112
1113         /* signal that we're down so the interrupt handler does not
1114          * reschedule our watchdog timer */
1115         set_bit(__IGB_DOWN, &adapter->state);
1116
1117         /* disable receives in the hardware */
1118         rctl = rd32(E1000_RCTL);
1119         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1120         /* flush and sleep below */
1121
1122         netif_tx_stop_all_queues(netdev);
1123
1124         /* disable transmits in the hardware */
1125         tctl = rd32(E1000_TCTL);
1126         tctl &= ~E1000_TCTL_EN;
1127         wr32(E1000_TCTL, tctl);
1128         /* flush both disables and wait for them to finish */
1129         wrfl();
1130         msleep(10);
1131
1132         for (i = 0; i < adapter->num_q_vectors; i++) {
1133                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1134                 napi_disable(&q_vector->napi);
1135         }
1136
1137         igb_irq_disable(adapter);
1138
1139         del_timer_sync(&adapter->watchdog_timer);
1140         del_timer_sync(&adapter->phy_info_timer);
1141
1142         netdev->tx_queue_len = adapter->tx_queue_len;
1143         netif_carrier_off(netdev);
1144
1145         /* record the stats before reset*/
1146         igb_update_stats(adapter);
1147
1148         adapter->link_speed = 0;
1149         adapter->link_duplex = 0;
1150
1151         if (!pci_channel_offline(adapter->pdev))
1152                 igb_reset(adapter);
1153         igb_clean_all_tx_rings(adapter);
1154         igb_clean_all_rx_rings(adapter);
1155 #ifdef CONFIG_IGB_DCA
1156
1157         /* since we reset the hardware DCA settings were cleared */
1158         igb_setup_dca(adapter);
1159 #endif
1160 }
1161
1162 void igb_reinit_locked(struct igb_adapter *adapter)
1163 {
1164         WARN_ON(in_interrupt());
1165         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1166                 msleep(1);
1167         igb_down(adapter);
1168         igb_up(adapter);
1169         clear_bit(__IGB_RESETTING, &adapter->state);
1170 }
1171
1172 void igb_reset(struct igb_adapter *adapter)
1173 {
1174         struct pci_dev *pdev = adapter->pdev;
1175         struct e1000_hw *hw = &adapter->hw;
1176         struct e1000_mac_info *mac = &hw->mac;
1177         struct e1000_fc_info *fc = &hw->fc;
1178         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1179         u16 hwm;
1180
1181         /* Repartition Pba for greater than 9k mtu
1182          * To take effect CTRL.RST is required.
1183          */
1184         switch (mac->type) {
1185         case e1000_82576:
1186                 pba = rd32(E1000_RXPBS);
1187                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1188                 break;
1189         case e1000_82575:
1190         default:
1191                 pba = E1000_PBA_34K;
1192                 break;
1193         }
1194
1195         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1196             (mac->type < e1000_82576)) {
1197                 /* adjust PBA for jumbo frames */
1198                 wr32(E1000_PBA, pba);
1199
1200                 /* To maintain wire speed transmits, the Tx FIFO should be
1201                  * large enough to accommodate two full transmit packets,
1202                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1203                  * the Rx FIFO should be large enough to accommodate at least
1204                  * one full receive packet and is similarly rounded up and
1205                  * expressed in KB. */
1206                 pba = rd32(E1000_PBA);
1207                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1208                 tx_space = pba >> 16;
1209                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1210                 pba &= 0xffff;
1211                 /* the tx fifo also stores 16 bytes of information about the tx
1212                  * but don't include ethernet FCS because hardware appends it */
1213                 min_tx_space = (adapter->max_frame_size +
1214                                 sizeof(union e1000_adv_tx_desc) -
1215                                 ETH_FCS_LEN) * 2;
1216                 min_tx_space = ALIGN(min_tx_space, 1024);
1217                 min_tx_space >>= 10;
1218                 /* software strips receive CRC, so leave room for it */
1219                 min_rx_space = adapter->max_frame_size;
1220                 min_rx_space = ALIGN(min_rx_space, 1024);
1221                 min_rx_space >>= 10;
1222
1223                 /* If current Tx allocation is less than the min Tx FIFO size,
1224                  * and the min Tx FIFO size is less than the current Rx FIFO
1225                  * allocation, take space away from current Rx allocation */
1226                 if (tx_space < min_tx_space &&
1227                     ((min_tx_space - tx_space) < pba)) {
1228                         pba = pba - (min_tx_space - tx_space);
1229
1230                         /* if short on rx space, rx wins and must trump tx
1231                          * adjustment */
1232                         if (pba < min_rx_space)
1233                                 pba = min_rx_space;
1234                 }
1235                 wr32(E1000_PBA, pba);
1236         }
1237
1238         /* flow control settings */
1239         /* The high water mark must be low enough to fit one full frame
1240          * (or the size used for early receive) above it in the Rx FIFO.
1241          * Set it to the lower of:
1242          * - 90% of the Rx FIFO size, or
1243          * - the full Rx FIFO size minus one full frame */
1244         hwm = min(((pba << 10) * 9 / 10),
1245                         ((pba << 10) - 2 * adapter->max_frame_size));
1246
1247         if (mac->type < e1000_82576) {
1248                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1249                 fc->low_water = fc->high_water - 8;
1250         } else {
1251                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1252                 fc->low_water = fc->high_water - 16;
1253         }
1254         fc->pause_time = 0xFFFF;
1255         fc->send_xon = 1;
1256         fc->current_mode = fc->requested_mode;
1257
1258         /* disable receive for all VFs and wait one second */
1259         if (adapter->vfs_allocated_count) {
1260                 int i;
1261                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1262                         adapter->vf_data[i].flags = 0;
1263
1264                 /* ping all the active vfs to let them know we are going down */
1265                 igb_ping_all_vfs(adapter);
1266
1267                 /* disable transmits and receives */
1268                 wr32(E1000_VFRE, 0);
1269                 wr32(E1000_VFTE, 0);
1270         }
1271
1272         /* Allow time for pending master requests to run */
1273         hw->mac.ops.reset_hw(hw);
1274         wr32(E1000_WUC, 0);
1275
1276         if (hw->mac.ops.init_hw(hw))
1277                 dev_err(&pdev->dev, "Hardware Error\n");
1278
1279         igb_update_mng_vlan(adapter);
1280
1281         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1282         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1283
1284         igb_reset_adaptive(hw);
1285         igb_get_phy_info(hw);
1286 }
1287
1288 static const struct net_device_ops igb_netdev_ops = {
1289         .ndo_open               = igb_open,
1290         .ndo_stop               = igb_close,
1291         .ndo_start_xmit         = igb_xmit_frame_adv,
1292         .ndo_get_stats          = igb_get_stats,
1293         .ndo_set_rx_mode        = igb_set_rx_mode,
1294         .ndo_set_multicast_list = igb_set_rx_mode,
1295         .ndo_set_mac_address    = igb_set_mac,
1296         .ndo_change_mtu         = igb_change_mtu,
1297         .ndo_do_ioctl           = igb_ioctl,
1298         .ndo_tx_timeout         = igb_tx_timeout,
1299         .ndo_validate_addr      = eth_validate_addr,
1300         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1301         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1302         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1303 #ifdef CONFIG_NET_POLL_CONTROLLER
1304         .ndo_poll_controller    = igb_netpoll,
1305 #endif
1306 };
1307
1308 /**
1309  * igb_probe - Device Initialization Routine
1310  * @pdev: PCI device information struct
1311  * @ent: entry in igb_pci_tbl
1312  *
1313  * Returns 0 on success, negative on failure
1314  *
1315  * igb_probe initializes an adapter identified by a pci_dev structure.
1316  * The OS initialization, configuring of the adapter private structure,
1317  * and a hardware reset occur.
1318  **/
1319 static int __devinit igb_probe(struct pci_dev *pdev,
1320                                const struct pci_device_id *ent)
1321 {
1322         struct net_device *netdev;
1323         struct igb_adapter *adapter;
1324         struct e1000_hw *hw;
1325         u16 eeprom_data = 0;
1326         static int global_quad_port_a; /* global quad port a indication */
1327         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1328         unsigned long mmio_start, mmio_len;
1329         int err, pci_using_dac;
1330         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1331         u32 part_num;
1332
1333         err = pci_enable_device_mem(pdev);
1334         if (err)
1335                 return err;
1336
1337         pci_using_dac = 0;
1338         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1339         if (!err) {
1340                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1341                 if (!err)
1342                         pci_using_dac = 1;
1343         } else {
1344                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1345                 if (err) {
1346                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1347                         if (err) {
1348                                 dev_err(&pdev->dev, "No usable DMA "
1349                                         "configuration, aborting\n");
1350                                 goto err_dma;
1351                         }
1352                 }
1353         }
1354
1355         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1356                                            IORESOURCE_MEM),
1357                                            igb_driver_name);
1358         if (err)
1359                 goto err_pci_reg;
1360
1361         pci_enable_pcie_error_reporting(pdev);
1362
1363         pci_set_master(pdev);
1364         pci_save_state(pdev);
1365
1366         err = -ENOMEM;
1367         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1368                                    IGB_ABS_MAX_TX_QUEUES);
1369         if (!netdev)
1370                 goto err_alloc_etherdev;
1371
1372         SET_NETDEV_DEV(netdev, &pdev->dev);
1373
1374         pci_set_drvdata(pdev, netdev);
1375         adapter = netdev_priv(netdev);
1376         adapter->netdev = netdev;
1377         adapter->pdev = pdev;
1378         hw = &adapter->hw;
1379         hw->back = adapter;
1380         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1381
1382         mmio_start = pci_resource_start(pdev, 0);
1383         mmio_len = pci_resource_len(pdev, 0);
1384
1385         err = -EIO;
1386         hw->hw_addr = ioremap(mmio_start, mmio_len);
1387         if (!hw->hw_addr)
1388                 goto err_ioremap;
1389
1390         netdev->netdev_ops = &igb_netdev_ops;
1391         igb_set_ethtool_ops(netdev);
1392         netdev->watchdog_timeo = 5 * HZ;
1393
1394         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1395
1396         netdev->mem_start = mmio_start;
1397         netdev->mem_end = mmio_start + mmio_len;
1398
1399         /* PCI config space info */
1400         hw->vendor_id = pdev->vendor;
1401         hw->device_id = pdev->device;
1402         hw->revision_id = pdev->revision;
1403         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1404         hw->subsystem_device_id = pdev->subsystem_device;
1405
1406         /* Copy the default MAC, PHY and NVM function pointers */
1407         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1408         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1409         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1410         /* Initialize skew-specific constants */
1411         err = ei->get_invariants(hw);
1412         if (err)
1413                 goto err_sw_init;
1414
1415         /* setup the private structure */
1416         err = igb_sw_init(adapter);
1417         if (err)
1418                 goto err_sw_init;
1419
1420         igb_get_bus_info_pcie(hw);
1421
1422         hw->phy.autoneg_wait_to_complete = false;
1423         hw->mac.adaptive_ifs = true;
1424
1425         /* Copper options */
1426         if (hw->phy.media_type == e1000_media_type_copper) {
1427                 hw->phy.mdix = AUTO_ALL_MODES;
1428                 hw->phy.disable_polarity_correction = false;
1429                 hw->phy.ms_type = e1000_ms_hw_default;
1430         }
1431
1432         if (igb_check_reset_block(hw))
1433                 dev_info(&pdev->dev,
1434                         "PHY reset is blocked due to SOL/IDER session.\n");
1435
1436         netdev->features = NETIF_F_SG |
1437                            NETIF_F_IP_CSUM |
1438                            NETIF_F_HW_VLAN_TX |
1439                            NETIF_F_HW_VLAN_RX |
1440                            NETIF_F_HW_VLAN_FILTER;
1441
1442         netdev->features |= NETIF_F_IPV6_CSUM;
1443         netdev->features |= NETIF_F_TSO;
1444         netdev->features |= NETIF_F_TSO6;
1445         netdev->features |= NETIF_F_GRO;
1446
1447         netdev->vlan_features |= NETIF_F_TSO;
1448         netdev->vlan_features |= NETIF_F_TSO6;
1449         netdev->vlan_features |= NETIF_F_IP_CSUM;
1450         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1451         netdev->vlan_features |= NETIF_F_SG;
1452
1453         if (pci_using_dac)
1454                 netdev->features |= NETIF_F_HIGHDMA;
1455
1456         if (hw->mac.type >= e1000_82576)
1457                 netdev->features |= NETIF_F_SCTP_CSUM;
1458
1459         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1460
1461         /* before reading the NVM, reset the controller to put the device in a
1462          * known good starting state */
1463         hw->mac.ops.reset_hw(hw);
1464
1465         /* make sure the NVM is good */
1466         if (igb_validate_nvm_checksum(hw) < 0) {
1467                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1468                 err = -EIO;
1469                 goto err_eeprom;
1470         }
1471
1472         /* copy the MAC address out of the NVM */
1473         if (hw->mac.ops.read_mac_addr(hw))
1474                 dev_err(&pdev->dev, "NVM Read Error\n");
1475
1476         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1477         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1478
1479         if (!is_valid_ether_addr(netdev->perm_addr)) {
1480                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1481                 err = -EIO;
1482                 goto err_eeprom;
1483         }
1484
1485         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1486                     (unsigned long) adapter);
1487         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1488                     (unsigned long) adapter);
1489
1490         INIT_WORK(&adapter->reset_task, igb_reset_task);
1491         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1492
1493         /* Initialize link properties that are user-changeable */
1494         adapter->fc_autoneg = true;
1495         hw->mac.autoneg = true;
1496         hw->phy.autoneg_advertised = 0x2f;
1497
1498         hw->fc.requested_mode = e1000_fc_default;
1499         hw->fc.current_mode = e1000_fc_default;
1500
1501         igb_validate_mdi_setting(hw);
1502
1503         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1504          * enable the ACPI Magic Packet filter
1505          */
1506
1507         if (hw->bus.func == 0)
1508                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1509         else if (hw->bus.func == 1)
1510                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1511
1512         if (eeprom_data & eeprom_apme_mask)
1513                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1514
1515         /* now that we have the eeprom settings, apply the special cases where
1516          * the eeprom may be wrong or the board simply won't support wake on
1517          * lan on a particular port */
1518         switch (pdev->device) {
1519         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1520                 adapter->eeprom_wol = 0;
1521                 break;
1522         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1523         case E1000_DEV_ID_82576_FIBER:
1524         case E1000_DEV_ID_82576_SERDES:
1525                 /* Wake events only supported on port A for dual fiber
1526                  * regardless of eeprom setting */
1527                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1528                         adapter->eeprom_wol = 0;
1529                 break;
1530         case E1000_DEV_ID_82576_QUAD_COPPER:
1531                 /* if quad port adapter, disable WoL on all but port A */
1532                 if (global_quad_port_a != 0)
1533                         adapter->eeprom_wol = 0;
1534                 else
1535                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1536                 /* Reset for multiple quad port adapters */
1537                 if (++global_quad_port_a == 4)
1538                         global_quad_port_a = 0;
1539                 break;
1540         }
1541
1542         /* initialize the wol settings based on the eeprom settings */
1543         adapter->wol = adapter->eeprom_wol;
1544         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1545
1546         /* reset the hardware with the new settings */
1547         igb_reset(adapter);
1548
1549         /* let the f/w know that the h/w is now under the control of the
1550          * driver. */
1551         igb_get_hw_control(adapter);
1552
1553         strcpy(netdev->name, "eth%d");
1554         err = register_netdev(netdev);
1555         if (err)
1556                 goto err_register;
1557
1558         /* carrier off reporting is important to ethtool even BEFORE open */
1559         netif_carrier_off(netdev);
1560
1561 #ifdef CONFIG_IGB_DCA
1562         if (dca_add_requester(&pdev->dev) == 0) {
1563                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1564                 dev_info(&pdev->dev, "DCA enabled\n");
1565                 igb_setup_dca(adapter);
1566         }
1567
1568 #endif
1569         switch (hw->mac.type) {
1570         case e1000_82576:
1571                 /*
1572                  * Initialize hardware timer: we keep it running just in case
1573                  * that some program needs it later on.
1574                  */
1575                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1576                 adapter->cycles.read = igb_read_clock;
1577                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1578                 adapter->cycles.mult = 1;
1579                 /**
1580                  * Scale the NIC clock cycle by a large factor so that
1581                  * relatively small clock corrections can be added or
1582                  * substracted at each clock tick. The drawbacks of a large
1583                  * factor are a) that the clock register overflows more quickly
1584                  * (not such a big deal) and b) that the increment per tick has
1585                  * to fit into 24 bits.  As a result we need to use a shift of
1586                  * 19 so we can fit a value of 16 into the TIMINCA register.
1587                  */
1588                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1589                 wr32(E1000_TIMINCA,
1590                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1591                                 (16 << IGB_82576_TSYNC_SHIFT));
1592
1593                 /* Set registers so that rollover occurs soon to test this. */
1594                 wr32(E1000_SYSTIML, 0x00000000);
1595                 wr32(E1000_SYSTIMH, 0xFF800000);
1596                 wrfl();
1597
1598                 timecounter_init(&adapter->clock,
1599                                  &adapter->cycles,
1600                                  ktime_to_ns(ktime_get_real()));
1601                 /*
1602                  * Synchronize our NIC clock against system wall clock. NIC
1603                  * time stamp reading requires ~3us per sample, each sample
1604                  * was pretty stable even under load => only require 10
1605                  * samples for each offset comparison.
1606                  */
1607                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1608                 adapter->compare.source = &adapter->clock;
1609                 adapter->compare.target = ktime_get_real;
1610                 adapter->compare.num_samples = 10;
1611                 timecompare_update(&adapter->compare, 0);
1612                 break;
1613         case e1000_82575:
1614                 /* 82575 does not support timesync */
1615         default:
1616                 break;
1617         }
1618
1619         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1620         /* print bus type/speed/width info */
1621         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1622                  netdev->name,
1623                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1624                                                             "unknown"),
1625                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1626                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1627                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1628                    "unknown"),
1629                  netdev->dev_addr);
1630
1631         igb_read_part_num(hw, &part_num);
1632         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1633                 (part_num >> 8), (part_num & 0xff));
1634
1635         dev_info(&pdev->dev,
1636                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1637                 adapter->msix_entries ? "MSI-X" :
1638                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1639                 adapter->num_rx_queues, adapter->num_tx_queues);
1640
1641         return 0;
1642
1643 err_register:
1644         igb_release_hw_control(adapter);
1645 err_eeprom:
1646         if (!igb_check_reset_block(hw))
1647                 igb_reset_phy(hw);
1648
1649         if (hw->flash_address)
1650                 iounmap(hw->flash_address);
1651 err_sw_init:
1652         igb_clear_interrupt_scheme(adapter);
1653         iounmap(hw->hw_addr);
1654 err_ioremap:
1655         free_netdev(netdev);
1656 err_alloc_etherdev:
1657         pci_release_selected_regions(pdev,
1658                                      pci_select_bars(pdev, IORESOURCE_MEM));
1659 err_pci_reg:
1660 err_dma:
1661         pci_disable_device(pdev);
1662         return err;
1663 }
1664
1665 /**
1666  * igb_remove - Device Removal Routine
1667  * @pdev: PCI device information struct
1668  *
1669  * igb_remove is called by the PCI subsystem to alert the driver
1670  * that it should release a PCI device.  The could be caused by a
1671  * Hot-Plug event, or because the driver is going to be removed from
1672  * memory.
1673  **/
1674 static void __devexit igb_remove(struct pci_dev *pdev)
1675 {
1676         struct net_device *netdev = pci_get_drvdata(pdev);
1677         struct igb_adapter *adapter = netdev_priv(netdev);
1678         struct e1000_hw *hw = &adapter->hw;
1679
1680         /* flush_scheduled work may reschedule our watchdog task, so
1681          * explicitly disable watchdog tasks from being rescheduled  */
1682         set_bit(__IGB_DOWN, &adapter->state);
1683         del_timer_sync(&adapter->watchdog_timer);
1684         del_timer_sync(&adapter->phy_info_timer);
1685
1686         flush_scheduled_work();
1687
1688 #ifdef CONFIG_IGB_DCA
1689         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1690                 dev_info(&pdev->dev, "DCA disabled\n");
1691                 dca_remove_requester(&pdev->dev);
1692                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1693                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1694         }
1695 #endif
1696
1697         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1698          * would have already happened in close and is redundant. */
1699         igb_release_hw_control(adapter);
1700
1701         unregister_netdev(netdev);
1702
1703         if (!igb_check_reset_block(hw))
1704                 igb_reset_phy(hw);
1705
1706         igb_clear_interrupt_scheme(adapter);
1707
1708 #ifdef CONFIG_PCI_IOV
1709         /* reclaim resources allocated to VFs */
1710         if (adapter->vf_data) {
1711                 /* disable iov and allow time for transactions to clear */
1712                 pci_disable_sriov(pdev);
1713                 msleep(500);
1714
1715                 kfree(adapter->vf_data);
1716                 adapter->vf_data = NULL;
1717                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1718                 msleep(100);
1719                 dev_info(&pdev->dev, "IOV Disabled\n");
1720         }
1721 #endif
1722
1723         iounmap(hw->hw_addr);
1724         if (hw->flash_address)
1725                 iounmap(hw->flash_address);
1726         pci_release_selected_regions(pdev,
1727                                      pci_select_bars(pdev, IORESOURCE_MEM));
1728
1729         free_netdev(netdev);
1730
1731         pci_disable_pcie_error_reporting(pdev);
1732
1733         pci_disable_device(pdev);
1734 }
1735
1736 /**
1737  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1738  * @adapter: board private structure to initialize
1739  *
1740  * This function initializes the vf specific data storage and then attempts to
1741  * allocate the VFs.  The reason for ordering it this way is because it is much
1742  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1743  * the memory for the VFs.
1744  **/
1745 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1746 {
1747 #ifdef CONFIG_PCI_IOV
1748         struct pci_dev *pdev = adapter->pdev;
1749
1750         if (adapter->vfs_allocated_count > 7)
1751                 adapter->vfs_allocated_count = 7;
1752
1753         if (adapter->vfs_allocated_count) {
1754                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1755                                            sizeof(struct vf_data_storage),
1756                                            GFP_KERNEL);
1757                 /* if allocation failed then we do not support SR-IOV */
1758                 if (!adapter->vf_data) {
1759                         adapter->vfs_allocated_count = 0;
1760                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1761                                 "Data Storage\n");
1762                 }
1763         }
1764
1765         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1766                 kfree(adapter->vf_data);
1767                 adapter->vf_data = NULL;
1768 #endif /* CONFIG_PCI_IOV */
1769                 adapter->vfs_allocated_count = 0;
1770 #ifdef CONFIG_PCI_IOV
1771         } else {
1772                 unsigned char mac_addr[ETH_ALEN];
1773                 int i;
1774                 dev_info(&pdev->dev, "%d vfs allocated\n",
1775                          adapter->vfs_allocated_count);
1776                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1777                         random_ether_addr(mac_addr);
1778                         igb_set_vf_mac(adapter, i, mac_addr);
1779                 }
1780         }
1781 #endif /* CONFIG_PCI_IOV */
1782 }
1783
1784 /**
1785  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1786  * @adapter: board private structure to initialize
1787  *
1788  * igb_sw_init initializes the Adapter private data structure.
1789  * Fields are initialized based on PCI device information and
1790  * OS network device settings (MTU size).
1791  **/
1792 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1793 {
1794         struct e1000_hw *hw = &adapter->hw;
1795         struct net_device *netdev = adapter->netdev;
1796         struct pci_dev *pdev = adapter->pdev;
1797
1798         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1799
1800         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1801         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1802         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1803         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1804
1805         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1806         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1807
1808 #ifdef CONFIG_PCI_IOV
1809         if (hw->mac.type == e1000_82576)
1810                 adapter->vfs_allocated_count = max_vfs;
1811
1812 #endif /* CONFIG_PCI_IOV */
1813         /* This call may decrease the number of queues */
1814         if (igb_init_interrupt_scheme(adapter)) {
1815                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1816                 return -ENOMEM;
1817         }
1818
1819         igb_probe_vfs(adapter);
1820
1821         /* Explicitly disable IRQ since the NIC can be in any state. */
1822         igb_irq_disable(adapter);
1823
1824         set_bit(__IGB_DOWN, &adapter->state);
1825         return 0;
1826 }
1827
1828 /**
1829  * igb_open - Called when a network interface is made active
1830  * @netdev: network interface device structure
1831  *
1832  * Returns 0 on success, negative value on failure
1833  *
1834  * The open entry point is called when a network interface is made
1835  * active by the system (IFF_UP).  At this point all resources needed
1836  * for transmit and receive operations are allocated, the interrupt
1837  * handler is registered with the OS, the watchdog timer is started,
1838  * and the stack is notified that the interface is ready.
1839  **/
1840 static int igb_open(struct net_device *netdev)
1841 {
1842         struct igb_adapter *adapter = netdev_priv(netdev);
1843         struct e1000_hw *hw = &adapter->hw;
1844         int err;
1845         int i;
1846
1847         /* disallow open during test */
1848         if (test_bit(__IGB_TESTING, &adapter->state))
1849                 return -EBUSY;
1850
1851         netif_carrier_off(netdev);
1852
1853         /* allocate transmit descriptors */
1854         err = igb_setup_all_tx_resources(adapter);
1855         if (err)
1856                 goto err_setup_tx;
1857
1858         /* allocate receive descriptors */
1859         err = igb_setup_all_rx_resources(adapter);
1860         if (err)
1861                 goto err_setup_rx;
1862
1863         /* e1000_power_up_phy(adapter); */
1864
1865         /* before we allocate an interrupt, we must be ready to handle it.
1866          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1867          * as soon as we call pci_request_irq, so we have to setup our
1868          * clean_rx handler before we do so.  */
1869         igb_configure(adapter);
1870
1871         err = igb_request_irq(adapter);
1872         if (err)
1873                 goto err_req_irq;
1874
1875         /* From here on the code is the same as igb_up() */
1876         clear_bit(__IGB_DOWN, &adapter->state);
1877
1878         for (i = 0; i < adapter->num_q_vectors; i++) {
1879                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1880                 napi_enable(&q_vector->napi);
1881         }
1882
1883         /* Clear any pending interrupts. */
1884         rd32(E1000_ICR);
1885
1886         igb_irq_enable(adapter);
1887
1888         /* notify VFs that reset has been completed */
1889         if (adapter->vfs_allocated_count) {
1890                 u32 reg_data = rd32(E1000_CTRL_EXT);
1891                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1892                 wr32(E1000_CTRL_EXT, reg_data);
1893         }
1894
1895         netif_tx_start_all_queues(netdev);
1896
1897         /* start the watchdog. */
1898         hw->mac.get_link_status = 1;
1899         schedule_work(&adapter->watchdog_task);
1900
1901         return 0;
1902
1903 err_req_irq:
1904         igb_release_hw_control(adapter);
1905         /* e1000_power_down_phy(adapter); */
1906         igb_free_all_rx_resources(adapter);
1907 err_setup_rx:
1908         igb_free_all_tx_resources(adapter);
1909 err_setup_tx:
1910         igb_reset(adapter);
1911
1912         return err;
1913 }
1914
1915 /**
1916  * igb_close - Disables a network interface
1917  * @netdev: network interface device structure
1918  *
1919  * Returns 0, this is not allowed to fail
1920  *
1921  * The close entry point is called when an interface is de-activated
1922  * by the OS.  The hardware is still under the driver's control, but
1923  * needs to be disabled.  A global MAC reset is issued to stop the
1924  * hardware, and all transmit and receive resources are freed.
1925  **/
1926 static int igb_close(struct net_device *netdev)
1927 {
1928         struct igb_adapter *adapter = netdev_priv(netdev);
1929
1930         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1931         igb_down(adapter);
1932
1933         igb_free_irq(adapter);
1934
1935         igb_free_all_tx_resources(adapter);
1936         igb_free_all_rx_resources(adapter);
1937
1938         return 0;
1939 }
1940
1941 /**
1942  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1943  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1944  *
1945  * Return 0 on success, negative on failure
1946  **/
1947 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1948 {
1949         struct pci_dev *pdev = tx_ring->pdev;
1950         int size;
1951
1952         size = sizeof(struct igb_buffer) * tx_ring->count;
1953         tx_ring->buffer_info = vmalloc(size);
1954         if (!tx_ring->buffer_info)
1955                 goto err;
1956         memset(tx_ring->buffer_info, 0, size);
1957
1958         /* round up to nearest 4K */
1959         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1960         tx_ring->size = ALIGN(tx_ring->size, 4096);
1961
1962         tx_ring->desc = pci_alloc_consistent(pdev,
1963                                              tx_ring->size,
1964                                              &tx_ring->dma);
1965
1966         if (!tx_ring->desc)
1967                 goto err;
1968
1969         tx_ring->next_to_use = 0;
1970         tx_ring->next_to_clean = 0;
1971         return 0;
1972
1973 err:
1974         vfree(tx_ring->buffer_info);
1975         dev_err(&pdev->dev,
1976                 "Unable to allocate memory for the transmit descriptor ring\n");
1977         return -ENOMEM;
1978 }
1979
1980 /**
1981  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
1982  *                                (Descriptors) for all queues
1983  * @adapter: board private structure
1984  *
1985  * Return 0 on success, negative on failure
1986  **/
1987 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
1988 {
1989         struct pci_dev *pdev = adapter->pdev;
1990         int i, err = 0;
1991
1992         for (i = 0; i < adapter->num_tx_queues; i++) {
1993                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
1994                 if (err) {
1995                         dev_err(&pdev->dev,
1996                                 "Allocation for Tx Queue %u failed\n", i);
1997                         for (i--; i >= 0; i--)
1998                                 igb_free_tx_resources(&adapter->tx_ring[i]);
1999                         break;
2000                 }
2001         }
2002
2003         for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2004                 int r_idx = i % adapter->num_tx_queues;
2005                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2006         }
2007         return err;
2008 }
2009
2010 /**
2011  * igb_setup_tctl - configure the transmit control registers
2012  * @adapter: Board private structure
2013  **/
2014 void igb_setup_tctl(struct igb_adapter *adapter)
2015 {
2016         struct e1000_hw *hw = &adapter->hw;
2017         u32 tctl;
2018
2019         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2020         wr32(E1000_TXDCTL(0), 0);
2021
2022         /* Program the Transmit Control Register */
2023         tctl = rd32(E1000_TCTL);
2024         tctl &= ~E1000_TCTL_CT;
2025         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2026                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2027
2028         igb_config_collision_dist(hw);
2029
2030         /* Enable transmits */
2031         tctl |= E1000_TCTL_EN;
2032
2033         wr32(E1000_TCTL, tctl);
2034 }
2035
2036 /**
2037  * igb_configure_tx_ring - Configure transmit ring after Reset
2038  * @adapter: board private structure
2039  * @ring: tx ring to configure
2040  *
2041  * Configure a transmit ring after a reset.
2042  **/
2043 void igb_configure_tx_ring(struct igb_adapter *adapter,
2044                            struct igb_ring *ring)
2045 {
2046         struct e1000_hw *hw = &adapter->hw;
2047         u32 txdctl;
2048         u64 tdba = ring->dma;
2049         int reg_idx = ring->reg_idx;
2050
2051         /* disable the queue */
2052         txdctl = rd32(E1000_TXDCTL(reg_idx));
2053         wr32(E1000_TXDCTL(reg_idx),
2054                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2055         wrfl();
2056         mdelay(10);
2057
2058         wr32(E1000_TDLEN(reg_idx),
2059                         ring->count * sizeof(union e1000_adv_tx_desc));
2060         wr32(E1000_TDBAL(reg_idx),
2061                         tdba & 0x00000000ffffffffULL);
2062         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2063
2064         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2065         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2066         writel(0, ring->head);
2067         writel(0, ring->tail);
2068
2069         txdctl |= IGB_TX_PTHRESH;
2070         txdctl |= IGB_TX_HTHRESH << 8;
2071         txdctl |= IGB_TX_WTHRESH << 16;
2072
2073         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2074         wr32(E1000_TXDCTL(reg_idx), txdctl);
2075 }
2076
2077 /**
2078  * igb_configure_tx - Configure transmit Unit after Reset
2079  * @adapter: board private structure
2080  *
2081  * Configure the Tx unit of the MAC after a reset.
2082  **/
2083 static void igb_configure_tx(struct igb_adapter *adapter)
2084 {
2085         int i;
2086
2087         for (i = 0; i < adapter->num_tx_queues; i++)
2088                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2089 }
2090
2091 /**
2092  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2093  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2094  *
2095  * Returns 0 on success, negative on failure
2096  **/
2097 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2098 {
2099         struct pci_dev *pdev = rx_ring->pdev;
2100         int size, desc_len;
2101
2102         size = sizeof(struct igb_buffer) * rx_ring->count;
2103         rx_ring->buffer_info = vmalloc(size);
2104         if (!rx_ring->buffer_info)
2105                 goto err;
2106         memset(rx_ring->buffer_info, 0, size);
2107
2108         desc_len = sizeof(union e1000_adv_rx_desc);
2109
2110         /* Round up to nearest 4K */
2111         rx_ring->size = rx_ring->count * desc_len;
2112         rx_ring->size = ALIGN(rx_ring->size, 4096);
2113
2114         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2115                                              &rx_ring->dma);
2116
2117         if (!rx_ring->desc)
2118                 goto err;
2119
2120         rx_ring->next_to_clean = 0;
2121         rx_ring->next_to_use = 0;
2122
2123         return 0;
2124
2125 err:
2126         vfree(rx_ring->buffer_info);
2127         rx_ring->buffer_info = NULL;
2128         dev_err(&pdev->dev, "Unable to allocate memory for "
2129                 "the receive descriptor ring\n");
2130         return -ENOMEM;
2131 }
2132
2133 /**
2134  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2135  *                                (Descriptors) for all queues
2136  * @adapter: board private structure
2137  *
2138  * Return 0 on success, negative on failure
2139  **/
2140 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2141 {
2142         struct pci_dev *pdev = adapter->pdev;
2143         int i, err = 0;
2144
2145         for (i = 0; i < adapter->num_rx_queues; i++) {
2146                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2147                 if (err) {
2148                         dev_err(&pdev->dev,
2149                                 "Allocation for Rx Queue %u failed\n", i);
2150                         for (i--; i >= 0; i--)
2151                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2152                         break;
2153                 }
2154         }
2155
2156         return err;
2157 }
2158
2159 /**
2160  * igb_setup_mrqc - configure the multiple receive queue control registers
2161  * @adapter: Board private structure
2162  **/
2163 static void igb_setup_mrqc(struct igb_adapter *adapter)
2164 {
2165         struct e1000_hw *hw = &adapter->hw;
2166         u32 mrqc, rxcsum;
2167         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2168         union e1000_reta {
2169                 u32 dword;
2170                 u8  bytes[4];
2171         } reta;
2172         static const u8 rsshash[40] = {
2173                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2174                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2175                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2176                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2177
2178         /* Fill out hash function seeds */
2179         for (j = 0; j < 10; j++) {
2180                 u32 rsskey = rsshash[(j * 4)];
2181                 rsskey |= rsshash[(j * 4) + 1] << 8;
2182                 rsskey |= rsshash[(j * 4) + 2] << 16;
2183                 rsskey |= rsshash[(j * 4) + 3] << 24;
2184                 array_wr32(E1000_RSSRK(0), j, rsskey);
2185         }
2186
2187         num_rx_queues = adapter->num_rx_queues;
2188
2189         if (adapter->vfs_allocated_count) {
2190                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2191                 switch (hw->mac.type) {
2192                 case e1000_82576:
2193                         shift = 3;
2194                         num_rx_queues = 2;
2195                         break;
2196                 case e1000_82575:
2197                         shift = 2;
2198                         shift2 = 6;
2199                 default:
2200                         break;
2201                 }
2202         } else {
2203                 if (hw->mac.type == e1000_82575)
2204                         shift = 6;
2205         }
2206
2207         for (j = 0; j < (32 * 4); j++) {
2208                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2209                 if (shift2)
2210                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2211                 if ((j & 3) == 3)
2212                         wr32(E1000_RETA(j >> 2), reta.dword);
2213         }
2214
2215         /*
2216          * Disable raw packet checksumming so that RSS hash is placed in
2217          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2218          * offloads as they are enabled by default
2219          */
2220         rxcsum = rd32(E1000_RXCSUM);
2221         rxcsum |= E1000_RXCSUM_PCSD;
2222
2223         if (adapter->hw.mac.type >= e1000_82576)
2224                 /* Enable Receive Checksum Offload for SCTP */
2225                 rxcsum |= E1000_RXCSUM_CRCOFL;
2226
2227         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2228         wr32(E1000_RXCSUM, rxcsum);
2229
2230         /* If VMDq is enabled then we set the appropriate mode for that, else
2231          * we default to RSS so that an RSS hash is calculated per packet even
2232          * if we are only using one queue */
2233         if (adapter->vfs_allocated_count) {
2234                 if (hw->mac.type > e1000_82575) {
2235                         /* Set the default pool for the PF's first queue */
2236                         u32 vtctl = rd32(E1000_VT_CTL);
2237                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2238                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2239                         vtctl |= adapter->vfs_allocated_count <<
2240                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2241                         wr32(E1000_VT_CTL, vtctl);
2242                 }
2243                 if (adapter->num_rx_queues > 1)
2244                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2245                 else
2246                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2247         } else {
2248                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2249         }
2250         igb_vmm_control(adapter);
2251
2252         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2253                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2254         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2255                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2256         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2257                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2258         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2259                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2260
2261         wr32(E1000_MRQC, mrqc);
2262 }
2263
2264 /**
2265  * igb_setup_rctl - configure the receive control registers
2266  * @adapter: Board private structure
2267  **/
2268 void igb_setup_rctl(struct igb_adapter *adapter)
2269 {
2270         struct e1000_hw *hw = &adapter->hw;
2271         u32 rctl;
2272
2273         rctl = rd32(E1000_RCTL);
2274
2275         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2276         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2277
2278         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2279                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2280
2281         /*
2282          * enable stripping of CRC. It's unlikely this will break BMC
2283          * redirection as it did with e1000. Newer features require
2284          * that the HW strips the CRC.
2285          */
2286         rctl |= E1000_RCTL_SECRC;
2287
2288         /* disable store bad packets and clear size bits. */
2289         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2290
2291         /* enable LPE to prevent packets larger than max_frame_size */
2292         rctl |= E1000_RCTL_LPE;
2293
2294         /* disable queue 0 to prevent tail write w/o re-config */
2295         wr32(E1000_RXDCTL(0), 0);
2296
2297         /* Attention!!!  For SR-IOV PF driver operations you must enable
2298          * queue drop for all VF and PF queues to prevent head of line blocking
2299          * if an un-trusted VF does not provide descriptors to hardware.
2300          */
2301         if (adapter->vfs_allocated_count) {
2302                 /* set all queue drop enable bits */
2303                 wr32(E1000_QDE, ALL_QUEUES);
2304         }
2305
2306         wr32(E1000_RCTL, rctl);
2307 }
2308
2309 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2310                                    int vfn)
2311 {
2312         struct e1000_hw *hw = &adapter->hw;
2313         u32 vmolr;
2314
2315         /* if it isn't the PF check to see if VFs are enabled and
2316          * increase the size to support vlan tags */
2317         if (vfn < adapter->vfs_allocated_count &&
2318             adapter->vf_data[vfn].vlans_enabled)
2319                 size += VLAN_TAG_SIZE;
2320
2321         vmolr = rd32(E1000_VMOLR(vfn));
2322         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2323         vmolr |= size | E1000_VMOLR_LPE;
2324         wr32(E1000_VMOLR(vfn), vmolr);
2325
2326         return 0;
2327 }
2328
2329 /**
2330  * igb_rlpml_set - set maximum receive packet size
2331  * @adapter: board private structure
2332  *
2333  * Configure maximum receivable packet size.
2334  **/
2335 static void igb_rlpml_set(struct igb_adapter *adapter)
2336 {
2337         u32 max_frame_size = adapter->max_frame_size;
2338         struct e1000_hw *hw = &adapter->hw;
2339         u16 pf_id = adapter->vfs_allocated_count;
2340
2341         if (adapter->vlgrp)
2342                 max_frame_size += VLAN_TAG_SIZE;
2343
2344         /* if vfs are enabled we set RLPML to the largest possible request
2345          * size and set the VMOLR RLPML to the size we need */
2346         if (pf_id) {
2347                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2348                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2349         }
2350
2351         wr32(E1000_RLPML, max_frame_size);
2352 }
2353
2354 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2355 {
2356         struct e1000_hw *hw = &adapter->hw;
2357         u32 vmolr;
2358
2359         /*
2360          * This register exists only on 82576 and newer so if we are older then
2361          * we should exit and do nothing
2362          */
2363         if (hw->mac.type < e1000_82576)
2364                 return;
2365
2366         vmolr = rd32(E1000_VMOLR(vfn));
2367         vmolr |= E1000_VMOLR_AUPE |        /* Accept untagged packets */
2368                  E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2369
2370         /* clear all bits that might not be set */
2371         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2372
2373         if (adapter->num_rx_queues > 1 && vfn == adapter->vfs_allocated_count)
2374                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2375         /*
2376          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2377          * multicast packets
2378          */
2379         if (vfn <= adapter->vfs_allocated_count)
2380                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2381
2382         wr32(E1000_VMOLR(vfn), vmolr);
2383 }
2384
2385 /**
2386  * igb_configure_rx_ring - Configure a receive ring after Reset
2387  * @adapter: board private structure
2388  * @ring: receive ring to be configured
2389  *
2390  * Configure the Rx unit of the MAC after a reset.
2391  **/
2392 void igb_configure_rx_ring(struct igb_adapter *adapter,
2393                            struct igb_ring *ring)
2394 {
2395         struct e1000_hw *hw = &adapter->hw;
2396         u64 rdba = ring->dma;
2397         int reg_idx = ring->reg_idx;
2398         u32 srrctl, rxdctl;
2399
2400         /* disable the queue */
2401         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2402         wr32(E1000_RXDCTL(reg_idx),
2403                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2404
2405         /* Set DMA base address registers */
2406         wr32(E1000_RDBAL(reg_idx),
2407              rdba & 0x00000000ffffffffULL);
2408         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2409         wr32(E1000_RDLEN(reg_idx),
2410                        ring->count * sizeof(union e1000_adv_rx_desc));
2411
2412         /* initialize head and tail */
2413         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2414         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2415         writel(0, ring->head);
2416         writel(0, ring->tail);
2417
2418         /* set descriptor configuration */
2419         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2420                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2421                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2422 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2423                 srrctl |= IGB_RXBUFFER_16384 >>
2424                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2425 #else
2426                 srrctl |= (PAGE_SIZE / 2) >>
2427                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2428 #endif
2429                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2430         } else {
2431                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2432                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2433                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2434         }
2435
2436         wr32(E1000_SRRCTL(reg_idx), srrctl);
2437
2438         /* set filtering for VMDQ pools */
2439         igb_set_vmolr(adapter, reg_idx & 0x7);
2440
2441         /* enable receive descriptor fetching */
2442         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2443         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2444         rxdctl &= 0xFFF00000;
2445         rxdctl |= IGB_RX_PTHRESH;
2446         rxdctl |= IGB_RX_HTHRESH << 8;
2447         rxdctl |= IGB_RX_WTHRESH << 16;
2448         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2449 }
2450
2451 /**
2452  * igb_configure_rx - Configure receive Unit after Reset
2453  * @adapter: board private structure
2454  *
2455  * Configure the Rx unit of the MAC after a reset.
2456  **/
2457 static void igb_configure_rx(struct igb_adapter *adapter)
2458 {
2459         int i;
2460
2461         /* set UTA to appropriate mode */
2462         igb_set_uta(adapter);
2463
2464         /* set the correct pool for the PF default MAC address in entry 0 */
2465         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2466                          adapter->vfs_allocated_count);
2467
2468         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2469          * the Base and Length of the Rx Descriptor Ring */
2470         for (i = 0; i < adapter->num_rx_queues; i++)
2471                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2472 }
2473
2474 /**
2475  * igb_free_tx_resources - Free Tx Resources per Queue
2476  * @tx_ring: Tx descriptor ring for a specific queue
2477  *
2478  * Free all transmit software resources
2479  **/
2480 void igb_free_tx_resources(struct igb_ring *tx_ring)
2481 {
2482         igb_clean_tx_ring(tx_ring);
2483
2484         vfree(tx_ring->buffer_info);
2485         tx_ring->buffer_info = NULL;
2486
2487         /* if not set, then don't free */
2488         if (!tx_ring->desc)
2489                 return;
2490
2491         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2492                             tx_ring->desc, tx_ring->dma);
2493
2494         tx_ring->desc = NULL;
2495 }
2496
2497 /**
2498  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2499  * @adapter: board private structure
2500  *
2501  * Free all transmit software resources
2502  **/
2503 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2504 {
2505         int i;
2506
2507         for (i = 0; i < adapter->num_tx_queues; i++)
2508                 igb_free_tx_resources(&adapter->tx_ring[i]);
2509 }
2510
2511 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2512                                     struct igb_buffer *buffer_info)
2513 {
2514         buffer_info->dma = 0;
2515         if (buffer_info->skb) {
2516                 skb_dma_unmap(&tx_ring->pdev->dev,
2517                               buffer_info->skb,
2518                               DMA_TO_DEVICE);
2519                 dev_kfree_skb_any(buffer_info->skb);
2520                 buffer_info->skb = NULL;
2521         }
2522         buffer_info->time_stamp = 0;
2523         /* buffer_info must be completely set up in the transmit path */
2524 }
2525
2526 /**
2527  * igb_clean_tx_ring - Free Tx Buffers
2528  * @tx_ring: ring to be cleaned
2529  **/
2530 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2531 {
2532         struct igb_buffer *buffer_info;
2533         unsigned long size;
2534         unsigned int i;
2535
2536         if (!tx_ring->buffer_info)
2537                 return;
2538         /* Free all the Tx ring sk_buffs */
2539
2540         for (i = 0; i < tx_ring->count; i++) {
2541                 buffer_info = &tx_ring->buffer_info[i];
2542                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2543         }
2544
2545         size = sizeof(struct igb_buffer) * tx_ring->count;
2546         memset(tx_ring->buffer_info, 0, size);
2547
2548         /* Zero out the descriptor ring */
2549         memset(tx_ring->desc, 0, tx_ring->size);
2550
2551         tx_ring->next_to_use = 0;
2552         tx_ring->next_to_clean = 0;
2553 }
2554
2555 /**
2556  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2557  * @adapter: board private structure
2558  **/
2559 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2560 {
2561         int i;
2562
2563         for (i = 0; i < adapter->num_tx_queues; i++)
2564                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2565 }
2566
2567 /**
2568  * igb_free_rx_resources - Free Rx Resources
2569  * @rx_ring: ring to clean the resources from
2570  *
2571  * Free all receive software resources
2572  **/
2573 void igb_free_rx_resources(struct igb_ring *rx_ring)
2574 {
2575         igb_clean_rx_ring(rx_ring);
2576
2577         vfree(rx_ring->buffer_info);
2578         rx_ring->buffer_info = NULL;
2579
2580         /* if not set, then don't free */
2581         if (!rx_ring->desc)
2582                 return;
2583
2584         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2585                             rx_ring->desc, rx_ring->dma);
2586
2587         rx_ring->desc = NULL;
2588 }
2589
2590 /**
2591  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2592  * @adapter: board private structure
2593  *
2594  * Free all receive software resources
2595  **/
2596 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2597 {
2598         int i;
2599
2600         for (i = 0; i < adapter->num_rx_queues; i++)
2601                 igb_free_rx_resources(&adapter->rx_ring[i]);
2602 }
2603
2604 /**
2605  * igb_clean_rx_ring - Free Rx Buffers per Queue
2606  * @rx_ring: ring to free buffers from
2607  **/
2608 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2609 {
2610         struct igb_buffer *buffer_info;
2611         unsigned long size;
2612         unsigned int i;
2613
2614         if (!rx_ring->buffer_info)
2615                 return;
2616
2617         /* Free all the Rx ring sk_buffs */
2618         for (i = 0; i < rx_ring->count; i++) {
2619                 buffer_info = &rx_ring->buffer_info[i];
2620                 if (buffer_info->dma) {
2621                         pci_unmap_single(rx_ring->pdev,
2622                                          buffer_info->dma,
2623                                          rx_ring->rx_buffer_len,
2624                                          PCI_DMA_FROMDEVICE);
2625                         buffer_info->dma = 0;
2626                 }
2627
2628                 if (buffer_info->skb) {
2629                         dev_kfree_skb(buffer_info->skb);
2630                         buffer_info->skb = NULL;
2631                 }
2632                 if (buffer_info->page_dma) {
2633                         pci_unmap_page(rx_ring->pdev,
2634                                        buffer_info->page_dma,
2635                                        PAGE_SIZE / 2,
2636                                        PCI_DMA_FROMDEVICE);
2637                         buffer_info->page_dma = 0;
2638                 }
2639                 if (buffer_info->page) {
2640                         put_page(buffer_info->page);
2641                         buffer_info->page = NULL;
2642                         buffer_info->page_offset = 0;
2643                 }
2644         }
2645
2646         size = sizeof(struct igb_buffer) * rx_ring->count;
2647         memset(rx_ring->buffer_info, 0, size);
2648
2649         /* Zero out the descriptor ring */
2650         memset(rx_ring->desc, 0, rx_ring->size);
2651
2652         rx_ring->next_to_clean = 0;
2653         rx_ring->next_to_use = 0;
2654 }
2655
2656 /**
2657  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2658  * @adapter: board private structure
2659  **/
2660 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2661 {
2662         int i;
2663
2664         for (i = 0; i < adapter->num_rx_queues; i++)
2665                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2666 }
2667
2668 /**
2669  * igb_set_mac - Change the Ethernet Address of the NIC
2670  * @netdev: network interface device structure
2671  * @p: pointer to an address structure
2672  *
2673  * Returns 0 on success, negative on failure
2674  **/
2675 static int igb_set_mac(struct net_device *netdev, void *p)
2676 {
2677         struct igb_adapter *adapter = netdev_priv(netdev);
2678         struct e1000_hw *hw = &adapter->hw;
2679         struct sockaddr *addr = p;
2680
2681         if (!is_valid_ether_addr(addr->sa_data))
2682                 return -EADDRNOTAVAIL;
2683
2684         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2685         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2686
2687         /* set the correct pool for the new PF MAC address in entry 0 */
2688         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2689                          adapter->vfs_allocated_count);
2690
2691         return 0;
2692 }
2693
2694 /**
2695  * igb_write_mc_addr_list - write multicast addresses to MTA
2696  * @netdev: network interface device structure
2697  *
2698  * Writes multicast address list to the MTA hash table.
2699  * Returns: -ENOMEM on failure
2700  *                0 on no addresses written
2701  *                X on writing X addresses to MTA
2702  **/
2703 static int igb_write_mc_addr_list(struct net_device *netdev)
2704 {
2705         struct igb_adapter *adapter = netdev_priv(netdev);
2706         struct e1000_hw *hw = &adapter->hw;
2707         struct dev_mc_list *mc_ptr = netdev->mc_list;
2708         u8  *mta_list;
2709         u32 vmolr = 0;
2710         int i;
2711
2712         if (!netdev->mc_count) {
2713                 /* nothing to program, so clear mc list */
2714                 igb_update_mc_addr_list(hw, NULL, 0);
2715                 igb_restore_vf_multicasts(adapter);
2716                 return 0;
2717         }
2718
2719         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2720         if (!mta_list)
2721                 return -ENOMEM;
2722
2723         /* set vmolr receive overflow multicast bit */
2724         vmolr |= E1000_VMOLR_ROMPE;
2725
2726         /* The shared function expects a packed array of only addresses. */
2727         mc_ptr = netdev->mc_list;
2728
2729         for (i = 0; i < netdev->mc_count; i++) {
2730                 if (!mc_ptr)
2731                         break;
2732                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2733                 mc_ptr = mc_ptr->next;
2734         }
2735         igb_update_mc_addr_list(hw, mta_list, i);
2736         kfree(mta_list);
2737
2738         return netdev->mc_count;
2739 }
2740
2741 /**
2742  * igb_write_uc_addr_list - write unicast addresses to RAR table
2743  * @netdev: network interface device structure
2744  *
2745  * Writes unicast address list to the RAR table.
2746  * Returns: -ENOMEM on failure/insufficient address space
2747  *                0 on no addresses written
2748  *                X on writing X addresses to the RAR table
2749  **/
2750 static int igb_write_uc_addr_list(struct net_device *netdev)
2751 {
2752         struct igb_adapter *adapter = netdev_priv(netdev);
2753         struct e1000_hw *hw = &adapter->hw;
2754         unsigned int vfn = adapter->vfs_allocated_count;
2755         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2756         int count = 0;
2757
2758         /* return ENOMEM indicating insufficient memory for addresses */
2759         if (netdev->uc.count > rar_entries)
2760                 return -ENOMEM;
2761
2762         if (netdev->uc.count && rar_entries) {
2763                 struct netdev_hw_addr *ha;
2764                 list_for_each_entry(ha, &netdev->uc.list, list) {
2765                         if (!rar_entries)
2766                                 break;
2767                         igb_rar_set_qsel(adapter, ha->addr,
2768                                          rar_entries--,
2769                                          vfn);
2770                         count++;
2771                 }
2772         }
2773         /* write the addresses in reverse order to avoid write combining */
2774         for (; rar_entries > 0 ; rar_entries--) {
2775                 wr32(E1000_RAH(rar_entries), 0);
2776                 wr32(E1000_RAL(rar_entries), 0);
2777         }
2778         wrfl();
2779
2780         return count;
2781 }
2782
2783 /**
2784  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2785  * @netdev: network interface device structure
2786  *
2787  * The set_rx_mode entry point is called whenever the unicast or multicast
2788  * address lists or the network interface flags are updated.  This routine is
2789  * responsible for configuring the hardware for proper unicast, multicast,
2790  * promiscuous mode, and all-multi behavior.
2791  **/
2792 static void igb_set_rx_mode(struct net_device *netdev)
2793 {
2794         struct igb_adapter *adapter = netdev_priv(netdev);
2795         struct e1000_hw *hw = &adapter->hw;
2796         unsigned int vfn = adapter->vfs_allocated_count;
2797         u32 rctl, vmolr = 0;
2798         int count;
2799
2800         /* Check for Promiscuous and All Multicast modes */
2801         rctl = rd32(E1000_RCTL);
2802
2803         /* clear the effected bits */
2804         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2805
2806         if (netdev->flags & IFF_PROMISC) {
2807                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2808                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2809         } else {
2810                 if (netdev->flags & IFF_ALLMULTI) {
2811                         rctl |= E1000_RCTL_MPE;
2812                         vmolr |= E1000_VMOLR_MPME;
2813                 } else {
2814                         /*
2815                          * Write addresses to the MTA, if the attempt fails
2816                          * then we should just turn on promiscous mode so
2817                          * that we can at least receive multicast traffic
2818                          */
2819                         count = igb_write_mc_addr_list(netdev);
2820                         if (count < 0) {
2821                                 rctl |= E1000_RCTL_MPE;
2822                                 vmolr |= E1000_VMOLR_MPME;
2823                         } else if (count) {
2824                                 vmolr |= E1000_VMOLR_ROMPE;
2825                         }
2826                 }
2827                 /*
2828                  * Write addresses to available RAR registers, if there is not
2829                  * sufficient space to store all the addresses then enable
2830                  * unicast promiscous mode
2831                  */
2832                 count = igb_write_uc_addr_list(netdev);
2833                 if (count < 0) {
2834                         rctl |= E1000_RCTL_UPE;
2835                         vmolr |= E1000_VMOLR_ROPE;
2836                 }
2837                 rctl |= E1000_RCTL_VFE;
2838         }
2839         wr32(E1000_RCTL, rctl);
2840
2841         /*
2842          * In order to support SR-IOV and eventually VMDq it is necessary to set
2843          * the VMOLR to enable the appropriate modes.  Without this workaround
2844          * we will have issues with VLAN tag stripping not being done for frames
2845          * that are only arriving because we are the default pool
2846          */
2847         if (hw->mac.type < e1000_82576)
2848                 return;
2849
2850         vmolr |= rd32(E1000_VMOLR(vfn)) &
2851                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2852         wr32(E1000_VMOLR(vfn), vmolr);
2853         igb_restore_vf_multicasts(adapter);
2854 }
2855
2856 /* Need to wait a few seconds after link up to get diagnostic information from
2857  * the phy */
2858 static void igb_update_phy_info(unsigned long data)
2859 {
2860         struct igb_adapter *adapter = (struct igb_adapter *) data;
2861         igb_get_phy_info(&adapter->hw);
2862 }
2863
2864 /**
2865  * igb_has_link - check shared code for link and determine up/down
2866  * @adapter: pointer to driver private info
2867  **/
2868 static bool igb_has_link(struct igb_adapter *adapter)
2869 {
2870         struct e1000_hw *hw = &adapter->hw;
2871         bool link_active = false;
2872         s32 ret_val = 0;
2873
2874         /* get_link_status is set on LSC (link status) interrupt or
2875          * rx sequence error interrupt.  get_link_status will stay
2876          * false until the e1000_check_for_link establishes link
2877          * for copper adapters ONLY
2878          */
2879         switch (hw->phy.media_type) {
2880         case e1000_media_type_copper:
2881                 if (hw->mac.get_link_status) {
2882                         ret_val = hw->mac.ops.check_for_link(hw);
2883                         link_active = !hw->mac.get_link_status;
2884                 } else {
2885                         link_active = true;
2886                 }
2887                 break;
2888         case e1000_media_type_internal_serdes:
2889                 ret_val = hw->mac.ops.check_for_link(hw);
2890                 link_active = hw->mac.serdes_has_link;
2891                 break;
2892         default:
2893         case e1000_media_type_unknown:
2894                 break;
2895         }
2896
2897         return link_active;
2898 }
2899
2900 /**
2901  * igb_watchdog - Timer Call-back
2902  * @data: pointer to adapter cast into an unsigned long
2903  **/
2904 static void igb_watchdog(unsigned long data)
2905 {
2906         struct igb_adapter *adapter = (struct igb_adapter *)data;
2907         /* Do the rest outside of interrupt context */
2908         schedule_work(&adapter->watchdog_task);
2909 }
2910
2911 static void igb_watchdog_task(struct work_struct *work)
2912 {
2913         struct igb_adapter *adapter = container_of(work,
2914                                                    struct igb_adapter,
2915                                                    watchdog_task);
2916         struct e1000_hw *hw = &adapter->hw;
2917         struct net_device *netdev = adapter->netdev;
2918         struct igb_ring *tx_ring = adapter->tx_ring;
2919         u32 link;
2920         int i;
2921
2922         link = igb_has_link(adapter);
2923         if (link) {
2924                 if (!netif_carrier_ok(netdev)) {
2925                         u32 ctrl;
2926                         hw->mac.ops.get_speed_and_duplex(hw,
2927                                                          &adapter->link_speed,
2928                                                          &adapter->link_duplex);
2929
2930                         ctrl = rd32(E1000_CTRL);
2931                         /* Links status message must follow this format */
2932                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2933                                  "Flow Control: %s\n",
2934                                netdev->name,
2935                                adapter->link_speed,
2936                                adapter->link_duplex == FULL_DUPLEX ?
2937                                  "Full Duplex" : "Half Duplex",
2938                                ((ctrl & E1000_CTRL_TFCE) &&
2939                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
2940                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
2941                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
2942
2943                         /* tweak tx_queue_len according to speed/duplex and
2944                          * adjust the timeout factor */
2945                         netdev->tx_queue_len = adapter->tx_queue_len;
2946                         adapter->tx_timeout_factor = 1;
2947                         switch (adapter->link_speed) {
2948                         case SPEED_10:
2949                                 netdev->tx_queue_len = 10;
2950                                 adapter->tx_timeout_factor = 14;
2951                                 break;
2952                         case SPEED_100:
2953                                 netdev->tx_queue_len = 100;
2954                                 /* maybe add some timeout factor ? */
2955                                 break;
2956                         }
2957
2958                         netif_carrier_on(netdev);
2959
2960                         igb_ping_all_vfs(adapter);
2961
2962                         /* link state has changed, schedule phy info update */
2963                         if (!test_bit(__IGB_DOWN, &adapter->state))
2964                                 mod_timer(&adapter->phy_info_timer,
2965                                           round_jiffies(jiffies + 2 * HZ));
2966                 }
2967         } else {
2968                 if (netif_carrier_ok(netdev)) {
2969                         adapter->link_speed = 0;
2970                         adapter->link_duplex = 0;
2971                         /* Links status message must follow this format */
2972                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
2973                                netdev->name);
2974                         netif_carrier_off(netdev);
2975
2976                         igb_ping_all_vfs(adapter);
2977
2978                         /* link state has changed, schedule phy info update */
2979                         if (!test_bit(__IGB_DOWN, &adapter->state))
2980                                 mod_timer(&adapter->phy_info_timer,
2981                                           round_jiffies(jiffies + 2 * HZ));
2982                 }
2983         }
2984
2985         igb_update_stats(adapter);
2986         igb_update_adaptive(hw);
2987
2988         if (!netif_carrier_ok(netdev)) {
2989                 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
2990                         /* We've lost link, so the controller stops DMA,
2991                          * but we've got queued Tx work that's never going
2992                          * to get done, so reset controller to flush Tx.
2993                          * (Do the reset outside of interrupt context). */
2994                         adapter->tx_timeout_count++;
2995                         schedule_work(&adapter->reset_task);
2996                         /* return immediately since reset is imminent */
2997                         return;
2998                 }
2999         }
3000
3001         /* Force detection of hung controller every watchdog period */
3002         for (i = 0; i < adapter->num_tx_queues; i++)
3003                 adapter->tx_ring[i].detect_tx_hung = true;
3004
3005         /* Cause software interrupt to ensure rx ring is cleaned */
3006         if (adapter->msix_entries) {
3007                 u32 eics = 0;
3008                 for (i = 0; i < adapter->num_q_vectors; i++) {
3009                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3010                         eics |= q_vector->eims_value;
3011                 }
3012                 wr32(E1000_EICS, eics);
3013         } else {
3014                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3015         }
3016
3017         /* Reset the timer */
3018         if (!test_bit(__IGB_DOWN, &adapter->state))
3019                 mod_timer(&adapter->watchdog_timer,
3020                           round_jiffies(jiffies + 2 * HZ));
3021 }
3022
3023 enum latency_range {
3024         lowest_latency = 0,
3025         low_latency = 1,
3026         bulk_latency = 2,
3027         latency_invalid = 255
3028 };
3029
3030 /**
3031  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3032  *
3033  *      Stores a new ITR value based on strictly on packet size.  This
3034  *      algorithm is less sophisticated than that used in igb_update_itr,
3035  *      due to the difficulty of synchronizing statistics across multiple
3036  *      receive rings.  The divisors and thresholds used by this fuction
3037  *      were determined based on theoretical maximum wire speed and testing
3038  *      data, in order to minimize response time while increasing bulk
3039  *      throughput.
3040  *      This functionality is controlled by the InterruptThrottleRate module
3041  *      parameter (see igb_param.c)
3042  *      NOTE:  This function is called only when operating in a multiqueue
3043  *             receive environment.
3044  * @q_vector: pointer to q_vector
3045  **/
3046 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3047 {
3048         int new_val = q_vector->itr_val;
3049         int avg_wire_size = 0;
3050         struct igb_adapter *adapter = q_vector->adapter;
3051
3052         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3053          * ints/sec - ITR timer value of 120 ticks.
3054          */
3055         if (adapter->link_speed != SPEED_1000) {
3056                 new_val = 976;
3057                 goto set_itr_val;
3058         }
3059
3060         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3061                 struct igb_ring *ring = q_vector->rx_ring;
3062                 avg_wire_size = ring->total_bytes / ring->total_packets;
3063         }
3064
3065         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3066                 struct igb_ring *ring = q_vector->tx_ring;
3067                 avg_wire_size = max_t(u32, avg_wire_size,
3068                                       (ring->total_bytes /
3069                                        ring->total_packets));
3070         }
3071
3072         /* if avg_wire_size isn't set no work was done */
3073         if (!avg_wire_size)
3074                 goto clear_counts;
3075
3076         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3077         avg_wire_size += 24;
3078
3079         /* Don't starve jumbo frames */
3080         avg_wire_size = min(avg_wire_size, 3000);
3081
3082         /* Give a little boost to mid-size frames */
3083         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3084                 new_val = avg_wire_size / 3;
3085         else
3086                 new_val = avg_wire_size / 2;
3087
3088 set_itr_val:
3089         if (new_val != q_vector->itr_val) {
3090                 q_vector->itr_val = new_val;
3091                 q_vector->set_itr = 1;
3092         }
3093 clear_counts:
3094         if (q_vector->rx_ring) {
3095                 q_vector->rx_ring->total_bytes = 0;
3096                 q_vector->rx_ring->total_packets = 0;
3097         }
3098         if (q_vector->tx_ring) {
3099                 q_vector->tx_ring->total_bytes = 0;
3100                 q_vector->tx_ring->total_packets = 0;
3101         }
3102 }
3103
3104 /**
3105  * igb_update_itr - update the dynamic ITR value based on statistics
3106  *      Stores a new ITR value based on packets and byte
3107  *      counts during the last interrupt.  The advantage of per interrupt
3108  *      computation is faster updates and more accurate ITR for the current
3109  *      traffic pattern.  Constants in this function were computed
3110  *      based on theoretical maximum wire speed and thresholds were set based
3111  *      on testing data as well as attempting to minimize response time
3112  *      while increasing bulk throughput.
3113  *      this functionality is controlled by the InterruptThrottleRate module
3114  *      parameter (see igb_param.c)
3115  *      NOTE:  These calculations are only valid when operating in a single-
3116  *             queue environment.
3117  * @adapter: pointer to adapter
3118  * @itr_setting: current q_vector->itr_val
3119  * @packets: the number of packets during this measurement interval
3120  * @bytes: the number of bytes during this measurement interval
3121  **/
3122 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3123                                    int packets, int bytes)
3124 {
3125         unsigned int retval = itr_setting;
3126
3127         if (packets == 0)
3128                 goto update_itr_done;
3129
3130         switch (itr_setting) {
3131         case lowest_latency:
3132                 /* handle TSO and jumbo frames */
3133                 if (bytes/packets > 8000)
3134                         retval = bulk_latency;
3135                 else if ((packets < 5) && (bytes > 512))
3136                         retval = low_latency;
3137                 break;
3138         case low_latency:  /* 50 usec aka 20000 ints/s */
3139                 if (bytes > 10000) {
3140                         /* this if handles the TSO accounting */
3141                         if (bytes/packets > 8000) {
3142                                 retval = bulk_latency;
3143                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3144                                 retval = bulk_latency;
3145                         } else if ((packets > 35)) {
3146                                 retval = lowest_latency;
3147                         }
3148                 } else if (bytes/packets > 2000) {
3149                         retval = bulk_latency;
3150                 } else if (packets <= 2 && bytes < 512) {
3151                         retval = lowest_latency;
3152                 }
3153                 break;
3154         case bulk_latency: /* 250 usec aka 4000 ints/s */
3155                 if (bytes > 25000) {
3156                         if (packets > 35)
3157                                 retval = low_latency;
3158                 } else if (bytes < 1500) {
3159                         retval = low_latency;
3160                 }
3161                 break;
3162         }
3163
3164 update_itr_done:
3165         return retval;
3166 }
3167
3168 static void igb_set_itr(struct igb_adapter *adapter)
3169 {
3170         struct igb_q_vector *q_vector = adapter->q_vector[0];
3171         u16 current_itr;
3172         u32 new_itr = q_vector->itr_val;
3173
3174         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3175         if (adapter->link_speed != SPEED_1000) {
3176                 current_itr = 0;
3177                 new_itr = 4000;
3178                 goto set_itr_now;
3179         }
3180
3181         adapter->rx_itr = igb_update_itr(adapter,
3182                                     adapter->rx_itr,
3183                                     adapter->rx_ring->total_packets,
3184                                     adapter->rx_ring->total_bytes);
3185
3186         adapter->tx_itr = igb_update_itr(adapter,
3187                                     adapter->tx_itr,
3188                                     adapter->tx_ring->total_packets,
3189                                     adapter->tx_ring->total_bytes);
3190         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3191
3192         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3193         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3194                 current_itr = low_latency;
3195
3196         switch (current_itr) {
3197         /* counts and packets in update_itr are dependent on these numbers */
3198         case lowest_latency:
3199                 new_itr = 56;  /* aka 70,000 ints/sec */
3200                 break;
3201         case low_latency:
3202                 new_itr = 196; /* aka 20,000 ints/sec */
3203                 break;
3204         case bulk_latency:
3205                 new_itr = 980; /* aka 4,000 ints/sec */
3206                 break;
3207         default:
3208                 break;
3209         }
3210
3211 set_itr_now:
3212         adapter->rx_ring->total_bytes = 0;
3213         adapter->rx_ring->total_packets = 0;
3214         adapter->tx_ring->total_bytes = 0;
3215         adapter->tx_ring->total_packets = 0;
3216
3217         if (new_itr != q_vector->itr_val) {
3218                 /* this attempts to bias the interrupt rate towards Bulk
3219                  * by adding intermediate steps when interrupt rate is
3220                  * increasing */
3221                 new_itr = new_itr > q_vector->itr_val ?
3222                              max((new_itr * q_vector->itr_val) /
3223                                  (new_itr + (q_vector->itr_val >> 2)),
3224                                  new_itr) :
3225                              new_itr;
3226                 /* Don't write the value here; it resets the adapter's
3227                  * internal timer, and causes us to delay far longer than
3228                  * we should between interrupts.  Instead, we write the ITR
3229                  * value at the beginning of the next interrupt so the timing
3230                  * ends up being correct.
3231                  */
3232                 q_vector->itr_val = new_itr;
3233                 q_vector->set_itr = 1;
3234         }
3235
3236         return;
3237 }
3238
3239 #define IGB_TX_FLAGS_CSUM               0x00000001
3240 #define IGB_TX_FLAGS_VLAN               0x00000002
3241 #define IGB_TX_FLAGS_TSO                0x00000004
3242 #define IGB_TX_FLAGS_IPV4               0x00000008
3243 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3244 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3245 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3246
3247 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3248                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3249 {
3250         struct e1000_adv_tx_context_desc *context_desc;
3251         unsigned int i;
3252         int err;
3253         struct igb_buffer *buffer_info;
3254         u32 info = 0, tu_cmd = 0;
3255         u32 mss_l4len_idx, l4len;
3256         *hdr_len = 0;
3257
3258         if (skb_header_cloned(skb)) {
3259                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3260                 if (err)
3261                         return err;
3262         }
3263
3264         l4len = tcp_hdrlen(skb);
3265         *hdr_len += l4len;
3266
3267         if (skb->protocol == htons(ETH_P_IP)) {
3268                 struct iphdr *iph = ip_hdr(skb);
3269                 iph->tot_len = 0;
3270                 iph->check = 0;
3271                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3272                                                          iph->daddr, 0,
3273                                                          IPPROTO_TCP,
3274                                                          0);
3275         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3276                 ipv6_hdr(skb)->payload_len = 0;
3277                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3278                                                        &ipv6_hdr(skb)->daddr,
3279                                                        0, IPPROTO_TCP, 0);
3280         }
3281
3282         i = tx_ring->next_to_use;
3283
3284         buffer_info = &tx_ring->buffer_info[i];
3285         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3286         /* VLAN MACLEN IPLEN */
3287         if (tx_flags & IGB_TX_FLAGS_VLAN)
3288                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3289         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3290         *hdr_len += skb_network_offset(skb);
3291         info |= skb_network_header_len(skb);
3292         *hdr_len += skb_network_header_len(skb);
3293         context_desc->vlan_macip_lens = cpu_to_le32(info);
3294
3295         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3296         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3297
3298         if (skb->protocol == htons(ETH_P_IP))
3299                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3300         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3301
3302         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3303
3304         /* MSS L4LEN IDX */
3305         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3306         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3307
3308         /* For 82575, context index must be unique per ring. */
3309         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3310                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3311
3312         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3313         context_desc->seqnum_seed = 0;
3314
3315         buffer_info->time_stamp = jiffies;
3316         buffer_info->next_to_watch = i;
3317         buffer_info->dma = 0;
3318         i++;
3319         if (i == tx_ring->count)
3320                 i = 0;
3321
3322         tx_ring->next_to_use = i;
3323
3324         return true;
3325 }
3326
3327 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3328                                    struct sk_buff *skb, u32 tx_flags)
3329 {
3330         struct e1000_adv_tx_context_desc *context_desc;
3331         struct pci_dev *pdev = tx_ring->pdev;
3332         struct igb_buffer *buffer_info;
3333         u32 info = 0, tu_cmd = 0;
3334         unsigned int i;
3335
3336         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3337             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3338                 i = tx_ring->next_to_use;
3339                 buffer_info = &tx_ring->buffer_info[i];
3340                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3341
3342                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3343                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3344
3345                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3346                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3347                         info |= skb_network_header_len(skb);
3348
3349                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3350
3351                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3352
3353                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3354                         __be16 protocol;
3355
3356                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3357                                 const struct vlan_ethhdr *vhdr =
3358                                           (const struct vlan_ethhdr*)skb->data;
3359
3360                                 protocol = vhdr->h_vlan_encapsulated_proto;
3361                         } else {
3362                                 protocol = skb->protocol;
3363                         }
3364
3365                         switch (protocol) {
3366                         case cpu_to_be16(ETH_P_IP):
3367                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3368                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3369                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3370                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3371                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3372                                 break;
3373                         case cpu_to_be16(ETH_P_IPV6):
3374                                 /* XXX what about other V6 headers?? */
3375                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3376                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3377                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3378                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3379                                 break;
3380                         default:
3381                                 if (unlikely(net_ratelimit()))
3382                                         dev_warn(&pdev->dev,
3383                                             "partial checksum but proto=%x!\n",
3384                                             skb->protocol);
3385                                 break;
3386                         }
3387                 }
3388
3389                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3390                 context_desc->seqnum_seed = 0;
3391                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3392                         context_desc->mss_l4len_idx =
3393                                 cpu_to_le32(tx_ring->reg_idx << 4);
3394
3395                 buffer_info->time_stamp = jiffies;
3396                 buffer_info->next_to_watch = i;
3397                 buffer_info->dma = 0;
3398
3399                 i++;
3400                 if (i == tx_ring->count)
3401                         i = 0;
3402                 tx_ring->next_to_use = i;
3403
3404                 return true;
3405         }
3406         return false;
3407 }
3408
3409 #define IGB_MAX_TXD_PWR 16
3410 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3411
3412 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3413                                  unsigned int first)
3414 {
3415         struct igb_buffer *buffer_info;
3416         struct pci_dev *pdev = tx_ring->pdev;
3417         unsigned int len = skb_headlen(skb);
3418         unsigned int count = 0, i;
3419         unsigned int f;
3420         dma_addr_t *map;
3421
3422         i = tx_ring->next_to_use;
3423
3424         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3425                 dev_err(&pdev->dev, "TX DMA map failed\n");
3426                 return 0;
3427         }
3428
3429         map = skb_shinfo(skb)->dma_maps;
3430
3431         buffer_info = &tx_ring->buffer_info[i];
3432         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3433         buffer_info->length = len;
3434         /* set time_stamp *before* dma to help avoid a possible race */
3435         buffer_info->time_stamp = jiffies;
3436         buffer_info->next_to_watch = i;
3437         buffer_info->dma = skb_shinfo(skb)->dma_head;
3438
3439         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3440                 struct skb_frag_struct *frag;
3441
3442                 i++;
3443                 if (i == tx_ring->count)
3444                         i = 0;
3445
3446                 frag = &skb_shinfo(skb)->frags[f];
3447                 len = frag->size;
3448
3449                 buffer_info = &tx_ring->buffer_info[i];
3450                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3451                 buffer_info->length = len;
3452                 buffer_info->time_stamp = jiffies;
3453                 buffer_info->next_to_watch = i;
3454                 buffer_info->dma = map[count];
3455                 count++;
3456         }
3457
3458         tx_ring->buffer_info[i].skb = skb;
3459         tx_ring->buffer_info[first].next_to_watch = i;
3460
3461         return ++count;
3462 }
3463
3464 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3465                                     int tx_flags, int count, u32 paylen,
3466                                     u8 hdr_len)
3467 {
3468         union e1000_adv_tx_desc *tx_desc;
3469         struct igb_buffer *buffer_info;
3470         u32 olinfo_status = 0, cmd_type_len;
3471         unsigned int i = tx_ring->next_to_use;
3472
3473         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3474                         E1000_ADVTXD_DCMD_DEXT);
3475
3476         if (tx_flags & IGB_TX_FLAGS_VLAN)
3477                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3478
3479         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3480                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3481
3482         if (tx_flags & IGB_TX_FLAGS_TSO) {
3483                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3484
3485                 /* insert tcp checksum */
3486                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3487
3488                 /* insert ip checksum */
3489                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3490                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3491
3492         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3493                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3494         }
3495
3496         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3497             (tx_flags & (IGB_TX_FLAGS_CSUM |
3498                          IGB_TX_FLAGS_TSO |
3499                          IGB_TX_FLAGS_VLAN)))
3500                 olinfo_status |= tx_ring->reg_idx << 4;
3501
3502         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3503
3504         do {
3505                 buffer_info = &tx_ring->buffer_info[i];
3506                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3507                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3508                 tx_desc->read.cmd_type_len =
3509                         cpu_to_le32(cmd_type_len | buffer_info->length);
3510                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3511                 count--;
3512                 i++;
3513                 if (i == tx_ring->count)
3514                         i = 0;
3515         } while (count > 0);
3516
3517         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3518         /* Force memory writes to complete before letting h/w
3519          * know there are new descriptors to fetch.  (Only
3520          * applicable for weak-ordered memory model archs,
3521          * such as IA-64). */
3522         wmb();
3523
3524         tx_ring->next_to_use = i;
3525         writel(i, tx_ring->tail);
3526         /* we need this if more than one processor can write to our tail
3527          * at a time, it syncronizes IO on IA64/Altix systems */
3528         mmiowb();
3529 }
3530
3531 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3532 {
3533         struct net_device *netdev = tx_ring->netdev;
3534
3535         netif_stop_subqueue(netdev, tx_ring->queue_index);
3536
3537         /* Herbert's original patch had:
3538          *  smp_mb__after_netif_stop_queue();
3539          * but since that doesn't exist yet, just open code it. */
3540         smp_mb();
3541
3542         /* We need to check again in a case another CPU has just
3543          * made room available. */
3544         if (igb_desc_unused(tx_ring) < size)
3545                 return -EBUSY;
3546
3547         /* A reprieve! */
3548         netif_wake_subqueue(netdev, tx_ring->queue_index);
3549         tx_ring->tx_stats.restart_queue++;
3550         return 0;
3551 }
3552
3553 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3554 {
3555         if (igb_desc_unused(tx_ring) >= size)
3556                 return 0;
3557         return __igb_maybe_stop_tx(tx_ring, size);
3558 }
3559
3560 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3561                                     struct igb_ring *tx_ring)
3562 {
3563         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3564         unsigned int first;
3565         unsigned int tx_flags = 0;
3566         u8 hdr_len = 0;
3567         int tso = 0, count;
3568         union skb_shared_tx *shtx = skb_tx(skb);
3569
3570         /* need: 1 descriptor per page,
3571          *       + 2 desc gap to keep tail from touching head,
3572          *       + 1 desc for skb->data,
3573          *       + 1 desc for context descriptor,
3574          * otherwise try next time */
3575         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3576                 /* this is a hard error */
3577                 return NETDEV_TX_BUSY;
3578         }
3579
3580         if (unlikely(shtx->hardware)) {
3581                 shtx->in_progress = 1;
3582                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3583         }
3584
3585         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3586                 tx_flags |= IGB_TX_FLAGS_VLAN;
3587                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3588         }
3589
3590         if (skb->protocol == htons(ETH_P_IP))
3591                 tx_flags |= IGB_TX_FLAGS_IPV4;
3592
3593         first = tx_ring->next_to_use;
3594         if (skb_is_gso(skb)) {
3595                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3596
3597                 if (tso < 0) {
3598                         dev_kfree_skb_any(skb);
3599                         return NETDEV_TX_OK;
3600                 }
3601         }
3602
3603         if (tso)
3604                 tx_flags |= IGB_TX_FLAGS_TSO;
3605         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3606                  (skb->ip_summed == CHECKSUM_PARTIAL))
3607                 tx_flags |= IGB_TX_FLAGS_CSUM;
3608
3609         /*
3610          * count reflects descriptors mapped, if 0 or less then mapping error
3611          * has occured and we need to rewind the descriptor queue
3612          */
3613         count = igb_tx_map_adv(tx_ring, skb, first);
3614         if (count <= 0) {
3615                 dev_kfree_skb_any(skb);
3616                 tx_ring->buffer_info[first].time_stamp = 0;
3617                 tx_ring->next_to_use = first;
3618                 return NETDEV_TX_OK;
3619         }
3620
3621         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3622
3623         /* Make sure there is space in the ring for the next send. */
3624         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3625
3626         return NETDEV_TX_OK;
3627 }
3628
3629 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3630                                       struct net_device *netdev)
3631 {
3632         struct igb_adapter *adapter = netdev_priv(netdev);
3633         struct igb_ring *tx_ring;
3634         int r_idx = 0;
3635
3636         if (test_bit(__IGB_DOWN, &adapter->state)) {
3637                 dev_kfree_skb_any(skb);
3638                 return NETDEV_TX_OK;
3639         }
3640
3641         if (skb->len <= 0) {
3642                 dev_kfree_skb_any(skb);
3643                 return NETDEV_TX_OK;
3644         }
3645
3646         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3647         tx_ring = adapter->multi_tx_table[r_idx];
3648
3649         /* This goes back to the question of how to logically map a tx queue
3650          * to a flow.  Right now, performance is impacted slightly negatively
3651          * if using multiple tx queues.  If the stack breaks away from a
3652          * single qdisc implementation, we can look at this again. */
3653         return igb_xmit_frame_ring_adv(skb, tx_ring);
3654 }
3655
3656 /**
3657  * igb_tx_timeout - Respond to a Tx Hang
3658  * @netdev: network interface device structure
3659  **/
3660 static void igb_tx_timeout(struct net_device *netdev)
3661 {
3662         struct igb_adapter *adapter = netdev_priv(netdev);
3663         struct e1000_hw *hw = &adapter->hw;
3664
3665         /* Do the reset outside of interrupt context */
3666         adapter->tx_timeout_count++;
3667
3668         schedule_work(&adapter->reset_task);
3669         wr32(E1000_EICS,
3670              (adapter->eims_enable_mask & ~adapter->eims_other));
3671 }
3672
3673 static void igb_reset_task(struct work_struct *work)
3674 {
3675         struct igb_adapter *adapter;
3676         adapter = container_of(work, struct igb_adapter, reset_task);
3677
3678         igb_reinit_locked(adapter);
3679 }
3680
3681 /**
3682  * igb_get_stats - Get System Network Statistics
3683  * @netdev: network interface device structure
3684  *
3685  * Returns the address of the device statistics structure.
3686  * The statistics are actually updated from the timer callback.
3687  **/
3688 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3689 {
3690         /* only return the current stats */
3691         return &netdev->stats;
3692 }
3693
3694 /**
3695  * igb_change_mtu - Change the Maximum Transfer Unit
3696  * @netdev: network interface device structure
3697  * @new_mtu: new value for maximum frame size
3698  *
3699  * Returns 0 on success, negative on failure
3700  **/
3701 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3702 {
3703         struct igb_adapter *adapter = netdev_priv(netdev);
3704         struct pci_dev *pdev = adapter->pdev;
3705         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3706         u32 rx_buffer_len, i;
3707
3708         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3709                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3710                 return -EINVAL;
3711         }
3712
3713         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3714                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3715                 return -EINVAL;
3716         }
3717
3718         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3719                 msleep(1);
3720
3721         /* igb_down has a dependency on max_frame_size */
3722         adapter->max_frame_size = max_frame;
3723
3724         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3725          * means we reserve 2 more, this pushes us to allocate from the next
3726          * larger slab size.
3727          * i.e. RXBUFFER_2048 --> size-4096 slab
3728          */
3729
3730         if (max_frame <= IGB_RXBUFFER_1024)
3731                 rx_buffer_len = IGB_RXBUFFER_1024;
3732         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3733                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3734         else
3735                 rx_buffer_len = IGB_RXBUFFER_128;
3736
3737         if (netif_running(netdev))
3738                 igb_down(adapter);
3739
3740         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3741                  netdev->mtu, new_mtu);
3742         netdev->mtu = new_mtu;
3743
3744         for (i = 0; i < adapter->num_rx_queues; i++)
3745                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3746
3747         if (netif_running(netdev))
3748                 igb_up(adapter);
3749         else
3750                 igb_reset(adapter);
3751
3752         clear_bit(__IGB_RESETTING, &adapter->state);
3753
3754         return 0;
3755 }
3756
3757 /**
3758  * igb_update_stats - Update the board statistics counters
3759  * @adapter: board private structure
3760  **/
3761
3762 void igb_update_stats(struct igb_adapter *adapter)
3763 {
3764         struct net_device *netdev = adapter->netdev;
3765         struct e1000_hw *hw = &adapter->hw;
3766         struct pci_dev *pdev = adapter->pdev;
3767         u32 rnbc;
3768         u16 phy_tmp;
3769         int i;
3770         u64 bytes, packets;
3771
3772 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3773
3774         /*
3775          * Prevent stats update while adapter is being reset, or if the pci
3776          * connection is down.
3777          */
3778         if (adapter->link_speed == 0)
3779                 return;
3780         if (pci_channel_offline(pdev))
3781                 return;
3782
3783         bytes = 0;
3784         packets = 0;
3785         for (i = 0; i < adapter->num_rx_queues; i++) {
3786                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3787                 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3788                 netdev->stats.rx_fifo_errors += rqdpc_tmp;
3789                 bytes += adapter->rx_ring[i].rx_stats.bytes;
3790                 packets += adapter->rx_ring[i].rx_stats.packets;
3791         }
3792
3793         netdev->stats.rx_bytes = bytes;
3794         netdev->stats.rx_packets = packets;
3795
3796         bytes = 0;
3797         packets = 0;
3798         for (i = 0; i < adapter->num_tx_queues; i++) {
3799                 bytes += adapter->tx_ring[i].tx_stats.bytes;
3800                 packets += adapter->tx_ring[i].tx_stats.packets;
3801         }
3802         netdev->stats.tx_bytes = bytes;
3803         netdev->stats.tx_packets = packets;
3804
3805         /* read stats registers */
3806         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3807         adapter->stats.gprc += rd32(E1000_GPRC);
3808         adapter->stats.gorc += rd32(E1000_GORCL);
3809         rd32(E1000_GORCH); /* clear GORCL */
3810         adapter->stats.bprc += rd32(E1000_BPRC);
3811         adapter->stats.mprc += rd32(E1000_MPRC);
3812         adapter->stats.roc += rd32(E1000_ROC);
3813
3814         adapter->stats.prc64 += rd32(E1000_PRC64);
3815         adapter->stats.prc127 += rd32(E1000_PRC127);
3816         adapter->stats.prc255 += rd32(E1000_PRC255);
3817         adapter->stats.prc511 += rd32(E1000_PRC511);
3818         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3819         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3820         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3821         adapter->stats.sec += rd32(E1000_SEC);
3822
3823         adapter->stats.mpc += rd32(E1000_MPC);
3824         adapter->stats.scc += rd32(E1000_SCC);
3825         adapter->stats.ecol += rd32(E1000_ECOL);
3826         adapter->stats.mcc += rd32(E1000_MCC);
3827         adapter->stats.latecol += rd32(E1000_LATECOL);
3828         adapter->stats.dc += rd32(E1000_DC);
3829         adapter->stats.rlec += rd32(E1000_RLEC);
3830         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3831         adapter->stats.xontxc += rd32(E1000_XONTXC);
3832         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3833         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3834         adapter->stats.fcruc += rd32(E1000_FCRUC);
3835         adapter->stats.gptc += rd32(E1000_GPTC);
3836         adapter->stats.gotc += rd32(E1000_GOTCL);
3837         rd32(E1000_GOTCH); /* clear GOTCL */
3838         rnbc = rd32(E1000_RNBC);
3839         adapter->stats.rnbc += rnbc;
3840         netdev->stats.rx_fifo_errors += rnbc;
3841         adapter->stats.ruc += rd32(E1000_RUC);
3842         adapter->stats.rfc += rd32(E1000_RFC);
3843         adapter->stats.rjc += rd32(E1000_RJC);
3844         adapter->stats.tor += rd32(E1000_TORH);
3845         adapter->stats.tot += rd32(E1000_TOTH);
3846         adapter->stats.tpr += rd32(E1000_TPR);
3847
3848         adapter->stats.ptc64 += rd32(E1000_PTC64);
3849         adapter->stats.ptc127 += rd32(E1000_PTC127);
3850         adapter->stats.ptc255 += rd32(E1000_PTC255);
3851         adapter->stats.ptc511 += rd32(E1000_PTC511);
3852         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3853         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3854
3855         adapter->stats.mptc += rd32(E1000_MPTC);
3856         adapter->stats.bptc += rd32(E1000_BPTC);
3857
3858         /* used for adaptive IFS */
3859         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3860         adapter->stats.tpt += hw->mac.tx_packet_delta;
3861         hw->mac.collision_delta = rd32(E1000_COLC);
3862         adapter->stats.colc += hw->mac.collision_delta;
3863
3864         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3865         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3866         adapter->stats.tncrs += rd32(E1000_TNCRS);
3867         adapter->stats.tsctc += rd32(E1000_TSCTC);
3868         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3869
3870         adapter->stats.iac += rd32(E1000_IAC);
3871         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3872         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3873         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3874         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3875         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3876         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3877         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3878         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3879
3880         /* Fill out the OS statistics structure */
3881         netdev->stats.multicast = adapter->stats.mprc;
3882         netdev->stats.collisions = adapter->stats.colc;
3883
3884         /* Rx Errors */
3885
3886         /* RLEC on some newer hardware can be incorrect so build
3887          * our own version based on RUC and ROC */
3888         netdev->stats.rx_errors = adapter->stats.rxerrc +
3889                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3890                 adapter->stats.ruc + adapter->stats.roc +
3891                 adapter->stats.cexterr;
3892         netdev->stats.rx_length_errors = adapter->stats.ruc +
3893                                               adapter->stats.roc;
3894         netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3895         netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3896         netdev->stats.rx_missed_errors = adapter->stats.mpc;
3897
3898         /* Tx Errors */
3899         netdev->stats.tx_errors = adapter->stats.ecol +
3900                                        adapter->stats.latecol;
3901         netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3902         netdev->stats.tx_window_errors = adapter->stats.latecol;
3903         netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3904
3905         /* Tx Dropped needs to be maintained elsewhere */
3906
3907         /* Phy Stats */
3908         if (hw->phy.media_type == e1000_media_type_copper) {
3909                 if ((adapter->link_speed == SPEED_1000) &&
3910                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3911                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3912                         adapter->phy_stats.idle_errors += phy_tmp;
3913                 }
3914         }
3915
3916         /* Management Stats */
3917         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3918         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3919         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3920 }
3921
3922 static irqreturn_t igb_msix_other(int irq, void *data)
3923 {
3924         struct igb_adapter *adapter = data;
3925         struct e1000_hw *hw = &adapter->hw;
3926         u32 icr = rd32(E1000_ICR);
3927         /* reading ICR causes bit 31 of EICR to be cleared */
3928
3929         if (icr & E1000_ICR_DOUTSYNC) {
3930                 /* HW is reporting DMA is out of sync */
3931                 adapter->stats.doosync++;
3932         }
3933
3934         /* Check for a mailbox event */
3935         if (icr & E1000_ICR_VMMB)
3936                 igb_msg_task(adapter);
3937
3938         if (icr & E1000_ICR_LSC) {
3939                 hw->mac.get_link_status = 1;
3940                 /* guard against interrupt when we're going down */
3941                 if (!test_bit(__IGB_DOWN, &adapter->state))
3942                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3943         }
3944
3945         if (adapter->vfs_allocated_count)
3946                 wr32(E1000_IMS, E1000_IMS_LSC |
3947                                 E1000_IMS_VMMB |
3948                                 E1000_IMS_DOUTSYNC);
3949         else
3950                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
3951         wr32(E1000_EIMS, adapter->eims_other);
3952
3953         return IRQ_HANDLED;
3954 }
3955
3956 static void igb_write_itr(struct igb_q_vector *q_vector)
3957 {
3958         u32 itr_val = q_vector->itr_val & 0x7FFC;
3959
3960         if (!q_vector->set_itr)
3961                 return;
3962
3963         if (!itr_val)
3964                 itr_val = 0x4;
3965
3966         if (q_vector->itr_shift)
3967                 itr_val |= itr_val << q_vector->itr_shift;
3968         else
3969                 itr_val |= 0x8000000;
3970
3971         writel(itr_val, q_vector->itr_register);
3972         q_vector->set_itr = 0;
3973 }
3974
3975 static irqreturn_t igb_msix_ring(int irq, void *data)
3976 {
3977         struct igb_q_vector *q_vector = data;
3978
3979         /* Write the ITR value calculated from the previous interrupt. */
3980         igb_write_itr(q_vector);
3981
3982         napi_schedule(&q_vector->napi);
3983
3984         return IRQ_HANDLED;
3985 }
3986
3987 #ifdef CONFIG_IGB_DCA
3988 static void igb_update_dca(struct igb_q_vector *q_vector)
3989 {
3990         struct igb_adapter *adapter = q_vector->adapter;
3991         struct e1000_hw *hw = &adapter->hw;
3992         int cpu = get_cpu();
3993
3994         if (q_vector->cpu == cpu)
3995                 goto out_no_update;
3996
3997         if (q_vector->tx_ring) {
3998                 int q = q_vector->tx_ring->reg_idx;
3999                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4000                 if (hw->mac.type == e1000_82575) {
4001                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4002                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4003                 } else {
4004                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4005                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4006                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4007                 }
4008                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4009                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4010         }
4011         if (q_vector->rx_ring) {
4012                 int q = q_vector->rx_ring->reg_idx;
4013                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4014                 if (hw->mac.type == e1000_82575) {
4015                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4016                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4017                 } else {
4018                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4019                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4020                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4021                 }
4022                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4023                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4024                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4025                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4026         }
4027         q_vector->cpu = cpu;
4028 out_no_update:
4029         put_cpu();
4030 }
4031
4032 static void igb_setup_dca(struct igb_adapter *adapter)
4033 {
4034         struct e1000_hw *hw = &adapter->hw;
4035         int i;
4036
4037         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4038                 return;
4039
4040         /* Always use CB2 mode, difference is masked in the CB driver. */
4041         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4042
4043         for (i = 0; i < adapter->num_q_vectors; i++) {
4044                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4045                 q_vector->cpu = -1;
4046                 igb_update_dca(q_vector);
4047         }
4048 }
4049
4050 static int __igb_notify_dca(struct device *dev, void *data)
4051 {
4052         struct net_device *netdev = dev_get_drvdata(dev);
4053         struct igb_adapter *adapter = netdev_priv(netdev);
4054         struct pci_dev *pdev = adapter->pdev;
4055         struct e1000_hw *hw = &adapter->hw;
4056         unsigned long event = *(unsigned long *)data;
4057
4058         switch (event) {
4059         case DCA_PROVIDER_ADD:
4060                 /* if already enabled, don't do it again */
4061                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4062                         break;
4063                 if (dca_add_requester(dev) == 0) {
4064                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4065                         dev_info(&pdev->dev, "DCA enabled\n");
4066                         igb_setup_dca(adapter);
4067                         break;
4068                 }
4069                 /* Fall Through since DCA is disabled. */
4070         case DCA_PROVIDER_REMOVE:
4071                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4072                         /* without this a class_device is left
4073                          * hanging around in the sysfs model */
4074                         dca_remove_requester(dev);
4075                         dev_info(&pdev->dev, "DCA disabled\n");
4076                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4077                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4078                 }
4079                 break;
4080         }
4081
4082         return 0;
4083 }
4084
4085 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4086                           void *p)
4087 {
4088         int ret_val;
4089
4090         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4091                                          __igb_notify_dca);
4092
4093         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4094 }
4095 #endif /* CONFIG_IGB_DCA */
4096
4097 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4098 {
4099         struct e1000_hw *hw = &adapter->hw;
4100         u32 ping;
4101         int i;
4102
4103         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4104                 ping = E1000_PF_CONTROL_MSG;
4105                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4106                         ping |= E1000_VT_MSGTYPE_CTS;
4107                 igb_write_mbx(hw, &ping, 1, i);
4108         }
4109 }
4110
4111 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4112 {
4113         struct e1000_hw *hw = &adapter->hw;
4114         u32 vmolr = rd32(E1000_VMOLR(vf));
4115         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4116
4117         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4118                             IGB_VF_FLAG_MULTI_PROMISC);
4119         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4120
4121         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4122                 vmolr |= E1000_VMOLR_MPME;
4123                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4124         } else {
4125                 /*
4126                  * if we have hashes and we are clearing a multicast promisc
4127                  * flag we need to write the hashes to the MTA as this step
4128                  * was previously skipped
4129                  */
4130                 if (vf_data->num_vf_mc_hashes > 30) {
4131                         vmolr |= E1000_VMOLR_MPME;
4132                 } else if (vf_data->num_vf_mc_hashes) {
4133                         int j;
4134                         vmolr |= E1000_VMOLR_ROMPE;
4135                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4136                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4137                 }
4138         }
4139
4140         wr32(E1000_VMOLR(vf), vmolr);
4141
4142         /* there are flags left unprocessed, likely not supported */
4143         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4144                 return -EINVAL;
4145
4146         return 0;
4147
4148 }
4149
4150 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4151                                   u32 *msgbuf, u32 vf)
4152 {
4153         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4154         u16 *hash_list = (u16 *)&msgbuf[1];
4155         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4156         int i;
4157
4158         /* salt away the number of multicast addresses assigned
4159          * to this VF for later use to restore when the PF multi cast
4160          * list changes
4161          */
4162         vf_data->num_vf_mc_hashes = n;
4163
4164         /* only up to 30 hash values supported */
4165         if (n > 30)
4166                 n = 30;
4167
4168         /* store the hashes for later use */
4169         for (i = 0; i < n; i++)
4170                 vf_data->vf_mc_hashes[i] = hash_list[i];
4171
4172         /* Flush and reset the mta with the new values */
4173         igb_set_rx_mode(adapter->netdev);
4174
4175         return 0;
4176 }
4177
4178 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4179 {
4180         struct e1000_hw *hw = &adapter->hw;
4181         struct vf_data_storage *vf_data;
4182         int i, j;
4183
4184         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4185                 u32 vmolr = rd32(E1000_VMOLR(i));
4186                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4187
4188                 vf_data = &adapter->vf_data[i];
4189
4190                 if ((vf_data->num_vf_mc_hashes > 30) ||
4191                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4192                         vmolr |= E1000_VMOLR_MPME;
4193                 } else if (vf_data->num_vf_mc_hashes) {
4194                         vmolr |= E1000_VMOLR_ROMPE;
4195                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4196                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4197                 }
4198                 wr32(E1000_VMOLR(i), vmolr);
4199         }
4200 }
4201
4202 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4203 {
4204         struct e1000_hw *hw = &adapter->hw;
4205         u32 pool_mask, reg, vid;
4206         int i;
4207
4208         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4209
4210         /* Find the vlan filter for this id */
4211         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4212                 reg = rd32(E1000_VLVF(i));
4213
4214                 /* remove the vf from the pool */
4215                 reg &= ~pool_mask;
4216
4217                 /* if pool is empty then remove entry from vfta */
4218                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4219                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4220                         reg = 0;
4221                         vid = reg & E1000_VLVF_VLANID_MASK;
4222                         igb_vfta_set(hw, vid, false);
4223                 }
4224
4225                 wr32(E1000_VLVF(i), reg);
4226         }
4227
4228         adapter->vf_data[vf].vlans_enabled = 0;
4229 }
4230
4231 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4232 {
4233         struct e1000_hw *hw = &adapter->hw;
4234         u32 reg, i;
4235
4236         /* The vlvf table only exists on 82576 hardware and newer */
4237         if (hw->mac.type < e1000_82576)
4238                 return -1;
4239
4240         /* we only need to do this if VMDq is enabled */
4241         if (!adapter->vfs_allocated_count)
4242                 return -1;
4243
4244         /* Find the vlan filter for this id */
4245         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4246                 reg = rd32(E1000_VLVF(i));
4247                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4248                     vid == (reg & E1000_VLVF_VLANID_MASK))
4249                         break;
4250         }
4251
4252         if (add) {
4253                 if (i == E1000_VLVF_ARRAY_SIZE) {
4254                         /* Did not find a matching VLAN ID entry that was
4255                          * enabled.  Search for a free filter entry, i.e.
4256                          * one without the enable bit set
4257                          */
4258                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4259                                 reg = rd32(E1000_VLVF(i));
4260                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4261                                         break;
4262                         }
4263                 }
4264                 if (i < E1000_VLVF_ARRAY_SIZE) {
4265                         /* Found an enabled/available entry */
4266                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4267
4268                         /* if !enabled we need to set this up in vfta */
4269                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4270                                 /* add VID to filter table */
4271                                 igb_vfta_set(hw, vid, true);
4272                                 reg |= E1000_VLVF_VLANID_ENABLE;
4273                         }
4274                         reg &= ~E1000_VLVF_VLANID_MASK;
4275                         reg |= vid;
4276                         wr32(E1000_VLVF(i), reg);
4277
4278                         /* do not modify RLPML for PF devices */
4279                         if (vf >= adapter->vfs_allocated_count)
4280                                 return 0;
4281
4282                         if (!adapter->vf_data[vf].vlans_enabled) {
4283                                 u32 size;
4284                                 reg = rd32(E1000_VMOLR(vf));
4285                                 size = reg & E1000_VMOLR_RLPML_MASK;
4286                                 size += 4;
4287                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4288                                 reg |= size;
4289                                 wr32(E1000_VMOLR(vf), reg);
4290                         }
4291
4292                         adapter->vf_data[vf].vlans_enabled++;
4293                         return 0;
4294                 }
4295         } else {
4296                 if (i < E1000_VLVF_ARRAY_SIZE) {
4297                         /* remove vf from the pool */
4298                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4299                         /* if pool is empty then remove entry from vfta */
4300                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4301                                 reg = 0;
4302                                 igb_vfta_set(hw, vid, false);
4303                         }
4304                         wr32(E1000_VLVF(i), reg);
4305
4306                         /* do not modify RLPML for PF devices */
4307                         if (vf >= adapter->vfs_allocated_count)
4308                                 return 0;
4309
4310                         adapter->vf_data[vf].vlans_enabled--;
4311                         if (!adapter->vf_data[vf].vlans_enabled) {
4312                                 u32 size;
4313                                 reg = rd32(E1000_VMOLR(vf));
4314                                 size = reg & E1000_VMOLR_RLPML_MASK;
4315                                 size -= 4;
4316                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4317                                 reg |= size;
4318                                 wr32(E1000_VMOLR(vf), reg);
4319                         }
4320                         return 0;
4321                 }
4322         }
4323         return -1;
4324 }
4325
4326 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4327 {
4328         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4329         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4330
4331         return igb_vlvf_set(adapter, vid, add, vf);
4332 }
4333
4334 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4335 {
4336         /* clear all flags */
4337         adapter->vf_data[vf].flags = 0;
4338         adapter->vf_data[vf].last_nack = jiffies;
4339
4340         /* reset offloads to defaults */
4341         igb_set_vmolr(adapter, vf);
4342
4343         /* reset vlans for device */
4344         igb_clear_vf_vfta(adapter, vf);
4345
4346         /* reset multicast table array for vf */
4347         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4348
4349         /* Flush and reset the mta with the new values */
4350         igb_set_rx_mode(adapter->netdev);
4351 }
4352
4353 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4354 {
4355         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4356
4357         /* generate a new mac address as we were hotplug removed/added */
4358         random_ether_addr(vf_mac);
4359
4360         /* process remaining reset events */
4361         igb_vf_reset(adapter, vf);
4362 }
4363
4364 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4365 {
4366         struct e1000_hw *hw = &adapter->hw;
4367         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4368         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4369         u32 reg, msgbuf[3];
4370         u8 *addr = (u8 *)(&msgbuf[1]);
4371
4372         /* process all the same items cleared in a function level reset */
4373         igb_vf_reset(adapter, vf);
4374
4375         /* set vf mac address */
4376         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4377
4378         /* enable transmit and receive for vf */
4379         reg = rd32(E1000_VFTE);
4380         wr32(E1000_VFTE, reg | (1 << vf));
4381         reg = rd32(E1000_VFRE);
4382         wr32(E1000_VFRE, reg | (1 << vf));
4383
4384         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4385
4386         /* reply to reset with ack and vf mac address */
4387         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4388         memcpy(addr, vf_mac, 6);
4389         igb_write_mbx(hw, msgbuf, 3, vf);
4390 }
4391
4392 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4393 {
4394         unsigned char *addr = (char *)&msg[1];
4395         int err = -1;
4396
4397         if (is_valid_ether_addr(addr))
4398                 err = igb_set_vf_mac(adapter, vf, addr);
4399
4400         return err;
4401 }
4402
4403 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4404 {
4405         struct e1000_hw *hw = &adapter->hw;
4406         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4407         u32 msg = E1000_VT_MSGTYPE_NACK;
4408
4409         /* if device isn't clear to send it shouldn't be reading either */
4410         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4411             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4412                 igb_write_mbx(hw, &msg, 1, vf);
4413                 vf_data->last_nack = jiffies;
4414         }
4415 }
4416
4417 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4418 {
4419         struct pci_dev *pdev = adapter->pdev;
4420         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4421         struct e1000_hw *hw = &adapter->hw;
4422         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4423         s32 retval;
4424
4425         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4426
4427         if (retval)
4428                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4429
4430         /* this is a message we already processed, do nothing */
4431         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4432                 return;
4433
4434         /*
4435          * until the vf completes a reset it should not be
4436          * allowed to start any configuration.
4437          */
4438
4439         if (msgbuf[0] == E1000_VF_RESET) {
4440                 igb_vf_reset_msg(adapter, vf);
4441                 return;
4442         }
4443
4444         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4445                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4446                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4447                         igb_write_mbx(hw, msgbuf, 1, vf);
4448                         vf_data->last_nack = jiffies;
4449                 }
4450                 return;
4451         }
4452
4453         switch ((msgbuf[0] & 0xFFFF)) {
4454         case E1000_VF_SET_MAC_ADDR:
4455                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4456                 break;
4457         case E1000_VF_SET_PROMISC:
4458                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4459                 break;
4460         case E1000_VF_SET_MULTICAST:
4461                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4462                 break;
4463         case E1000_VF_SET_LPE:
4464                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4465                 break;
4466         case E1000_VF_SET_VLAN:
4467                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4468                 break;
4469         default:
4470                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4471                 retval = -1;
4472                 break;
4473         }
4474
4475         /* notify the VF of the results of what it sent us */
4476         if (retval)
4477                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4478         else
4479                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4480
4481         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4482
4483         igb_write_mbx(hw, msgbuf, 1, vf);
4484 }
4485
4486 static void igb_msg_task(struct igb_adapter *adapter)
4487 {
4488         struct e1000_hw *hw = &adapter->hw;
4489         u32 vf;
4490
4491         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4492                 /* process any reset requests */
4493                 if (!igb_check_for_rst(hw, vf))
4494                         igb_vf_reset_event(adapter, vf);
4495
4496                 /* process any messages pending */
4497                 if (!igb_check_for_msg(hw, vf))
4498                         igb_rcv_msg_from_vf(adapter, vf);
4499
4500                 /* process any acks */
4501                 if (!igb_check_for_ack(hw, vf))
4502                         igb_rcv_ack_from_vf(adapter, vf);
4503         }
4504 }
4505
4506 /**
4507  *  igb_set_uta - Set unicast filter table address
4508  *  @adapter: board private structure
4509  *
4510  *  The unicast table address is a register array of 32-bit registers.
4511  *  The table is meant to be used in a way similar to how the MTA is used
4512  *  however due to certain limitations in the hardware it is necessary to
4513  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4514  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4515  **/
4516 static void igb_set_uta(struct igb_adapter *adapter)
4517 {
4518         struct e1000_hw *hw = &adapter->hw;
4519         int i;
4520
4521         /* The UTA table only exists on 82576 hardware and newer */
4522         if (hw->mac.type < e1000_82576)
4523                 return;
4524
4525         /* we only need to do this if VMDq is enabled */
4526         if (!adapter->vfs_allocated_count)
4527                 return;
4528
4529         for (i = 0; i < hw->mac.uta_reg_count; i++)
4530                 array_wr32(E1000_UTA, i, ~0);
4531 }
4532
4533 /**
4534  * igb_intr_msi - Interrupt Handler
4535  * @irq: interrupt number
4536  * @data: pointer to a network interface device structure
4537  **/
4538 static irqreturn_t igb_intr_msi(int irq, void *data)
4539 {
4540         struct igb_adapter *adapter = data;
4541         struct igb_q_vector *q_vector = adapter->q_vector[0];
4542         struct e1000_hw *hw = &adapter->hw;
4543         /* read ICR disables interrupts using IAM */
4544         u32 icr = rd32(E1000_ICR);
4545
4546         igb_write_itr(q_vector);
4547
4548         if (icr & E1000_ICR_DOUTSYNC) {
4549                 /* HW is reporting DMA is out of sync */
4550                 adapter->stats.doosync++;
4551         }
4552
4553         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4554                 hw->mac.get_link_status = 1;
4555                 if (!test_bit(__IGB_DOWN, &adapter->state))
4556                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4557         }
4558
4559         napi_schedule(&q_vector->napi);
4560
4561         return IRQ_HANDLED;
4562 }
4563
4564 /**
4565  * igb_intr - Legacy Interrupt Handler
4566  * @irq: interrupt number
4567  * @data: pointer to a network interface device structure
4568  **/
4569 static irqreturn_t igb_intr(int irq, void *data)
4570 {
4571         struct igb_adapter *adapter = data;
4572         struct igb_q_vector *q_vector = adapter->q_vector[0];
4573         struct e1000_hw *hw = &adapter->hw;
4574         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4575          * need for the IMC write */
4576         u32 icr = rd32(E1000_ICR);
4577         if (!icr)
4578                 return IRQ_NONE;  /* Not our interrupt */
4579
4580         igb_write_itr(q_vector);
4581
4582         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4583          * not set, then the adapter didn't send an interrupt */
4584         if (!(icr & E1000_ICR_INT_ASSERTED))
4585                 return IRQ_NONE;
4586
4587         if (icr & E1000_ICR_DOUTSYNC) {
4588                 /* HW is reporting DMA is out of sync */
4589                 adapter->stats.doosync++;
4590         }
4591
4592         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4593                 hw->mac.get_link_status = 1;
4594                 /* guard against interrupt when we're going down */
4595                 if (!test_bit(__IGB_DOWN, &adapter->state))
4596                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4597         }
4598
4599         napi_schedule(&q_vector->napi);
4600
4601         return IRQ_HANDLED;
4602 }
4603
4604 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4605 {
4606         struct igb_adapter *adapter = q_vector->adapter;
4607         struct e1000_hw *hw = &adapter->hw;
4608
4609         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4610             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4611                 if (!adapter->msix_entries)
4612                         igb_set_itr(adapter);
4613                 else
4614                         igb_update_ring_itr(q_vector);
4615         }
4616
4617         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4618                 if (adapter->msix_entries)
4619                         wr32(E1000_EIMS, q_vector->eims_value);
4620                 else
4621                         igb_irq_enable(adapter);
4622         }
4623 }
4624
4625 /**
4626  * igb_poll - NAPI Rx polling callback
4627  * @napi: napi polling structure
4628  * @budget: count of how many packets we should handle
4629  **/
4630 static int igb_poll(struct napi_struct *napi, int budget)
4631 {
4632         struct igb_q_vector *q_vector = container_of(napi,
4633                                                      struct igb_q_vector,
4634                                                      napi);
4635         int tx_clean_complete = 1, work_done = 0;
4636
4637 #ifdef CONFIG_IGB_DCA
4638         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4639                 igb_update_dca(q_vector);
4640 #endif
4641         if (q_vector->tx_ring)
4642                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4643
4644         if (q_vector->rx_ring)
4645                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4646
4647         if (!tx_clean_complete)
4648                 work_done = budget;
4649
4650         /* If not enough Rx work done, exit the polling mode */
4651         if (work_done < budget) {
4652                 napi_complete(napi);
4653                 igb_ring_irq_enable(q_vector);
4654         }
4655
4656         return work_done;
4657 }
4658
4659 /**
4660  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4661  * @adapter: board private structure
4662  * @shhwtstamps: timestamp structure to update
4663  * @regval: unsigned 64bit system time value.
4664  *
4665  * We need to convert the system time value stored in the RX/TXSTMP registers
4666  * into a hwtstamp which can be used by the upper level timestamping functions
4667  */
4668 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4669                                    struct skb_shared_hwtstamps *shhwtstamps,
4670                                    u64 regval)
4671 {
4672         u64 ns;
4673
4674         ns = timecounter_cyc2time(&adapter->clock, regval);
4675         timecompare_update(&adapter->compare, ns);
4676         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4677         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4678         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4679 }
4680
4681 /**
4682  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4683  * @q_vector: pointer to q_vector containing needed info
4684  * @skb: packet that was just sent
4685  *
4686  * If we were asked to do hardware stamping and such a time stamp is
4687  * available, then it must have been for this skb here because we only
4688  * allow only one such packet into the queue.
4689  */
4690 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4691 {
4692         struct igb_adapter *adapter = q_vector->adapter;
4693         union skb_shared_tx *shtx = skb_tx(skb);
4694         struct e1000_hw *hw = &adapter->hw;
4695         struct skb_shared_hwtstamps shhwtstamps;
4696         u64 regval;
4697
4698         /* if skb does not support hw timestamp or TX stamp not valid exit */
4699         if (likely(!shtx->hardware) ||
4700             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4701                 return;
4702
4703         regval = rd32(E1000_TXSTMPL);
4704         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4705
4706         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4707         skb_tstamp_tx(skb, &shhwtstamps);
4708 }
4709
4710 /**
4711  * igb_clean_tx_irq - Reclaim resources after transmit completes
4712  * @q_vector: pointer to q_vector containing needed info
4713  * returns true if ring is completely cleaned
4714  **/
4715 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4716 {
4717         struct igb_adapter *adapter = q_vector->adapter;
4718         struct igb_ring *tx_ring = q_vector->tx_ring;
4719         struct net_device *netdev = tx_ring->netdev;
4720         struct e1000_hw *hw = &adapter->hw;
4721         struct igb_buffer *buffer_info;
4722         struct sk_buff *skb;
4723         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4724         unsigned int total_bytes = 0, total_packets = 0;
4725         unsigned int i, eop, count = 0;
4726         bool cleaned = false;
4727
4728         i = tx_ring->next_to_clean;
4729         eop = tx_ring->buffer_info[i].next_to_watch;
4730         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4731
4732         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4733                (count < tx_ring->count)) {
4734                 for (cleaned = false; !cleaned; count++) {
4735                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4736                         buffer_info = &tx_ring->buffer_info[i];
4737                         cleaned = (i == eop);
4738                         skb = buffer_info->skb;
4739
4740                         if (skb) {
4741                                 unsigned int segs, bytecount;
4742                                 /* gso_segs is currently only valid for tcp */
4743                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4744                                 /* multiply data chunks by size of headers */
4745                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4746                                             skb->len;
4747                                 total_packets += segs;
4748                                 total_bytes += bytecount;
4749
4750                                 igb_tx_hwtstamp(q_vector, skb);
4751                         }
4752
4753                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4754                         tx_desc->wb.status = 0;
4755
4756                         i++;
4757                         if (i == tx_ring->count)
4758                                 i = 0;
4759                 }
4760                 eop = tx_ring->buffer_info[i].next_to_watch;
4761                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4762         }
4763
4764         tx_ring->next_to_clean = i;
4765
4766         if (unlikely(count &&
4767                      netif_carrier_ok(netdev) &&
4768                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4769                 /* Make sure that anybody stopping the queue after this
4770                  * sees the new next_to_clean.
4771                  */
4772                 smp_mb();
4773                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4774                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4775                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4776                         tx_ring->tx_stats.restart_queue++;
4777                 }
4778         }
4779
4780         if (tx_ring->detect_tx_hung) {
4781                 /* Detect a transmit hang in hardware, this serializes the
4782                  * check with the clearing of time_stamp and movement of i */
4783                 tx_ring->detect_tx_hung = false;
4784                 if (tx_ring->buffer_info[i].time_stamp &&
4785                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4786                                (adapter->tx_timeout_factor * HZ))
4787                     && !(rd32(E1000_STATUS) &
4788                          E1000_STATUS_TXOFF)) {
4789
4790                         /* detected Tx unit hang */
4791                         dev_err(&tx_ring->pdev->dev,
4792                                 "Detected Tx Unit Hang\n"
4793                                 "  Tx Queue             <%d>\n"
4794                                 "  TDH                  <%x>\n"
4795                                 "  TDT                  <%x>\n"
4796                                 "  next_to_use          <%x>\n"
4797                                 "  next_to_clean        <%x>\n"
4798                                 "buffer_info[next_to_clean]\n"
4799                                 "  time_stamp           <%lx>\n"
4800                                 "  next_to_watch        <%x>\n"
4801                                 "  jiffies              <%lx>\n"
4802                                 "  desc.status          <%x>\n",
4803                                 tx_ring->queue_index,
4804                                 readl(tx_ring->head),
4805                                 readl(tx_ring->tail),
4806                                 tx_ring->next_to_use,
4807                                 tx_ring->next_to_clean,
4808                                 tx_ring->buffer_info[eop].time_stamp,
4809                                 eop,
4810                                 jiffies,
4811                                 eop_desc->wb.status);
4812                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4813                 }
4814         }
4815         tx_ring->total_bytes += total_bytes;
4816         tx_ring->total_packets += total_packets;
4817         tx_ring->tx_stats.bytes += total_bytes;
4818         tx_ring->tx_stats.packets += total_packets;
4819         return (count < tx_ring->count);
4820 }
4821
4822 /**
4823  * igb_receive_skb - helper function to handle rx indications
4824  * @q_vector: structure containing interrupt and ring information
4825  * @skb: packet to send up
4826  * @vlan_tag: vlan tag for packet
4827  **/
4828 static void igb_receive_skb(struct igb_q_vector *q_vector,
4829                             struct sk_buff *skb,
4830                             u16 vlan_tag)
4831 {
4832         struct igb_adapter *adapter = q_vector->adapter;
4833
4834         if (vlan_tag)
4835                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4836                                  vlan_tag, skb);
4837         else
4838                 napi_gro_receive(&q_vector->napi, skb);
4839 }
4840
4841 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4842                                        u32 status_err, struct sk_buff *skb)
4843 {
4844         skb->ip_summed = CHECKSUM_NONE;
4845
4846         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4847         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4848              (status_err & E1000_RXD_STAT_IXSM))
4849                 return;
4850
4851         /* TCP/UDP checksum error bit is set */
4852         if (status_err &
4853             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4854                 /*
4855                  * work around errata with sctp packets where the TCPE aka
4856                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4857                  * packets, (aka let the stack check the crc32c)
4858                  */
4859                 if ((skb->len == 60) &&
4860                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4861                         ring->rx_stats.csum_err++;
4862
4863                 /* let the stack verify checksum errors */
4864                 return;
4865         }
4866         /* It must be a TCP or UDP packet with a valid checksum */
4867         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4868                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4869
4870         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4871 }
4872
4873 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4874                                    struct sk_buff *skb)
4875 {
4876         struct igb_adapter *adapter = q_vector->adapter;
4877         struct e1000_hw *hw = &adapter->hw;
4878         u64 regval;
4879
4880         /*
4881          * If this bit is set, then the RX registers contain the time stamp. No
4882          * other packet will be time stamped until we read these registers, so
4883          * read the registers to make them available again. Because only one
4884          * packet can be time stamped at a time, we know that the register
4885          * values must belong to this one here and therefore we don't need to
4886          * compare any of the additional attributes stored for it.
4887          *
4888          * If nothing went wrong, then it should have a skb_shared_tx that we
4889          * can turn into a skb_shared_hwtstamps.
4890          */
4891         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4892                 return;
4893         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4894                 return;
4895
4896         regval = rd32(E1000_RXSTMPL);
4897         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4898
4899         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4900 }
4901 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4902                                union e1000_adv_rx_desc *rx_desc)
4903 {
4904         /* HW will not DMA in data larger than the given buffer, even if it
4905          * parses the (NFS, of course) header to be larger.  In that case, it
4906          * fills the header buffer and spills the rest into the page.
4907          */
4908         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4909                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4910         if (hlen > rx_ring->rx_buffer_len)
4911                 hlen = rx_ring->rx_buffer_len;
4912         return hlen;
4913 }
4914
4915 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4916                                  int *work_done, int budget)
4917 {
4918         struct igb_ring *rx_ring = q_vector->rx_ring;
4919         struct net_device *netdev = rx_ring->netdev;
4920         struct pci_dev *pdev = rx_ring->pdev;
4921         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4922         struct igb_buffer *buffer_info , *next_buffer;
4923         struct sk_buff *skb;
4924         bool cleaned = false;
4925         int cleaned_count = 0;
4926         unsigned int total_bytes = 0, total_packets = 0;
4927         unsigned int i;
4928         u32 staterr;
4929         u16 length;
4930         u16 vlan_tag;
4931
4932         i = rx_ring->next_to_clean;
4933         buffer_info = &rx_ring->buffer_info[i];
4934         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4935         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4936
4937         while (staterr & E1000_RXD_STAT_DD) {
4938                 if (*work_done >= budget)
4939                         break;
4940                 (*work_done)++;
4941
4942                 skb = buffer_info->skb;
4943                 prefetch(skb->data - NET_IP_ALIGN);
4944                 buffer_info->skb = NULL;
4945
4946                 i++;
4947                 if (i == rx_ring->count)
4948                         i = 0;
4949
4950                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4951                 prefetch(next_rxd);
4952                 next_buffer = &rx_ring->buffer_info[i];
4953
4954                 length = le16_to_cpu(rx_desc->wb.upper.length);
4955                 cleaned = true;
4956                 cleaned_count++;
4957
4958                 if (buffer_info->dma) {
4959                         pci_unmap_single(pdev, buffer_info->dma,
4960                                          rx_ring->rx_buffer_len,
4961                                          PCI_DMA_FROMDEVICE);
4962                         buffer_info->dma = 0;
4963                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4964                                 skb_put(skb, length);
4965                                 goto send_up;
4966                         }
4967                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4968                 }
4969
4970                 if (length) {
4971                         pci_unmap_page(pdev, buffer_info->page_dma,
4972                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4973                         buffer_info->page_dma = 0;
4974
4975                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4976                                                 buffer_info->page,
4977                                                 buffer_info->page_offset,
4978                                                 length);
4979
4980                         if (page_count(buffer_info->page) != 1)
4981                                 buffer_info->page = NULL;
4982                         else
4983                                 get_page(buffer_info->page);
4984
4985                         skb->len += length;
4986                         skb->data_len += length;
4987                         skb->truesize += length;
4988                 }
4989
4990                 if (!(staterr & E1000_RXD_STAT_EOP)) {
4991                         buffer_info->skb = next_buffer->skb;
4992                         buffer_info->dma = next_buffer->dma;
4993                         next_buffer->skb = skb;
4994                         next_buffer->dma = 0;
4995                         goto next_desc;
4996                 }
4997 send_up:
4998                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4999                         dev_kfree_skb_irq(skb);
5000                         goto next_desc;
5001                 }
5002
5003                 igb_rx_hwtstamp(q_vector, staterr, skb);
5004                 total_bytes += skb->len;
5005                 total_packets++;
5006
5007                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5008
5009                 skb->protocol = eth_type_trans(skb, netdev);
5010                 skb_record_rx_queue(skb, rx_ring->queue_index);
5011
5012                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5013                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5014
5015                 igb_receive_skb(q_vector, skb, vlan_tag);
5016
5017 next_desc:
5018                 rx_desc->wb.upper.status_error = 0;
5019
5020                 /* return some buffers to hardware, one at a time is too slow */
5021                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5022                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5023                         cleaned_count = 0;
5024                 }
5025
5026                 /* use prefetched values */
5027                 rx_desc = next_rxd;
5028                 buffer_info = next_buffer;
5029                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5030         }
5031
5032         rx_ring->next_to_clean = i;
5033         cleaned_count = igb_desc_unused(rx_ring);
5034
5035         if (cleaned_count)
5036                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5037
5038         rx_ring->total_packets += total_packets;
5039         rx_ring->total_bytes += total_bytes;
5040         rx_ring->rx_stats.packets += total_packets;
5041         rx_ring->rx_stats.bytes += total_bytes;
5042         return cleaned;
5043 }
5044
5045 /**
5046  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5047  * @adapter: address of board private structure
5048  **/
5049 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5050 {
5051         struct net_device *netdev = rx_ring->netdev;
5052         union e1000_adv_rx_desc *rx_desc;
5053         struct igb_buffer *buffer_info;
5054         struct sk_buff *skb;
5055         unsigned int i;
5056         int bufsz;
5057
5058         i = rx_ring->next_to_use;
5059         buffer_info = &rx_ring->buffer_info[i];
5060
5061         bufsz = rx_ring->rx_buffer_len;
5062
5063         while (cleaned_count--) {
5064                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5065
5066                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5067                         if (!buffer_info->page) {
5068                                 buffer_info->page = netdev_alloc_page(netdev);
5069                                 if (!buffer_info->page) {
5070                                         rx_ring->rx_stats.alloc_failed++;
5071                                         goto no_buffers;
5072                                 }
5073                                 buffer_info->page_offset = 0;
5074                         } else {
5075                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5076                         }
5077                         buffer_info->page_dma =
5078                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5079                                              buffer_info->page_offset,
5080                                              PAGE_SIZE / 2,
5081                                              PCI_DMA_FROMDEVICE);
5082                         if (pci_dma_mapping_error(rx_ring->pdev,
5083                                                   buffer_info->page_dma)) {
5084                                 buffer_info->page_dma = 0;
5085                                 rx_ring->rx_stats.alloc_failed++;
5086                                 goto no_buffers;
5087                         }
5088                 }
5089
5090                 skb = buffer_info->skb;
5091                 if (!skb) {
5092                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5093                         if (!skb) {
5094                                 rx_ring->rx_stats.alloc_failed++;
5095                                 goto no_buffers;
5096                         }
5097
5098                         buffer_info->skb = skb;
5099                 }
5100                 if (!buffer_info->dma) {
5101                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5102                                                           skb->data,
5103                                                           bufsz,
5104                                                           PCI_DMA_FROMDEVICE);
5105                         if (pci_dma_mapping_error(rx_ring->pdev,
5106                                                   buffer_info->dma)) {
5107                                 buffer_info->dma = 0;
5108                                 rx_ring->rx_stats.alloc_failed++;
5109                                 goto no_buffers;
5110                         }
5111                 }
5112                 /* Refresh the desc even if buffer_addrs didn't change because
5113                  * each write-back erases this info. */
5114                 if (bufsz < IGB_RXBUFFER_1024) {
5115                         rx_desc->read.pkt_addr =
5116                              cpu_to_le64(buffer_info->page_dma);
5117                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5118                 } else {
5119                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5120                         rx_desc->read.hdr_addr = 0;
5121                 }
5122
5123                 i++;
5124                 if (i == rx_ring->count)
5125                         i = 0;
5126                 buffer_info = &rx_ring->buffer_info[i];
5127         }
5128
5129 no_buffers:
5130         if (rx_ring->next_to_use != i) {
5131                 rx_ring->next_to_use = i;
5132                 if (i == 0)
5133                         i = (rx_ring->count - 1);
5134                 else
5135                         i--;
5136
5137                 /* Force memory writes to complete before letting h/w
5138                  * know there are new descriptors to fetch.  (Only
5139                  * applicable for weak-ordered memory model archs,
5140                  * such as IA-64). */
5141                 wmb();
5142                 writel(i, rx_ring->tail);
5143         }
5144 }
5145
5146 /**
5147  * igb_mii_ioctl -
5148  * @netdev:
5149  * @ifreq:
5150  * @cmd:
5151  **/
5152 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5153 {
5154         struct igb_adapter *adapter = netdev_priv(netdev);
5155         struct mii_ioctl_data *data = if_mii(ifr);
5156
5157         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5158                 return -EOPNOTSUPP;
5159
5160         switch (cmd) {
5161         case SIOCGMIIPHY:
5162                 data->phy_id = adapter->hw.phy.addr;
5163                 break;
5164         case SIOCGMIIREG:
5165                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5166                                      &data->val_out))
5167                         return -EIO;
5168                 break;
5169         case SIOCSMIIREG:
5170         default:
5171                 return -EOPNOTSUPP;
5172         }
5173         return 0;
5174 }
5175
5176 /**
5177  * igb_hwtstamp_ioctl - control hardware time stamping
5178  * @netdev:
5179  * @ifreq:
5180  * @cmd:
5181  *
5182  * Outgoing time stamping can be enabled and disabled. Play nice and
5183  * disable it when requested, although it shouldn't case any overhead
5184  * when no packet needs it. At most one packet in the queue may be
5185  * marked for time stamping, otherwise it would be impossible to tell
5186  * for sure to which packet the hardware time stamp belongs.
5187  *
5188  * Incoming time stamping has to be configured via the hardware
5189  * filters. Not all combinations are supported, in particular event
5190  * type has to be specified. Matching the kind of event packet is
5191  * not supported, with the exception of "all V2 events regardless of
5192  * level 2 or 4".
5193  *
5194  **/
5195 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5196                               struct ifreq *ifr, int cmd)
5197 {
5198         struct igb_adapter *adapter = netdev_priv(netdev);
5199         struct e1000_hw *hw = &adapter->hw;
5200         struct hwtstamp_config config;
5201         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5202         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5203         u32 tsync_rx_cfg = 0;
5204         bool is_l4 = false;
5205         bool is_l2 = false;
5206         u32 regval;
5207
5208         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5209                 return -EFAULT;
5210
5211         /* reserved for future extensions */
5212         if (config.flags)
5213                 return -EINVAL;
5214
5215         switch (config.tx_type) {
5216         case HWTSTAMP_TX_OFF:
5217                 tsync_tx_ctl = 0;
5218         case HWTSTAMP_TX_ON:
5219                 break;
5220         default:
5221                 return -ERANGE;
5222         }
5223
5224         switch (config.rx_filter) {
5225         case HWTSTAMP_FILTER_NONE:
5226                 tsync_rx_ctl = 0;
5227                 break;
5228         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5229         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5230         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5231         case HWTSTAMP_FILTER_ALL:
5232                 /*
5233                  * register TSYNCRXCFG must be set, therefore it is not
5234                  * possible to time stamp both Sync and Delay_Req messages
5235                  * => fall back to time stamping all packets
5236                  */
5237                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5238                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5239                 break;
5240         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5241                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5242                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5243                 is_l4 = true;
5244                 break;
5245         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5246                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5247                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5248                 is_l4 = true;
5249                 break;
5250         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5251         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5252                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5253                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5254                 is_l2 = true;
5255                 is_l4 = true;
5256                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5257                 break;
5258         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5259         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5260                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5261                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5262                 is_l2 = true;
5263                 is_l4 = true;
5264                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5265                 break;
5266         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5267         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5268         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5269                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5270                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5271                 is_l2 = true;
5272                 break;
5273         default:
5274                 return -ERANGE;
5275         }
5276
5277         if (hw->mac.type == e1000_82575) {
5278                 if (tsync_rx_ctl | tsync_tx_ctl)
5279                         return -EINVAL;
5280                 return 0;
5281         }
5282
5283         /* enable/disable TX */
5284         regval = rd32(E1000_TSYNCTXCTL);
5285         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5286         regval |= tsync_tx_ctl;
5287         wr32(E1000_TSYNCTXCTL, regval);
5288
5289         /* enable/disable RX */
5290         regval = rd32(E1000_TSYNCRXCTL);
5291         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5292         regval |= tsync_rx_ctl;
5293         wr32(E1000_TSYNCRXCTL, regval);
5294
5295         /* define which PTP packets are time stamped */
5296         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5297
5298         /* define ethertype filter for timestamped packets */
5299         if (is_l2)
5300                 wr32(E1000_ETQF(3),
5301                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5302                                  E1000_ETQF_1588 | /* enable timestamping */
5303                                  ETH_P_1588));     /* 1588 eth protocol type */
5304         else
5305                 wr32(E1000_ETQF(3), 0);
5306
5307 #define PTP_PORT 319
5308         /* L4 Queue Filter[3]: filter by destination port and protocol */
5309         if (is_l4) {
5310                 u32 ftqf = (IPPROTO_UDP /* UDP */
5311                         | E1000_FTQF_VF_BP /* VF not compared */
5312                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5313                         | E1000_FTQF_MASK); /* mask all inputs */
5314                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5315
5316                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5317                 wr32(E1000_IMIREXT(3),
5318                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5319                 if (hw->mac.type == e1000_82576) {
5320                         /* enable source port check */
5321                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5322                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5323                 }
5324                 wr32(E1000_FTQF(3), ftqf);
5325         } else {
5326                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5327         }
5328         wrfl();
5329
5330         adapter->hwtstamp_config = config;
5331
5332         /* clear TX/RX time stamp registers, just to be sure */
5333         regval = rd32(E1000_TXSTMPH);
5334         regval = rd32(E1000_RXSTMPH);
5335
5336         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5337                 -EFAULT : 0;
5338 }
5339
5340 /**
5341  * igb_ioctl -
5342  * @netdev:
5343  * @ifreq:
5344  * @cmd:
5345  **/
5346 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5347 {
5348         switch (cmd) {
5349         case SIOCGMIIPHY:
5350         case SIOCGMIIREG:
5351         case SIOCSMIIREG:
5352                 return igb_mii_ioctl(netdev, ifr, cmd);
5353         case SIOCSHWTSTAMP:
5354                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5355         default:
5356                 return -EOPNOTSUPP;
5357         }
5358 }
5359
5360 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5361 {
5362         struct igb_adapter *adapter = hw->back;
5363         u16 cap_offset;
5364
5365         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5366         if (!cap_offset)
5367                 return -E1000_ERR_CONFIG;
5368
5369         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5370
5371         return 0;
5372 }
5373
5374 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5375 {
5376         struct igb_adapter *adapter = hw->back;
5377         u16 cap_offset;
5378
5379         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5380         if (!cap_offset)
5381                 return -E1000_ERR_CONFIG;
5382
5383         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5384
5385         return 0;
5386 }
5387
5388 static void igb_vlan_rx_register(struct net_device *netdev,
5389                                  struct vlan_group *grp)
5390 {
5391         struct igb_adapter *adapter = netdev_priv(netdev);
5392         struct e1000_hw *hw = &adapter->hw;
5393         u32 ctrl, rctl;
5394
5395         igb_irq_disable(adapter);
5396         adapter->vlgrp = grp;
5397
5398         if (grp) {
5399                 /* enable VLAN tag insert/strip */
5400                 ctrl = rd32(E1000_CTRL);
5401                 ctrl |= E1000_CTRL_VME;
5402                 wr32(E1000_CTRL, ctrl);
5403
5404                 /* Disable CFI check */
5405                 rctl = rd32(E1000_RCTL);
5406                 rctl &= ~E1000_RCTL_CFIEN;
5407                 wr32(E1000_RCTL, rctl);
5408         } else {
5409                 /* disable VLAN tag insert/strip */
5410                 ctrl = rd32(E1000_CTRL);
5411                 ctrl &= ~E1000_CTRL_VME;
5412                 wr32(E1000_CTRL, ctrl);
5413         }
5414
5415         igb_rlpml_set(adapter);
5416
5417         if (!test_bit(__IGB_DOWN, &adapter->state))
5418                 igb_irq_enable(adapter);
5419 }
5420
5421 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5422 {
5423         struct igb_adapter *adapter = netdev_priv(netdev);
5424         struct e1000_hw *hw = &adapter->hw;
5425         int pf_id = adapter->vfs_allocated_count;
5426
5427         /* attempt to add filter to vlvf array */
5428         igb_vlvf_set(adapter, vid, true, pf_id);
5429
5430         /* add the filter since PF can receive vlans w/o entry in vlvf */
5431         igb_vfta_set(hw, vid, true);
5432 }
5433
5434 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5435 {
5436         struct igb_adapter *adapter = netdev_priv(netdev);
5437         struct e1000_hw *hw = &adapter->hw;
5438         int pf_id = adapter->vfs_allocated_count;
5439         s32 err;
5440
5441         igb_irq_disable(adapter);
5442         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5443
5444         if (!test_bit(__IGB_DOWN, &adapter->state))
5445                 igb_irq_enable(adapter);
5446
5447         /* remove vlan from VLVF table array */
5448         err = igb_vlvf_set(adapter, vid, false, pf_id);
5449
5450         /* if vid was not present in VLVF just remove it from table */
5451         if (err)
5452                 igb_vfta_set(hw, vid, false);
5453 }
5454
5455 static void igb_restore_vlan(struct igb_adapter *adapter)
5456 {
5457         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5458
5459         if (adapter->vlgrp) {
5460                 u16 vid;
5461                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5462                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5463                                 continue;
5464                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5465                 }
5466         }
5467 }
5468
5469 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5470 {
5471         struct pci_dev *pdev = adapter->pdev;
5472         struct e1000_mac_info *mac = &adapter->hw.mac;
5473
5474         mac->autoneg = 0;
5475
5476         switch (spddplx) {
5477         case SPEED_10 + DUPLEX_HALF:
5478                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5479                 break;
5480         case SPEED_10 + DUPLEX_FULL:
5481                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5482                 break;
5483         case SPEED_100 + DUPLEX_HALF:
5484                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5485                 break;
5486         case SPEED_100 + DUPLEX_FULL:
5487                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5488                 break;
5489         case SPEED_1000 + DUPLEX_FULL:
5490                 mac->autoneg = 1;
5491                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5492                 break;
5493         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5494         default:
5495                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5496                 return -EINVAL;
5497         }
5498         return 0;
5499 }
5500
5501 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5502 {
5503         struct net_device *netdev = pci_get_drvdata(pdev);
5504         struct igb_adapter *adapter = netdev_priv(netdev);
5505         struct e1000_hw *hw = &adapter->hw;
5506         u32 ctrl, rctl, status;
5507         u32 wufc = adapter->wol;
5508 #ifdef CONFIG_PM
5509         int retval = 0;
5510 #endif
5511
5512         netif_device_detach(netdev);
5513
5514         if (netif_running(netdev))
5515                 igb_close(netdev);
5516
5517         igb_clear_interrupt_scheme(adapter);
5518
5519 #ifdef CONFIG_PM
5520         retval = pci_save_state(pdev);
5521         if (retval)
5522                 return retval;
5523 #endif
5524
5525         status = rd32(E1000_STATUS);
5526         if (status & E1000_STATUS_LU)
5527                 wufc &= ~E1000_WUFC_LNKC;
5528
5529         if (wufc) {
5530                 igb_setup_rctl(adapter);
5531                 igb_set_rx_mode(netdev);
5532
5533                 /* turn on all-multi mode if wake on multicast is enabled */
5534                 if (wufc & E1000_WUFC_MC) {
5535                         rctl = rd32(E1000_RCTL);
5536                         rctl |= E1000_RCTL_MPE;
5537                         wr32(E1000_RCTL, rctl);
5538                 }
5539
5540                 ctrl = rd32(E1000_CTRL);
5541                 /* advertise wake from D3Cold */
5542                 #define E1000_CTRL_ADVD3WUC 0x00100000
5543                 /* phy power management enable */
5544                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5545                 ctrl |= E1000_CTRL_ADVD3WUC;
5546                 wr32(E1000_CTRL, ctrl);
5547
5548                 /* Allow time for pending master requests to run */
5549                 igb_disable_pcie_master(hw);
5550
5551                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5552                 wr32(E1000_WUFC, wufc);
5553         } else {
5554                 wr32(E1000_WUC, 0);
5555                 wr32(E1000_WUFC, 0);
5556         }
5557
5558         *enable_wake = wufc || adapter->en_mng_pt;
5559         if (!*enable_wake)
5560                 igb_shutdown_serdes_link_82575(hw);
5561
5562         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5563          * would have already happened in close and is redundant. */
5564         igb_release_hw_control(adapter);
5565
5566         pci_disable_device(pdev);
5567
5568         return 0;
5569 }
5570
5571 #ifdef CONFIG_PM
5572 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5573 {
5574         int retval;
5575         bool wake;
5576
5577         retval = __igb_shutdown(pdev, &wake);
5578         if (retval)
5579                 return retval;
5580
5581         if (wake) {
5582                 pci_prepare_to_sleep(pdev);
5583         } else {
5584                 pci_wake_from_d3(pdev, false);
5585                 pci_set_power_state(pdev, PCI_D3hot);
5586         }
5587
5588         return 0;
5589 }
5590
5591 static int igb_resume(struct pci_dev *pdev)
5592 {
5593         struct net_device *netdev = pci_get_drvdata(pdev);
5594         struct igb_adapter *adapter = netdev_priv(netdev);
5595         struct e1000_hw *hw = &adapter->hw;
5596         u32 err;
5597
5598         pci_set_power_state(pdev, PCI_D0);
5599         pci_restore_state(pdev);
5600
5601         err = pci_enable_device_mem(pdev);
5602         if (err) {
5603                 dev_err(&pdev->dev,
5604                         "igb: Cannot enable PCI device from suspend\n");
5605                 return err;
5606         }
5607         pci_set_master(pdev);
5608
5609         pci_enable_wake(pdev, PCI_D3hot, 0);
5610         pci_enable_wake(pdev, PCI_D3cold, 0);
5611
5612         if (igb_init_interrupt_scheme(adapter)) {
5613                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5614                 return -ENOMEM;
5615         }
5616
5617         /* e1000_power_up_phy(adapter); */
5618
5619         igb_reset(adapter);
5620
5621         /* let the f/w know that the h/w is now under the control of the
5622          * driver. */
5623         igb_get_hw_control(adapter);
5624
5625         wr32(E1000_WUS, ~0);
5626
5627         if (netif_running(netdev)) {
5628                 err = igb_open(netdev);
5629                 if (err)
5630                         return err;
5631         }
5632
5633         netif_device_attach(netdev);
5634
5635         return 0;
5636 }
5637 #endif
5638
5639 static void igb_shutdown(struct pci_dev *pdev)
5640 {
5641         bool wake;
5642
5643         __igb_shutdown(pdev, &wake);
5644
5645         if (system_state == SYSTEM_POWER_OFF) {
5646                 pci_wake_from_d3(pdev, wake);
5647                 pci_set_power_state(pdev, PCI_D3hot);
5648         }
5649 }
5650
5651 #ifdef CONFIG_NET_POLL_CONTROLLER
5652 /*
5653  * Polling 'interrupt' - used by things like netconsole to send skbs
5654  * without having to re-enable interrupts. It's not called while
5655  * the interrupt routine is executing.
5656  */
5657 static void igb_netpoll(struct net_device *netdev)
5658 {
5659         struct igb_adapter *adapter = netdev_priv(netdev);
5660         struct e1000_hw *hw = &adapter->hw;
5661         int i;
5662
5663         if (!adapter->msix_entries) {
5664                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5665                 igb_irq_disable(adapter);
5666                 napi_schedule(&q_vector->napi);
5667                 return;
5668         }
5669
5670         for (i = 0; i < adapter->num_q_vectors; i++) {
5671                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5672                 wr32(E1000_EIMC, q_vector->eims_value);
5673                 napi_schedule(&q_vector->napi);
5674         }
5675 }
5676 #endif /* CONFIG_NET_POLL_CONTROLLER */
5677
5678 /**
5679  * igb_io_error_detected - called when PCI error is detected
5680  * @pdev: Pointer to PCI device
5681  * @state: The current pci connection state
5682  *
5683  * This function is called after a PCI bus error affecting
5684  * this device has been detected.
5685  */
5686 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5687                                               pci_channel_state_t state)
5688 {
5689         struct net_device *netdev = pci_get_drvdata(pdev);
5690         struct igb_adapter *adapter = netdev_priv(netdev);
5691
5692         netif_device_detach(netdev);
5693
5694         if (state == pci_channel_io_perm_failure)
5695                 return PCI_ERS_RESULT_DISCONNECT;
5696
5697         if (netif_running(netdev))
5698                 igb_down(adapter);
5699         pci_disable_device(pdev);
5700
5701         /* Request a slot slot reset. */
5702         return PCI_ERS_RESULT_NEED_RESET;
5703 }
5704
5705 /**
5706  * igb_io_slot_reset - called after the pci bus has been reset.
5707  * @pdev: Pointer to PCI device
5708  *
5709  * Restart the card from scratch, as if from a cold-boot. Implementation
5710  * resembles the first-half of the igb_resume routine.
5711  */
5712 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5713 {
5714         struct net_device *netdev = pci_get_drvdata(pdev);
5715         struct igb_adapter *adapter = netdev_priv(netdev);
5716         struct e1000_hw *hw = &adapter->hw;
5717         pci_ers_result_t result;
5718         int err;
5719
5720         if (pci_enable_device_mem(pdev)) {
5721                 dev_err(&pdev->dev,
5722                         "Cannot re-enable PCI device after reset.\n");
5723                 result = PCI_ERS_RESULT_DISCONNECT;
5724         } else {
5725                 pci_set_master(pdev);
5726                 pci_restore_state(pdev);
5727
5728                 pci_enable_wake(pdev, PCI_D3hot, 0);
5729                 pci_enable_wake(pdev, PCI_D3cold, 0);
5730
5731                 igb_reset(adapter);
5732                 wr32(E1000_WUS, ~0);
5733                 result = PCI_ERS_RESULT_RECOVERED;
5734         }
5735
5736         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5737         if (err) {
5738                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5739                         "failed 0x%0x\n", err);
5740                 /* non-fatal, continue */
5741         }
5742
5743         return result;
5744 }
5745
5746 /**
5747  * igb_io_resume - called when traffic can start flowing again.
5748  * @pdev: Pointer to PCI device
5749  *
5750  * This callback is called when the error recovery driver tells us that
5751  * its OK to resume normal operation. Implementation resembles the
5752  * second-half of the igb_resume routine.
5753  */
5754 static void igb_io_resume(struct pci_dev *pdev)
5755 {
5756         struct net_device *netdev = pci_get_drvdata(pdev);
5757         struct igb_adapter *adapter = netdev_priv(netdev);
5758
5759         if (netif_running(netdev)) {
5760                 if (igb_up(adapter)) {
5761                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5762                         return;
5763                 }
5764         }
5765
5766         netif_device_attach(netdev);
5767
5768         /* let the f/w know that the h/w is now under the control of the
5769          * driver. */
5770         igb_get_hw_control(adapter);
5771 }
5772
5773 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5774                              u8 qsel)
5775 {
5776         u32 rar_low, rar_high;
5777         struct e1000_hw *hw = &adapter->hw;
5778
5779         /* HW expects these in little endian so we reverse the byte order
5780          * from network order (big endian) to little endian
5781          */
5782         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5783                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5784         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5785
5786         /* Indicate to hardware the Address is Valid. */
5787         rar_high |= E1000_RAH_AV;
5788
5789         if (hw->mac.type == e1000_82575)
5790                 rar_high |= E1000_RAH_POOL_1 * qsel;
5791         else
5792                 rar_high |= E1000_RAH_POOL_1 << qsel;
5793
5794         wr32(E1000_RAL(index), rar_low);
5795         wrfl();
5796         wr32(E1000_RAH(index), rar_high);
5797         wrfl();
5798 }
5799
5800 static int igb_set_vf_mac(struct igb_adapter *adapter,
5801                           int vf, unsigned char *mac_addr)
5802 {
5803         struct e1000_hw *hw = &adapter->hw;
5804         /* VF MAC addresses start at end of receive addresses and moves
5805          * torwards the first, as a result a collision should not be possible */
5806         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5807
5808         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5809
5810         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5811
5812         return 0;
5813 }
5814
5815 static void igb_vmm_control(struct igb_adapter *adapter)
5816 {
5817         struct e1000_hw *hw = &adapter->hw;
5818         u32 reg;
5819
5820         /* replication is not supported for 82575 */
5821         if (hw->mac.type == e1000_82575)
5822                 return;
5823
5824         /* enable replication vlan tag stripping */
5825         reg = rd32(E1000_RPLOLR);
5826         reg |= E1000_RPLOLR_STRVLAN;
5827         wr32(E1000_RPLOLR, reg);
5828
5829         /* notify HW that the MAC is adding vlan tags */
5830         reg = rd32(E1000_DTXCTL);
5831         reg |= E1000_DTXCTL_VLAN_ADDED;
5832         wr32(E1000_DTXCTL, reg);
5833
5834         if (adapter->vfs_allocated_count) {
5835                 igb_vmdq_set_loopback_pf(hw, true);
5836                 igb_vmdq_set_replication_pf(hw, true);
5837         } else {
5838                 igb_vmdq_set_loopback_pf(hw, false);
5839                 igb_vmdq_set_replication_pf(hw, false);
5840         }
5841 }
5842
5843 /* igb_main.c */