Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[pandora-kernel.git] / drivers / net / igb / igb_main.c
index 9dd13ad..ffd7315 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007 Intel Corporation.
+  Copyright(c) 2007-2009 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -34,6 +34,7 @@
 #include <linux/ipv6.h>
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
+#include <linux/net_tstamp.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
 #endif
 #include "igb.h"
 
-#define DRV_VERSION "1.2.45-k2"
+#define DRV_VERSION "1.3.16-k2"
 char igb_driver_name[] = "igb";
 char igb_driver_version[] = DRV_VERSION;
 static const char igb_driver_string[] =
                                "Intel(R) Gigabit Ethernet Network Driver";
-static const char igb_copyright[] = "Copyright (c) 2008 Intel Corporation.";
+static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
 
 static const struct e1000_info *igb_info_tbl[] = {
        [board_82575] = &e1000_82575_info,
@@ -61,8 +62,10 @@ static const struct e1000_info *igb_info_tbl[] = {
 
 static struct pci_device_id igb_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
@@ -105,7 +108,6 @@ static irqreturn_t igb_intr_msi(int irq, void *);
 static irqreturn_t igb_msix_other(int irq, void *);
 static irqreturn_t igb_msix_rx(int irq, void *);
 static irqreturn_t igb_msix_tx(int irq, void *);
-static int igb_clean_rx_ring_msix(struct napi_struct *, int);
 #ifdef CONFIG_IGB_DCA
 static void igb_update_rx_dca(struct igb_ring *);
 static void igb_update_tx_dca(struct igb_ring *);
@@ -115,9 +117,6 @@ static bool igb_clean_tx_irq(struct igb_ring *);
 static int igb_poll(struct napi_struct *, int);
 static bool igb_clean_rx_irq_adv(struct igb_ring *, int *, int);
 static void igb_alloc_rx_buffers_adv(struct igb_ring *, int);
-#ifdef CONFIG_IGB_LRO
-static int igb_get_skb_hdr(struct sk_buff *skb, void **, void **, u64 *, void *);
-#endif
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
@@ -125,9 +124,19 @@ static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
 static void igb_vlan_rx_add_vid(struct net_device *, u16);
 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
 static void igb_restore_vlan(struct igb_adapter *);
+static void igb_ping_all_vfs(struct igb_adapter *);
+static void igb_msg_task(struct igb_adapter *);
+static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
+static inline void igb_set_rah_pool(struct e1000_hw *, int , int);
+static void igb_set_mc_list_pools(struct igb_adapter *, int, u16);
+static void igb_vmm_control(struct igb_adapter *);
+static inline void igb_set_vmolr(struct e1000_hw *, int);
+static inline int igb_set_vf_rlpml(struct igb_adapter *, int, int);
+static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 
-static int igb_suspend(struct pci_dev *, pm_message_t);
 #ifdef CONFIG_PM
+static int igb_suspend(struct pci_dev *, pm_message_t);
 static int igb_resume(struct pci_dev *);
 #endif
 static void igb_shutdown(struct pci_dev *);
@@ -139,11 +148,16 @@ static struct notifier_block dca_notifier = {
        .priority       = 0
 };
 #endif
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 /* for netdump / net console */
 static void igb_netpoll(struct net_device *);
 #endif
+#ifdef CONFIG_PCI_IOV
+static unsigned int max_vfs = 0;
+module_param(max_vfs, uint, 0);
+MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
+                 "per physical function");
+#endif /* CONFIG_PCI_IOV */
 
 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
                     pci_channel_state_t);
@@ -178,6 +192,54 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
+/**
+ * Scale the NIC clock cycle by a large factor so that
+ * relatively small clock corrections can be added or
+ * substracted at each clock tick. The drawbacks of a
+ * large factor are a) that the clock register overflows
+ * more quickly (not such a big deal) and b) that the
+ * increment per tick has to fit into 24 bits.
+ *
+ * Note that
+ *   TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS *
+ *             IGB_TSYNC_SCALE
+ *   TIMINCA += TIMINCA * adjustment [ppm] / 1e9
+ *
+ * The base scale factor is intentionally a power of two
+ * so that the division in %struct timecounter can be done with
+ * a shift.
+ */
+#define IGB_TSYNC_SHIFT (19)
+#define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT)
+
+/**
+ * The duration of one clock cycle of the NIC.
+ *
+ * @todo This hard-coded value is part of the specification and might change
+ * in future hardware revisions. Add revision check.
+ */
+#define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16
+
+#if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24)
+# error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA
+#endif
+
+/**
+ * igb_read_clock - read raw cycle counter (to be used by time counter)
+ */
+static cycle_t igb_read_clock(const struct cyclecounter *tc)
+{
+       struct igb_adapter *adapter =
+               container_of(tc, struct igb_adapter, cycles);
+       struct e1000_hw *hw = &adapter->hw;
+       u64 stamp;
+
+       stamp =  rd32(E1000_SYSTIML);
+       stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL;
+
+       return stamp;
+}
+
 #ifdef DEBUG
 /**
  * igb_get_hw_dev_name - return device name string
@@ -188,8 +250,43 @@ char *igb_get_hw_dev_name(struct e1000_hw *hw)
        struct igb_adapter *adapter = hw->back;
        return adapter->netdev->name;
 }
+
+/**
+ * igb_get_time_str - format current NIC and system time as string
+ */
+static char *igb_get_time_str(struct igb_adapter *adapter,
+                             char buffer[160])
+{
+       cycle_t hw = adapter->cycles.read(&adapter->cycles);
+       struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
+       struct timespec sys;
+       struct timespec delta;
+       getnstimeofday(&sys);
+
+       delta = timespec_sub(nic, sys);
+
+       sprintf(buffer,
+               "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
+               hw,
+               (long)nic.tv_sec, nic.tv_nsec,
+               (long)sys.tv_sec, sys.tv_nsec,
+               (long)delta.tv_sec, delta.tv_nsec);
+
+       return buffer;
+}
 #endif
 
+/**
+ * igb_desc_unused - calculate if we have unused descriptors
+ **/
+static int igb_desc_unused(struct igb_ring *ring)
+{
+       if (ring->next_to_clean > ring->next_to_use)
+               return ring->next_to_clean - ring->next_to_use - 1;
+
+       return ring->count + ring->next_to_clean - ring->next_to_use - 1;
+}
+
 /**
  * igb_init_module - Driver Registration Routine
  *
@@ -243,6 +340,7 @@ module_exit(igb_exit_module);
 static void igb_cache_ring_register(struct igb_adapter *adapter)
 {
        int i;
+       unsigned int rbase_offset = adapter->vfs_allocated_count;
 
        switch (adapter->hw.mac.type) {
        case e1000_82576:
@@ -252,9 +350,11 @@ static void igb_cache_ring_register(struct igb_adapter *adapter)
                 * and continue consuming queues in the same sequence
                 */
                for (i = 0; i < adapter->num_rx_queues; i++)
-                       adapter->rx_ring[i].reg_idx = Q_IDX_82576(i);
+                       adapter->rx_ring[i].reg_idx = rbase_offset +
+                                                     Q_IDX_82576(i);
                for (i = 0; i < adapter->num_tx_queues; i++)
-                       adapter->tx_ring[i].reg_idx = Q_IDX_82576(i);
+                       adapter->tx_ring[i].reg_idx = rbase_offset +
+                                                     Q_IDX_82576(i);
                break;
        case e1000_82575:
        default:
@@ -319,6 +419,9 @@ static void igb_free_queues(struct igb_adapter *adapter)
        for (i = 0; i < adapter->num_rx_queues; i++)
                netif_napi_del(&adapter->rx_ring[i].napi);
 
+       adapter->num_rx_queues = 0;
+       adapter->num_tx_queues = 0;
+
        kfree(adapter->tx_ring);
        kfree(adapter->rx_ring);
 }
@@ -354,7 +457,7 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue,
                   a vector number along with a "valid" bit.  Sadly, the layout
                   of the table is somewhat counterintuitive. */
                if (rx_queue > IGB_N0_QUEUE) {
-                       index = (rx_queue >> 1);
+                       index = (rx_queue >> 1) + adapter->vfs_allocated_count;
                        ivar = array_rd32(E1000_IVAR0, index);
                        if (rx_queue & 0x1) {
                                /* vector goes into third byte of register */
@@ -369,7 +472,7 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue,
                        array_wr32(E1000_IVAR0, index, ivar);
                }
                if (tx_queue > IGB_N0_QUEUE) {
-                       index = (tx_queue >> 1);
+                       index = (tx_queue >> 1) + adapter->vfs_allocated_count;
                        ivar = array_rd32(E1000_IVAR0, index);
                        if (tx_queue & 0x1) {
                                /* vector goes into high byte of register */
@@ -407,7 +510,7 @@ static void igb_configure_msix(struct igb_adapter *adapter)
                /* Turn on MSI-X capability first, or our settings
                 * won't stick.  And it will take days to debug. */
                wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
-                                  E1000_GPIE_PBA | E1000_GPIE_EIAME | 
+                                  E1000_GPIE_PBA | E1000_GPIE_EIAME |
                                   E1000_GPIE_NSICR);
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
@@ -506,9 +609,6 @@ static int igb_request_msix(struct igb_adapter *adapter)
                        goto out;
                ring->itr_register = E1000_EITR(0) + (vector << 2);
                ring->itr_val = adapter->itr;
-               /* overwrite the poll routine for MSIX, we've already done
-                * netif_napi_add */
-               ring->napi.poll = &igb_clean_rx_ring_msix;
                vector++;
        }
 
@@ -546,6 +646,11 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter)
        int err;
        int numvecs, i;
 
+       /* Number of supported queues. */
+       /* Having more queues than CPUs doesn't make sense. */
+       adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
+       adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
+
        numvecs = adapter->num_tx_queues + adapter->num_rx_queues + 1;
        adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
                                        GFP_KERNEL);
@@ -565,6 +670,21 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter)
 
        /* If we can't do MSI-X, try MSI */
 msi_only:
+#ifdef CONFIG_PCI_IOV
+       /* disable SR-IOV for non MSI-X configurations */
+       if (adapter->vf_data) {
+               struct e1000_hw *hw = &adapter->hw;
+               /* disable iov and allow time for transactions to clear */
+               pci_disable_sriov(adapter->pdev);
+               msleep(500);
+
+               kfree(adapter->vf_data);
+               adapter->vf_data = NULL;
+               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+               msleep(100);
+               dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+       }
+#endif
        adapter->num_rx_queues = 1;
        adapter->num_tx_queues = 1;
        if (!pci_enable_msi(adapter->pdev))
@@ -687,7 +807,10 @@ static void igb_irq_enable(struct igb_adapter *adapter)
                wr32(E1000_EIAC, adapter->eims_enable_mask);
                wr32(E1000_EIAM, adapter->eims_enable_mask);
                wr32(E1000_EIMS, adapter->eims_enable_mask);
-               wr32(E1000_IMS, E1000_IMS_LSC);
+               if (adapter->vfs_allocated_count)
+                       wr32(E1000_MBVFIMR, 0xFF);
+               wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
+                                E1000_IMS_DOUTSYNC));
        } else {
                wr32(E1000_IMS, IMS_ENABLE_MASK);
                wr32(E1000_IAM, IMS_ENABLE_MASK);
@@ -778,12 +901,12 @@ static void igb_configure(struct igb_adapter *adapter)
 
        igb_rx_fifo_flush_82575(&adapter->hw);
 
-       /* call IGB_DESC_UNUSED which always leaves
+       /* call igb_desc_unused which always leaves
         * at least 1 descriptor unused to make sure
         * next_to_use != next_to_clean */
        for (i = 0; i < adapter->num_rx_queues; i++) {
                struct igb_ring *ring = &adapter->rx_ring[i];
-               igb_alloc_rx_buffers_adv(ring, IGB_DESC_UNUSED(ring));
+               igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
        }
 
 
@@ -811,10 +934,16 @@ int igb_up(struct igb_adapter *adapter)
        if (adapter->msix_entries)
                igb_configure_msix(adapter);
 
+       igb_vmm_control(adapter);
+       igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0);
+       igb_set_vmolr(hw, adapter->vfs_allocated_count);
+
        /* Clear any pending interrupts. */
        rd32(E1000_ICR);
        igb_irq_enable(adapter);
 
+       netif_tx_start_all_queues(adapter->netdev);
+
        /* Fire a link change interrupt to start the watchdog. */
        wr32(E1000_ICS, E1000_ICS_LSC);
        return 0;
@@ -856,6 +985,10 @@ void igb_down(struct igb_adapter *adapter)
 
        netdev->tx_queue_len = adapter->tx_queue_len;
        netif_carrier_off(netdev);
+
+       /* record the stats before reset*/
+       igb_update_stats(adapter);
+
        adapter->link_speed = 0;
        adapter->link_duplex = 0;
 
@@ -886,11 +1019,14 @@ void igb_reset(struct igb_adapter *adapter)
        /* Repartition Pba for greater than 9k mtu
         * To take effect CTRL.RST is required.
         */
-       if (mac->type != e1000_82576) {
-       pba = E1000_PBA_34K;
-       }
-       else {
+       switch (mac->type) {
+       case e1000_82576:
                pba = E1000_PBA_64K;
+               break;
+       case e1000_82575:
+       default:
+               pba = E1000_PBA_34K;
+               break;
        }
 
        if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
@@ -912,7 +1048,7 @@ void igb_reset(struct igb_adapter *adapter)
                /* the tx fifo also stores 16 bytes of information about the tx
                 * but don't include ethernet FCS because hardware appends it */
                min_tx_space = (adapter->max_frame_size +
-                               sizeof(struct e1000_tx_desc) -
+                               sizeof(union e1000_adv_tx_desc) -
                                ETH_FCS_LEN) * 2;
                min_tx_space = ALIGN(min_tx_space, 1024);
                min_tx_space >>= 10;
@@ -956,6 +1092,20 @@ void igb_reset(struct igb_adapter *adapter)
        fc->send_xon = 1;
        fc->type = fc->original_type;
 
+       /* disable receive for all VFs and wait one second */
+       if (adapter->vfs_allocated_count) {
+               int i;
+               for (i = 0 ; i < adapter->vfs_allocated_count; i++)
+                       adapter->vf_data[i].clear_to_send = false;
+
+               /* ping all the active vfs to let them know we are going down */
+                       igb_ping_all_vfs(adapter);
+
+               /* disable transmits and receives */
+               wr32(E1000_VFRE, 0);
+               wr32(E1000_VFTE, 0);
+       }
+
        /* Allow time for pending master requests to run */
        adapter->hw.mac.ops.reset_hw(&adapter->hw);
        wr32(E1000_WUC, 0);
@@ -972,21 +1122,6 @@ void igb_reset(struct igb_adapter *adapter)
        igb_get_phy_info(&adapter->hw);
 }
 
-/**
- * igb_is_need_ioport - determine if an adapter needs ioport resources or not
- * @pdev: PCI device information struct
- *
- * Returns true if an adapter needs ioport resources
- **/
-static int igb_is_need_ioport(struct pci_dev *pdev)
-{
-       switch (pdev->device) {
-       /* Currently there are no adapters that need ioport resources */
-       default:
-               return false;
-       }
-}
-
 static const struct net_device_ops igb_netdev_ops = {
        .ndo_open               = igb_open,
        .ndo_stop               = igb_close,
@@ -1025,34 +1160,25 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        struct e1000_hw *hw;
        const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
        unsigned long mmio_start, mmio_len;
-       int i, err, pci_using_dac;
+       int err, pci_using_dac;
        u16 eeprom_data = 0;
        u16 eeprom_apme_mask = IGB_EEPROM_APME;
        u32 part_num;
-       int bars, need_ioport;
 
-       /* do not allocate ioport bars when not needed */
-       need_ioport = igb_is_need_ioport(pdev);
-       if (need_ioport) {
-               bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO);
-               err = pci_enable_device(pdev);
-       } else {
-               bars = pci_select_bars(pdev, IORESOURCE_MEM);
-               err = pci_enable_device_mem(pdev);
-       }
+       err = pci_enable_device_mem(pdev);
        if (err)
                return err;
 
        pci_using_dac = 0;
-       err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
        if (!err) {
-               err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
                if (!err)
                        pci_using_dac = 1;
        } else {
-               err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                if (err) {
-                       err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+                       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
                        if (err) {
                                dev_err(&pdev->dev, "No usable DMA "
                                        "configuration, aborting\n");
@@ -1061,7 +1187,9 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                }
        }
 
-       err = pci_request_selected_regions(pdev, bars, igb_driver_name);
+       err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
+                                          IORESOURCE_MEM),
+                                          igb_driver_name);
        if (err)
                goto err_pci_reg;
 
@@ -1076,7 +1204,8 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        pci_save_state(pdev);
 
        err = -ENOMEM;
-       netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), IGB_MAX_TX_QUEUES);
+       netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
+                                  IGB_ABS_MAX_TX_QUEUES);
        if (!netdev)
                goto err_alloc_etherdev;
 
@@ -1089,15 +1218,13 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        hw = &adapter->hw;
        hw->back = adapter;
        adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
-       adapter->bars = bars;
-       adapter->need_ioport = need_ioport;
 
        mmio_start = pci_resource_start(pdev, 0);
        mmio_len = pci_resource_len(pdev, 0);
 
        err = -EIO;
-       adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
-       if (!adapter->hw.hw_addr)
+       hw->hw_addr = ioremap(mmio_start, mmio_len);
+       if (!hw->hw_addr)
                goto err_ioremap;
 
        netdev->netdev_ops = &igb_netdev_ops;
@@ -1125,8 +1252,49 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        /* Initialize skew-specific constants */
        err = ei->get_invariants(hw);
        if (err)
-               goto err_hw_init;
+               goto err_sw_init;
+
+#ifdef CONFIG_PCI_IOV
+       /* since iov functionality isn't critical to base device function we
+        * can accept failure.  If it fails we don't allow iov to be enabled */
+       if (hw->mac.type == e1000_82576) {
+               /* 82576 supports a maximum of 7 VFs in addition to the PF */
+               unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
+               int i;
+               unsigned char mac_addr[ETH_ALEN];
+
+               if (num_vfs) {
+                       adapter->vf_data = kcalloc(num_vfs,
+                                               sizeof(struct vf_data_storage),
+                                               GFP_KERNEL);
+                       if (!adapter->vf_data) {
+                               dev_err(&pdev->dev,
+                                       "Could not allocate VF private data - "
+                                       "IOV enable failed\n");
+                       } else {
+                               err = pci_enable_sriov(pdev, num_vfs);
+                               if (!err) {
+                                       adapter->vfs_allocated_count = num_vfs;
+                                       dev_info(&pdev->dev,
+                                                "%d vfs allocated\n",
+                                                num_vfs);
+                                       for (i = 0;
+                                            i < adapter->vfs_allocated_count;
+                                            i++) {
+                                               random_ether_addr(mac_addr);
+                                               igb_set_vf_mac(adapter, i,
+                                                              mac_addr);
+                                       }
+                               } else {
+                                       kfree(adapter->vf_data);
+                                       adapter->vf_data = NULL;
+                               }
+                       }
+               }
+       }
 
+#endif
+       /* setup the private structure */
        err = igb_sw_init(adapter);
        if (err)
                goto err_sw_init;
@@ -1158,27 +1326,28 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                        "PHY reset is blocked due to SOL/IDER session.\n");
 
        netdev->features = NETIF_F_SG |
-                          NETIF_F_HW_CSUM |
+                          NETIF_F_IP_CSUM |
                           NETIF_F_HW_VLAN_TX |
                           NETIF_F_HW_VLAN_RX |
                           NETIF_F_HW_VLAN_FILTER;
 
+       netdev->features |= NETIF_F_IPV6_CSUM;
        netdev->features |= NETIF_F_TSO;
        netdev->features |= NETIF_F_TSO6;
 
-#ifdef CONFIG_IGB_LRO
-       netdev->features |= NETIF_F_LRO;
-#endif
+       netdev->features |= NETIF_F_GRO;
 
        netdev->vlan_features |= NETIF_F_TSO;
        netdev->vlan_features |= NETIF_F_TSO6;
-       netdev->vlan_features |= NETIF_F_HW_CSUM;
+       netdev->vlan_features |= NETIF_F_IP_CSUM;
        netdev->vlan_features |= NETIF_F_SG;
 
        if (pci_using_dac)
                netdev->features |= NETIF_F_HIGHDMA;
 
-       netdev->features |= NETIF_F_LLTX;
+       if (adapter->hw.mac.type == e1000_82576)
+               netdev->features |= NETIF_F_SCTP_CSUM;
+
        adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
 
        /* before reading the NVM, reset the controller to put the device in a
@@ -1205,25 +1374,15 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                goto err_eeprom;
        }
 
-       init_timer(&adapter->watchdog_timer);
-       adapter->watchdog_timer.function = &igb_watchdog;
-       adapter->watchdog_timer.data = (unsigned long) adapter;
-
-       init_timer(&adapter->phy_info_timer);
-       adapter->phy_info_timer.function = &igb_update_phy_info;
-       adapter->phy_info_timer.data = (unsigned long) adapter;
+       setup_timer(&adapter->watchdog_timer, &igb_watchdog,
+                   (unsigned long) adapter);
+       setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
+                   (unsigned long) adapter);
 
        INIT_WORK(&adapter->reset_task, igb_reset_task);
        INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
 
-       /* Initialize link & ring properties that are user-changeable */
-       adapter->tx_ring->count = 256;
-       for (i = 0; i < adapter->num_tx_queues; i++)
-               adapter->tx_ring[i].count = adapter->tx_ring->count;
-       adapter->rx_ring->count = 256;
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               adapter->rx_ring[i].count = adapter->rx_ring->count;
-
+       /* Initialize link properties that are user-changeable */
        adapter->fc_autoneg = true;
        hw->mac.autoneg = true;
        hw->phy.autoneg_advertised = 0x2f;
@@ -1231,21 +1390,19 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        hw->fc.original_type = e1000_fc_default;
        hw->fc.type = e1000_fc_default;
 
-       adapter->itr_setting = 3;
+       adapter->itr_setting = IGB_DEFAULT_ITR;
        adapter->itr = IGB_START_ITR;
 
        igb_validate_mdi_setting(hw);
 
-       adapter->rx_csum = 1;
-
        /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
         * enable the ACPI Magic Packet filter
         */
 
-       if (hw->bus.func == 0 ||
-           hw->device_id == E1000_DEV_ID_82575EB_COPPER)
-               hw->nvm.ops.read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1,
-                                    &eeprom_data);
+       if (hw->bus.func == 0)
+               hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
+       else if (hw->bus.func == 1)
+               hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
 
        if (eeprom_data & eeprom_apme_mask)
                adapter->eeprom_wol |= E1000_WUFC_MAG;
@@ -1265,6 +1422,16 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
                        adapter->eeprom_wol = 0;
                break;
+       case E1000_DEV_ID_82576_QUAD_COPPER:
+               /* if quad port adapter, disable WoL on all but port A */
+               if (global_quad_port_a != 0)
+                       adapter->eeprom_wol = 0;
+               else
+                       adapter->flags |= IGB_FLAG_QUAD_PORT_A;
+               /* Reset for multiple quad port adapters */
+               if (++global_quad_port_a == 4)
+                       global_quad_port_a = 0;
+               break;
        }
 
        /* initialize the wol settings based on the eeprom settings */
@@ -1278,35 +1445,87 @@ static int __devinit igb_probe(struct pci_dev *pdev,
         * driver. */
        igb_get_hw_control(adapter);
 
-       /* tell the stack to leave us alone until igb_open() is called */
-       netif_carrier_off(netdev);
-       netif_tx_stop_all_queues(netdev);
-
        strcpy(netdev->name, "eth%d");
        err = register_netdev(netdev);
        if (err)
                goto err_register;
 
+       /* carrier off reporting is important to ethtool even BEFORE open */
+       netif_carrier_off(netdev);
+
 #ifdef CONFIG_IGB_DCA
        if (dca_add_requester(&pdev->dev) == 0) {
                adapter->flags |= IGB_FLAG_DCA_ENABLED;
                dev_info(&pdev->dev, "DCA enabled\n");
                /* Always use CB2 mode, difference is masked
                 * in the CB driver. */
-               wr32(E1000_DCA_CTRL, 2);
+               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
                igb_setup_dca(adapter);
        }
 #endif
 
+       /*
+        * Initialize hardware timer: we keep it running just in case
+        * that some program needs it later on.
+        */
+       memset(&adapter->cycles, 0, sizeof(adapter->cycles));
+       adapter->cycles.read = igb_read_clock;
+       adapter->cycles.mask = CLOCKSOURCE_MASK(64);
+       adapter->cycles.mult = 1;
+       adapter->cycles.shift = IGB_TSYNC_SHIFT;
+       wr32(E1000_TIMINCA,
+            (1<<24) |
+            IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE);
+#if 0
+       /*
+        * Avoid rollover while we initialize by resetting the time counter.
+        */
+       wr32(E1000_SYSTIML, 0x00000000);
+       wr32(E1000_SYSTIMH, 0x00000000);
+#else
+       /*
+        * Set registers so that rollover occurs soon to test this.
+        */
+       wr32(E1000_SYSTIML, 0x00000000);
+       wr32(E1000_SYSTIMH, 0xFF800000);
+#endif
+       wrfl();
+       timecounter_init(&adapter->clock,
+                        &adapter->cycles,
+                        ktime_to_ns(ktime_get_real()));
+
+       /*
+        * Synchronize our NIC clock against system wall clock. NIC
+        * time stamp reading requires ~3us per sample, each sample
+        * was pretty stable even under load => only require 10
+        * samples for each offset comparison.
+        */
+       memset(&adapter->compare, 0, sizeof(adapter->compare));
+       adapter->compare.source = &adapter->clock;
+       adapter->compare.target = ktime_get_real;
+       adapter->compare.num_samples = 10;
+       timecompare_update(&adapter->compare, 0);
+
+#ifdef DEBUG
+       {
+               char buffer[160];
+               printk(KERN_DEBUG
+                       "igb: %s: hw %p initialized timer\n",
+                       igb_get_time_str(adapter, buffer),
+                       &adapter->hw);
+       }
+#endif
+
        dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
        /* print bus type/speed/width info */
        dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
                 netdev->name,
                 ((hw->bus.speed == e1000_bus_speed_2500)
                  ? "2.5Gb/s" : "unknown"),
-                ((hw->bus.width == e1000_bus_width_pcie_x4)
-                 ? "Width x4" : (hw->bus.width == e1000_bus_width_pcie_x1)
-                 ? "Width x1" : "unknown"),
+                ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+                 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
+                 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
+                  "unknown"),
                 netdev->dev_addr);
 
        igb_read_part_num(hw, &part_num);
@@ -1330,15 +1549,14 @@ err_eeprom:
        if (hw->flash_address)
                iounmap(hw->flash_address);
 
-       igb_remove_device(hw);
        igb_free_queues(adapter);
 err_sw_init:
-err_hw_init:
        iounmap(hw->hw_addr);
 err_ioremap:
        free_netdev(netdev);
 err_alloc_etherdev:
-       pci_release_selected_regions(pdev, bars);
+       pci_release_selected_regions(pdev, pci_select_bars(pdev,
+                                    IORESOURCE_MEM));
 err_pci_reg:
 err_dma:
        pci_disable_device(pdev);
@@ -1358,9 +1576,7 @@ static void __devexit igb_remove(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct igb_adapter *adapter = netdev_priv(netdev);
-#ifdef CONFIG_IGB_DCA
        struct e1000_hw *hw = &adapter->hw;
-#endif
        int err;
 
        /* flush_scheduled work may reschedule our watchdog task, so
@@ -1376,7 +1592,7 @@ static void __devexit igb_remove(struct pci_dev *pdev)
                dev_info(&pdev->dev, "DCA disabled\n");
                dca_remove_requester(&pdev->dev);
                adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
-               wr32(E1000_DCA_CTRL, 1);
+               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
        }
 #endif
 
@@ -1389,15 +1605,29 @@ static void __devexit igb_remove(struct pci_dev *pdev)
        if (!igb_check_reset_block(&adapter->hw))
                igb_reset_phy(&adapter->hw);
 
-       igb_remove_device(&adapter->hw);
        igb_reset_interrupt_capability(adapter);
 
        igb_free_queues(adapter);
 
-       iounmap(adapter->hw.hw_addr);
-       if (adapter->hw.flash_address)
-               iounmap(adapter->hw.flash_address);
-       pci_release_selected_regions(pdev, adapter->bars);
+#ifdef CONFIG_PCI_IOV
+       /* reclaim resources allocated to VFs */
+       if (adapter->vf_data) {
+               /* disable iov and allow time for transactions to clear */
+               pci_disable_sriov(pdev);
+               msleep(500);
+
+               kfree(adapter->vf_data);
+               adapter->vf_data = NULL;
+               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+               msleep(100);
+               dev_info(&pdev->dev, "IOV Disabled\n");
+       }
+#endif
+       iounmap(hw->hw_addr);
+       if (hw->flash_address)
+               iounmap(hw->flash_address);
+       pci_release_selected_regions(pdev, pci_select_bars(pdev,
+                                    IORESOURCE_MEM));
 
        free_netdev(netdev);
 
@@ -1432,11 +1662,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
-       /* Number of supported queues. */
-       /* Having more queues than CPUs doesn't make sense. */
-       adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
-       adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
-
        /* This call may decrease the number of queues depending on
         * interrupt mode. */
        igb_set_interrupt_capability(adapter);
@@ -1476,6 +1701,8 @@ static int igb_open(struct net_device *netdev)
        if (test_bit(__IGB_TESTING, &adapter->state))
                return -EBUSY;
 
+       netif_carrier_off(netdev);
+
        /* allocate transmit descriptors */
        err = igb_setup_all_tx_resources(adapter);
        if (err)
@@ -1499,6 +1726,10 @@ static int igb_open(struct net_device *netdev)
         * clean_rx handler before we do so.  */
        igb_configure(adapter);
 
+       igb_vmm_control(adapter);
+       igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0);
+       igb_set_vmolr(hw, adapter->vfs_allocated_count);
+
        err = igb_request_irq(adapter);
        if (err)
                goto err_req_irq;
@@ -1574,7 +1805,6 @@ static int igb_close(struct net_device *netdev)
  *
  * Return 0 on success, negative on failure
  **/
-
 int igb_setup_tx_resources(struct igb_adapter *adapter,
                           struct igb_ring *tx_ring)
 {
@@ -1588,7 +1818,7 @@ int igb_setup_tx_resources(struct igb_adapter *adapter,
        memset(tx_ring->buffer_info, 0, size);
 
        /* round up to nearest 4K */
-       tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
+       tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
        tx_ring->size = ALIGN(tx_ring->size, 4096);
 
        tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
@@ -1635,7 +1865,7 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
        for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
                r_idx = i % adapter->num_tx_queues;
                adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
-       }       
+       }
        return err;
 }
 
@@ -1654,13 +1884,13 @@ static void igb_configure_tx(struct igb_adapter *adapter)
        int i, j;
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
-               struct igb_ring *ring = &(adapter->tx_ring[i]);
+               struct igb_ring *ring = &adapter->tx_ring[i];
                j = ring->reg_idx;
                wr32(E1000_TDLEN(j),
-                               ring->count * sizeof(struct e1000_tx_desc));
+                    ring->count * sizeof(union e1000_adv_tx_desc));
                tdba = ring->dma;
                wr32(E1000_TDBAL(j),
-                               tdba & 0x00000000ffffffffULL);
+                    tdba & 0x00000000ffffffffULL);
                wr32(E1000_TDBAH(j), tdba >> 32);
 
                ring->head = E1000_TDH(j);
@@ -1680,12 +1910,11 @@ static void igb_configure_tx(struct igb_adapter *adapter)
                wr32(E1000_DCA_TXCTRL(j), txctrl);
        }
 
-
-
-       /* Use the default values for the Tx Inter Packet Gap (IPG) timer */
+       /* disable queue 0 to prevent tail bump w/o re-configuration */
+       if (adapter->vfs_allocated_count)
+               wr32(E1000_TXDCTL(0), 0);
 
        /* Program the Transmit Control Register */
-
        tctl = rd32(E1000_TCTL);
        tctl &= ~E1000_TCTL_CT;
        tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
@@ -1709,21 +1938,12 @@ static void igb_configure_tx(struct igb_adapter *adapter)
  *
  * Returns 0 on success, negative on failure
  **/
-
 int igb_setup_rx_resources(struct igb_adapter *adapter,
                           struct igb_ring *rx_ring)
 {
        struct pci_dev *pdev = adapter->pdev;
        int size, desc_len;
 
-#ifdef CONFIG_IGB_LRO
-       size = sizeof(struct net_lro_desc) * MAX_LRO_DESCRIPTORS;
-       rx_ring->lro_mgr.lro_arr = vmalloc(size);
-       if (!rx_ring->lro_mgr.lro_arr)
-               goto err;
-       memset(rx_ring->lro_mgr.lro_arr, 0, size);
-#endif
-
        size = sizeof(struct igb_buffer) * rx_ring->count;
        rx_ring->buffer_info = vmalloc(size);
        if (!rx_ring->buffer_info)
@@ -1750,10 +1970,6 @@ int igb_setup_rx_resources(struct igb_adapter *adapter,
        return 0;
 
 err:
-#ifdef CONFIG_IGB_LRO
-       vfree(rx_ring->lro_mgr.lro_arr);
-       rx_ring->lro_mgr.lro_arr = NULL;
-#endif
        vfree(rx_ring->buffer_info);
        dev_err(&adapter->pdev->dev, "Unable to allocate memory for "
                "the receive descriptor ring\n");
@@ -1794,7 +2010,7 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
        u32 rctl;
        u32 srrctl = 0;
-       int i, j;
+       int i;
 
        rctl = rd32(E1000_RCTL);
 
@@ -1802,13 +2018,13 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
        rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
 
        rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
-               (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+               (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
 
        /*
         * enable stripping of CRC. It's unlikely this will break BMC
         * redirection as it did with e1000. Newer features require
         * that the HW strips the CRC.
-       */
+        */
        rctl |= E1000_RCTL_SECRC;
 
        /*
@@ -1852,14 +2068,84 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
                srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
        }
 
+       /* Attention!!!  For SR-IOV PF driver operations you must enable
+        * queue drop for all VF and PF queues to prevent head of line blocking
+        * if an un-trusted VF does not provide descriptors to hardware.
+        */
+       if (adapter->vfs_allocated_count) {
+               u32 vmolr;
+
+               /* set all queue drop enable bits */
+               wr32(E1000_QDE, ALL_QUEUES);
+               srrctl |= E1000_SRRCTL_DROP_EN;
+
+               /* disable queue 0 to prevent tail write w/o re-config */
+               wr32(E1000_RXDCTL(0), 0);
+
+               vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
+               if (rctl & E1000_RCTL_LPE)
+                       vmolr |= E1000_VMOLR_LPE;
+               if (adapter->num_rx_queues > 1)
+                       vmolr |= E1000_VMOLR_RSSE;
+               wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
+       }
+
        for (i = 0; i < adapter->num_rx_queues; i++) {
-               j = adapter->rx_ring[i].reg_idx;
+               int j = adapter->rx_ring[i].reg_idx;
                wr32(E1000_SRRCTL(j), srrctl);
        }
 
        wr32(E1000_RCTL, rctl);
 }
 
+/**
+ * igb_rlpml_set - set maximum receive packet size
+ * @adapter: board private structure
+ *
+ * Configure maximum receivable packet size.
+ **/
+static void igb_rlpml_set(struct igb_adapter *adapter)
+{
+       u32 max_frame_size = adapter->max_frame_size;
+       struct e1000_hw *hw = &adapter->hw;
+       u16 pf_id = adapter->vfs_allocated_count;
+
+       if (adapter->vlgrp)
+               max_frame_size += VLAN_TAG_SIZE;
+
+       /* if vfs are enabled we set RLPML to the largest possible request
+        * size and set the VMOLR RLPML to the size we need */
+       if (pf_id) {
+               igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
+               max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
+       }
+
+       wr32(E1000_RLPML, max_frame_size);
+}
+
+/**
+ * igb_configure_vt_default_pool - Configure VT default pool
+ * @adapter: board private structure
+ *
+ * Configure the default pool
+ **/
+static void igb_configure_vt_default_pool(struct igb_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u16 pf_id = adapter->vfs_allocated_count;
+       u32 vtctl;
+
+       /* not in sr-iov mode - do nothing */
+       if (!pf_id)
+               return;
+
+       vtctl = rd32(E1000_VT_CTL);
+       vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
+                  E1000_VT_CTL_DISABLE_DEF_POOL);
+       vtctl |= pf_id << E1000_VT_CTL_DEFAULT_POOL_SHIFT;
+       wr32(E1000_VT_CTL, vtctl);
+}
+
 /**
  * igb_configure_rx - Configure receive Unit after Reset
  * @adapter: board private structure
@@ -1872,7 +2158,7 @@ static void igb_configure_rx(struct igb_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
        u32 rctl, rxcsum;
        u32 rxdctl;
-       int i, j;
+       int i;
 
        /* disable receives while setting up the descriptors */
        rctl = rd32(E1000_RCTL);
@@ -1886,14 +2172,14 @@ static void igb_configure_rx(struct igb_adapter *adapter)
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring */
        for (i = 0; i < adapter->num_rx_queues; i++) {
-               struct igb_ring *ring = &(adapter->rx_ring[i]);
-               j = ring->reg_idx;
+               struct igb_ring *ring = &adapter->rx_ring[i];
+               int j = ring->reg_idx;
                rdba = ring->dma;
                wr32(E1000_RDBAL(j),
-                               rdba & 0x00000000ffffffffULL);
+                    rdba & 0x00000000ffffffffULL);
                wr32(E1000_RDBAH(j), rdba >> 32);
                wr32(E1000_RDLEN(j),
-                              ring->count * sizeof(union e1000_adv_rx_desc));
+                    ring->count * sizeof(union e1000_adv_rx_desc));
 
                ring->head = E1000_RDH(j);
                ring->tail = E1000_RDT(j);
@@ -1907,16 +2193,6 @@ static void igb_configure_rx(struct igb_adapter *adapter)
                rxdctl |= IGB_RX_HTHRESH << 8;
                rxdctl |= IGB_RX_WTHRESH << 16;
                wr32(E1000_RXDCTL(j), rxdctl);
-#ifdef CONFIG_IGB_LRO
-               /* Intitial LRO Settings */
-               ring->lro_mgr.max_aggr = MAX_LRO_AGGR;
-               ring->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS;
-               ring->lro_mgr.get_skb_header = igb_get_skb_hdr;
-               ring->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
-               ring->lro_mgr.dev = adapter->netdev;
-               ring->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
-               ring->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
-#endif
        }
 
        if (adapter->num_rx_queues > 1) {
@@ -1941,7 +2217,10 @@ static void igb_configure_rx(struct igb_adapter *adapter)
                                writel(reta.dword,
                                       hw->hw_addr + E1000_RETA(0) + (j & ~3));
                }
-               mrqc = E1000_MRQC_ENABLE_RSS_4Q;
+               if (adapter->vfs_allocated_count)
+                       mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
+               else
+                       mrqc = E1000_MRQC_ENABLE_RSS_4Q;
 
                /* Fill out hash function seeds */
                for (j = 0; j < 10; j++)
@@ -1956,37 +2235,28 @@ static void igb_configure_rx(struct igb_adapter *adapter)
                mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
                         E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
 
-
                wr32(E1000_MRQC, mrqc);
+       } else if (adapter->vfs_allocated_count) {
+               /* Enable multi-queue for sr-iov */
+               wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
+       }
 
-               /* Multiqueue and raw packet checksumming are mutually
-                * exclusive.  Note that this not the same as TCP/IP
-                * checksumming, which works fine. */
-               rxcsum = rd32(E1000_RXCSUM);
-               rxcsum |= E1000_RXCSUM_PCSD;
-               wr32(E1000_RXCSUM, rxcsum);
-       } else {
-               /* Enable Receive Checksum Offload for TCP and UDP */
-               rxcsum = rd32(E1000_RXCSUM);
-               if (adapter->rx_csum) {
-                       rxcsum |= E1000_RXCSUM_TUOFL;
+       /* Enable Receive Checksum Offload for TCP and UDP */
+       rxcsum = rd32(E1000_RXCSUM);
+       /* Disable raw packet checksumming */
+       rxcsum |= E1000_RXCSUM_PCSD;
 
-                       /* Enable IPv4 payload checksum for UDP fragments
-                        * Must be used in conjunction with packet-split. */
-                       if (adapter->rx_ps_hdr_size)
-                               rxcsum |= E1000_RXCSUM_IPPCSE;
-               } else {
-                       rxcsum &= ~E1000_RXCSUM_TUOFL;
-                       /* don't need to clear IPPCSE as it defaults to 0 */
-               }
-               wr32(E1000_RXCSUM, rxcsum);
-       }
+       if (adapter->hw.mac.type == e1000_82576)
+               /* Enable Receive Checksum Offload for SCTP */
+               rxcsum |= E1000_RXCSUM_CRCOFL;
 
-       if (adapter->vlgrp)
-               wr32(E1000_RLPML,
-                               adapter->max_frame_size + VLAN_TAG_SIZE);
-       else
-               wr32(E1000_RLPML, adapter->max_frame_size);
+       /* Don't need to set TUOFL or IPOFL, they default to 1 */
+       wr32(E1000_RXCSUM, rxcsum);
+
+       /* Set the default pool for the PF's first queue */
+       igb_configure_vt_default_pool(adapter);
+
+       igb_rlpml_set(adapter);
 
        /* Enable Receives */
        wr32(E1000_RCTL, rctl);
@@ -2029,14 +2299,10 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
 static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter,
                                           struct igb_buffer *buffer_info)
 {
-       if (buffer_info->dma) {
-               pci_unmap_page(adapter->pdev,
-                               buffer_info->dma,
-                               buffer_info->length,
-                               PCI_DMA_TODEVICE);
-               buffer_info->dma = 0;
-       }
+       buffer_info->dma = 0;
        if (buffer_info->skb) {
+               skb_dma_unmap(&adapter->pdev->dev, buffer_info->skb,
+                             DMA_TO_DEVICE);
                dev_kfree_skb_any(buffer_info->skb);
                buffer_info->skb = NULL;
        }
@@ -2105,11 +2371,6 @@ void igb_free_rx_resources(struct igb_ring *rx_ring)
        vfree(rx_ring->buffer_info);
        rx_ring->buffer_info = NULL;
 
-#ifdef CONFIG_IGB_LRO
-       vfree(rx_ring->lro_mgr.lro_arr);
-       rx_ring->lro_mgr.lro_arr = NULL;
-#endif 
-
        pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma);
 
        rx_ring->desc = NULL;
@@ -2209,15 +2470,18 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
 static int igb_set_mac(struct net_device *netdev, void *p)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
+       struct e1000_hw *hw = &adapter->hw;
        struct sockaddr *addr = p;
 
        if (!is_valid_ether_addr(addr->sa_data))
                return -EADDRNOTAVAIL;
 
        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
-       memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len);
+       memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+       hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
 
-       adapter->hw.mac.ops.rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
+       igb_set_rah_pool(hw, adapter->vfs_allocated_count, 0);
 
        return 0;
 }
@@ -2237,7 +2501,7 @@ static void igb_set_multi(struct net_device *netdev)
        struct e1000_hw *hw = &adapter->hw;
        struct e1000_mac_info *mac = &hw->mac;
        struct dev_mc_list *mc_ptr;
-       u8  *mta_list;
+       u8  *mta_list = NULL;
        u32 rctl;
        int i;
 
@@ -2258,17 +2522,15 @@ static void igb_set_multi(struct net_device *netdev)
        }
        wr32(E1000_RCTL, rctl);
 
-       if (!netdev->mc_count) {
-               /* nothing to program, so clear mc list */
-               igb_update_mc_addr_list_82575(hw, NULL, 0, 1,
-                                         mac->rar_entry_count);
-               return;
+       if (netdev->mc_count) {
+               mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
+               if (!mta_list) {
+                       dev_err(&adapter->pdev->dev,
+                               "failed to allocate multicast filter list\n");
+                       return;
+               }
        }
 
-       mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
-       if (!mta_list)
-               return;
-
        /* The shared function expects a packed array of only addresses. */
        mc_ptr = netdev->mc_list;
 
@@ -2278,8 +2540,13 @@ static void igb_set_multi(struct net_device *netdev)
                memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
                mc_ptr = mc_ptr->next;
        }
-       igb_update_mc_addr_list_82575(hw, mta_list, i, 1,
-                                     mac->rar_entry_count);
+       igb_update_mc_addr_list(hw, mta_list, i,
+                               adapter->vfs_allocated_count + 1,
+                               mac->rar_entry_count);
+
+       igb_set_mc_list_pools(adapter, i, mac->rar_entry_count);
+       igb_restore_vf_multicasts(adapter);
+
        kfree(mta_list);
 }
 
@@ -2291,6 +2558,46 @@ static void igb_update_phy_info(unsigned long data)
        igb_get_phy_info(&adapter->hw);
 }
 
+/**
+ * igb_has_link - check shared code for link and determine up/down
+ * @adapter: pointer to driver private info
+ **/
+static bool igb_has_link(struct igb_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       bool link_active = false;
+       s32 ret_val = 0;
+
+       /* get_link_status is set on LSC (link status) interrupt or
+        * rx sequence error interrupt.  get_link_status will stay
+        * false until the e1000_check_for_link establishes link
+        * for copper adapters ONLY
+        */
+       switch (hw->phy.media_type) {
+       case e1000_media_type_copper:
+               if (hw->mac.get_link_status) {
+                       ret_val = hw->mac.ops.check_for_link(hw);
+                       link_active = !hw->mac.get_link_status;
+               } else {
+                       link_active = true;
+               }
+               break;
+       case e1000_media_type_fiber:
+               ret_val = hw->mac.ops.check_for_link(hw);
+               link_active = !!(rd32(E1000_STATUS) & E1000_STATUS_LU);
+               break;
+       case e1000_media_type_internal_serdes:
+               ret_val = hw->mac.ops.check_for_link(hw);
+               link_active = hw->mac.serdes_has_link;
+               break;
+       default:
+       case e1000_media_type_unknown:
+               break;
+       }
+
+       return link_active;
+}
+
 /**
  * igb_watchdog - Timer Call-back
  * @data: pointer to adapter cast into an unsigned long
@@ -2307,34 +2614,16 @@ static void igb_watchdog_task(struct work_struct *work)
        struct igb_adapter *adapter = container_of(work,
                                        struct igb_adapter, watchdog_task);
        struct e1000_hw *hw = &adapter->hw;
-
        struct net_device *netdev = adapter->netdev;
        struct igb_ring *tx_ring = adapter->tx_ring;
-       struct e1000_mac_info *mac = &adapter->hw.mac;
        u32 link;
        u32 eics = 0;
-       s32 ret_val;
        int i;
 
-       if ((netif_carrier_ok(netdev)) &&
-           (rd32(E1000_STATUS) & E1000_STATUS_LU))
+       link = igb_has_link(adapter);
+       if ((netif_carrier_ok(netdev)) && link)
                goto link_up;
 
-       ret_val = hw->mac.ops.check_for_link(&adapter->hw);
-       if ((ret_val == E1000_ERR_PHY) &&
-           (hw->phy.type == e1000_phy_igp_3) &&
-           (rd32(E1000_CTRL) &
-            E1000_PHY_CTRL_GBE_DISABLE))
-               dev_info(&adapter->pdev->dev,
-                        "Gigabit has been disabled, downgrading speed\n");
-
-       if ((hw->phy.media_type == e1000_media_type_internal_serdes) &&
-           !(rd32(E1000_TXCW) & E1000_TXCW_ANE))
-               link = mac->serdes_has_link;
-       else
-               link = rd32(E1000_STATUS) &
-                                     E1000_STATUS_LU;
-
        if (link) {
                if (!netif_carrier_ok(netdev)) {
                        u32 ctrl;
@@ -2371,8 +2660,10 @@ static void igb_watchdog_task(struct work_struct *work)
                        }
 
                        netif_carrier_on(netdev);
-                       netif_tx_wake_all_queues(netdev);
 
+                       igb_ping_all_vfs(adapter);
+
+                       /* link state has changed, schedule phy info update */
                        if (!test_bit(__IGB_DOWN, &adapter->state))
                                mod_timer(&adapter->phy_info_timer,
                                          round_jiffies(jiffies + 2 * HZ));
@@ -2385,7 +2676,10 @@ static void igb_watchdog_task(struct work_struct *work)
                        printk(KERN_INFO "igb: %s NIC Link is Down\n",
                               netdev->name);
                        netif_carrier_off(netdev);
-                       netif_tx_stop_all_queues(netdev);
+
+                       igb_ping_all_vfs(adapter);
+
+                       /* link state has changed, schedule phy info update */
                        if (!test_bit(__IGB_DOWN, &adapter->state))
                                mod_timer(&adapter->phy_info_timer,
                                          round_jiffies(jiffies + 2 * HZ));
@@ -2395,9 +2689,9 @@ static void igb_watchdog_task(struct work_struct *work)
 link_up:
        igb_update_stats(adapter);
 
-       mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
+       hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
        adapter->tpt_old = adapter->stats.tpt;
-       mac->collision_delta = adapter->stats.colc - adapter->colc_old;
+       hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
        adapter->colc_old = adapter->stats.colc;
 
        adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
@@ -2408,13 +2702,15 @@ link_up:
        igb_update_adaptive(&adapter->hw);
 
        if (!netif_carrier_ok(netdev)) {
-               if (IGB_DESC_UNUSED(tx_ring) + 1 < tx_ring->count) {
+               if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
                        /* We've lost link, so the controller stops DMA,
                         * but we've got queued Tx work that's never going
                         * to get done, so reset controller to flush Tx.
                         * (Do the reset outside of interrupt context). */
                        adapter->tx_timeout_count++;
                        schedule_work(&adapter->reset_task);
+                       /* return immediately since reset is imminent */
+                       return;
                }
        }
 
@@ -2554,7 +2850,7 @@ static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
                if (bytes > 25000) {
                        if (packets > 35)
                                retval = low_latency;
-               } else if (bytes < 6000) {
+               } else if (bytes < 1500) {
                        retval = low_latency;
                }
                break;
@@ -2586,27 +2882,25 @@ static void igb_set_itr(struct igb_adapter *adapter)
                                            adapter->tx_itr,
                                            adapter->tx_ring->total_packets,
                                            adapter->tx_ring->total_bytes);
-
                current_itr = max(adapter->rx_itr, adapter->tx_itr);
        } else {
                current_itr = adapter->rx_itr;
        }
 
        /* conservative mode (itr 3) eliminates the lowest_latency setting */
-       if (adapter->itr_setting == 3 &&
-           current_itr == lowest_latency)
+       if (adapter->itr_setting == 3 && current_itr == lowest_latency)
                current_itr = low_latency;
 
        switch (current_itr) {
        /* counts and packets in update_itr are dependent on these numbers */
        case lowest_latency:
-               new_itr = 70000;
+               new_itr = 56;  /* aka 70,000 ints/sec */
                break;
        case low_latency:
-               new_itr = 20000; /* aka hwitr = ~200 */
+               new_itr = 196; /* aka 20,000 ints/sec */
                break;
        case bulk_latency:
-               new_itr = 4000;
+               new_itr = 980; /* aka 4,000 ints/sec */
                break;
        default:
                break;
@@ -2625,7 +2919,8 @@ set_itr_now:
                 * by adding intermediate steps when interrupt rate is
                 * increasing */
                new_itr = new_itr > adapter->itr ?
-                            min(adapter->itr + (new_itr >> 2), new_itr) :
+                            max((new_itr * adapter->itr) /
+                                (new_itr + (adapter->itr >> 2)), new_itr) :
                             new_itr;
                /* Don't write the value here; it resets the adapter's
                 * internal timer, and causes us to delay far longer than
@@ -2634,7 +2929,7 @@ set_itr_now:
                 * ends up being correct.
                 */
                adapter->itr = new_itr;
-               adapter->rx_ring->itr_val = 1000000000 / (new_itr * 256);
+               adapter->rx_ring->itr_val = new_itr;
                adapter->rx_ring->set_itr = 1;
        }
 
@@ -2646,6 +2941,7 @@ set_itr_now:
 #define IGB_TX_FLAGS_VLAN              0x00000002
 #define IGB_TX_FLAGS_TSO               0x00000004
 #define IGB_TX_FLAGS_IPV4              0x00000008
+#define IGB_TX_FLAGS_TSTAMP             0x00000010
 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
 #define IGB_TX_FLAGS_VLAN_SHIFT        16
 
@@ -2711,7 +3007,7 @@ static inline int igb_tso_adv(struct igb_adapter *adapter,
        mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
        mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
 
-       /* Context index must be unique per ring. */
+       /* For 82575, context index must be unique per ring. */
        if (adapter->flags & IGB_FLAG_NEED_CTX_IDX)
                mss_l4len_idx |= tx_ring->queue_index << 4;
 
@@ -2756,16 +3052,31 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
                tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
 
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
-                       switch (skb->protocol) {
-                       case __constant_htons(ETH_P_IP):
+                       __be16 protocol;
+
+                       if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+                               const struct vlan_ethhdr *vhdr =
+                                         (const struct vlan_ethhdr*)skb->data;
+
+                               protocol = vhdr->h_vlan_encapsulated_proto;
+                       } else {
+                               protocol = skb->protocol;
+                       }
+
+                       switch (protocol) {
+                       case cpu_to_be16(ETH_P_IP):
                                tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
                                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
-                       case __constant_htons(ETH_P_IPV6):
+                       case cpu_to_be16(ETH_P_IPV6):
                                /* XXX what about other V6 headers?? */
                                if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        default:
                                if (unlikely(net_ratelimit()))
@@ -2781,6 +3092,8 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
                if (adapter->flags & IGB_FLAG_NEED_CTX_IDX)
                        context_desc->mss_l4len_idx =
                                cpu_to_le32(tx_ring->queue_index << 4);
+               else
+                       context_desc->mss_l4len_idx = 0;
 
                buffer_info->time_stamp = jiffies;
                buffer_info->next_to_watch = i;
@@ -2793,8 +3106,6 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
 
                return true;
        }
-
-
        return false;
 }
 
@@ -2809,25 +3120,33 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
        unsigned int len = skb_headlen(skb);
        unsigned int count = 0, i;
        unsigned int f;
+       dma_addr_t *map;
 
        i = tx_ring->next_to_use;
 
+       if (skb_dma_map(&adapter->pdev->dev, skb, DMA_TO_DEVICE)) {
+               dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
+               return 0;
+       }
+
+       map = skb_shinfo(skb)->dma_maps;
+
        buffer_info = &tx_ring->buffer_info[i];
        BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
        buffer_info->length = len;
        /* set time_stamp *before* dma to help avoid a possible race */
        buffer_info->time_stamp = jiffies;
        buffer_info->next_to_watch = i;
-       buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len,
-                                         PCI_DMA_TODEVICE);
+       buffer_info->dma = map[count];
        count++;
-       i++;
-       if (i == tx_ring->count)
-               i = 0;
 
        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
                struct skb_frag_struct *frag;
 
+               i++;
+               if (i == tx_ring->count)
+                       i = 0;
+
                frag = &skb_shinfo(skb)->frags[f];
                len = frag->size;
 
@@ -2836,19 +3155,10 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
                buffer_info->length = len;
                buffer_info->time_stamp = jiffies;
                buffer_info->next_to_watch = i;
-               buffer_info->dma = pci_map_page(adapter->pdev,
-                                               frag->page,
-                                               frag->page_offset,
-                                               len,
-                                               PCI_DMA_TODEVICE);
-
+               buffer_info->dma = map[count];
                count++;
-               i++;
-               if (i == tx_ring->count)
-                       i = 0;
        }
 
-       i = ((i == 0) ? tx_ring->count - 1 : i - 1);
        tx_ring->buffer_info[i].skb = skb;
        tx_ring->buffer_info[first].next_to_watch = i;
 
@@ -2871,6 +3181,9 @@ static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
        if (tx_flags & IGB_TX_FLAGS_VLAN)
                cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
 
+       if (tx_flags & IGB_TX_FLAGS_TSTAMP)
+               cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
+
        if (tx_flags & IGB_TX_FLAGS_TSO) {
                cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
 
@@ -2933,7 +3246,7 @@ static int __igb_maybe_stop_tx(struct net_device *netdev,
 
        /* We need to check again in a case another CPU has just
         * made room available. */
-       if (IGB_DESC_UNUSED(tx_ring) < size)
+       if (igb_desc_unused(tx_ring) < size)
                return -EBUSY;
 
        /* A reprieve! */
@@ -2945,13 +3258,11 @@ static int __igb_maybe_stop_tx(struct net_device *netdev,
 static int igb_maybe_stop_tx(struct net_device *netdev,
                             struct igb_ring *tx_ring, int size)
 {
-       if (IGB_DESC_UNUSED(tx_ring) >= size)
+       if (igb_desc_unused(tx_ring) >= size)
                return 0;
        return __igb_maybe_stop_tx(netdev, tx_ring, size);
 }
 
-#define TXD_USE_COUNT(S) (((S) >> (IGB_MAX_TXD_PWR)) + 1)
-
 static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                                   struct net_device *netdev,
                                   struct igb_ring *tx_ring)
@@ -2959,11 +3270,10 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
        struct igb_adapter *adapter = netdev_priv(netdev);
        unsigned int first;
        unsigned int tx_flags = 0;
-       unsigned int len;
        u8 hdr_len = 0;
+       int count = 0;
        int tso = 0;
-
-       len = skb_headlen(skb);
+       union skb_shared_tx *shtx;
 
        if (test_bit(__IGB_DOWN, &adapter->state)) {
                dev_kfree_skb_any(skb);
@@ -2984,7 +3294,21 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                /* this is a hard error */
                return NETDEV_TX_BUSY;
        }
-       skb_orphan(skb);
+
+       /*
+        * TODO: check that there currently is no other packet with
+        * time stamping in the queue
+        *
+        * When doing time stamping, keep the connection to the socket
+        * a while longer: it is still needed by skb_hwtstamp_tx(),
+        * called either in igb_tx_hwtstamp() or by our caller when
+        * doing software time stamping.
+        */
+       shtx = skb_tx(skb);
+       if (unlikely(shtx->hardware)) {
+               shtx->in_progress = 1;
+               tx_flags |= IGB_TX_FLAGS_TSTAMP;
+       }
 
        if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
                tx_flags |= IGB_TX_FLAGS_VLAN;
@@ -2995,7 +3319,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                tx_flags |= IGB_TX_FLAGS_IPV4;
 
        first = tx_ring->next_to_use;
-
        tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags,
                                              &hdr_len) : 0;
 
@@ -3006,18 +3329,27 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
 
        if (tso)
                tx_flags |= IGB_TX_FLAGS_TSO;
-       else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags))
-                       if (skb->ip_summed == CHECKSUM_PARTIAL)
-                               tx_flags |= IGB_TX_FLAGS_CSUM;
+       else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags) &&
+                (skb->ip_summed == CHECKSUM_PARTIAL))
+               tx_flags |= IGB_TX_FLAGS_CSUM;
 
-       igb_tx_queue_adv(adapter, tx_ring, tx_flags,
-                        igb_tx_map_adv(adapter, tx_ring, skb, first),
-                        skb->len, hdr_len);
-
-       netdev->trans_start = jiffies;
-
-       /* Make sure there is space in the ring for the next send. */
-       igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+       /*
+        * count reflects descriptors mapped, if 0 then mapping error
+        * has occured and we need to rewind the descriptor queue
+        */
+       count = igb_tx_map_adv(adapter, tx_ring, skb, first);
+
+       if (count) {
+               igb_tx_queue_adv(adapter, tx_ring, tx_flags, count,
+                                skb->len, hdr_len);
+               netdev->trans_start = jiffies;
+               /* Make sure there is space in the ring for the next send. */
+               igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+       } else {
+               dev_kfree_skb_any(skb);
+               tx_ring->buffer_info[first].time_stamp = 0;
+               tx_ring->next_to_use = first;
+       }
 
        return NETDEV_TX_OK;
 }
@@ -3028,7 +3360,7 @@ static int igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *netdev)
        struct igb_ring *tx_ring;
 
        int r_idx = 0;
-       r_idx = skb->queue_mapping & (IGB_MAX_TX_QUEUES - 1);
+       r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
        tx_ring = adapter->multi_tx_table[r_idx];
 
        /* This goes back to the question of how to logically map a tx queue
@@ -3050,8 +3382,8 @@ static void igb_tx_timeout(struct net_device *netdev)
        /* Do the reset outside of interrupt context */
        adapter->tx_timeout_count++;
        schedule_work(&adapter->reset_task);
-       wr32(E1000_EICS, adapter->eims_enable_mask &
-               ~(E1000_EIMS_TCP_TIMER | E1000_EIMS_OTHER));
+       wr32(E1000_EICS,
+            (adapter->eims_enable_mask & ~adapter->eims_other));
 }
 
 static void igb_reset_task(struct work_struct *work)
@@ -3069,8 +3401,7 @@ static void igb_reset_task(struct work_struct *work)
  * Returns the address of the device statistics structure.
  * The statistics are actually updated from the timer callback.
  **/
-static struct net_device_stats *
-igb_get_stats(struct net_device *netdev)
+static struct net_device_stats *igb_get_stats(struct net_device *netdev)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
 
@@ -3096,7 +3427,6 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
                return -EINVAL;
        }
 
-#define MAX_STD_JUMBO_FRAME_SIZE 9234
        if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
                dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
                return -EINVAL;
@@ -3104,6 +3434,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 
        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
                msleep(1);
+
        /* igb_down has a dependency on max_frame_size */
        adapter->max_frame_size = max_frame;
        if (netif_running(netdev))
@@ -3129,6 +3460,12 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 #else
                adapter->rx_buffer_len = PAGE_SIZE / 2;
 #endif
+
+       /* if sr-iov is enabled we need to force buffer size to 1K or larger */
+       if (adapter->vfs_allocated_count &&
+           (adapter->rx_buffer_len < IGB_RXBUFFER_1024))
+               adapter->rx_buffer_len = IGB_RXBUFFER_1024;
+
        /* adjust allocation if LPE protects us, and we aren't using SBP */
        if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) ||
             (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE))
@@ -3273,8 +3610,7 @@ void igb_update_stats(struct igb_adapter *adapter)
        /* Phy Stats */
        if (hw->phy.media_type == e1000_media_type_copper) {
                if ((adapter->link_speed == SPEED_1000) &&
-                  (!igb_read_phy_reg(hw, PHY_1000T_STATUS,
-                                             &phy_tmp))) {
+                  (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
                        phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
                        adapter->phy_stats.idle_errors += phy_tmp;
                }
@@ -3286,7 +3622,6 @@ void igb_update_stats(struct igb_adapter *adapter)
        adapter->stats.mgpdc += rd32(E1000_MGTPDC);
 }
 
-
 static irqreturn_t igb_msix_other(int irq, void *data)
 {
        struct net_device *netdev = data;
@@ -3295,15 +3630,24 @@ static irqreturn_t igb_msix_other(int irq, void *data)
        u32 icr = rd32(E1000_ICR);
 
        /* reading ICR causes bit 31 of EICR to be cleared */
-       if (!(icr & E1000_ICR_LSC))
-               goto no_link_interrupt;
-       hw->mac.get_link_status = 1;
-       /* guard against interrupt when we're going down */
-       if (!test_bit(__IGB_DOWN, &adapter->state))
-               mod_timer(&adapter->watchdog_timer, jiffies + 1);
-       
-no_link_interrupt:
-       wr32(E1000_IMS, E1000_IMS_LSC);
+
+       if(icr & E1000_ICR_DOUTSYNC) {
+               /* HW is reporting DMA is out of sync */
+               adapter->stats.doosync++;
+       }
+
+       /* Check for a mailbox event */
+       if (icr & E1000_ICR_VMMB)
+               igb_msg_task(adapter);
+
+       if (icr & E1000_ICR_LSC) {
+               hw->mac.get_link_status = 1;
+               /* guard against interrupt when we're going down */
+               if (!test_bit(__IGB_DOWN, &adapter->state))
+                       mod_timer(&adapter->watchdog_timer, jiffies + 1);
+       }
+
+       wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
        wr32(E1000_EIMS, adapter->eims_other);
 
        return IRQ_HANDLED;
@@ -3319,6 +3663,7 @@ static irqreturn_t igb_msix_tx(int irq, void *data)
        if (adapter->flags & IGB_FLAG_DCA_ENABLED)
                igb_update_tx_dca(tx_ring);
 #endif
+
        tx_ring->total_bytes = 0;
        tx_ring->total_packets = 0;
 
@@ -3339,13 +3684,11 @@ static void igb_write_itr(struct igb_ring *ring)
        if ((ring->adapter->itr_setting & 3) && ring->set_itr) {
                switch (hw->mac.type) {
                case e1000_82576:
-                       wr32(ring->itr_register,
-                            ring->itr_val |
+                       wr32(ring->itr_register, ring->itr_val |
                             0x80000000);
                        break;
                default:
-                       wr32(ring->itr_register,
-                            ring->itr_val |
+                       wr32(ring->itr_register, ring->itr_val |
                             (ring->itr_val << 16));
                        break;
                }
@@ -3363,8 +3706,8 @@ static irqreturn_t igb_msix_rx(int irq, void *data)
 
        igb_write_itr(rx_ring);
 
-       if (netif_rx_schedule_prep(&rx_ring->napi))
-               __netif_rx_schedule(&rx_ring->napi);
+       if (napi_schedule_prep(&rx_ring->napi))
+               __napi_schedule(&rx_ring->napi);
 
 #ifdef CONFIG_IGB_DCA
        if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED)
@@ -3386,11 +3729,11 @@ static void igb_update_rx_dca(struct igb_ring *rx_ring)
                dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
                if (hw->mac.type == e1000_82576) {
                        dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
-                       dca_rxctrl |= dca_get_tag(cpu) <<
+                       dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
                                      E1000_DCA_RXCTRL_CPUID_SHIFT;
                } else {
                        dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
-                       dca_rxctrl |= dca_get_tag(cpu);
+                       dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
                }
                dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
                dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
@@ -3413,11 +3756,11 @@ static void igb_update_tx_dca(struct igb_ring *tx_ring)
                dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
                if (hw->mac.type == e1000_82576) {
                        dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
-                       dca_txctrl |= dca_get_tag(cpu) <<
+                       dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
                                      E1000_DCA_TXCTRL_CPUID_SHIFT;
                } else {
                        dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
-                       dca_txctrl |= dca_get_tag(cpu);
+                       dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
                }
                dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
                wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
@@ -3457,7 +3800,7 @@ static int __igb_notify_dca(struct device *dev, void *data)
                        break;
                /* Always use CB2 mode, difference is masked
                 * in the CB driver. */
-               wr32(E1000_DCA_CTRL, 2);
+               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
                if (dca_add_requester(dev) == 0) {
                        adapter->flags |= IGB_FLAG_DCA_ENABLED;
                        dev_info(&adapter->pdev->dev, "DCA enabled\n");
@@ -3472,7 +3815,7 @@ static int __igb_notify_dca(struct device *dev, void *data)
                        dca_remove_requester(dev);
                        dev_info(&adapter->pdev->dev, "DCA disabled\n");
                        adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
-                       wr32(E1000_DCA_CTRL, 1);
+                       wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
                }
                break;
        }
@@ -3492,6 +3835,322 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
 }
 #endif /* CONFIG_IGB_DCA */
 
+static void igb_ping_all_vfs(struct igb_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 ping;
+       int i;
+
+       for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
+               ping = E1000_PF_CONTROL_MSG;
+               if (adapter->vf_data[i].clear_to_send)
+                       ping |= E1000_VT_MSGTYPE_CTS;
+               igb_write_mbx(hw, &ping, 1, i);
+       }
+}
+
+static int igb_set_vf_multicasts(struct igb_adapter *adapter,
+                                 u32 *msgbuf, u32 vf)
+{
+       int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+       u16 *hash_list = (u16 *)&msgbuf[1];
+       struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+       int i;
+
+       /* only up to 30 hash values supported */
+       if (n > 30)
+               n = 30;
+
+       /* salt away the number of multi cast addresses assigned
+        * to this VF for later use to restore when the PF multi cast
+        * list changes
+        */
+       vf_data->num_vf_mc_hashes = n;
+
+       /* VFs are limited to using the MTA hash table for their multicast
+        * addresses */
+       for (i = 0; i < n; i++)
+               vf_data->vf_mc_hashes[i] = hash_list[i];;
+
+       /* Flush and reset the mta with the new values */
+       igb_set_multi(adapter->netdev);
+
+       return 0;
+}
+
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       struct vf_data_storage *vf_data;
+       int i, j;
+
+       for (i = 0; i < adapter->vfs_allocated_count; i++) {
+               vf_data = &adapter->vf_data[i];
+               for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
+                       igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
+       }
+}
+
+static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 pool_mask, reg, vid;
+       int i;
+
+       pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+
+       /* Find the vlan filter for this id */
+       for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+               reg = rd32(E1000_VLVF(i));
+
+               /* remove the vf from the pool */
+               reg &= ~pool_mask;
+
+               /* if pool is empty then remove entry from vfta */
+               if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
+                   (reg & E1000_VLVF_VLANID_ENABLE)) {
+                       reg = 0;
+                       vid = reg & E1000_VLVF_VLANID_MASK;
+                       igb_vfta_set(hw, vid, false);
+               }
+
+               wr32(E1000_VLVF(i), reg);
+       }
+}
+
+static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 reg, i;
+
+       /* It is an error to call this function when VFs are not enabled */
+       if (!adapter->vfs_allocated_count)
+               return -1;
+
+       /* Find the vlan filter for this id */
+       for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+               reg = rd32(E1000_VLVF(i));
+               if ((reg & E1000_VLVF_VLANID_ENABLE) &&
+                   vid == (reg & E1000_VLVF_VLANID_MASK))
+                       break;
+       }
+
+       if (add) {
+               if (i == E1000_VLVF_ARRAY_SIZE) {
+                       /* Did not find a matching VLAN ID entry that was
+                        * enabled.  Search for a free filter entry, i.e.
+                        * one without the enable bit set
+                        */
+                       for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+                               reg = rd32(E1000_VLVF(i));
+                               if (!(reg & E1000_VLVF_VLANID_ENABLE))
+                                       break;
+                       }
+               }
+               if (i < E1000_VLVF_ARRAY_SIZE) {
+                       /* Found an enabled/available entry */
+                       reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+
+                       /* if !enabled we need to set this up in vfta */
+                       if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
+                               /* add VID to filter table, if bit already set
+                                * PF must have added it outside of table */
+                               if (igb_vfta_set(hw, vid, true))
+                                       reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
+                                               adapter->vfs_allocated_count);
+                               reg |= E1000_VLVF_VLANID_ENABLE;
+                       }
+                       reg &= ~E1000_VLVF_VLANID_MASK;
+                       reg |= vid;
+
+                       wr32(E1000_VLVF(i), reg);
+                       return 0;
+               }
+       } else {
+               if (i < E1000_VLVF_ARRAY_SIZE) {
+                       /* remove vf from the pool */
+                       reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
+                       /* if pool is empty then remove entry from vfta */
+                       if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
+                               reg = 0;
+                               igb_vfta_set(hw, vid, false);
+                       }
+                       wr32(E1000_VLVF(i), reg);
+                       return 0;
+               }
+       }
+       return -1;
+}
+
+static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+       int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+       int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
+
+       return igb_vlvf_set(adapter, vid, add, vf);
+}
+
+static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
+{
+       struct e1000_hw *hw = &adapter->hw;
+
+       /* disable mailbox functionality for vf */
+       adapter->vf_data[vf].clear_to_send = false;
+
+       /* reset offloads to defaults */
+       igb_set_vmolr(hw, vf);
+
+       /* reset vlans for device */
+       igb_clear_vf_vfta(adapter, vf);
+
+       /* reset multicast table array for vf */
+       adapter->vf_data[vf].num_vf_mc_hashes = 0;
+
+       /* Flush and reset the mta with the new values */
+       igb_set_multi(adapter->netdev);
+}
+
+static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+       u32 reg, msgbuf[3];
+       u8 *addr = (u8 *)(&msgbuf[1]);
+
+       /* process all the same items cleared in a function level reset */
+       igb_vf_reset_event(adapter, vf);
+
+       /* set vf mac address */
+       igb_rar_set(hw, vf_mac, vf + 1);
+       igb_set_rah_pool(hw, vf, vf + 1);
+
+       /* enable transmit and receive for vf */
+       reg = rd32(E1000_VFTE);
+       wr32(E1000_VFTE, reg | (1 << vf));
+       reg = rd32(E1000_VFRE);
+       wr32(E1000_VFRE, reg | (1 << vf));
+
+       /* enable mailbox functionality for vf */
+       adapter->vf_data[vf].clear_to_send = true;
+
+       /* reply to reset with ack and vf mac address */
+       msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
+       memcpy(addr, vf_mac, 6);
+       igb_write_mbx(hw, msgbuf, 3, vf);
+}
+
+static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
+{
+               unsigned char *addr = (char *)&msg[1];
+               int err = -1;
+
+               if (is_valid_ether_addr(addr))
+                       err = igb_set_vf_mac(adapter, vf, addr);
+
+               return err;
+
+}
+
+static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 msg = E1000_VT_MSGTYPE_NACK;
+
+       /* if device isn't clear to send it shouldn't be reading either */
+       if (!adapter->vf_data[vf].clear_to_send)
+               igb_write_mbx(hw, &msg, 1, vf);
+}
+
+
+static void igb_msg_task(struct igb_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 vf;
+
+       for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
+               /* process any reset requests */
+               if (!igb_check_for_rst(hw, vf)) {
+                       adapter->vf_data[vf].clear_to_send = false;
+                       igb_vf_reset_event(adapter, vf);
+               }
+
+               /* process any messages pending */
+               if (!igb_check_for_msg(hw, vf))
+                       igb_rcv_msg_from_vf(adapter, vf);
+
+               /* process any acks */
+               if (!igb_check_for_ack(hw, vf))
+                       igb_rcv_ack_from_vf(adapter, vf);
+
+       }
+}
+
+static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+       u32 mbx_size = E1000_VFMAILBOX_SIZE;
+       u32 msgbuf[mbx_size];
+       struct e1000_hw *hw = &adapter->hw;
+       s32 retval;
+
+       retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
+
+       if (retval)
+               dev_err(&adapter->pdev->dev,
+                       "Error receiving message from VF\n");
+
+       /* this is a message we already processed, do nothing */
+       if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
+               return retval;
+
+       /*
+        * until the vf completes a reset it should not be
+        * allowed to start any configuration.
+        */
+
+       if (msgbuf[0] == E1000_VF_RESET) {
+               igb_vf_reset_msg(adapter, vf);
+
+               return retval;
+       }
+
+       if (!adapter->vf_data[vf].clear_to_send) {
+               msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
+               igb_write_mbx(hw, msgbuf, 1, vf);
+               return retval;
+       }
+
+       switch ((msgbuf[0] & 0xFFFF)) {
+       case E1000_VF_SET_MAC_ADDR:
+               retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
+               break;
+       case E1000_VF_SET_MULTICAST:
+               retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
+               break;
+       case E1000_VF_SET_LPE:
+               retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
+               break;
+       case E1000_VF_SET_VLAN:
+               retval = igb_set_vf_vlan(adapter, msgbuf, vf);
+               break;
+       default:
+               dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
+               retval = -1;
+               break;
+       }
+
+       /* notify the VF of the results of what it sent us */
+       if (retval)
+               msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
+       else
+               msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
+
+       msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
+
+       igb_write_mbx(hw, msgbuf, 1, vf);
+
+       return retval;
+}
+
 /**
  * igb_intr_msi - Interrupt Handler
  * @irq: interrupt number
@@ -3507,19 +4166,24 @@ static irqreturn_t igb_intr_msi(int irq, void *data)
 
        igb_write_itr(adapter->rx_ring);
 
+       if(icr & E1000_ICR_DOUTSYNC) {
+               /* HW is reporting DMA is out of sync */
+               adapter->stats.doosync++;
+       }
+
        if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
                hw->mac.get_link_status = 1;
                if (!test_bit(__IGB_DOWN, &adapter->state))
                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
        }
 
-       netif_rx_schedule(&adapter->rx_ring[0].napi);
+       napi_schedule(&adapter->rx_ring[0].napi);
 
        return IRQ_HANDLED;
 }
 
 /**
- * igb_intr - Interrupt Handler
+ * igb_intr - Legacy Interrupt Handler
  * @irq: interrupt number
  * @data: pointer to a network interface device structure
  **/
@@ -3531,7 +4195,6 @@ static irqreturn_t igb_intr(int irq, void *data)
        /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
         * need for the IMC write */
        u32 icr = rd32(E1000_ICR);
-       u32 eicr = 0;
        if (!icr)
                return IRQ_NONE;  /* Not our interrupt */
 
@@ -3542,7 +4205,10 @@ static irqreturn_t igb_intr(int irq, void *data)
        if (!(icr & E1000_ICR_INT_ASSERTED))
                return IRQ_NONE;
 
-       eicr = rd32(E1000_EICR);
+       if(icr & E1000_ICR_DOUTSYNC) {
+               /* HW is reporting DMA is out of sync */
+               adapter->stats.doosync++;
+       }
 
        if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
                hw->mac.get_link_status = 1;
@@ -3551,11 +4217,31 @@ static irqreturn_t igb_intr(int irq, void *data)
                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
        }
 
-       netif_rx_schedule(&adapter->rx_ring[0].napi);
+       napi_schedule(&adapter->rx_ring[0].napi);
 
        return IRQ_HANDLED;
 }
 
+static inline void igb_rx_irq_enable(struct igb_ring *rx_ring)
+{
+       struct igb_adapter *adapter = rx_ring->adapter;
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (adapter->itr_setting & 3) {
+               if (adapter->num_rx_queues == 1)
+                       igb_set_itr(adapter);
+               else
+                       igb_update_ring_itr(rx_ring);
+       }
+
+       if (!test_bit(__IGB_DOWN, &adapter->state)) {
+               if (adapter->msix_entries)
+                       wr32(E1000_EIMS, rx_ring->eims_value);
+               else
+                       igb_irq_enable(adapter);
+       }
+}
+
 /**
  * igb_poll - NAPI Rx polling callback
  * @napi: napi polling structure
@@ -3564,70 +4250,64 @@ static irqreturn_t igb_intr(int irq, void *data)
 static int igb_poll(struct napi_struct *napi, int budget)
 {
        struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi);
-       struct igb_adapter *adapter = rx_ring->adapter;
-       struct net_device *netdev = adapter->netdev;
-       int tx_clean_complete, work_done = 0;
+       int work_done = 0;
 
-       /* this poll routine only supports one tx and one rx queue */
 #ifdef CONFIG_IGB_DCA
-       if (adapter->flags & IGB_FLAG_DCA_ENABLED)
-               igb_update_tx_dca(&adapter->tx_ring[0]);
+       if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED)
+               igb_update_rx_dca(rx_ring);
 #endif
-       tx_clean_complete = igb_clean_tx_irq(&adapter->tx_ring[0]);
+       igb_clean_rx_irq_adv(rx_ring, &work_done, budget);
 
+       if (rx_ring->buddy) {
 #ifdef CONFIG_IGB_DCA
-       if (adapter->flags & IGB_FLAG_DCA_ENABLED)
-               igb_update_rx_dca(&adapter->rx_ring[0]);
+               if (rx_ring->adapter->flags & IGB_FLAG_DCA_ENABLED)
+                       igb_update_tx_dca(rx_ring->buddy);
 #endif
-       igb_clean_rx_irq_adv(&adapter->rx_ring[0], &work_done, budget);
+               if (!igb_clean_tx_irq(rx_ring->buddy))
+                       work_done = budget;
+       }
 
-       /* If no Tx and not enough Rx work done, exit the polling mode */
-       if ((tx_clean_complete && (work_done < budget)) ||
-           !netif_running(netdev)) {
-               if (adapter->itr_setting & 3)
-                       igb_set_itr(adapter);
-               netif_rx_complete(napi);
-               if (!test_bit(__IGB_DOWN, &adapter->state))
-                       igb_irq_enable(adapter);
-               return 0;
+       /* If not enough Rx work done, exit the polling mode */
+       if (work_done < budget) {
+               napi_complete(napi);
+               igb_rx_irq_enable(rx_ring);
        }
 
-       return 1;
+       return work_done;
 }
 
-static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget)
+/**
+ * igb_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: board private structure
+ * @skb: packet that was just sent
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb)
 {
-       struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi);
-       struct igb_adapter *adapter = rx_ring->adapter;
+       union skb_shared_tx *shtx = skb_tx(skb);
        struct e1000_hw *hw = &adapter->hw;
-       struct net_device *netdev = adapter->netdev;
-       int work_done = 0;
-
-#ifdef CONFIG_IGB_DCA
-       if (adapter->flags & IGB_FLAG_DCA_ENABLED)
-               igb_update_rx_dca(rx_ring);
-#endif
-       igb_clean_rx_irq_adv(rx_ring, &work_done, budget);
 
-
-       /* If not enough Rx work done, exit the polling mode */
-       if ((work_done == 0) || !netif_running(netdev)) {
-               netif_rx_complete(napi);
-
-               if (adapter->itr_setting & 3) {
-                       if (adapter->num_rx_queues == 1)
-                               igb_set_itr(adapter);
-                       else
-                               igb_update_ring_itr(rx_ring);
+       if (unlikely(shtx->hardware)) {
+               u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID;
+               if (valid) {
+                       u64 regval = rd32(E1000_TXSTMPL);
+                       u64 ns;
+                       struct skb_shared_hwtstamps shhwtstamps;
+
+                       memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+                       regval |= (u64)rd32(E1000_TXSTMPH) << 32;
+                       ns = timecounter_cyc2time(&adapter->clock,
+                                                 regval);
+                       timecompare_update(&adapter->compare, ns);
+                       shhwtstamps.hwtstamp = ns_to_ktime(ns);
+                       shhwtstamps.syststamp =
+                               timecompare_transform(&adapter->compare, ns);
+                       skb_tstamp_tx(skb, &shhwtstamps);
                }
-
-               if (!test_bit(__IGB_DOWN, &adapter->state))
-                       wr32(E1000_EIMS, rx_ring->eims_value);
-
-               return 0;
        }
-
-       return 1;
 }
 
 /**
@@ -3668,6 +4348,8 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
                                            skb->len;
                                total_packets += segs;
                                total_bytes += bytecount;
+
+                               igb_tx_hwtstamp(adapter, skb);
                        }
 
                        igb_unmap_and_free_tx_resource(adapter, buffer_info);
@@ -3677,7 +4359,6 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
                        if (i == tx_ring->count)
                                i = 0;
                }
-
                eop = tx_ring->buffer_info[i].next_to_watch;
                eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
        }
@@ -3686,7 +4367,7 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
 
        if (unlikely(count &&
                     netif_carrier_ok(netdev) &&
-                    IGB_DESC_UNUSED(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
+                    igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
                /* Make sure that anybody stopping the queue after this
                 * sees the new next_to_clean.
                 */
@@ -3742,44 +4423,11 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
        return (count < tx_ring->count);
 }
 
-#ifdef CONFIG_IGB_LRO
- /**
- * igb_get_skb_hdr - helper function for LRO header processing
- * @skb: pointer to sk_buff to be added to LRO packet
- * @iphdr: pointer to ip header structure
- * @tcph: pointer to tcp header structure
- * @hdr_flags: pointer to header flags
- * @priv: pointer to the receive descriptor for the current sk_buff
- **/
-static int igb_get_skb_hdr(struct sk_buff *skb, void **iphdr, void **tcph,
-                           u64 *hdr_flags, void *priv)
-{
-       union e1000_adv_rx_desc *rx_desc = priv;
-       u16 pkt_type = rx_desc->wb.lower.lo_dword.pkt_info &
-                      (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP);
-
-       /* Verify that this is a valid IPv4 TCP packet */
-       if (pkt_type != (E1000_RXDADV_PKTTYPE_IPV4 |
-                         E1000_RXDADV_PKTTYPE_TCP))
-               return -1;
-
-       /* Set network headers */
-       skb_reset_network_header(skb);
-       skb_set_transport_header(skb, ip_hdrlen(skb));
-       *iphdr = ip_hdr(skb);
-       *tcph = tcp_hdr(skb);
-       *hdr_flags = LRO_IPV4 | LRO_TCP;
-
-       return 0;
-
-}
-#endif /* CONFIG_IGB_LRO */
-
 /**
  * igb_receive_skb - helper function to handle rx indications
- * @ring: pointer to receive ring receving this packet 
+ * @ring: pointer to receive ring receving this packet
  * @status: descriptor status field as written by hardware
- * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
+ * @rx_desc: receive descriptor containing vlan and type information.
  * @skb: pointer to sk_buff to be indicated to stack
  **/
 static void igb_receive_skb(struct igb_ring *ring, u8 status,
@@ -3789,50 +4437,43 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status,
        struct igb_adapter * adapter = ring->adapter;
        bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP));
 
-#ifdef CONFIG_IGB_LRO
-       if (adapter->netdev->features & NETIF_F_LRO &&
-           skb->ip_summed == CHECKSUM_UNNECESSARY) {
-               if (vlan_extracted)
-                       lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb,
-                                          adapter->vlgrp,
-                                          le16_to_cpu(rx_desc->wb.upper.vlan),
-                                          rx_desc);
-               else
-                       lro_receive_skb(&ring->lro_mgr,skb, rx_desc);
-               ring->lro_used = 1;
-       } else {
-#endif
-               if (vlan_extracted)
-                       vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-                                         le16_to_cpu(rx_desc->wb.upper.vlan));
-               else
-
-                       netif_receive_skb(skb);
-#ifdef CONFIG_IGB_LRO
-       }
-#endif
+       skb_record_rx_queue(skb, ring->queue_index);
+       if (vlan_extracted)
+               vlan_gro_receive(&ring->napi, adapter->vlgrp,
+                                le16_to_cpu(rx_desc->wb.upper.vlan),
+                                skb);
+       else
+               napi_gro_receive(&ring->napi, skb);
 }
 
-
 static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
                                       u32 status_err, struct sk_buff *skb)
 {
        skb->ip_summed = CHECKSUM_NONE;
 
        /* Ignore Checksum bit is set or checksum is disabled through ethtool */
-       if ((status_err & E1000_RXD_STAT_IXSM) || !adapter->rx_csum)
+       if ((status_err & E1000_RXD_STAT_IXSM) ||
+           (adapter->flags & IGB_FLAG_RX_CSUM_DISABLED))
                return;
        /* TCP/UDP checksum error bit is set */
        if (status_err &
            (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+               /*
+                * work around errata with sctp packets where the TCPE aka
+                * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+                * packets, (aka let the stack check the crc32c)
+                */
+               if (!((adapter->hw.mac.type == e1000_82576) &&
+                     (skb->len == 60)))
+                       adapter->hw_csum_err++;
                /* let the stack verify checksum errors */
-               adapter->hw_csum_err++;
                return;
        }
        /* It must be a TCP or UDP packet with a valid checksum */
        if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       dev_dbg(&adapter->pdev->dev, "cksum success: bits %08X\n", status_err);
        adapter->hw_csum_good++;
 }
 
@@ -3841,17 +4482,19 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
 {
        struct igb_adapter *adapter = rx_ring->adapter;
        struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
        struct pci_dev *pdev = adapter->pdev;
        union e1000_adv_rx_desc *rx_desc , *next_rxd;
        struct igb_buffer *buffer_info , *next_buffer;
        struct sk_buff *skb;
-       unsigned int i;
-       u32 length, hlen, staterr;
        bool cleaned = false;
        int cleaned_count = 0;
        unsigned int total_bytes = 0, total_packets = 0;
+       unsigned int i;
+       u32 length, hlen, staterr;
 
        i = rx_ring->next_to_clean;
+       buffer_info = &rx_ring->buffer_info[i];
        rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
        staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 
@@ -3859,25 +4502,22 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
                if (*work_done >= budget)
                        break;
                (*work_done)++;
-               buffer_info = &rx_ring->buffer_info[i];
 
-               /* HW will not DMA in data larger than the given buffer, even
-                * if it parses the (NFS, of course) header to be larger.  In
-                * that case, it fills the header buffer and spills the rest
-                * into the page.
-                */
-               hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
-                 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
-               if (hlen > adapter->rx_ps_hdr_size)
-                       hlen = adapter->rx_ps_hdr_size;
+               skb = buffer_info->skb;
+               prefetch(skb->data - NET_IP_ALIGN);
+               buffer_info->skb = NULL;
+
+               i++;
+               if (i == rx_ring->count)
+                       i = 0;
+               next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
+               prefetch(next_rxd);
+               next_buffer = &rx_ring->buffer_info[i];
 
                length = le16_to_cpu(rx_desc->wb.upper.length);
                cleaned = true;
                cleaned_count++;
 
-               skb = buffer_info->skb;
-               prefetch(skb->data - NET_IP_ALIGN);
-               buffer_info->skb = NULL;
                if (!adapter->rx_ps_hdr_size) {
                        pci_unmap_single(pdev, buffer_info->dma,
                                         adapter->rx_buffer_len +
@@ -3887,10 +4527,19 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
                        goto send_up;
                }
 
+               /* HW will not DMA in data larger than the given buffer, even
+                * if it parses the (NFS, of course) header to be larger.  In
+                * that case, it fills the header buffer and spills the rest
+                * into the page.
+                */
+               hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
+                 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
+               if (hlen > adapter->rx_ps_hdr_size)
+                       hlen = adapter->rx_ps_hdr_size;
+
                if (!skb_shinfo(skb)->nr_frags) {
                        pci_unmap_single(pdev, buffer_info->dma,
-                                        adapter->rx_ps_hdr_size +
-                                          NET_IP_ALIGN,
+                                        adapter->rx_ps_hdr_size + NET_IP_ALIGN,
                                         PCI_DMA_FROMDEVICE);
                        skb_put(skb, hlen);
                }
@@ -3916,13 +4565,6 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
 
                        skb->truesize += length;
                }
-send_up:
-               i++;
-               if (i == rx_ring->count)
-                       i = 0;
-               next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
-               prefetch(next_rxd);
-               next_buffer = &rx_ring->buffer_info[i];
 
                if (!(staterr & E1000_RXD_STAT_EOP)) {
                        buffer_info->skb = next_buffer->skb;
@@ -3931,6 +4573,47 @@ send_up:
                        next_buffer->dma = 0;
                        goto next_desc;
                }
+send_up:
+               /*
+                * If this bit is set, then the RX registers contain
+                * the time stamp. No other packet will be time
+                * stamped until we read these registers, so read the
+                * registers to make them available again. Because
+                * only one packet can be time stamped at a time, we
+                * know that the register values must belong to this
+                * one here and therefore we don't need to compare
+                * any of the additional attributes stored for it.
+                *
+                * If nothing went wrong, then it should have a
+                * skb_shared_tx that we can turn into a
+                * skb_shared_hwtstamps.
+                *
+                * TODO: can time stamping be triggered (thus locking
+                * the registers) without the packet reaching this point
+                * here? In that case RX time stamping would get stuck.
+                *
+                * TODO: in "time stamp all packets" mode this bit is
+                * not set. Need a global flag for this mode and then
+                * always read the registers. Cannot be done without
+                * a race condition.
+                */
+               if (unlikely(staterr & E1000_RXD_STAT_TS)) {
+                       u64 regval;
+                       u64 ns;
+                       struct skb_shared_hwtstamps *shhwtstamps =
+                               skb_hwtstamps(skb);
+
+                       WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
+                            "igb: no RX time stamp available for time stamped packet");
+                       regval = rd32(E1000_RXSTMPL);
+                       regval |= (u64)rd32(E1000_RXSTMPH) << 32;
+                       ns = timecounter_cyc2time(&adapter->clock, regval);
+                       timecompare_update(&adapter->compare, ns);
+                       memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+                       shhwtstamps->hwtstamp = ns_to_ktime(ns);
+                       shhwtstamps->syststamp =
+                               timecompare_transform(&adapter->compare, ns);
+               }
 
                if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
                        dev_kfree_skb_irq(skb);
@@ -3958,19 +4641,11 @@ next_desc:
                /* use prefetched values */
                rx_desc = next_rxd;
                buffer_info = next_buffer;
-
                staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
        }
 
        rx_ring->next_to_clean = i;
-       cleaned_count = IGB_DESC_UNUSED(rx_ring);
-
-#ifdef CONFIG_IGB_LRO
-       if (rx_ring->lro_used) {
-               lro_flush_all(&rx_ring->lro_mgr);
-               rx_ring->lro_used = 0;
-       }
-#endif
+       cleaned_count = igb_desc_unused(rx_ring);
 
        if (cleaned_count)
                igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
@@ -3984,7 +4659,6 @@ next_desc:
        return cleaned;
 }
 
-
 /**
  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
  * @adapter: address of board private structure
@@ -3999,10 +4673,17 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
        struct igb_buffer *buffer_info;
        struct sk_buff *skb;
        unsigned int i;
+       int bufsz;
 
        i = rx_ring->next_to_use;
        buffer_info = &rx_ring->buffer_info[i];
 
+       if (adapter->rx_ps_hdr_size)
+               bufsz = adapter->rx_ps_hdr_size;
+       else
+               bufsz = adapter->rx_buffer_len;
+       bufsz += NET_IP_ALIGN;
+
        while (cleaned_count--) {
                rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
 
@@ -4018,23 +4699,14 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
                                buffer_info->page_offset ^= PAGE_SIZE / 2;
                        }
                        buffer_info->page_dma =
-                               pci_map_page(pdev,
-                                            buffer_info->page,
+                               pci_map_page(pdev, buffer_info->page,
                                             buffer_info->page_offset,
                                             PAGE_SIZE / 2,
                                             PCI_DMA_FROMDEVICE);
                }
 
                if (!buffer_info->skb) {
-                       int bufsz;
-
-                       if (adapter->rx_ps_hdr_size)
-                               bufsz = adapter->rx_ps_hdr_size;
-                       else
-                               bufsz = adapter->rx_buffer_len;
-                       bufsz += NET_IP_ALIGN;
                        skb = netdev_alloc_skb(netdev, bufsz);
-
                        if (!skb) {
                                adapter->alloc_rx_buff_failed++;
                                goto no_buffers;
@@ -4050,7 +4722,6 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
                        buffer_info->dma = pci_map_single(pdev, skb->data,
                                                          bufsz,
                                                          PCI_DMA_FROMDEVICE);
-
                }
                /* Refresh the desc even if buffer_addrs didn't change because
                 * each write-back erases this info. */
@@ -4119,6 +4790,163 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
        return 0;
 }
 
+/**
+ * igb_hwtstamp_ioctl - control hardware time stamping
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ **/
+static int igb_hwtstamp_ioctl(struct net_device *netdev,
+                             struct ifreq *ifr, int cmd)
+{
+       struct igb_adapter *adapter = netdev_priv(netdev);
+       struct e1000_hw *hw = &adapter->hw;
+       struct hwtstamp_config config;
+       u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
+       u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
+       u32 tsync_rx_ctl_type = 0;
+       u32 tsync_rx_cfg = 0;
+       int is_l4 = 0;
+       int is_l2 = 0;
+       short port = 319; /* PTP */
+       u32 regval;
+
+       if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+               return -EFAULT;
+
+       /* reserved for future extensions */
+       if (config.flags)
+               return -EINVAL;
+
+       switch (config.tx_type) {
+       case HWTSTAMP_TX_OFF:
+               tsync_tx_ctl_bit = 0;
+               break;
+       case HWTSTAMP_TX_ON:
+               tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       switch (config.rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+               tsync_rx_ctl_bit = 0;
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_ALL:
+               /*
+                * register TSYNCRXCFG must be set, therefore it is not
+                * possible to time stamp both Sync and Delay_Req messages
+                * => fall back to time stamping all packets
+                */
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
+               config.rx_filter = HWTSTAMP_FILTER_ALL;
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+               is_l4 = 1;
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+               is_l4 = 1;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
+               is_l2 = 1;
+               is_l4 = 1;
+               config.rx_filter = HWTSTAMP_FILTER_SOME;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
+               is_l2 = 1;
+               is_l4 = 1;
+               config.rx_filter = HWTSTAMP_FILTER_SOME;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
+               config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+               is_l2 = 1;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       /* enable/disable TX */
+       regval = rd32(E1000_TSYNCTXCTL);
+       regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
+       wr32(E1000_TSYNCTXCTL, regval);
+
+       /* enable/disable RX, define which PTP packets are time stamped */
+       regval = rd32(E1000_TSYNCRXCTL);
+       regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
+       regval = (regval & ~0xE) | tsync_rx_ctl_type;
+       wr32(E1000_TSYNCRXCTL, regval);
+       wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
+
+       /*
+        * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
+        *                                          (Ethertype to filter on)
+        * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
+        * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
+        */
+       wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
+
+       /* L4 Queue Filter[0]: only filter by source and destination port */
+       wr32(E1000_SPQF0, htons(port));
+       wr32(E1000_IMIREXT(0), is_l4 ?
+            ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
+       wr32(E1000_IMIR(0), is_l4 ?
+            (htons(port)
+             | (0<<16) /* immediate interrupt disabled */
+             | 0 /* (1<<17) bit cleared: do not bypass
+                    destination port check */)
+               : 0);
+       wr32(E1000_FTQF0, is_l4 ?
+            (0x11 /* UDP */
+             | (1<<15) /* VF not compared */
+             | (1<<27) /* Enable Timestamping */
+             | (7<<28) /* only source port filter enabled,
+                          source/target address and protocol
+                          masked */)
+            : ((1<<15) | (15<<28) /* all mask bits set = filter not
+                                     enabled */));
+
+       wrfl();
+
+       adapter->hwtstamp_config = config;
+
+       /* clear TX/RX time stamp registers, just to be sure */
+       regval = rd32(E1000_TXSTMPH);
+       regval = rd32(E1000_RXSTMPH);
+
+       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+               -EFAULT : 0;
+}
+
 /**
  * igb_ioctl -
  * @netdev:
@@ -4132,6 +4960,8 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
        case SIOCGMIIREG:
        case SIOCSMIIREG:
                return igb_mii_ioctl(netdev, ifr, cmd);
+       case SIOCSHWTSTAMP:
+               return igb_hwtstamp_ioctl(netdev, ifr, cmd);
        default:
                return -EOPNOTSUPP;
        }
@@ -4158,8 +4988,6 @@ static void igb_vlan_rx_register(struct net_device *netdev,
                rctl &= ~E1000_RCTL_CFIEN;
                wr32(E1000_RCTL, rctl);
                igb_update_mng_vlan(adapter);
-               wr32(E1000_RLPML,
-                               adapter->max_frame_size + VLAN_TAG_SIZE);
        } else {
                /* disable VLAN tag insert/strip */
                ctrl = rd32(E1000_CTRL);
@@ -4170,10 +4998,10 @@ static void igb_vlan_rx_register(struct net_device *netdev,
                        igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
                        adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
                }
-               wr32(E1000_RLPML,
-                               adapter->max_frame_size);
        }
 
+       igb_rlpml_set(adapter);
+
        if (!test_bit(__IGB_DOWN, &adapter->state))
                igb_irq_enable(adapter);
 }
@@ -4182,24 +5010,25 @@ static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       u32 vfta, index;
+       int pf_id = adapter->vfs_allocated_count;
 
-       if ((adapter->hw.mng_cookie.status &
+       if ((hw->mng_cookie.status &
             E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
            (vid == adapter->mng_vlan_id))
                return;
-       /* add VID to filter table */
-       index = (vid >> 5) & 0x7F;
-       vfta = array_rd32(E1000_VFTA, index);
-       vfta |= (1 << (vid & 0x1F));
-       igb_write_vfta(&adapter->hw, index, vfta);
+
+       /* add vid to vlvf if sr-iov is enabled,
+        * if that fails add directly to filter table */
+       if (igb_vlvf_set(adapter, vid, true, pf_id))
+               igb_vfta_set(hw, vid, true);
+
 }
 
 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       u32 vfta, index;
+       int pf_id = adapter->vfs_allocated_count;
 
        igb_irq_disable(adapter);
        vlan_group_set_device(adapter->vlgrp, vid, NULL);
@@ -4215,11 +5044,10 @@ static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
                return;
        }
 
-       /* remove VID from filter table */
-       index = (vid >> 5) & 0x7F;
-       vfta = array_rd32(E1000_VFTA, index);
-       vfta &= ~(1 << (vid & 0x1F));
-       igb_write_vfta(&adapter->hw, index, vfta);
+       /* remove vid from vlvf if sr-iov is enabled,
+        * if not in vlvf remove from vfta */
+       if (igb_vlvf_set(adapter, vid, false, pf_id))
+               igb_vfta_set(hw, vid, false);
 }
 
 static void igb_restore_vlan(struct igb_adapter *adapter)
@@ -4276,8 +5104,7 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
        return 0;
 }
 
-
-static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct igb_adapter *adapter = netdev_priv(netdev);
@@ -4336,15 +5163,9 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
                wr32(E1000_WUFC, 0);
        }
 
-       /* make sure adapter isn't asleep if manageability/wol is enabled */
-       if (wufc || adapter->en_mng_pt) {
-               pci_enable_wake(pdev, PCI_D3hot, 1);
-               pci_enable_wake(pdev, PCI_D3cold, 1);
-       } else {
+       *enable_wake = wufc || adapter->en_mng_pt;
+       if (!*enable_wake)
                igb_shutdown_fiber_serdes_link_82575(hw);
-               pci_enable_wake(pdev, PCI_D3hot, 0);
-               pci_enable_wake(pdev, PCI_D3cold, 0);
-       }
 
        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
         * would have already happened in close and is redundant. */
@@ -4352,12 +5173,29 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
 
        pci_disable_device(pdev);
 
-       pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
        return 0;
 }
 
 #ifdef CONFIG_PM
+static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       int retval;
+       bool wake;
+
+       retval = __igb_shutdown(pdev, &wake);
+       if (retval)
+               return retval;
+
+       if (wake) {
+               pci_prepare_to_sleep(pdev);
+       } else {
+               pci_wake_from_d3(pdev, false);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
+
+       return 0;
+}
+
 static int igb_resume(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
@@ -4368,10 +5206,7 @@ static int igb_resume(struct pci_dev *pdev)
        pci_set_power_state(pdev, PCI_D0);
        pci_restore_state(pdev);
 
-       if (adapter->need_ioport)
-               err = pci_enable_device(pdev);
-       else
-               err = pci_enable_device_mem(pdev);
+       err = pci_enable_device_mem(pdev);
        if (err) {
                dev_err(&pdev->dev,
                        "igb: Cannot enable PCI device from suspend\n");
@@ -4392,6 +5227,11 @@ static int igb_resume(struct pci_dev *pdev)
        /* e1000_power_up_phy(adapter); */
 
        igb_reset(adapter);
+
+       /* let the f/w know that the h/w is now under the control of the
+        * driver. */
+       igb_get_hw_control(adapter);
+
        wr32(E1000_WUS, ~0);
 
        if (netif_running(netdev)) {
@@ -4402,17 +5242,20 @@ static int igb_resume(struct pci_dev *pdev)
 
        netif_device_attach(netdev);
 
-       /* let the f/w know that the h/w is now under the control of the
-        * driver. */
-       igb_get_hw_control(adapter);
-
        return 0;
 }
 #endif
 
 static void igb_shutdown(struct pci_dev *pdev)
 {
-       igb_suspend(pdev, PMSG_SUSPEND);
+       bool wake;
+
+       __igb_shutdown(pdev, &wake);
+
+       if (system_state == SYSTEM_POWER_OFF) {
+               pci_wake_from_d3(pdev, wake);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -4424,22 +5267,27 @@ static void igb_shutdown(struct pci_dev *pdev)
 static void igb_netpoll(struct net_device *netdev)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
+       struct e1000_hw *hw = &adapter->hw;
        int i;
-       int work_done = 0;
-
-       igb_irq_disable(adapter);
-       adapter->flags |= IGB_FLAG_IN_NETPOLL;
 
-       for (i = 0; i < adapter->num_tx_queues; i++)
-               igb_clean_tx_irq(&adapter->tx_ring[i]);
+       if (!adapter->msix_entries) {
+               igb_irq_disable(adapter);
+               napi_schedule(&adapter->rx_ring[0].napi);
+               return;
+       }
 
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               igb_clean_rx_irq_adv(&adapter->rx_ring[i],
-                                    &work_done,
-                                    adapter->rx_ring[i].napi.weight);
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igb_ring *tx_ring = &adapter->tx_ring[i];
+               wr32(E1000_EIMC, tx_ring->eims_value);
+               igb_clean_tx_irq(tx_ring);
+               wr32(E1000_EIMS, tx_ring->eims_value);
+       }
 
-       adapter->flags &= ~IGB_FLAG_IN_NETPOLL;
-       igb_irq_enable(adapter);
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct igb_ring *rx_ring = &adapter->rx_ring[i];
+               wr32(E1000_EIMC, rx_ring->eims_value);
+               napi_schedule(&rx_ring->napi);
+       }
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
@@ -4482,12 +5330,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
        pci_ers_result_t result;
        int err;
 
-       if (adapter->need_ioport)
-               err = pci_enable_device(pdev);
-       else
-               err = pci_enable_device_mem(pdev);
-
-       if (err) {
+       if (pci_enable_device_mem(pdev)) {
                dev_err(&pdev->dev,
                        "Cannot re-enable PCI device after reset.\n");
                result = PCI_ERS_RESULT_DISCONNECT;
@@ -4540,4 +5383,87 @@ static void igb_io_resume(struct pci_dev *pdev)
        igb_get_hw_control(adapter);
 }
 
+static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
+{
+       u32 reg_data;
+
+       reg_data = rd32(E1000_VMOLR(vfn));
+       reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
+                   E1000_VMOLR_ROPE |   /* Accept packets matched in UTA */
+                   E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
+                   E1000_VMOLR_AUPE |   /* Accept untagged packets */
+                   E1000_VMOLR_STRVLAN; /* Strip vlan tags */
+       wr32(E1000_VMOLR(vfn), reg_data);
+}
+
+static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
+                                 int vfn)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 vmolr;
+
+       vmolr = rd32(E1000_VMOLR(vfn));
+       vmolr &= ~E1000_VMOLR_RLPML_MASK;
+       vmolr |= size | E1000_VMOLR_LPE;
+       wr32(E1000_VMOLR(vfn), vmolr);
+
+       return 0;
+}
+
+static inline void igb_set_rah_pool(struct e1000_hw *hw, int pool, int entry)
+{
+       u32 reg_data;
+
+       reg_data = rd32(E1000_RAH(entry));
+       reg_data &= ~E1000_RAH_POOL_MASK;
+       reg_data |= E1000_RAH_POOL_1 << pool;;
+       wr32(E1000_RAH(entry), reg_data);
+}
+
+static void igb_set_mc_list_pools(struct igb_adapter *adapter,
+                                 int entry_count, u16 total_rar_filters)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       int i = adapter->vfs_allocated_count + 1;
+
+       if ((i + entry_count) < total_rar_filters)
+               total_rar_filters = i + entry_count;
+
+       for (; i < total_rar_filters; i++)
+               igb_set_rah_pool(hw, adapter->vfs_allocated_count, i);
+}
+
+static int igb_set_vf_mac(struct igb_adapter *adapter,
+                          int vf, unsigned char *mac_addr)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       int rar_entry = vf + 1; /* VF MAC addresses start at entry 1 */
+
+       igb_rar_set(hw, mac_addr, rar_entry);
+
+       memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
+
+       igb_set_rah_pool(hw, vf, rar_entry);
+
+       return 0;
+}
+
+static void igb_vmm_control(struct igb_adapter *adapter)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 reg_data;
+
+       if (!adapter->vfs_allocated_count)
+               return;
+
+       /* VF's need PF reset indication before they
+        * can send/receive mail */
+       reg_data = rd32(E1000_CTRL_EXT);
+       reg_data |= E1000_CTRL_EXT_PFRSTD;
+       wr32(E1000_CTRL_EXT, reg_data);
+
+       igb_vmdq_set_loopback_pf(hw, true);
+       igb_vmdq_set_replication_pf(hw, true);
+}
+
 /* igb_main.c */