Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[pandora-kernel.git] / drivers / net / igb / igb_main.c
index 03aa959..ffd7315 100644 (file)
@@ -152,14 +152,13 @@ static struct notifier_block dca_notifier = {
 /* for netdump / net console */
 static void igb_netpoll(struct net_device *);
 #endif
-
 #ifdef CONFIG_PCI_IOV
-static ssize_t igb_set_num_vfs(struct device *, struct device_attribute *,
-                               const char *, size_t);
-static ssize_t igb_show_num_vfs(struct device *, struct device_attribute *,
-                               char *);
-DEVICE_ATTR(num_vfs, S_IRUGO | S_IWUSR, igb_show_num_vfs, igb_set_num_vfs);
-#endif
+static unsigned int max_vfs = 0;
+module_param(max_vfs, uint, 0);
+MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
+                 "per physical function");
+#endif /* CONFIG_PCI_IOV */
+
 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
                     pci_channel_state_t);
 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
@@ -671,6 +670,21 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter)
 
        /* If we can't do MSI-X, try MSI */
 msi_only:
+#ifdef CONFIG_PCI_IOV
+       /* disable SR-IOV for non MSI-X configurations */
+       if (adapter->vf_data) {
+               struct e1000_hw *hw = &adapter->hw;
+               /* disable iov and allow time for transactions to clear */
+               pci_disable_sriov(adapter->pdev);
+               msleep(500);
+
+               kfree(adapter->vf_data);
+               adapter->vf_data = NULL;
+               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+               msleep(100);
+               dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+       }
+#endif
        adapter->num_rx_queues = 1;
        adapter->num_tx_queues = 1;
        if (!pci_enable_msi(adapter->pdev))
@@ -928,6 +942,8 @@ int igb_up(struct igb_adapter *adapter)
        rd32(E1000_ICR);
        igb_irq_enable(adapter);
 
+       netif_tx_start_all_queues(adapter->netdev);
+
        /* Fire a link change interrupt to start the watchdog. */
        wr32(E1000_ICS, E1000_ICS_LSC);
        return 0;
@@ -1154,15 +1170,15 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                return err;
 
        pci_using_dac = 0;
-       err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
        if (!err) {
-               err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
                if (!err)
                        pci_using_dac = 1;
        } else {
-               err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                if (err) {
-                       err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+                       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
                        if (err) {
                                dev_err(&pdev->dev, "No usable DMA "
                                        "configuration, aborting\n");
@@ -1238,6 +1254,46 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        if (err)
                goto err_sw_init;
 
+#ifdef CONFIG_PCI_IOV
+       /* since iov functionality isn't critical to base device function we
+        * can accept failure.  If it fails we don't allow iov to be enabled */
+       if (hw->mac.type == e1000_82576) {
+               /* 82576 supports a maximum of 7 VFs in addition to the PF */
+               unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
+               int i;
+               unsigned char mac_addr[ETH_ALEN];
+
+               if (num_vfs) {
+                       adapter->vf_data = kcalloc(num_vfs,
+                                               sizeof(struct vf_data_storage),
+                                               GFP_KERNEL);
+                       if (!adapter->vf_data) {
+                               dev_err(&pdev->dev,
+                                       "Could not allocate VF private data - "
+                                       "IOV enable failed\n");
+                       } else {
+                               err = pci_enable_sriov(pdev, num_vfs);
+                               if (!err) {
+                                       adapter->vfs_allocated_count = num_vfs;
+                                       dev_info(&pdev->dev,
+                                                "%d vfs allocated\n",
+                                                num_vfs);
+                                       for (i = 0;
+                                            i < adapter->vfs_allocated_count;
+                                            i++) {
+                                               random_ether_addr(mac_addr);
+                                               igb_set_vf_mac(adapter, i,
+                                                              mac_addr);
+                                       }
+                               } else {
+                                       kfree(adapter->vf_data);
+                                       adapter->vf_data = NULL;
+                               }
+                       }
+               }
+       }
+
+#endif
        /* setup the private structure */
        err = igb_sw_init(adapter);
        if (err)
@@ -1289,6 +1345,9 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        if (pci_using_dac)
                netdev->features |= NETIF_F_HIGHDMA;
 
+       if (adapter->hw.mac.type == e1000_82576)
+               netdev->features |= NETIF_F_SCTP_CSUM;
+
        adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
 
        /* before reading the NVM, reset the controller to put the device in a
@@ -1336,8 +1395,6 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 
        igb_validate_mdi_setting(hw);
 
-       adapter->rx_csum = 1;
-
        /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
         * enable the ACPI Magic Packet filter
         */
@@ -1388,28 +1445,14 @@ static int __devinit igb_probe(struct pci_dev *pdev,
         * driver. */
        igb_get_hw_control(adapter);
 
-       /* tell the stack to leave us alone until igb_open() is called */
-       netif_carrier_off(netdev);
-       netif_tx_stop_all_queues(netdev);
-
        strcpy(netdev->name, "eth%d");
        err = register_netdev(netdev);
        if (err)
                goto err_register;
 
-#ifdef CONFIG_PCI_IOV
-       /* since iov functionality isn't critical to base device function we
-        * can accept failure.  If it fails we don't allow iov to be enabled */
-       if (hw->mac.type == e1000_82576) {
-               err = pci_enable_sriov(pdev, 0);
-               if (!err)
-                       err = device_create_file(&netdev->dev,
-                                                &dev_attr_num_vfs);
-               if (err)
-                       dev_err(&pdev->dev, "Failed to initialize IOV\n");
-       }
+       /* carrier off reporting is important to ethtool even BEFORE open */
+       netif_carrier_off(netdev);
 
-#endif
 #ifdef CONFIG_IGB_DCA
        if (dca_add_requester(&pdev->dev) == 0) {
                adapter->flags |= IGB_FLAG_DCA_ENABLED;
@@ -1658,6 +1701,8 @@ static int igb_open(struct net_device *netdev)
        if (test_bit(__IGB_TESTING, &adapter->state))
                return -EBUSY;
 
+       netif_carrier_off(netdev);
+
        /* allocate transmit descriptors */
        err = igb_setup_all_tx_resources(adapter);
        if (err)
@@ -1965,7 +2010,7 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
        u32 rctl;
        u32 srrctl = 0;
-       int i, j;
+       int i;
 
        rctl = rd32(E1000_RCTL);
 
@@ -2030,8 +2075,6 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
        if (adapter->vfs_allocated_count) {
                u32 vmolr;
 
-               j = adapter->rx_ring[0].reg_idx;
-
                /* set all queue drop enable bits */
                wr32(E1000_QDE, ALL_QUEUES);
                srrctl |= E1000_SRRCTL_DROP_EN;
@@ -2039,16 +2082,16 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
                /* disable queue 0 to prevent tail write w/o re-config */
                wr32(E1000_RXDCTL(0), 0);
 
-               vmolr = rd32(E1000_VMOLR(j));
+               vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
                if (rctl & E1000_RCTL_LPE)
                        vmolr |= E1000_VMOLR_LPE;
-               if (adapter->num_rx_queues > 0)
+               if (adapter->num_rx_queues > 1)
                        vmolr |= E1000_VMOLR_RSSE;
-               wr32(E1000_VMOLR(j), vmolr);
+               wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
        }
 
        for (i = 0; i < adapter->num_rx_queues; i++) {
-               j = adapter->rx_ring[i].reg_idx;
+               int j = adapter->rx_ring[i].reg_idx;
                wr32(E1000_SRRCTL(j), srrctl);
        }
 
@@ -2192,29 +2235,24 @@ static void igb_configure_rx(struct igb_adapter *adapter)
                mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
                         E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
 
-
                wr32(E1000_MRQC, mrqc);
-
-               /* Multiqueue and raw packet checksumming are mutually
-                * exclusive.  Note that this not the same as TCP/IP
-                * checksumming, which works fine. */
-               rxcsum = rd32(E1000_RXCSUM);
-               rxcsum |= E1000_RXCSUM_PCSD;
-               wr32(E1000_RXCSUM, rxcsum);
-       } else {
+       } else if (adapter->vfs_allocated_count) {
                /* Enable multi-queue for sr-iov */
-               if (adapter->vfs_allocated_count)
-                       wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
-               /* Enable Receive Checksum Offload for TCP and UDP */
-               rxcsum = rd32(E1000_RXCSUM);
-               if (adapter->rx_csum)
-                       rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE;
-               else
-                       rxcsum &= ~(E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE);
-
-               wr32(E1000_RXCSUM, rxcsum);
+               wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
        }
 
+       /* Enable Receive Checksum Offload for TCP and UDP */
+       rxcsum = rd32(E1000_RXCSUM);
+       /* Disable raw packet checksumming */
+       rxcsum |= E1000_RXCSUM_PCSD;
+
+       if (adapter->hw.mac.type == e1000_82576)
+               /* Enable Receive Checksum Offload for SCTP */
+               rxcsum |= E1000_RXCSUM_CRCOFL;
+
+       /* Don't need to set TUOFL or IPOFL, they default to 1 */
+       wr32(E1000_RXCSUM, rxcsum);
+
        /* Set the default pool for the PF's first queue */
        igb_configure_vt_default_pool(adapter);
 
@@ -2622,7 +2660,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        }
 
                        netif_carrier_on(netdev);
-                       netif_tx_wake_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2639,7 +2676,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        printk(KERN_INFO "igb: %s NIC Link is Down\n",
                               netdev->name);
                        netif_carrier_off(netdev);
-                       netif_tx_stop_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2673,6 +2709,8 @@ link_up:
                         * (Do the reset outside of interrupt context). */
                        adapter->tx_timeout_count++;
                        schedule_work(&adapter->reset_task);
+                       /* return immediately since reset is imminent */
+                       return;
                }
        }
 
@@ -2856,13 +2894,13 @@ static void igb_set_itr(struct igb_adapter *adapter)
        switch (current_itr) {
        /* counts and packets in update_itr are dependent on these numbers */
        case lowest_latency:
-               new_itr = 70000;
+               new_itr = 56;  /* aka 70,000 ints/sec */
                break;
        case low_latency:
-               new_itr = 20000; /* aka hwitr = ~200 */
+               new_itr = 196; /* aka 20,000 ints/sec */
                break;
        case bulk_latency:
-               new_itr = 4000;
+               new_itr = 980; /* aka 4,000 ints/sec */
                break;
        default:
                break;
@@ -2881,7 +2919,8 @@ set_itr_now:
                 * by adding intermediate steps when interrupt rate is
                 * increasing */
                new_itr = new_itr > adapter->itr ?
-                            min(adapter->itr + (new_itr >> 2), new_itr) :
+                            max((new_itr * adapter->itr) /
+                                (new_itr + (adapter->itr >> 2)), new_itr) :
                             new_itr;
                /* Don't write the value here; it resets the adapter's
                 * internal timer, and causes us to delay far longer than
@@ -2890,7 +2929,7 @@ set_itr_now:
                 * ends up being correct.
                 */
                adapter->itr = new_itr;
-               adapter->rx_ring->itr_val = 1000000000 / (new_itr * 256);
+               adapter->rx_ring->itr_val = new_itr;
                adapter->rx_ring->set_itr = 1;
        }
 
@@ -3029,11 +3068,15 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
                                tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
                                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        case cpu_to_be16(ETH_P_IPV6):
                                /* XXX what about other V6 headers?? */
                                if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        default:
                                if (unlikely(net_ratelimit()))
@@ -4395,20 +4438,12 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status,
        bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP));
 
        skb_record_rx_queue(skb, ring->queue_index);
-       if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-               if (vlan_extracted)
-                       vlan_gro_receive(&ring->napi, adapter->vlgrp,
-                                        le16_to_cpu(rx_desc->wb.upper.vlan),
-                                        skb);
-               else
-                       napi_gro_receive(&ring->napi, skb);
-       } else {
-               if (vlan_extracted)
-                       vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-                                         le16_to_cpu(rx_desc->wb.upper.vlan));
-               else
-                       netif_receive_skb(skb);
-       }
+       if (vlan_extracted)
+               vlan_gro_receive(&ring->napi, adapter->vlgrp,
+                                le16_to_cpu(rx_desc->wb.upper.vlan),
+                                skb);
+       else
+               napi_gro_receive(&ring->napi, skb);
 }
 
 static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
@@ -4417,19 +4452,28 @@ static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
        skb->ip_summed = CHECKSUM_NONE;
 
        /* Ignore Checksum bit is set or checksum is disabled through ethtool */
-       if ((status_err & E1000_RXD_STAT_IXSM) || !adapter->rx_csum)
+       if ((status_err & E1000_RXD_STAT_IXSM) ||
+           (adapter->flags & IGB_FLAG_RX_CSUM_DISABLED))
                return;
        /* TCP/UDP checksum error bit is set */
        if (status_err &
            (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+               /*
+                * work around errata with sctp packets where the TCPE aka
+                * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+                * packets, (aka let the stack check the crc32c)
+                */
+               if (!((adapter->hw.mac.type == e1000_82576) &&
+                     (skb->len == 60)))
+                       adapter->hw_csum_err++;
                /* let the stack verify checksum errors */
-               adapter->hw_csum_err++;
                return;
        }
        /* It must be a TCP or UDP packet with a valid checksum */
        if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       dev_dbg(&adapter->pdev->dev, "cksum success: bits %08X\n", status_err);
        adapter->hw_csum_good++;
 }
 
@@ -5422,89 +5466,4 @@ static void igb_vmm_control(struct igb_adapter *adapter)
        igb_vmdq_set_replication_pf(hw, true);
 }
 
-#ifdef CONFIG_PCI_IOV
-static ssize_t igb_show_num_vfs(struct device *dev,
-                                struct device_attribute *attr, char *buf)
-{
-       struct igb_adapter *adapter = netdev_priv(to_net_dev(dev));
-
-       return sprintf(buf, "%d\n", adapter->vfs_allocated_count);
-}
-
-static ssize_t igb_set_num_vfs(struct device *dev,
-                               struct device_attribute *attr,
-                               const char *buf, size_t count)
-{
-       struct net_device *netdev = to_net_dev(dev);
-       struct igb_adapter *adapter = netdev_priv(netdev);
-       struct e1000_hw *hw = &adapter->hw;
-       struct pci_dev *pdev = adapter->pdev;
-       unsigned int num_vfs, i;
-       unsigned char mac_addr[ETH_ALEN];
-       int err;
-
-       sscanf(buf, "%u", &num_vfs);
-
-       if (num_vfs > 7)
-               num_vfs = 7;
-
-       /* value unchanged do nothing */
-       if (num_vfs == adapter->vfs_allocated_count)
-               return count;
-
-       if (netdev->flags & IFF_UP)
-               igb_close(netdev);
-
-       igb_reset_interrupt_capability(adapter);
-       igb_free_queues(adapter);
-       adapter->tx_ring = NULL;
-       adapter->rx_ring = NULL;
-       adapter->vfs_allocated_count = 0;
-
-       /* reclaim resources allocated to VFs since we are changing count */
-       if (adapter->vf_data) {
-               /* disable iov and allow time for transactions to clear */
-               pci_disable_sriov(pdev);
-               msleep(500);
-
-               kfree(adapter->vf_data);
-               adapter->vf_data = NULL;
-               wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
-               msleep(100);
-               dev_info(&pdev->dev, "IOV Disabled\n");
-       }
-
-       if (num_vfs) {
-               adapter->vf_data = kcalloc(num_vfs,
-                                          sizeof(struct vf_data_storage),
-                                          GFP_KERNEL);
-               if (!adapter->vf_data) {
-                       dev_err(&pdev->dev, "Could not allocate VF private "
-                               "data - IOV enable failed\n");
-               } else {
-                       err = pci_enable_sriov(pdev, num_vfs);
-                       if (!err) {
-                               adapter->vfs_allocated_count = num_vfs;
-                               dev_info(&pdev->dev, "%d vfs allocated\n", num_vfs);
-                               for (i = 0; i < adapter->vfs_allocated_count; i++) {
-                                       random_ether_addr(mac_addr);
-                                       igb_set_vf_mac(adapter, i, mac_addr);
-                               }
-                       } else {
-                               kfree(adapter->vf_data);
-                               adapter->vf_data = NULL;
-                       }
-               }
-       }
-
-       igb_set_interrupt_capability(adapter);
-       igb_alloc_queues(adapter);
-       igb_reset(adapter);
-
-       if (netdev->flags & IFF_UP)
-               igb_open(netdev);
-
-       return count;
-}
-#endif /* CONFIG_PCI_IOV */
 /* igb_main.c */