net: arp_ioctl() must hold RTNL
[pandora-kernel.git] / net / core / dev.c
index d28b3a0..1e5077d 100644 (file)
@@ -749,7 +749,8 @@ EXPORT_SYMBOL(dev_get_by_index);
  *     @ha: hardware address
  *
  *     Search for an interface by MAC address. Returns NULL if the device
- *     is not found or a pointer to the device. The caller must hold RCU
+ *     is not found or a pointer to the device.
+ *     The caller must hold RCU or RTNL.
  *     The returned device has not had its ref count increased
  *     and the caller must therefore be careful about locking
  *
@@ -1222,52 +1223,90 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close(struct net_device *dev)
+static int __dev_close_many(struct list_head *head)
 {
-       const struct net_device_ops *ops = dev->netdev_ops;
+       struct net_device *dev;
 
        ASSERT_RTNL();
        might_sleep();
 
-       /*
-        *      Tell people we are going down, so that they can
-        *      prepare to death, when device is still operating.
-        */
-       call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+       list_for_each_entry(dev, head, unreg_list) {
+               /*
+                *      Tell people we are going down, so that they can
+                *      prepare to death, when device is still operating.
+                */
+               call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
-       clear_bit(__LINK_STATE_START, &dev->state);
+               clear_bit(__LINK_STATE_START, &dev->state);
 
-       /* Synchronize to scheduled poll. We cannot touch poll list,
-        * it can be even on different cpu. So just clear netif_running().
-        *
-        * dev->stop() will invoke napi_disable() on all of it's
-        * napi_struct instances on this device.
-        */
-       smp_mb__after_clear_bit(); /* Commit netif_running(). */
+               /* Synchronize to scheduled poll. We cannot touch poll list, it
+                * can be even on different cpu. So just clear netif_running().
+                *
+                * dev->stop() will invoke napi_disable() on all of it's
+                * napi_struct instances on this device.
+                */
+               smp_mb__after_clear_bit(); /* Commit netif_running(). */
+       }
 
-       dev_deactivate(dev);
+       dev_deactivate_many(head);
 
-       /*
-        *      Call the device specific close. This cannot fail.
-        *      Only if device is UP
-        *
-        *      We allow it to be called even after a DETACH hot-plug
-        *      event.
-        */
-       if (ops->ndo_stop)
-               ops->ndo_stop(dev);
+       list_for_each_entry(dev, head, unreg_list) {
+               const struct net_device_ops *ops = dev->netdev_ops;
 
-       /*
-        *      Device is now down.
-        */
+               /*
+                *      Call the device specific close. This cannot fail.
+                *      Only if device is UP
+                *
+                *      We allow it to be called even after a DETACH hot-plug
+                *      event.
+                */
+               if (ops->ndo_stop)
+                       ops->ndo_stop(dev);
+
+               /*
+                *      Device is now down.
+                */
+
+               dev->flags &= ~IFF_UP;
+
+               /*
+                *      Shutdown NET_DMA
+                */
+               net_dmaengine_put();
+       }
+
+       return 0;
+}
+
+static int __dev_close(struct net_device *dev)
+{
+       LIST_HEAD(single);
+
+       list_add(&dev->unreg_list, &single);
+       return __dev_close_many(&single);
+}
+
+int dev_close_many(struct list_head *head)
+{
+       struct net_device *dev, *tmp;
+       LIST_HEAD(tmp_list);
+
+       list_for_each_entry_safe(dev, tmp, head, unreg_list)
+               if (!(dev->flags & IFF_UP))
+                       list_move(&dev->unreg_list, &tmp_list);
 
-       dev->flags &= ~IFF_UP;
+       __dev_close_many(head);
 
        /*
-        *      Shutdown NET_DMA
+        * Tell people we are down
         */
-       net_dmaengine_put();
+       list_for_each_entry(dev, head, unreg_list) {
+               rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+               call_netdevice_notifiers(NETDEV_DOWN, dev);
+       }
 
+       /* rollback_registered_many needs the complete original list */
+       list_splice(&tmp_list, head);
        return 0;
 }
 
@@ -1282,16 +1321,10 @@ static int __dev_close(struct net_device *dev)
  */
 int dev_close(struct net_device *dev)
 {
-       if (!(dev->flags & IFF_UP))
-               return 0;
-
-       __dev_close(dev);
+       LIST_HEAD(single);
 
-       /*
-        * Tell people we are down
-        */
-       rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
-       call_netdevice_notifiers(NETDEV_DOWN, dev);
+       list_add(&dev->unreg_list, &single);
+       dev_close_many(&single);
 
        return 0;
 }
@@ -1496,6 +1529,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
+static inline int deliver_skb(struct sk_buff *skb,
+                             struct packet_type *pt_prev,
+                             struct net_device *orig_dev)
+{
+       atomic_inc(&skb->users);
+       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
 /*
  *     Support routine. Sends outgoing frames to any network
  *     taps currently in use.
@@ -1504,13 +1545,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb);
 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
        struct packet_type *ptype;
-
-#ifdef CONFIG_NET_CLS_ACT
-       if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-               net_timestamp_set(skb);
-#else
-       net_timestamp_set(skb);
-#endif
+       struct sk_buff *skb2 = NULL;
+       struct packet_type *pt_prev = NULL;
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1520,10 +1556,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
                if ((ptype->dev == dev || !ptype->dev) &&
                    (ptype->af_packet_priv == NULL ||
                     (struct sock *)ptype->af_packet_priv != skb->sk)) {
-                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+                       if (pt_prev) {
+                               deliver_skb(skb2, pt_prev, skb->dev);
+                               pt_prev = ptype;
+                               continue;
+                       }
+
+                       skb2 = skb_clone(skb, GFP_ATOMIC);
                        if (!skb2)
                                break;
 
+                       net_timestamp_set(skb2);
+
                        /* skb->nh should be correctly
                           set by sender, so that the second statement is
                           just protection against buggy protocols.
@@ -1542,9 +1586,11 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
                        skb2->transport_header = skb2->network_header;
                        skb2->pkt_type = PACKET_OUTGOING;
-                       ptype->func(skb2, skb->dev, ptype, skb->dev);
+                       pt_prev = ptype;
                }
        }
+       if (pt_prev)
+               pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
        rcu_read_unlock();
 }
 
@@ -1687,33 +1733,6 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
-{
-       return ((features & NETIF_F_NO_CSUM) ||
-               ((features & NETIF_F_V4_CSUM) &&
-                protocol == htons(ETH_P_IP)) ||
-               ((features & NETIF_F_V6_CSUM) &&
-                protocol == htons(ETH_P_IPV6)) ||
-               ((features & NETIF_F_FCOE_CRC) &&
-                protocol == htons(ETH_P_FCOE)));
-}
-
-static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
-{
-       __be16 protocol = skb->protocol;
-       int features = dev->features;
-
-       if (vlan_tx_tag_present(skb)) {
-               features &= dev->vlan_features;
-       } else if (protocol == htons(ETH_P_8021Q)) {
-               struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-               protocol = veh->h_vlan_encapsulated_proto;
-               features &= dev->vlan_features;
-       }
-
-       return can_checksum_protocol(features, protocol);
-}
-
 /**
  * skb_dev_set -- assign a new device to a buffer
  * @skb: buffer for the new device
@@ -1761,7 +1780,7 @@ int skb_checksum_help(struct sk_buff *skb)
                goto out_set_summed;
        }
 
-       offset = skb->csum_start - skb_headroom(skb);
+       offset = skb_checksum_start_offset(skb);
        BUG_ON(offset >= skb_headlen(skb));
        csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -1926,16 +1945,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
 /**
  *     dev_gso_segment - Perform emulated hardware segmentation on skb.
  *     @skb: buffer to segment
+ *     @features: device features as applicable to this skb
  *
  *     This function segments the given skb and stores the list of segments
  *     in skb->next.
  */
-static int dev_gso_segment(struct sk_buff *skb)
+static int dev_gso_segment(struct sk_buff *skb, int features)
 {
-       struct net_device *dev = skb->dev;
        struct sk_buff *segs;
-       int features = dev->features & ~(illegal_highdma(dev, skb) ?
-                                        NETIF_F_SG : 0);
 
        segs = skb_gso_segment(skb, features);
 
@@ -1972,22 +1989,52 @@ static inline void skb_orphan_try(struct sk_buff *skb)
        }
 }
 
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+       return ((features & NETIF_F_GEN_CSUM) ||
+               ((features & NETIF_F_V4_CSUM) &&
+                protocol == htons(ETH_P_IP)) ||
+               ((features & NETIF_F_V6_CSUM) &&
+                protocol == htons(ETH_P_IPV6)) ||
+               ((features & NETIF_F_FCOE_CRC) &&
+                protocol == htons(ETH_P_FCOE)));
+}
+
+static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+{
+       if (!can_checksum_protocol(features, protocol)) {
+               features &= ~NETIF_F_ALL_CSUM;
+               features &= ~NETIF_F_SG;
+       } else if (illegal_highdma(skb->dev, skb)) {
+               features &= ~NETIF_F_SG;
+       }
+
+       return features;
+}
+
+int netif_skb_features(struct sk_buff *skb)
 {
        __be16 protocol = skb->protocol;
+       int features = skb->dev->features;
 
        if (protocol == htons(ETH_P_8021Q)) {
                struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
                protocol = veh->h_vlan_encapsulated_proto;
-       } else if (!skb->vlan_tci)
-               return dev->features;
+       } else if (!vlan_tx_tag_present(skb)) {
+               return harmonize_features(skb, protocol, features);
+       }
 
-       if (protocol != htons(ETH_P_8021Q))
-               return dev->features & dev->vlan_features;
-       else
-               return 0;
+       features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
+
+       if (protocol != htons(ETH_P_8021Q)) {
+               return harmonize_features(skb, protocol, features);
+       } else {
+               features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+                               NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
+               return harmonize_features(skb, protocol, features);
+       }
 }
-EXPORT_SYMBOL(netif_get_vlan_features);
+EXPORT_SYMBOL(netif_skb_features);
 
 /*
  * Returns true if either:
@@ -1997,22 +2044,13 @@ EXPORT_SYMBOL(netif_get_vlan_features);
  *        support DMA from it.
  */
 static inline int skb_needs_linearize(struct sk_buff *skb,
-                                     struct net_device *dev)
+                                     int features)
 {
-       if (skb_is_nonlinear(skb)) {
-               int features = dev->features;
-
-               if (vlan_tx_tag_present(skb))
-                       features &= dev->vlan_features;
-
-               return (skb_has_frag_list(skb) &&
-                       !(features & NETIF_F_FRAGLIST)) ||
+       return skb_is_nonlinear(skb) &&
+                       ((skb_has_frag_list(skb) &&
+                               !(features & NETIF_F_FRAGLIST)) ||
                        (skb_shinfo(skb)->nr_frags &&
-                       (!(features & NETIF_F_SG) ||
-                       illegal_highdma(dev, skb)));
-       }
-
-       return 0;
+                               !(features & NETIF_F_SG)));
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -2022,6 +2060,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
        int rc = NETDEV_TX_OK;
 
        if (likely(!skb->next)) {
+               int features;
+
                /*
                 * If device doesnt need skb->dst, release it right now while
                 * its hot in this cpu cache
@@ -2034,8 +2074,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
                skb_orphan_try(skb);
 
+               features = netif_skb_features(skb);
+
                if (vlan_tx_tag_present(skb) &&
-                   !(dev->features & NETIF_F_HW_VLAN_TX)) {
+                   !(features & NETIF_F_HW_VLAN_TX)) {
                        skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
                        if (unlikely(!skb))
                                goto out;
@@ -2043,13 +2085,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                        skb->vlan_tci = 0;
                }
 
-               if (netif_needs_gso(dev, skb)) {
-                       if (unlikely(dev_gso_segment(skb)))
+               if (netif_needs_gso(skb, features)) {
+                       if (unlikely(dev_gso_segment(skb, features)))
                                goto out_kfree_skb;
                        if (skb->next)
                                goto gso;
                } else {
-                       if (skb_needs_linearize(skb, dev) &&
+                       if (skb_needs_linearize(skb, features) &&
                            __skb_linearize(skb))
                                goto out_kfree_skb;
 
@@ -2058,9 +2100,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                         * checksumming here.
                         */
                        if (skb->ip_summed == CHECKSUM_PARTIAL) {
-                               skb_set_transport_header(skb, skb->csum_start -
-                                             skb_headroom(skb));
-                               if (!dev_can_checksum(dev, skb) &&
+                               skb_set_transport_header(skb,
+                                       skb_checksum_start_offset(skb));
+                               if (!(features & NETIF_F_ALL_CSUM) &&
                                     skb_checksum_help(skb))
                                        goto out_kfree_skb;
                        }
@@ -2112,14 +2154,19 @@ out:
 
 static u32 hashrnd __read_mostly;
 
-u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+                 unsigned int num_tx_queues)
 {
        u32 hash;
 
        if (skb_rx_queue_recorded(skb)) {
                hash = skb_get_rx_queue(skb);
-               while (unlikely(hash >= dev->real_num_tx_queues))
-                       hash -= dev->real_num_tx_queues;
+               while (unlikely(hash >= num_tx_queues))
+                       hash -= num_tx_queues;
                return hash;
        }
 
@@ -2129,9 +2176,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
                hash = (__force u16) skb->protocol ^ skb->rxhash;
        hash = jhash_1word(hash, hashrnd);
 
-       return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+       return (u16) (((u64) hash * num_tx_queues) >> 32);
 }
-EXPORT_SYMBOL(skb_tx_hash);
+EXPORT_SYMBOL(__skb_tx_hash);
 
 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
@@ -2251,7 +2298,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                 */
                if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
                        skb_dst_force(skb);
-               __qdisc_update_bstats(q, skb->len);
+
+               qdisc_skb_cb(skb)->pkt_len = skb->len;
+               qdisc_bstats_update(q, skb);
+
                if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
@@ -2783,14 +2833,6 @@ static void net_tx_action(struct softirq_action *h)
        }
 }
 
-static inline int deliver_skb(struct sk_buff *skb,
-                             struct packet_type *pt_prev,
-                             struct net_device *orig_dev)
-{
-       atomic_inc(&skb->users);
-       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-}
-
 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
@@ -4958,10 +5000,12 @@ static void rollback_registered_many(struct list_head *head)
                }
 
                BUG_ON(dev->reg_state != NETREG_REGISTERED);
+       }
 
-               /* If device is running, close it first. */
-               dev_close(dev);
+       /* If device is running, close it first. */
+       dev_close_many(head);
 
+       list_for_each_entry(dev, head, unreg_list) {
                /* And unlink it from device chain. */
                unlist_netdevice(dev);
 
@@ -5116,7 +5160,7 @@ static void netdev_init_one_queue(struct net_device *dev,
        spin_lock_init(&queue->_xmit_lock);
        netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
        queue->xmit_lock_owner = -1;
-       netdev_queue_numa_node_write(queue, -1);
+       netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
        queue->dev = dev;
 }
 
@@ -5480,34 +5524,6 @@ void netdev_run_todo(void)
        }
 }
 
-/**
- *     dev_txq_stats_fold - fold tx_queues stats
- *     @dev: device to get statistics from
- *     @stats: struct rtnl_link_stats64 to hold results
- */
-void dev_txq_stats_fold(const struct net_device *dev,
-                       struct rtnl_link_stats64 *stats)
-{
-       u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
-       unsigned int i;
-       struct netdev_queue *txq;
-
-       for (i = 0; i < dev->num_tx_queues; i++) {
-               txq = netdev_get_tx_queue(dev, i);
-               spin_lock_bh(&txq->_xmit_lock);
-               tx_bytes   += txq->tx_bytes;
-               tx_packets += txq->tx_packets;
-               tx_dropped += txq->tx_dropped;
-               spin_unlock_bh(&txq->_xmit_lock);
-       }
-       if (tx_bytes || tx_packets || tx_dropped) {
-               stats->tx_bytes   = tx_bytes;
-               stats->tx_packets = tx_packets;
-               stats->tx_dropped = tx_dropped;
-       }
-}
-EXPORT_SYMBOL(dev_txq_stats_fold);
-
 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
  * fields in the same order, with only the type differing.
  */
@@ -5551,7 +5567,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
                netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
        } else {
                netdev_stats_to_stats64(storage, &dev->stats);
-               dev_txq_stats_fold(dev, storage);
        }
        storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
        return storage;
@@ -5577,18 +5592,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 }
 
 /**
- *     alloc_netdev_mq - allocate network device
+ *     alloc_netdev_mqs - allocate network device
  *     @sizeof_priv:   size of private data to allocate space for
  *     @name:          device name format string
  *     @setup:         callback to initialize device
- *     @queue_count:   the number of subqueues to allocate
+ *     @txqs:          the number of TX subqueues to allocate
+ *     @rxqs:          the number of RX subqueues to allocate
  *
  *     Allocates a struct net_device with private data area for driver use
  *     and performs basic initialization.  Also allocates subquue structs
- *     for each queue on the device at the end of the netdevice.
+ *     for each queue on the device.
  */
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
-               void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+               void (*setup)(struct net_device *),
+               unsigned int txqs, unsigned int rxqs)
 {
        struct net_device *dev;
        size_t alloc_size;
@@ -5596,12 +5613,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
        BUG_ON(strlen(name) >= sizeof(dev->name));
 
-       if (queue_count < 1) {
+       if (txqs < 1) {
                pr_err("alloc_netdev: Unable to allocate device "
                       "with zero queues.\n");
                return NULL;
        }
 
+#ifdef CONFIG_RPS
+       if (rxqs < 1) {
+               pr_err("alloc_netdev: Unable to allocate device "
+                      "with zero RX queues.\n");
+               return NULL;
+       }
+#endif
+
        alloc_size = sizeof(struct net_device);
        if (sizeof_priv) {
                /* ensure 32-byte alignment of private area */
@@ -5632,14 +5657,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
        dev_net_set(dev, &init_net);
 
-       dev->num_tx_queues = queue_count;
-       dev->real_num_tx_queues = queue_count;
+       dev->num_tx_queues = txqs;
+       dev->real_num_tx_queues = txqs;
        if (netif_alloc_netdev_queues(dev))
                goto free_pcpu;
 
 #ifdef CONFIG_RPS
-       dev->num_rx_queues = queue_count;
-       dev->real_num_rx_queues = queue_count;
+       dev->num_rx_queues = rxqs;
+       dev->real_num_rx_queues = rxqs;
        if (netif_alloc_rx_queues(dev))
                goto free_pcpu;
 #endif
@@ -5667,7 +5692,7 @@ free_p:
        kfree(p);
        return NULL;
 }
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
 
 /**
  *     free_netdev - free network device