Merge branch 'linus' into cpus4096-for-linus

author Ingo Molnar <mingo@elte.hu>

Mon, 21 Jul 2008 15:19:50 +0000 (17:19 +0200)

committer Ingo Molnar <mingo@elte.hu>

Mon, 21 Jul 2008 15:19:50 +0000 (17:19 +0200)
author Ingo Molnar <mingo@elte.hu>
Mon, 21 Jul 2008 15:19:50 +0000 (17:19 +0200)
committer Ingo Molnar <mingo@elte.hu>
Mon, 21 Jul 2008 15:19:50 +0000 (17:19 +0200)
diff --combined net/core/dev.c

index df5520a,2eed17b..106d5e6
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -90,6 -90,7 +90,7 @@@
   #include <linux/if_ether.h>
   #include <linux/netdevice.h>
   #include <linux/etherdevice.h>
+ #include <linux/ethtool.h>
   #include <linux/notifier.h>
   #include <linux/skbuff.h>
   #include <net/net_namespace.h>
@@@ -120,6 -121,9 +121,9 @@@
   #include <linux/ctype.h>
   #include <linux/if_arp.h>
   #include <linux/if_vlan.h>
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
+ #include <linux/in.h>
   
   #include "net-sysfs.h"
   
@@@ -257,7 -261,7 +261,7 @@@ DEFINE_PER_CPU(struct softnet_data, sof
   
   #ifdef CONFIG_DEBUG_LOCK_ALLOC
   /*
-  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
+  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
    * according to dev->type
    */
   static const unsigned short netdev_lock_type[] =
@@@ -961,6 -965,12 +965,12 @@@ void netdev_state_change(struct net_dev
         }
   }
   
+ void netdev_bonding_change(struct net_device *dev)
+ {
+       call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+ }
+ EXPORT_SYMBOL(netdev_bonding_change);
+ 
   /**
    *    dev_load        - load a network module
    *    @net: the applicable net namespace
@@@ -1117,6 -1127,29 +1127,29 @@@ int dev_close(struct net_device *dev
   }
   
   
+ /**
+  *    dev_disable_lro - disable Large Receive Offload on a device
+  *    @dev: device
+  *
+  *    Disable Large Receive Offload (LRO) on a net device.  Must be
+  *    called under RTNL.  This is needed if received packets may be
+  *    forwarded to another interface.
+  */
+ void dev_disable_lro(struct net_device *dev)
+ {
+       if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
+           dev->ethtool_ops->set_flags) {
+               u32 flags = dev->ethtool_ops->get_flags(dev);
+               if (flags & ETH_FLAG_LRO) {
+                       flags &= ~ETH_FLAG_LRO;
+                       dev->ethtool_ops->set_flags(dev, flags);
+               }
+       }
+       WARN_ON(dev->features & NETIF_F_LRO);
+ }
+ EXPORT_SYMBOL(dev_disable_lro);
+ 
+ 
   static int dev_boot_phase = 1;
   
   /*
@@@ -1290,16 -1323,18 +1323,18 @@@ static void dev_queue_xmit_nit(struct s
   }
   
   
- void __netif_schedule(struct net_device *dev)
+ void __netif_schedule(struct Qdisc *q)
   {
-       if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
-               unsigned long flags;
+       BUG_ON(q == &noop_qdisc);
+ 
+       if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
                 struct softnet_data *sd;
+               unsigned long flags;
   
                 local_irq_save(flags);
                 sd = &__get_cpu_var(softnet_data);
-               dev->next_sched = sd->output_queue;
-               sd->output_queue = dev;
+               q->next_sched = sd->output_queue;
+               sd->output_queue = q;
                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
                 local_irq_restore(flags);
         }
@@@ -1566,7 -1601,8 +1601,8 @@@ static int dev_gso_segment(struct sk_bu
         return 0;
   }
   
- int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+                       struct netdev_queue *txq)
   {
         if (likely(!skb->next)) {
                 if (!list_empty(&ptype_all))
@@@ -1595,9 -1631,7 +1631,7 @@@ gso
                         skb->next = nskb;
                         return rc;
                 }
-               if (unlikely((netif_queue_stopped(dev) ||
-                            netif_subqueue_stopped(dev, skb)) &&
-                            skb->next))
+               if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
                         return NETDEV_TX_BUSY;
         } while (skb->next);
   
@@@ -1634,9 -1668,71 +1668,71 @@@ out_kfree_skb
    *          --BLG
    */
   
+ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+ {
+       u32 *addr, *ports, hash, ihl;
+       u8 ip_proto;
+       int alen;
+ 
+       switch (skb->protocol) {
+       case __constant_htons(ETH_P_IP):
+               ip_proto = ip_hdr(skb)->protocol;
+               addr = &ip_hdr(skb)->saddr;
+               ihl = ip_hdr(skb)->ihl;
+               alen = 2;
+               break;
+       case __constant_htons(ETH_P_IPV6):
+               ip_proto = ipv6_hdr(skb)->nexthdr;
+               addr = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+               ihl = (40 >> 2);
+               alen = 8;
+               break;
+       default:
+               return 0;
+       }
+ 
+       ports = (u32 *) (skb_network_header(skb) + (ihl * 4));
+ 
+       hash = 0;
+       while (alen--)
+               hash ^= *addr++;
+ 
+       switch (ip_proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_DCCP:
+       case IPPROTO_ESP:
+       case IPPROTO_AH:
+       case IPPROTO_SCTP:
+       case IPPROTO_UDPLITE:
+               hash ^= *ports;
+               break;
+ 
+       default:
+               break;
+       }
+ 
+       return hash % dev->real_num_tx_queues;
+ }
+ 
+ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+                                       struct sk_buff *skb)
+ {
+       u16 queue_index = 0;
+ 
+       if (dev->select_queue)
+               queue_index = dev->select_queue(dev, skb);
+       else if (dev->real_num_tx_queues > 1)
+               queue_index = simple_tx_hash(dev, skb);
+ 
+       skb_set_queue_mapping(skb, queue_index);
+       return netdev_get_tx_queue(dev, queue_index);
+ }
+ 
   int dev_queue_xmit(struct sk_buff *skb)
   {
         struct net_device *dev = skb->dev;
+       struct netdev_queue *txq;
         struct Qdisc *q;
         int rc = -ENOMEM;
   
@@@ -1669,44 -1765,29 +1765,29 @@@
         }
   
   gso:
-       spin_lock_prefetch(&dev->queue_lock);
- 
         /* Disable soft irqs for various locks below. Also
          * stops preemption for RCU.
          */
         rcu_read_lock_bh();
   
-       /* Updates of qdisc are serialized by queue_lock.
-        * The struct Qdisc which is pointed to by qdisc is now a
-        * rcu structure - it may be accessed without acquiring
-        * a lock (but the structure may be stale.) The freeing of the
-        * qdisc will be deferred until it's known that there are no
-        * more references to it.
-        *
-        * If the qdisc has an enqueue function, we still need to
-        * hold the queue_lock before calling it, since queue_lock
-        * also serializes access to the device queue.
-        */
+       txq = dev_pick_tx(dev, skb);
+       q = rcu_dereference(txq->qdisc);
   
-       q = rcu_dereference(dev->qdisc);
   #ifdef CONFIG_NET_CLS_ACT
         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
   #endif
         if (q->enqueue) {
-               /* Grab device queue */
-               spin_lock(&dev->queue_lock);
-               q = dev->qdisc;
-               if (q->enqueue) {
-                       /* reset queue_mapping to zero */
-                       skb_set_queue_mapping(skb, 0);
-                       rc = q->enqueue(skb, q);
-                       qdisc_run(dev);
-                       spin_unlock(&dev->queue_lock);
- 
-                       rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
-                       goto out;
-               }
-               spin_unlock(&dev->queue_lock);
+               spinlock_t *root_lock = qdisc_root_lock(q);
+ 
+               spin_lock(root_lock);
+ 
+               rc = qdisc_enqueue_root(skb, q);
+               qdisc_run(q);
+ 
+               spin_unlock(root_lock);
+ 
+               rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+               goto out;
         }
   
         /* The device has no queue. Common case for software devices:
@@@ -1724,19 -1805,18 +1805,18 @@@
         if (dev->flags & IFF_UP) {
                 int cpu = smp_processor_id(); /* ok because BHs are off */
   
-               if (dev->xmit_lock_owner != cpu) {
+               if (txq->xmit_lock_owner != cpu) {
   
-                       HARD_TX_LOCK(dev, cpu);
+                       HARD_TX_LOCK(dev, txq, cpu);
   
-                       if (!netif_queue_stopped(dev) &&
-                           !netif_subqueue_stopped(dev, skb)) {
+                       if (!netif_tx_queue_stopped(txq)) {
                                 rc = 0;
-                               if (!dev_hard_start_xmit(skb, dev)) {
-                                       HARD_TX_UNLOCK(dev);
+                               if (!dev_hard_start_xmit(skb, dev, txq)) {
+                                       HARD_TX_UNLOCK(dev, txq);
                                         goto out;
                                 }
                         }
-                       HARD_TX_UNLOCK(dev);
+                       HARD_TX_UNLOCK(dev, txq);
                         if (net_ratelimit())
                                 printk(KERN_CRIT "Virtual device %s asks to "
                                        "queue packet!\n", dev->name);
@@@ -1880,7 -1960,7 +1960,7 @@@ static void net_tx_action(struct softir
         }
   
         if (sd->output_queue) {
-               struct net_device *head;
+               struct Qdisc *head;
   
                 local_irq_disable();
                 head = sd->output_queue;
@@@ -1888,17 -1968,20 +1968,20 @@@
                 local_irq_enable();
   
                 while (head) {
-                       struct net_device *dev = head;
+                       struct Qdisc *q = head;
+                       spinlock_t *root_lock;
+ 
                         head = head->next_sched;
   
                         smp_mb__before_clear_bit();
-                       clear_bit(__LINK_STATE_SCHED, &dev->state);
+                       clear_bit(__QDISC_STATE_SCHED, &q->state);
   
-                       if (spin_trylock(&dev->queue_lock)) {
-                               qdisc_run(dev);
-                               spin_unlock(&dev->queue_lock);
+                       root_lock = qdisc_root_lock(q);
+                       if (spin_trylock(root_lock)) {
+                               qdisc_run(q);
+                               spin_unlock(root_lock);
                         } else {
-                               netif_schedule(dev);
+                               __netif_schedule(q);
                         }
                 }
         }
@@@ -1979,10 -2062,11 +2062,11 @@@ static inline struct sk_buff *handle_ma
    */
   static int ing_filter(struct sk_buff *skb)
   {
-       struct Qdisc *q;
         struct net_device *dev = skb->dev;
-       int result = TC_ACT_OK;
         u32 ttl = G_TC_RTTL(skb->tc_verd);
+       struct netdev_queue *rxq;
+       int result = TC_ACT_OK;
+       struct Qdisc *q;
   
         if (MAX_RED_LOOP < ttl++) {
                 printk(KERN_WARNING
@@@ -1994,10 -2078,14 +2078,14 @@@
         skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
   
-       spin_lock(&dev->ingress_lock);
-       if ((q = dev->qdisc_ingress) != NULL)
-               result = q->enqueue(skb, q);
-       spin_unlock(&dev->ingress_lock);
+       rxq = &dev->rx_queue;
+ 
+       q = rxq->qdisc;
+       if (q) {
+               spin_lock(qdisc_lock(q));
+               result = qdisc_enqueue_root(skb, q);
+               spin_unlock(qdisc_lock(q));
+       }
   
         return result;
   }
@@@ -2006,7 -2094,7 +2094,7 @@@ static inline struct sk_buff *handle_in
                                          struct packet_type **pt_prev,
                                          int *ret, struct net_device *orig_dev)
   {
-       if (!skb->dev->qdisc_ingress)
+       if (!skb->dev->rx_queue.qdisc)
                 goto out;
   
         if (*pt_prev) {
@@@ -2030,6 -2118,33 +2118,33 @@@ out
   }
   #endif
   
+ /*
+  *    netif_nit_deliver - deliver received packets to network taps
+  *    @skb: buffer
+  *
+  *    This function is used to deliver incoming packets to network
+  *    taps. It should be used when the normal netif_receive_skb path
+  *    is bypassed, for example because of VLAN acceleration.
+  */
+ void netif_nit_deliver(struct sk_buff *skb)
+ {
+       struct packet_type *ptype;
+ 
+       if (list_empty(&ptype_all))
+               return;
+ 
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
+       skb->mac_len = skb->network_header - skb->mac_header;
+ 
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, &ptype_all, list) {
+               if (!ptype->dev || ptype->dev == skb->dev)
+                       deliver_skb(skb, ptype, skb->dev);
+       }
+       rcu_read_unlock();
+ }
+ 
   /**
    *    netif_receive_skb - process receive buffer from network
    *    @skb: buffer to process
@@@ -2261,7 -2376,7 +2376,7 @@@ out
          */
         if (!cpus_empty(net_dma.channel_mask)) {
                 int chan_idx;
- -              for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
+ +              for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
                         struct dma_chan *chan = net_dma.channels[chan_idx];
                         if (chan)
                                 dma_async_memcpy_issue_pending(chan);
@@@ -2769,16 -2884,29 +2884,29 @@@ int netdev_set_master(struct net_devic
         return 0;
   }
   
- static void __dev_set_promiscuity(struct net_device *dev, int inc)
+ static int __dev_set_promiscuity(struct net_device *dev, int inc)
   {
         unsigned short old_flags = dev->flags;
   
         ASSERT_RTNL();
   
-       if ((dev->promiscuity += inc) == 0)
-               dev->flags &= ~IFF_PROMISC;
-       else
-               dev->flags |= IFF_PROMISC;
+       dev->flags |= IFF_PROMISC;
+       dev->promiscuity += inc;
+       if (dev->promiscuity == 0) {
+               /*
+                * Avoid overflow.
+                * If inc causes overflow, untouch promisc and return error.
+                */
+               if (inc < 0)
+                       dev->flags &= ~IFF_PROMISC;
+               else {
+                       dev->promiscuity -= inc;
+                       printk(KERN_WARNING "%s: promiscuity touches roof, "
+                               "set promiscuity failed, promiscuity feature "
+                               "of device might be broken.\n", dev->name);
+                       return -EOVERFLOW;
+               }
+       }
         if (dev->flags != old_flags) {
                 printk(KERN_INFO "device %s %s promiscuous mode\n",
                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
@@@ -2796,6 -2924,7 +2924,7 @@@
                 if (dev->change_rx_flags)
                         dev->change_rx_flags(dev, IFF_PROMISC);
         }
+       return 0;
   }
   
   /**
@@@ -2807,14 -2936,19 +2936,19 @@@
    *    remains above zero the interface remains promiscuous. Once it hits zero
    *    the device reverts back to normal filtering operation. A negative inc
    *    value is used to drop promiscuity on the device.
+  *    Return 0 if successful or a negative errno code on error.
    */
- void dev_set_promiscuity(struct net_device *dev, int inc)
+ int dev_set_promiscuity(struct net_device *dev, int inc)
   {
         unsigned short old_flags = dev->flags;
+       int err;
   
-       __dev_set_promiscuity(dev, inc);
+       err = __dev_set_promiscuity(dev, inc);
+       if (err < 0)
+               return err;
         if (dev->flags != old_flags)
                 dev_set_rx_mode(dev);
+       return err;
   }
   
   /**
@@@ -2827,22 -2961,38 +2961,38 @@@
    *    to all interfaces. Once it hits zero the device reverts back to normal
    *    filtering operation. A negative @inc value is used to drop the counter
    *    when releasing a resource needing all multicasts.
+  *    Return 0 if successful or a negative errno code on error.
    */
   
- void dev_set_allmulti(struct net_device *dev, int inc)
+ int dev_set_allmulti(struct net_device *dev, int inc)
   {
         unsigned short old_flags = dev->flags;
   
         ASSERT_RTNL();
   
         dev->flags |= IFF_ALLMULTI;
-       if ((dev->allmulti += inc) == 0)
-               dev->flags &= ~IFF_ALLMULTI;
+       dev->allmulti += inc;
+       if (dev->allmulti == 0) {
+               /*
+                * Avoid overflow.
+                * If inc causes overflow, untouch allmulti and return error.
+                */
+               if (inc < 0)
+                       dev->flags &= ~IFF_ALLMULTI;
+               else {
+                       dev->allmulti -= inc;
+                       printk(KERN_WARNING "%s: allmulti touches roof, "
+                               "set allmulti failed, allmulti feature of "
+                               "device might be broken.\n", dev->name);
+                       return -EOVERFLOW;
+               }
+       }
         if (dev->flags ^ old_flags) {
                 if (dev->change_rx_flags)
                         dev->change_rx_flags(dev, IFF_ALLMULTI);
                 dev_set_rx_mode(dev);
         }
+       return 0;
   }
   
   /*
@@@ -2881,9 -3031,9 +3031,9 @@@ void __dev_set_rx_mode(struct net_devic
   
   void dev_set_rx_mode(struct net_device *dev)
   {
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
         __dev_set_rx_mode(dev);
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
   }
   
   int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@@ -2961,11 -3111,11 +3111,11 @@@ int dev_unicast_delete(struct net_devic
   
         ASSERT_RTNL();
   
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
         if (!err)
                 __dev_set_rx_mode(dev);
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
         return err;
   }
   EXPORT_SYMBOL(dev_unicast_delete);
@@@ -2987,11 -3137,11 +3137,11 @@@ int dev_unicast_add(struct net_device *
   
         ASSERT_RTNL();
   
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
         if (!err)
                 __dev_set_rx_mode(dev);
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
         return err;
   }
   EXPORT_SYMBOL(dev_unicast_add);
@@@ -3058,12 -3208,12 +3208,12 @@@ int dev_unicast_sync(struct net_device 
   {
         int err = 0;
   
-       netif_tx_lock_bh(to);
+       netif_addr_lock_bh(to);
         err = __dev_addr_sync(&to->uc_list, &to->uc_count,
                               &from->uc_list, &from->uc_count);
         if (!err)
                 __dev_set_rx_mode(to);
-       netif_tx_unlock_bh(to);
+       netif_addr_unlock_bh(to);
         return err;
   }
   EXPORT_SYMBOL(dev_unicast_sync);
@@@ -3079,15 -3229,15 +3229,15 @@@
    */
   void dev_unicast_unsync(struct net_device *to, struct net_device *from)
   {
-       netif_tx_lock_bh(from);
-       netif_tx_lock_bh(to);
+       netif_addr_lock_bh(from);
+       netif_addr_lock(to);
   
         __dev_addr_unsync(&to->uc_list, &to->uc_count,
                           &from->uc_list, &from->uc_count);
         __dev_set_rx_mode(to);
   
-       netif_tx_unlock_bh(to);
-       netif_tx_unlock_bh(from);
+       netif_addr_unlock(to);
+       netif_addr_unlock_bh(from);
   }
   EXPORT_SYMBOL(dev_unicast_unsync);
   
@@@ -3107,7 -3257,7 +3257,7 @@@ static void __dev_addr_discard(struct d
   
   static void dev_addr_discard(struct net_device *dev)
   {
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
   
         __dev_addr_discard(&dev->uc_list);
         dev->uc_count = 0;
@@@ -3115,7 -3265,7 +3265,7 @@@
         __dev_addr_discard(&dev->mc_list);
         dev->mc_count = 0;
   
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
   }
   
   unsigned dev_get_flags(const struct net_device *dev)
@@@ -3688,6 -3838,21 +3838,21 @@@ static void rollback_registered(struct 
         dev_put(dev);
   }
   
+ static void __netdev_init_queue_locks_one(struct net_device *dev,
+                                         struct netdev_queue *dev_queue,
+                                         void *_unused)
+ {
+       spin_lock_init(&dev_queue->_xmit_lock);
+       netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+       dev_queue->xmit_lock_owner = -1;
+ }
+ 
+ static void netdev_init_queue_locks(struct net_device *dev)
+ {
+       netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
+       __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
+ }
+ 
   /**
    *    register_netdevice      - register a network device
    *    @dev: device to register
@@@ -3722,11 -3887,8 +3887,8 @@@ int register_netdevice(struct net_devic
         BUG_ON(!dev_net(dev));
         net = dev_net(dev);
   
-       spin_lock_init(&dev->queue_lock);
-       spin_lock_init(&dev->_xmit_lock);
-       netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
-       dev->xmit_lock_owner = -1;
-       spin_lock_init(&dev->ingress_lock);
+       spin_lock_init(&dev->addr_list_lock);
+       netdev_init_queue_locks(dev);
   
         dev->iflink = -1;
   
@@@ -4007,6 -4169,19 +4169,19 @@@ static struct net_device_stats *interna
         return &dev->stats;
   }
   
+ static void netdev_init_one_queue(struct net_device *dev,
+                                 struct netdev_queue *queue,
+                                 void *_unused)
+ {
+       queue->dev = dev;
+ }
+ 
+ static void netdev_init_queues(struct net_device *dev)
+ {
+       netdev_init_one_queue(dev, &dev->rx_queue, NULL);
+       netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+ }
+ 
   /**
    *    alloc_netdev_mq - allocate network device
    *    @sizeof_priv:   size of private data to allocate space for
@@@ -4021,14 -4196,14 +4196,14 @@@
   struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                 void (*setup)(struct net_device *), unsigned int queue_count)
   {
-       void *p;
+       struct netdev_queue *tx;
         struct net_device *dev;
         int alloc_size;
+       void *p;
   
         BUG_ON(strlen(name) >= sizeof(dev->name));
   
-       alloc_size = sizeof(struct net_device) +
-                    sizeof(struct net_device_subqueue) * (queue_count - 1);
+       alloc_size = sizeof(struct net_device);
         if (sizeof_priv) {
                 /* ensure 32-byte alignment of private area */
                 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
@@@ -4043,22 -4218,33 +4218,33 @@@
                 return NULL;
         }
   
+       tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
+       if (!tx) {
+               printk(KERN_ERR "alloc_netdev: Unable to allocate "
+                      "tx qdiscs.\n");
+               kfree(p);
+               return NULL;
+       }
+ 
         dev = (struct net_device *)
                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
         dev->padded = (char *)dev - (char *)p;
         dev_net_set(dev, &init_net);
   
+       dev->_tx = tx;
+       dev->num_tx_queues = queue_count;
+       dev->real_num_tx_queues = queue_count;
+ 
         if (sizeof_priv) {
                 dev->priv = ((char *)dev +
-                            ((sizeof(struct net_device) +
-                              (sizeof(struct net_device_subqueue) *
-                               (queue_count - 1)) + NETDEV_ALIGN_CONST)
+                            ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
                               & ~NETDEV_ALIGN_CONST));
         }
   
-       dev->egress_subqueue_count = queue_count;
         dev->gso_max_size = GSO_MAX_SIZE;
   
+       netdev_init_queues(dev);
+ 
         dev->get_stats = internal_stats;
         netpoll_netdev_init(dev);
         setup(dev);
@@@ -4079,6 -4265,8 +4265,8 @@@ void free_netdev(struct net_device *dev
   {
         release_net(dev_net(dev));
   
+       kfree(dev->_tx);
+ 
         /*  Compatibility with error handling in drivers */
         if (dev->reg_state == NETREG_UNINITIALIZED) {
                 kfree((char *)dev - dev->padded);
@@@ -4260,7 -4448,7 +4448,7 @@@ static int dev_cpu_callback(struct noti
                             void *ocpu)
   {
         struct sk_buff **list_skb;
-       struct net_device **list_net;
+       struct Qdisc **list_net;
         struct sk_buff *skb;
         unsigned int cpu, oldcpu = (unsigned long)ocpu;
         struct softnet_data *sd, *oldsd;
@@@ -4322,7 -4510,7 +4510,7 @@@ static void net_dma_rebalance(struct ne
         i = 0;
         cpu = first_cpu(cpu_online_map);
   
- -      for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
+ +      for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
                 chan = net_dma->channels[chan_idx];
   
                 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
diff --combined net/iucv/iucv.c

index 411b339,265b1b2..705959b
--- 1/net/iucv/iucv.c
--- 2/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@@ -474,14 -474,14 +474,14 @@@ static void iucv_setmask_mp(void
   {
         int cpu;
   
-       preempt_disable();
+       get_online_cpus();
         for_each_online_cpu(cpu)
                 /* Enable all cpus with a declared buffer. */
                 if (cpu_isset(cpu, iucv_buffer_cpumask) &&
                     !cpu_isset(cpu, iucv_irq_cpumask))
                         smp_call_function_single(cpu, iucv_allow_cpu,
                                                  NULL, 1);
-       preempt_enable();
+       put_online_cpus();
   }
   
   /**
@@@ -497,7 -497,7 +497,7 @@@ static void iucv_setmask_up(void
         /* Disable all cpu but the first in cpu_irq_cpumask. */
         cpumask = iucv_irq_cpumask;
         cpu_clear(first_cpu(iucv_irq_cpumask), cpumask);
- -      for_each_cpu_mask(cpu, cpumask)
+ +      for_each_cpu_mask_nr(cpu, cpumask)
                 smp_call_function_single(cpu, iucv_block_cpu, NULL, 1);
   }
   
@@@ -521,16 -521,18 +521,18 @@@ static int iucv_enable(void
                 goto out;
         /* Declare per cpu buffers. */
         rc = -EIO;
-       preempt_disable();
+       get_online_cpus();
         for_each_online_cpu(cpu)
                 smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1);
         preempt_enable();
         if (cpus_empty(iucv_buffer_cpumask))
                 /* No cpu could declare an iucv buffer. */
                 goto out_path;
+       put_online_cpus();
         return 0;
   
   out_path:
+       put_online_cpus();
         kfree(iucv_path_table);
   out:
         return rc;
@@@ -564,8 -566,11 +566,11 @@@ static int __cpuinit iucv_cpu_notify(st
                         return NOTIFY_BAD;
                 iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
                                      GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
-               if (!iucv_param[cpu])
+               if (!iucv_param[cpu]) {
+                       kfree(iucv_irq_data[cpu]);
+                       iucv_irq_data[cpu] = NULL;
                         return NOTIFY_BAD;
+               }
                 break;
         case CPU_UP_CANCELED:
         case CPU_UP_CANCELED_FROZEN:
@@@ -598,7 -603,7 +603,7 @@@
         return NOTIFY_OK;
   }
   
- static struct notifier_block __cpuinitdata iucv_cpu_notifier = {
+ static struct notifier_block __refdata iucv_cpu_notifier = {
         .notifier_call = iucv_cpu_notify,
   };
   
diff --combined net/sunrpc/svc.c

index 083d126,5a32cb7..835d274
--- 1/net/sunrpc/svc.c
--- 2/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@@ -18,6 -18,7 +18,7 @@@
   #include <linux/mm.h>
   #include <linux/interrupt.h>
   #include <linux/module.h>
+ #include <linux/kthread.h>
   
   #include <linux/sunrpc/types.h>
   #include <linux/sunrpc/xdr.h>
@@@ -291,15 -292,14 +292,14 @@@ svc_pool_map_put(void
   
   
   /*
-  * Set the current thread's cpus_allowed mask so that it
+  * Set the given thread's cpus_allowed mask so that it
    * will only run on cpus in the given pool.
-  *
-  * Returns 1 and fills in oldmask iff a cpumask was applied.
    */
- static inline int
- svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+ static inline void
+ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
   {
         struct svc_pool_map *m = &svc_pool_map;
+       unsigned int node = m->pool_to[pidx];
   
         /*
          * The caller checks for sv_nrpools > 1, which
@@@ -307,27 -307,17 +307,18 @@@
          */
         BUG_ON(m->count == 0);
   
-       switch (m->mode)
-       {
-       default:
-               return 0;
+       switch (m->mode) {
         case SVC_POOL_PERCPU:
         {
-               unsigned int cpu = m->pool_to[pidx];
-               cpumask_of_cpu_ptr(cpumask, cpu);
- 
-               *oldmask = current->cpus_allowed;
-               set_cpus_allowed_ptr(current, cpumask);
-               return 1;
- -              set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
++              cpumask_of_cpu_ptr(cpumask, node);
++              set_cpus_allowed_ptr(task, cpumask);
+               break;
         }
         case SVC_POOL_PERNODE:
         {
-               unsigned int node = m->pool_to[pidx];
                 node_to_cpumask_ptr(nodecpumask, node);
- 
-               *oldmask = current->cpus_allowed;
-               set_cpus_allowed_ptr(current, nodecpumask);
-               return 1;
+               set_cpus_allowed_ptr(task, nodecpumask);
+               break;
         }
         }
   }
@@@ -444,7 -434,7 +435,7 @@@ EXPORT_SYMBOL(svc_create)
   struct svc_serv *
   svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
                 void (*shutdown)(struct svc_serv *serv),
-                 svc_thread_fn func, int sig, struct module *mod)
+                 svc_thread_fn func, struct module *mod)
   {
         struct svc_serv *serv;
         unsigned int npools = svc_pool_map_get();
@@@ -453,7 -443,6 +444,6 @@@
   
         if (serv != NULL) {
                 serv->sv_function = func;
-               serv->sv_kill_signal = sig;
                 serv->sv_module = mod;
         }
   
@@@ -462,7 -451,8 +452,8 @@@
   EXPORT_SYMBOL(svc_create_pooled);
   
   /*
-  * Destroy an RPC service.  Should be called with the BKL held
+  * Destroy an RPC service. Should be called with appropriate locking to
+  * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
    */
   void
   svc_destroy(struct svc_serv *serv)
@@@ -578,46 -568,6 +569,6 @@@ out_enomem
   }
   EXPORT_SYMBOL(svc_prepare_thread);
   
- /*
-  * Create a thread in the given pool.  Caller must hold BKL.
-  * On a NUMA or SMP machine, with a multi-pool serv, the thread
-  * will be restricted to run on the cpus belonging to the pool.
-  */
- static int
- __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
-                   struct svc_pool *pool)
- {
-       struct svc_rqst *rqstp;
-       int             error = -ENOMEM;
-       int             have_oldmask = 0;
-       cpumask_t       uninitialized_var(oldmask);
- 
-       rqstp = svc_prepare_thread(serv, pool);
-       if (IS_ERR(rqstp)) {
-               error = PTR_ERR(rqstp);
-               goto out;
-       }
- 
-       if (serv->sv_nrpools > 1)
-               have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
- 
-       error = kernel_thread((int (*)(void *)) func, rqstp, 0);
- 
-       if (have_oldmask)
-               set_cpus_allowed_ptr(current, &oldmask);
- 
-       if (error < 0)
-               goto out_thread;
-       svc_sock_update_bufs(serv);
-       error = 0;
- out:
-       return error;
- 
- out_thread:
-       svc_exit_thread(rqstp);
-       goto out;
- }
- 
   /*
    * Choose a pool in which to create a new thread, for svc_set_num_threads
    */
@@@ -675,7 -625,7 +626,7 @@@ found_pool
    * of threads the given number.  If `pool' is non-NULL, applies
    * only to threads in that pool, otherwise round-robins between
    * all pools.  Must be called with a svc_get() reference and
-  * the BKL held.
+  * the BKL or another lock to protect access to svc_serv fields.
    *
    * Destroying threads relies on the service threads filling in
    * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
@@@ -687,7 -637,9 +638,9 @@@
   int
   svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
   {
-       struct task_struct *victim;
+       struct svc_rqst *rqstp;
+       struct task_struct *task;
+       struct svc_pool *chosen_pool;
         int error = 0;
         unsigned int state = serv->sv_nrthreads-1;
   
@@@ -703,18 -655,34 +656,34 @@@
         /* create new threads */
         while (nrservs > 0) {
                 nrservs--;
+               chosen_pool = choose_pool(serv, pool, &state);
+ 
+               rqstp = svc_prepare_thread(serv, chosen_pool);
+               if (IS_ERR(rqstp)) {
+                       error = PTR_ERR(rqstp);
+                       break;
+               }
+ 
                 __module_get(serv->sv_module);
-               error = __svc_create_thread(serv->sv_function, serv,
-                                           choose_pool(serv, pool, &state));
-               if (error < 0) {
+               task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
+               if (IS_ERR(task)) {
+                       error = PTR_ERR(task);
                         module_put(serv->sv_module);
+                       svc_exit_thread(rqstp);
                         break;
                 }
+ 
+               rqstp->rq_task = task;
+               if (serv->sv_nrpools > 1)
+                       svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
+ 
+               svc_sock_update_bufs(serv);
+               wake_up_process(task);
         }
         /* destroy old threads */
         while (nrservs < 0 &&
-              (victim = choose_victim(serv, pool, &state)) != NULL) {
-               send_sig(serv->sv_kill_signal, victim, 1);
+              (task = choose_victim(serv, pool, &state)) != NULL) {
+               send_sig(SIGINT, task, 1);
                 nrservs++;
         }
   
@@@ -723,7 -691,8 +692,8 @@@
   EXPORT_SYMBOL(svc_set_num_threads);
   
   /*
-  * Called from a server thread as it's exiting.  Caller must hold BKL.
+  * Called from a server thread as it's exiting. Caller must hold the BKL or
+  * the "service mutex", whichever is appropriate for the service.
    */
   void
   svc_exit_thread(struct svc_rqst *rqstp)
author	Ingo Molnar <mingo@elte.hu>
	Mon, 21 Jul 2008 15:19:50 +0000 (17:19 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 21 Jul 2008 15:19:50 +0000 (17:19 +0200)
		1	2
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/iucv/iucv.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sunrpc/svc.c	patch \|	diff1 \|	diff2 \|	blob \| history