#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+ #include <linux/ethtool.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <net/net_namespace.h>
#include <linux/ctype.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
+ #include <linux/in.h>
#include "net-sysfs.h"
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
- * register_netdevice() inits dev->_xmit_lock and sets lockdep class
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
* according to dev->type
*/
static const unsigned short netdev_lock_type[] =
}
}
+ void netdev_bonding_change(struct net_device *dev)
+ {
+ call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+ }
+ EXPORT_SYMBOL(netdev_bonding_change);
+
/**
* dev_load - load a network module
* @net: the applicable net namespace
}
+ /**
+ * dev_disable_lro - disable Large Receive Offload on a device
+ * @dev: device
+ *
+ * Disable Large Receive Offload (LRO) on a net device. Must be
+ * called under RTNL. This is needed if received packets may be
+ * forwarded to another interface.
+ */
+ void dev_disable_lro(struct net_device *dev)
+ {
+ if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
+ dev->ethtool_ops->set_flags) {
+ u32 flags = dev->ethtool_ops->get_flags(dev);
+ if (flags & ETH_FLAG_LRO) {
+ flags &= ~ETH_FLAG_LRO;
+ dev->ethtool_ops->set_flags(dev, flags);
+ }
+ }
+ WARN_ON(dev->features & NETIF_F_LRO);
+ }
+ EXPORT_SYMBOL(dev_disable_lro);
+
+
static int dev_boot_phase = 1;
/*
}
- void __netif_schedule(struct net_device *dev)
+ void __netif_schedule(struct Qdisc *q)
{
- if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
- unsigned long flags;
+ BUG_ON(q == &noop_qdisc);
+
+ if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
struct softnet_data *sd;
+ unsigned long flags;
local_irq_save(flags);
sd = &__get_cpu_var(softnet_data);
- dev->next_sched = sd->output_queue;
- sd->output_queue = dev;
+ q->next_sched = sd->output_queue;
+ sd->output_queue = q;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
return 0;
}
- int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq)
{
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
skb->next = nskb;
return rc;
}
- if (unlikely((netif_queue_stopped(dev) ||
- netif_subqueue_stopped(dev, skb)) &&
- skb->next))
+ if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
* --BLG
*/
+ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+ {
+ u32 *addr, *ports, hash, ihl;
+ u8 ip_proto;
+ int alen;
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ ip_proto = ip_hdr(skb)->protocol;
+ addr = &ip_hdr(skb)->saddr;
+ ihl = ip_hdr(skb)->ihl;
+ alen = 2;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ ip_proto = ipv6_hdr(skb)->nexthdr;
+ addr = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+ ihl = (40 >> 2);
+ alen = 8;
+ break;
+ default:
+ return 0;
+ }
+
+ ports = (u32 *) (skb_network_header(skb) + (ihl * 4));
+
+ hash = 0;
+ while (alen--)
+ hash ^= *addr++;
+
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_AH:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ hash ^= *ports;
+ break;
+
+ default:
+ break;
+ }
+
+ return hash % dev->real_num_tx_queues;
+ }
+
+ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb)
+ {
+ u16 queue_index = 0;
+
+ if (dev->select_queue)
+ queue_index = dev->select_queue(dev, skb);
+ else if (dev->real_num_tx_queues > 1)
+ queue_index = simple_tx_hash(dev, skb);
+
+ skb_set_queue_mapping(skb, queue_index);
+ return netdev_get_tx_queue(dev, queue_index);
+ }
+
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
struct Qdisc *q;
int rc = -ENOMEM;
}
gso:
- spin_lock_prefetch(&dev->queue_lock);
-
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
rcu_read_lock_bh();
- /* Updates of qdisc are serialized by queue_lock.
- * The struct Qdisc which is pointed to by qdisc is now a
- * rcu structure - it may be accessed without acquiring
- * a lock (but the structure may be stale.) The freeing of the
- * qdisc will be deferred until it's known that there are no
- * more references to it.
- *
- * If the qdisc has an enqueue function, we still need to
- * hold the queue_lock before calling it, since queue_lock
- * also serializes access to the device queue.
- */
+ txq = dev_pick_tx(dev, skb);
+ q = rcu_dereference(txq->qdisc);
- q = rcu_dereference(dev->qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
#endif
if (q->enqueue) {
- /* Grab device queue */
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
- if (q->enqueue) {
- /* reset queue_mapping to zero */
- skb_set_queue_mapping(skb, 0);
- rc = q->enqueue(skb, q);
- qdisc_run(dev);
- spin_unlock(&dev->queue_lock);
-
- rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
- goto out;
- }
- spin_unlock(&dev->queue_lock);
+ spinlock_t *root_lock = qdisc_root_lock(q);
+
+ spin_lock(root_lock);
+
+ rc = qdisc_enqueue_root(skb, q);
+ qdisc_run(q);
+
+ spin_unlock(root_lock);
+
+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+ goto out;
}
/* The device has no queue. Common case for software devices:
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
- if (dev->xmit_lock_owner != cpu) {
+ if (txq->xmit_lock_owner != cpu) {
- HARD_TX_LOCK(dev, cpu);
+ HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_queue_stopped(dev) &&
- !netif_subqueue_stopped(dev, skb)) {
+ if (!netif_tx_queue_stopped(txq)) {
rc = 0;
- if (!dev_hard_start_xmit(skb, dev)) {
- HARD_TX_UNLOCK(dev);
+ if (!dev_hard_start_xmit(skb, dev, txq)) {
+ HARD_TX_UNLOCK(dev, txq);
goto out;
}
}
- HARD_TX_UNLOCK(dev);
+ HARD_TX_UNLOCK(dev, txq);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name);
}
if (sd->output_queue) {
- struct net_device *head;
+ struct Qdisc *head;
local_irq_disable();
head = sd->output_queue;
local_irq_enable();
while (head) {
- struct net_device *dev = head;
+ struct Qdisc *q = head;
+ spinlock_t *root_lock;
+
head = head->next_sched;
smp_mb__before_clear_bit();
- clear_bit(__LINK_STATE_SCHED, &dev->state);
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
- if (spin_trylock(&dev->queue_lock)) {
- qdisc_run(dev);
- spin_unlock(&dev->queue_lock);
+ root_lock = qdisc_root_lock(q);
+ if (spin_trylock(root_lock)) {
+ qdisc_run(q);
+ spin_unlock(root_lock);
} else {
- netif_schedule(dev);
+ __netif_schedule(q);
}
}
}
*/
static int ing_filter(struct sk_buff *skb)
{
- struct Qdisc *q;
struct net_device *dev = skb->dev;
- int result = TC_ACT_OK;
u32 ttl = G_TC_RTTL(skb->tc_verd);
+ struct netdev_queue *rxq;
+ int result = TC_ACT_OK;
+ struct Qdisc *q;
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- spin_lock(&dev->ingress_lock);
- if ((q = dev->qdisc_ingress) != NULL)
- result = q->enqueue(skb, q);
- spin_unlock(&dev->ingress_lock);
+ rxq = &dev->rx_queue;
+
+ q = rxq->qdisc;
+ if (q) {
+ spin_lock(qdisc_lock(q));
+ result = qdisc_enqueue_root(skb, q);
+ spin_unlock(qdisc_lock(q));
+ }
return result;
}
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- if (!skb->dev->qdisc_ingress)
+ if (!skb->dev->rx_queue.qdisc)
goto out;
if (*pt_prev) {
}
#endif
+ /*
+ * netif_nit_deliver - deliver received packets to network taps
+ * @skb: buffer
+ *
+ * This function is used to deliver incoming packets to network
+ * taps. It should be used when the normal netif_receive_skb path
+ * is bypassed, for example because of VLAN acceleration.
+ */
+ void netif_nit_deliver(struct sk_buff *skb)
+ {
+ struct packet_type *ptype;
+
+ if (list_empty(&ptype_all))
+ return;
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->mac_len = skb->network_header - skb->mac_header;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ if (!ptype->dev || ptype->dev == skb->dev)
+ deliver_skb(skb, ptype, skb->dev);
+ }
+ rcu_read_unlock();
+ }
+
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
*/
if (!cpus_empty(net_dma.channel_mask)) {
int chan_idx;
- for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
+ for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
struct dma_chan *chan = net_dma.channels[chan_idx];
if (chan)
dma_async_memcpy_issue_pending(chan);
return 0;
}
- static void __dev_set_promiscuity(struct net_device *dev, int inc)
+ static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
ASSERT_RTNL();
- if ((dev->promiscuity += inc) == 0)
- dev->flags &= ~IFF_PROMISC;
- else
- dev->flags |= IFF_PROMISC;
+ dev->flags |= IFF_PROMISC;
+ dev->promiscuity += inc;
+ if (dev->promiscuity == 0) {
+ /*
+ * Avoid overflow.
+ * If inc causes overflow, untouch promisc and return error.
+ */
+ if (inc < 0)
+ dev->flags &= ~IFF_PROMISC;
+ else {
+ dev->promiscuity -= inc;
+ printk(KERN_WARNING "%s: promiscuity touches roof, "
+ "set promiscuity failed, promiscuity feature "
+ "of device might be broken.\n", dev->name);
+ return -EOVERFLOW;
+ }
+ }
if (dev->flags != old_flags) {
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
if (dev->change_rx_flags)
dev->change_rx_flags(dev, IFF_PROMISC);
}
+ return 0;
}
/**
* remains above zero the interface remains promiscuous. Once it hits zero
* the device reverts back to normal filtering operation. A negative inc
* value is used to drop promiscuity on the device.
+ * Return 0 if successful or a negative errno code on error.
*/
- void dev_set_promiscuity(struct net_device *dev, int inc)
+ int dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
+ int err;
- __dev_set_promiscuity(dev, inc);
+ err = __dev_set_promiscuity(dev, inc);
+ if (err < 0)
+ return err;
if (dev->flags != old_flags)
dev_set_rx_mode(dev);
+ return err;
}
/**
* to all interfaces. Once it hits zero the device reverts back to normal
* filtering operation. A negative @inc value is used to drop the counter
* when releasing a resource needing all multicasts.
+ * Return 0 if successful or a negative errno code on error.
*/
- void dev_set_allmulti(struct net_device *dev, int inc)
+ int dev_set_allmulti(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
ASSERT_RTNL();
dev->flags |= IFF_ALLMULTI;
- if ((dev->allmulti += inc) == 0)
- dev->flags &= ~IFF_ALLMULTI;
+ dev->allmulti += inc;
+ if (dev->allmulti == 0) {
+ /*
+ * Avoid overflow.
+ * If inc causes overflow, untouch allmulti and return error.
+ */
+ if (inc < 0)
+ dev->flags &= ~IFF_ALLMULTI;
+ else {
+ dev->allmulti -= inc;
+ printk(KERN_WARNING "%s: allmulti touches roof, "
+ "set allmulti failed, allmulti feature of "
+ "device might be broken.\n", dev->name);
+ return -EOVERFLOW;
+ }
+ }
if (dev->flags ^ old_flags) {
if (dev->change_rx_flags)
dev->change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
}
+ return 0;
}
/*
void dev_set_rx_mode(struct net_device *dev)
{
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
}
int __dev_addr_delete(struct dev_addr_list **list, int *count,
ASSERT_RTNL();
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_delete);
ASSERT_RTNL();
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_add);
{
int err = 0;
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(to);
err = __dev_addr_sync(&to->uc_list, &to->uc_count,
&from->uc_list, &from->uc_count);
if (!err)
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
+ netif_addr_unlock_bh(to);
return err;
}
EXPORT_SYMBOL(dev_unicast_sync);
*/
void dev_unicast_unsync(struct net_device *to, struct net_device *from)
{
- netif_tx_lock_bh(from);
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
__dev_addr_unsync(&to->uc_list, &to->uc_count,
&from->uc_list, &from->uc_count);
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
- netif_tx_unlock_bh(from);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
}
EXPORT_SYMBOL(dev_unicast_unsync);
static void dev_addr_discard(struct net_device *dev)
{
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
__dev_addr_discard(&dev->uc_list);
dev->uc_count = 0;
__dev_addr_discard(&dev->mc_list);
dev->mc_count = 0;
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
}
unsigned dev_get_flags(const struct net_device *dev)
dev_put(dev);
}
+ static void __netdev_init_queue_locks_one(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+ {
+ spin_lock_init(&dev_queue->_xmit_lock);
+ netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+ dev_queue->xmit_lock_owner = -1;
+ }
+
+ static void netdev_init_queue_locks(struct net_device *dev)
+ {
+ netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
+ __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
+ }
+
/**
* register_netdevice - register a network device
* @dev: device to register
BUG_ON(!dev_net(dev));
net = dev_net(dev);
- spin_lock_init(&dev->queue_lock);
- spin_lock_init(&dev->_xmit_lock);
- netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
- dev->xmit_lock_owner = -1;
- spin_lock_init(&dev->ingress_lock);
+ spin_lock_init(&dev->addr_list_lock);
+ netdev_init_queue_locks(dev);
dev->iflink = -1;
return &dev->stats;
}
+ static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue,
+ void *_unused)
+ {
+ queue->dev = dev;
+ }
+
+ static void netdev_init_queues(struct net_device *dev)
+ {
+ netdev_init_one_queue(dev, &dev->rx_queue, NULL);
+ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+ }
+
/**
* alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
- void *p;
+ struct netdev_queue *tx;
struct net_device *dev;
int alloc_size;
+ void *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
- alloc_size = sizeof(struct net_device) +
- sizeof(struct net_device_subqueue) * (queue_count - 1);
+ alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
return NULL;
}
+ tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
+ if (!tx) {
+ printk(KERN_ERR "alloc_netdev: Unable to allocate "
+ "tx qdiscs.\n");
+ kfree(p);
+ return NULL;
+ }
+
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
dev_net_set(dev, &init_net);
+ dev->_tx = tx;
+ dev->num_tx_queues = queue_count;
+ dev->real_num_tx_queues = queue_count;
+
if (sizeof_priv) {
dev->priv = ((char *)dev +
- ((sizeof(struct net_device) +
- (sizeof(struct net_device_subqueue) *
- (queue_count - 1)) + NETDEV_ALIGN_CONST)
+ ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST));
}
- dev->egress_subqueue_count = queue_count;
dev->gso_max_size = GSO_MAX_SIZE;
+ netdev_init_queues(dev);
+
dev->get_stats = internal_stats;
netpoll_netdev_init(dev);
setup(dev);
{
release_net(dev_net(dev));
+ kfree(dev->_tx);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
void *ocpu)
{
struct sk_buff **list_skb;
- struct net_device **list_net;
+ struct Qdisc **list_net;
struct sk_buff *skb;
unsigned int cpu, oldcpu = (unsigned long)ocpu;
struct softnet_data *sd, *oldsd;
i = 0;
cpu = first_cpu(cpu_online_map);
- for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
+ for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
chan = net_dma->channels[chan_idx];
n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+ #include <linux/kthread.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
/*
- * Set the current thread's cpus_allowed mask so that it
+ * Set the given thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
- *
- * Returns 1 and fills in oldmask iff a cpumask was applied.
*/
- static inline int
- svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+ static inline void
+ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
{
struct svc_pool_map *m = &svc_pool_map;
+ unsigned int node = m->pool_to[pidx];
/*
* The caller checks for sv_nrpools > 1, which
*/
BUG_ON(m->count == 0);
- switch (m->mode)
- {
- default:
- return 0;
+ switch (m->mode) {
case SVC_POOL_PERCPU:
{
- unsigned int cpu = m->pool_to[pidx];
- cpumask_of_cpu_ptr(cpumask, cpu);
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, cpumask);
- return 1;
- set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
++ cpumask_of_cpu_ptr(cpumask, node);
++ set_cpus_allowed_ptr(task, cpumask);
+ break;
}
case SVC_POOL_PERNODE:
{
- unsigned int node = m->pool_to[pidx];
node_to_cpumask_ptr(nodecpumask, node);
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, nodecpumask);
- return 1;
+ set_cpus_allowed_ptr(task, nodecpumask);
+ break;
}
}
}
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
void (*shutdown)(struct svc_serv *serv),
- svc_thread_fn func, int sig, struct module *mod)
+ svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
if (serv != NULL) {
serv->sv_function = func;
- serv->sv_kill_signal = sig;
serv->sv_module = mod;
}
EXPORT_SYMBOL(svc_create_pooled);
/*
- * Destroy an RPC service. Should be called with the BKL held
+ * Destroy an RPC service. Should be called with appropriate locking to
+ * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
*/
void
svc_destroy(struct svc_serv *serv)
}
EXPORT_SYMBOL(svc_prepare_thread);
- /*
- * Create a thread in the given pool. Caller must hold BKL.
- * On a NUMA or SMP machine, with a multi-pool serv, the thread
- * will be restricted to run on the cpus belonging to the pool.
- */
- static int
- __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
- struct svc_pool *pool)
- {
- struct svc_rqst *rqstp;
- int error = -ENOMEM;
- int have_oldmask = 0;
- cpumask_t uninitialized_var(oldmask);
-
- rqstp = svc_prepare_thread(serv, pool);
- if (IS_ERR(rqstp)) {
- error = PTR_ERR(rqstp);
- goto out;
- }
-
- if (serv->sv_nrpools > 1)
- have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
-
- error = kernel_thread((int (*)(void *)) func, rqstp, 0);
-
- if (have_oldmask)
- set_cpus_allowed_ptr(current, &oldmask);
-
- if (error < 0)
- goto out_thread;
- svc_sock_update_bufs(serv);
- error = 0;
- out:
- return error;
-
- out_thread:
- svc_exit_thread(rqstp);
- goto out;
- }
-
/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
*/
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
* all pools. Must be called with a svc_get() reference and
- * the BKL held.
+ * the BKL or another lock to protect access to svc_serv fields.
*
* Destroying threads relies on the service threads filling in
* rqstp->rq_task, which only the nfs ones do. Assumes the serv
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- struct task_struct *victim;
+ struct svc_rqst *rqstp;
+ struct task_struct *task;
+ struct svc_pool *chosen_pool;
int error = 0;
unsigned int state = serv->sv_nrthreads-1;
/* create new threads */
while (nrservs > 0) {
nrservs--;
+ chosen_pool = choose_pool(serv, pool, &state);
+
+ rqstp = svc_prepare_thread(serv, chosen_pool);
+ if (IS_ERR(rqstp)) {
+ error = PTR_ERR(rqstp);
+ break;
+ }
+
__module_get(serv->sv_module);
- error = __svc_create_thread(serv->sv_function, serv,
- choose_pool(serv, pool, &state));
- if (error < 0) {
+ task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
module_put(serv->sv_module);
+ svc_exit_thread(rqstp);
break;
}
+
+ rqstp->rq_task = task;
+ if (serv->sv_nrpools > 1)
+ svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
+
+ svc_sock_update_bufs(serv);
+ wake_up_process(task);
}
/* destroy old threads */
while (nrservs < 0 &&
- (victim = choose_victim(serv, pool, &state)) != NULL) {
- send_sig(serv->sv_kill_signal, victim, 1);
+ (task = choose_victim(serv, pool, &state)) != NULL) {
+ send_sig(SIGINT, task, 1);
nrservs++;
}
EXPORT_SYMBOL(svc_set_num_threads);
/*
- * Called from a server thread as it's exiting. Caller must hold BKL.
+ * Called from a server thread as it's exiting. Caller must hold the BKL or
+ * the "service mutex", whichever is appropriate for the service.
*/
void
svc_exit_thread(struct svc_rqst *rqstp)