* smp_call_function() if an IPI is sent by the same process we are
* waiting to become inactive.
*/
-void wait_task_inactive(task_t * p)
+void wait_task_inactive(task_t *p)
{
unsigned long flags;
runqueue_t *rq;
int local_group;
int i;
+ /* Skip over this group if it has no CPUs allowed */
+ if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+ goto nextgroup;
+
local_group = cpu_isset(this_cpu, group->cpumask);
- /* XXX: put a cpus allowed check */
/* Tally up the load of all CPUs in the group */
avg_load = 0;
min_load = avg_load;
idlest = group;
}
+nextgroup:
group = group->next;
} while (group != sd->groups);
/*
* find_idlest_queue - find the idlest runqueue among the cpus in group.
*/
-static int find_idlest_cpu(struct sched_group *group, int this_cpu)
+static int
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
{
+ cpumask_t tmp;
unsigned long load, min_load = ULONG_MAX;
int idlest = -1;
int i;
- for_each_cpu_mask(i, group->cpumask) {
+ /* Traverse only the allowed CPUs */
+ cpus_and(tmp, group->cpumask, p->cpus_allowed);
+
+ for_each_cpu_mask(i, tmp) {
load = source_load(i, 0);
if (load < min_load || (load == min_load && i == this_cpu)) {
if (!group)
goto nextlevel;
- new_cpu = find_idlest_cpu(group, cpu);
+ new_cpu = find_idlest_cpu(group, t, cpu);
if (new_cpu == -1 || new_cpu == cpu)
goto nextlevel;
*
* returns failure only if the task is already active.
*/
-static int try_to_wake_up(task_t * p, unsigned int state, int sync)
+static int try_to_wake_up(task_t *p, unsigned int state, int sync)
{
int cpu, this_cpu, success = 0;
unsigned long flags;
p->activated = -1;
}
+ /*
+ * Tasks that have marked their sleep as noninteractive get
+ * woken up without updating their sleep average. (i.e. their
+ * sleep is handled in a priority-neutral manner, no priority
+ * boost and no penalty.)
+ */
+ if (old_state & TASK_NONINTERACTIVE)
+ __activate_task(p, rq);
+ else
+ activate_task(p, rq, cpu == this_cpu);
/*
* Sync wakeups (i.e. those types of wakeups where the waker
* has indicated that it will leave the CPU in short order)
* the waker guarantees that the freshly woken up task is going
* to be considered on this CPU.)
*/
- activate_task(p, rq, cpu == this_cpu);
if (!sync || cpu != this_cpu) {
if (TASK_PREEMPTS_CURR(p, rq))
resched_task(rq->curr);
return success;
}
-int fastcall wake_up_process(task_t * p)
+int fastcall wake_up_process(task_t *p)
{
return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
* that must be done for every newly created context, then puts the task
* on the runqueue and wakes it.
*/
-void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
+void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
{
unsigned long flags;
int this_cpu, cpu;
* artificially, because any timeslice recovered here
* was given away by the parent in the first place.)
*/
-void fastcall sched_exit(task_t * p)
+void fastcall sched_exit(task_t *p)
{
unsigned long flags;
runqueue_t *rq;
/**
* finish_task_switch - clean up after a task-switch
+ * @rq: runqueue associated with task-switch
* @prev: the thread we just switched away from.
*
* finish_task_switch must be called after the context switch, paired
* Manfred Spraul <manfred@colorfullife.com>
*/
prev_task_flags = prev->flags;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ /* this is a valid case when another task releases the spinlock */
+ rq->lock.owner = current;
+#endif
finish_arch_switch(prev);
finish_lock_switch(rq, prev);
if (mm)
*/
static inline
int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
- struct sched_domain *sd, enum idle_type idle, int *all_pinned)
+ struct sched_domain *sd, enum idle_type idle,
+ int *all_pinned)
{
/*
* We do not migrate tasks that are:
}
#ifdef CONFIG_SCHED_SMT
+static inline void wakeup_busy_runqueue(runqueue_t *rq)
+{
+ /* If an SMT runqueue is sleeping due to priority reasons wake it up */
+ if (rq->curr == rq->idle && rq->nr_running)
+ resched_task(rq->idle);
+}
+
static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
{
struct sched_domain *tmp, *sd = NULL;
for_each_cpu_mask(i, sibling_map) {
runqueue_t *smt_rq = cpu_rq(i);
- /*
- * If an SMT sibling task is sleeping due to priority
- * reasons wake it up now.
- */
- if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running)
- resched_task(smt_rq->idle);
+ wakeup_busy_runqueue(smt_rq);
}
for_each_cpu_mask(i, sibling_map)
*/
}
+/*
+ * number of 'lost' timeslices this task wont be able to fully
+ * utilize, if another task runs on a sibling. This models the
+ * slowdown effect of other tasks running on siblings:
+ */
+static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
+{
+ return p->time_slice * (100 - sd->per_cpu_gain) / 100;
+}
+
static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
{
struct sched_domain *tmp, *sd = NULL;
runqueue_t *smt_rq = cpu_rq(i);
task_t *smt_curr = smt_rq->curr;
+ /* Kernel threads do not participate in dependent sleeping */
+ if (!p->mm || !smt_curr->mm || rt_task(p))
+ goto check_smt_task;
+
/*
* If a user task with lower static priority than the
* running task on the SMT sibling is trying to schedule,
* task from using an unfair proportion of the
* physical cpu's resources. -ck
*/
- if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) >
- task_timeslice(p) || rt_task(smt_curr)) &&
- p->mm && smt_curr->mm && !rt_task(p))
- ret = 1;
+ if (rt_task(smt_curr)) {
+ /*
+ * With real time tasks we run non-rt tasks only
+ * per_cpu_gain% of the time.
+ */
+ if ((jiffies % DEF_TIMESLICE) >
+ (sd->per_cpu_gain * DEF_TIMESLICE / 100))
+ ret = 1;
+ } else
+ if (smt_curr->static_prio < p->static_prio &&
+ !TASK_PREEMPTS_CURR(p, smt_rq) &&
+ smt_slice(smt_curr, sd) > task_timeslice(p))
+ ret = 1;
+
+check_smt_task:
+ if ((!smt_curr->mm && smt_curr != smt_rq->idle) ||
+ rt_task(smt_curr))
+ continue;
+ if (!p->mm) {
+ wakeup_busy_runqueue(smt_rq);
+ continue;
+ }
/*
- * Reschedule a lower priority task on the SMT sibling,
- * or wake it up if it has been put to sleep for priority
- * reasons.
+ * Reschedule a lower priority task on the SMT sibling for
+ * it to be put to sleep, or wake it up if it has been put to
+ * sleep for priority reasons to see if it should run now.
*/
- if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) >
- task_timeslice(smt_curr) || rt_task(p)) &&
- smt_curr->mm && p->mm && !rt_task(smt_curr)) ||
- (smt_curr == smt_rq->idle && smt_rq->nr_running))
- resched_task(smt_curr);
+ if (rt_task(p)) {
+ if ((jiffies % DEF_TIMESLICE) >
+ (sd->per_cpu_gain * DEF_TIMESLICE / 100))
+ resched_task(smt_curr);
+ } else {
+ if (TASK_PREEMPTS_CURR(p, smt_rq) &&
+ smt_slice(p, sd) > task_timeslice(smt_curr))
+ resched_task(smt_curr);
+ else
+ wakeup_busy_runqueue(smt_rq);
+ }
}
out_unlock:
for_each_cpu_mask(i, sibling_map)
if (next == rq->idle)
schedstat_inc(rq, sched_goidle);
prefetch(next);
+ prefetch_stack(next);
clear_tsk_need_resched(prev);
rcu_qsctr_inc(task_cpu(prev));
#endif /* CONFIG_PREEMPT */
-int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key)
+int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
+ void *key)
{
task_t *p = curr->private;
return try_to_wake_up(p, mode, sync);
* @key: is directly passed to the wakeup function
*/
void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, void *key)
+ int nr_exclusive, void *key)
{
unsigned long flags;
*
* On UP it can prevent extra preemption.
*/
-void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void fastcall
+__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
{
unsigned long flags;
int sync = 1;
EXPORT_SYMBOL(interruptible_sleep_on);
-long fastcall __sched interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
+long fastcall __sched
+interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
{
SLEEP_ON_VAR
* @policy: new policy.
* @param: structure containing the new RT priority.
*/
-int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param)
+int sched_setscheduler(struct task_struct *p, int policy,
+ struct sched_param *param)
{
int retval;
int oldprio, oldpolicy = -1;
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
*/
if (param->sched_priority < 0 ||
- (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
+ (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
(!p->mm && param->sched_priority > MAX_RT_PRIO-1))
return -EINVAL;
if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
}
EXPORT_SYMBOL_GPL(sched_setscheduler);
-static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+static int
+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
{
int retval;
struct sched_param lparam;
* operations here to prevent schedule() from being called twice (once via
* spin_unlock(), once by hand).
*/
-int cond_resched_lock(spinlock_t * lock)
+int cond_resched_lock(spinlock_t *lock)
{
int ret = 0;
return list_entry(p->sibling.next,struct task_struct,sibling);
}
-static void show_task(task_t * p)
+static void show_task(task_t *p)
{
task_t *relative;
unsigned state;
#endif
#ifdef CONFIG_DEBUG_STACK_USAGE
{
- unsigned long * n = (unsigned long *) (p->thread_info+1);
+ unsigned long *n = (unsigned long *) (p->thread_info+1);
while (!*n)
n++;
free = (unsigned long) n - (unsigned long)(p->thread_info+1);
* thread migration by bumping thread off CPU then 'pushing' onto
* another runqueue.
*/
-static int migration_thread(void * data)
+static int migration_thread(void *data)
{
runqueue_t *rq;
int cpu = (long)data;
* gets dynamically allocated.
*/
static DEFINE_PER_CPU(struct sched_domain, node_domains);
-static struct sched_group *sched_group_nodes[MAX_NUMNODES];
+static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static struct sched_group sched_group_allnodes[MAX_NUMNODES];
+static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
static int cpu_to_allnodes_group(int cpu)
{
void build_sched_domains(const cpumask_t *cpu_map)
{
int i;
+#ifdef CONFIG_NUMA
+ struct sched_group **sched_group_nodes = NULL;
+ struct sched_group *sched_group_allnodes = NULL;
+
+ /*
+ * Allocate the per-node list of sched groups
+ */
+ sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
+ GFP_ATOMIC);
+ if (!sched_group_nodes) {
+ printk(KERN_WARNING "Can not alloc sched group node list\n");
+ return;
+ }
+ sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
+#endif
/*
* Set up domains for cpus specified by the cpu_map.
cpus_and(nodemask, nodemask, *cpu_map);
#ifdef CONFIG_NUMA
- if (num_online_cpus()
+ if (cpus_weight(*cpu_map)
> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
+ if (!sched_group_allnodes) {
+ sched_group_allnodes
+ = kmalloc(sizeof(struct sched_group)
+ * MAX_NUMNODES,
+ GFP_KERNEL);
+ if (!sched_group_allnodes) {
+ printk(KERN_WARNING
+ "Can not alloc allnodes sched group\n");
+ break;
+ }
+ sched_group_allnodes_bycpu[i]
+ = sched_group_allnodes;
+ }
sd = &per_cpu(allnodes_domains, i);
*sd = SD_ALLNODES_INIT;
sd->span = *cpu_map;
#ifdef CONFIG_NUMA
/* Set up node groups */
- init_sched_build_groups(sched_group_allnodes, *cpu_map,
- &cpu_to_allnodes_group);
+ if (sched_group_allnodes)
+ init_sched_build_groups(sched_group_allnodes, *cpu_map,
+ &cpu_to_allnodes_group);
for (i = 0; i < MAX_NUMNODES; i++) {
/* Set up node groups */
int j;
cpus_and(nodemask, nodemask, *cpu_map);
- if (cpus_empty(nodemask))
+ if (cpus_empty(nodemask)) {
+ sched_group_nodes[i] = NULL;
continue;
+ }
domainspan = sched_domain_node_span(i);
cpus_and(domainspan, domainspan, *cpu_map);
{
#ifdef CONFIG_NUMA
int i;
- for (i = 0; i < MAX_NUMNODES; i++) {
- cpumask_t nodemask = node_to_cpumask(i);
- struct sched_group *oldsg, *sg = sched_group_nodes[i];
+ int cpu;
- cpus_and(nodemask, nodemask, *cpu_map);
- if (cpus_empty(nodemask))
- continue;
+ for_each_cpu_mask(cpu, *cpu_map) {
+ struct sched_group *sched_group_allnodes
+ = sched_group_allnodes_bycpu[cpu];
+ struct sched_group **sched_group_nodes
+ = sched_group_nodes_bycpu[cpu];
- if (sg == NULL)
+ if (sched_group_allnodes) {
+ kfree(sched_group_allnodes);
+ sched_group_allnodes_bycpu[cpu] = NULL;
+ }
+
+ if (!sched_group_nodes)
continue;
- sg = sg->next;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ cpumask_t nodemask = node_to_cpumask(i);
+ struct sched_group *oldsg, *sg = sched_group_nodes[i];
+
+ cpus_and(nodemask, nodemask, *cpu_map);
+ if (cpus_empty(nodemask))
+ continue;
+
+ if (sg == NULL)
+ continue;
+ sg = sg->next;
next_sg:
- oldsg = sg;
- sg = sg->next;
- kfree(oldsg);
- if (oldsg != sched_group_nodes[i])
- goto next_sg;
- sched_group_nodes[i] = NULL;
+ oldsg = sg;
+ sg = sg->next;
+ kfree(oldsg);
+ if (oldsg != sched_group_nodes[i])
+ goto next_sg;
+ }
+ kfree(sched_group_nodes);
+ sched_group_nodes_bycpu[cpu] = NULL;
}
#endif
}