[PATCH] do_signal_stop: don't take tasklist_lock
[pandora-kernel.git] / kernel / sched.c
index e82c99f..a9ecac3 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/syscalls.h>
 #include <linux/times.h>
 #include <linux/acct.h>
+#include <linux/kprobes.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
        (v1) * (v2_max) / (v1_max)
 
 #define DELTA(p) \
-       (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA)
+       (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
+               INTERACTIVE_DELTA)
 
 #define TASK_INTERACTIVE(p) \
        ((p)->prio <= (p)->static_prio - DELTA(p))
@@ -178,13 +180,6 @@ static unsigned int task_timeslice(task_t *p)
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)      \
                                < (long long) (sd)->cache_hot_time)
 
-void __put_task_struct_cb(struct rcu_head *rhp)
-{
-       __put_task_struct(container_of(rhp, struct task_struct, rcu));
-}
-
-EXPORT_SYMBOL_GPL(__put_task_struct_cb);
-
 /*
  * These are the runqueue data structures:
  */
@@ -244,6 +239,7 @@ struct runqueue {
 
        task_t *migration_thread;
        struct list_head migration_queue;
+       int cpu;
 #endif
 
 #ifdef CONFIG_SCHEDSTATS
@@ -713,12 +709,6 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
                                p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
                                                DEF_TIMESLICE);
                } else {
-                       /*
-                        * The lower the sleep avg a task has the more
-                        * rapidly it will rise with sleep time.
-                        */
-                       sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
-
                        /*
                         * Tasks waking from uninterruptible sleep are
                         * limited in their sleep_avg rise as they
@@ -1558,8 +1548,14 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
        finish_lock_switch(rq, prev);
        if (mm)
                mmdrop(mm);
-       if (unlikely(prev_task_flags & PF_DEAD))
+       if (unlikely(prev_task_flags & PF_DEAD)) {
+               /*
+                * Remove function-return probe instances associated with this
+                * task and put them back on the free list.
+                */
+               kprobe_flush_task(prev);
                put_task_struct(prev);
+       }
 }
 
 /**
@@ -1629,7 +1625,7 @@ unsigned long nr_uninterruptible(void)
 {
        unsigned long i, sum = 0;
 
-       for_each_cpu(i)
+       for_each_possible_cpu(i)
                sum += cpu_rq(i)->nr_uninterruptible;
 
        /*
@@ -1646,7 +1642,7 @@ unsigned long long nr_context_switches(void)
 {
        unsigned long long i, sum = 0;
 
-       for_each_cpu(i)
+       for_each_possible_cpu(i)
                sum += cpu_rq(i)->nr_switches;
 
        return sum;
@@ -1656,7 +1652,7 @@ unsigned long nr_iowait(void)
 {
        unsigned long i, sum = 0;
 
-       for_each_cpu(i)
+       for_each_possible_cpu(i)
                sum += atomic_read(&cpu_rq(i)->nr_iowait);
 
        return sum;
@@ -1667,6 +1663,9 @@ unsigned long nr_iowait(void)
 /*
  * double_rq_lock - safely lock two runqueues
  *
+ * We must take them in cpu order to match code in
+ * dependent_sleeper and wake_dependent_sleeper.
+ *
  * Note this does not disable interrupts like task_rq_lock,
  * you need to do so manually before calling.
  */
@@ -1678,7 +1677,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
                spin_lock(&rq1->lock);
                __acquire(rq2->lock);   /* Fake it out ;) */
        } else {
-               if (rq1 < rq2) {
+               if (rq1->cpu < rq2->cpu) {
                        spin_lock(&rq1->lock);
                        spin_lock(&rq2->lock);
                } else {
@@ -1714,7 +1713,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
        __acquires(this_rq->lock)
 {
        if (unlikely(!spin_trylock(&busiest->lock))) {
-               if (busiest < this_rq) {
+               if (busiest->cpu < this_rq->cpu) {
                        spin_unlock(&this_rq->lock);
                        spin_lock(&busiest->lock);
                        spin_lock(&this_rq->lock);
@@ -2880,13 +2879,11 @@ asmlinkage void __sched schedule(void)
         * schedule() atomically, we ignore that path for now.
         * Otherwise, whine if we are scheduling when we should not be.
         */
-       if (likely(!current->exit_state)) {
-               if (unlikely(in_atomic())) {
-                       printk(KERN_ERR "scheduling while atomic: "
-                               "%s/0x%08x/%d\n",
-                               current->comm, preempt_count(), current->pid);
-                       dump_stack();
-               }
+       if (unlikely(in_atomic() && !current->exit_state)) {
+               printk(KERN_ERR "BUG: scheduling while atomic: "
+                       "%s/0x%08x/%d\n",
+                       current->comm, preempt_count(), current->pid);
+               dump_stack();
        }
        profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
@@ -5577,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu)
 }
 #endif
 
+#ifdef CONFIG_SCHED_MC
+static DEFINE_PER_CPU(struct sched_domain, core_domains);
+static struct sched_group sched_group_core[NR_CPUS];
+#endif
+
+#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
+static int cpu_to_core_group(int cpu)
+{
+       return first_cpu(cpu_sibling_map[cpu]);
+}
+#elif defined(CONFIG_SCHED_MC)
+static int cpu_to_core_group(int cpu)
+{
+       return cpu;
+}
+#endif
+
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 static struct sched_group sched_group_phys[NR_CPUS];
 static int cpu_to_phys_group(int cpu)
 {
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_MC)
+       cpumask_t mask = cpu_coregroup_map(cpu);
+       return first_cpu(mask);
+#elif defined(CONFIG_SCHED_SMT)
        return first_cpu(cpu_sibling_map[cpu]);
 #else
        return cpu;
@@ -5604,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu)
 {
        return cpu_to_node(cpu);
 }
+static void init_numa_sched_groups_power(struct sched_group *group_head)
+{
+       struct sched_group *sg = group_head;
+       int j;
+
+       if (!sg)
+               return;
+next_sg:
+       for_each_cpu_mask(j, sg->cpumask) {
+               struct sched_domain *sd;
+
+               sd = &per_cpu(phys_domains, j);
+               if (j != first_cpu(sd->groups->cpumask)) {
+                       /*
+                        * Only add "power" once for each
+                        * physical package.
+                        */
+                       continue;
+               }
+
+               sg->cpu_power += sd->groups->cpu_power;
+       }
+       sg = sg->next;
+       if (sg != group_head)
+               goto next_sg;
+}
 #endif
 
 /*
@@ -5679,6 +5722,17 @@ void build_sched_domains(const cpumask_t *cpu_map)
                sd->parent = p;
                sd->groups = &sched_group_phys[group];
 
+#ifdef CONFIG_SCHED_MC
+               p = sd;
+               sd = &per_cpu(core_domains, i);
+               group = cpu_to_core_group(i);
+               *sd = SD_MC_INIT;
+               sd->span = cpu_coregroup_map(i);
+               cpus_and(sd->span, sd->span, *cpu_map);
+               sd->parent = p;
+               sd->groups = &sched_group_core[group];
+#endif
+
 #ifdef CONFIG_SCHED_SMT
                p = sd;
                sd = &per_cpu(cpu_domains, i);
@@ -5704,6 +5758,19 @@ void build_sched_domains(const cpumask_t *cpu_map)
        }
 #endif
 
+#ifdef CONFIG_SCHED_MC
+       /* Set up multi-core groups */
+       for_each_cpu_mask(i, *cpu_map) {
+               cpumask_t this_core_map = cpu_coregroup_map(i);
+               cpus_and(this_core_map, this_core_map, *cpu_map);
+               if (i != first_cpu(this_core_map))
+                       continue;
+               init_sched_build_groups(sched_group_core, this_core_map,
+                                       &cpu_to_core_group);
+       }
+#endif
+
+
        /* Set up physical groups */
        for (i = 0; i < MAX_NUMNODES; i++) {
                cpumask_t nodemask = node_to_cpumask(i);
@@ -5800,51 +5867,38 @@ void build_sched_domains(const cpumask_t *cpu_map)
                power = SCHED_LOAD_SCALE;
                sd->groups->cpu_power = power;
 #endif
+#ifdef CONFIG_SCHED_MC
+               sd = &per_cpu(core_domains, i);
+               power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
+                                           * SCHED_LOAD_SCALE / 10;
+               sd->groups->cpu_power = power;
 
+               sd = &per_cpu(phys_domains, i);
+
+               /*
+                * This has to be < 2 * SCHED_LOAD_SCALE
+                * Lets keep it SCHED_LOAD_SCALE, so that
+                * while calculating NUMA group's cpu_power
+                * we can simply do
+                *  numa_group->cpu_power += phys_group->cpu_power;
+                *
+                * See "only add power once for each physical pkg"
+                * comment below
+                */
+               sd->groups->cpu_power = SCHED_LOAD_SCALE;
+#else
                sd = &per_cpu(phys_domains, i);
                power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
                                (cpus_weight(sd->groups->cpumask)-1) / 10;
                sd->groups->cpu_power = power;
-
-#ifdef CONFIG_NUMA
-               sd = &per_cpu(allnodes_domains, i);
-               if (sd->groups) {
-                       power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-                               (cpus_weight(sd->groups->cpumask)-1) / 10;
-                       sd->groups->cpu_power = power;
-               }
 #endif
        }
 
 #ifdef CONFIG_NUMA
-       for (i = 0; i < MAX_NUMNODES; i++) {
-               struct sched_group *sg = sched_group_nodes[i];
-               int j;
-
-               if (sg == NULL)
-                       continue;
-next_sg:
-               for_each_cpu_mask(j, sg->cpumask) {
-                       struct sched_domain *sd;
-                       int power;
+       for (i = 0; i < MAX_NUMNODES; i++)
+               init_numa_sched_groups_power(sched_group_nodes[i]);
 
-                       sd = &per_cpu(phys_domains, j);
-                       if (j != first_cpu(sd->groups->cpumask)) {
-                               /*
-                                * Only add "power" once for each
-                                * physical package.
-                                */
-                               continue;
-                       }
-                       power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-                               (cpus_weight(sd->groups->cpumask)-1) / 10;
-
-                       sg->cpu_power += power;
-               }
-               sg = sg->next;
-               if (sg != sched_group_nodes[i])
-                       goto next_sg;
-       }
+       init_numa_sched_groups_power(sched_group_allnodes);
 #endif
 
        /* Attach the domains */
@@ -5852,6 +5906,8 @@ next_sg:
                struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
                sd = &per_cpu(cpu_domains, i);
+#elif defined(CONFIG_SCHED_MC)
+               sd = &per_cpu(core_domains, i);
 #else
                sd = &per_cpu(phys_domains, i);
 #endif
@@ -6024,7 +6080,7 @@ void __init sched_init(void)
        runqueue_t *rq;
        int i, j, k;
 
-       for_each_cpu(i) {
+       for_each_possible_cpu(i) {
                prio_array_t *array;
 
                rq = cpu_rq(i);
@@ -6042,6 +6098,7 @@ void __init sched_init(void)
                rq->push_cpu = 0;
                rq->migration_thread = NULL;
                INIT_LIST_HEAD(&rq->migration_queue);
+               rq->cpu = i;
 #endif
                atomic_set(&rq->nr_iowait, 0);
 
@@ -6082,7 +6139,7 @@ void __might_sleep(char *file, int line)
                if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                        return;
                prev_jiffy = jiffies;
-               printk(KERN_ERR "Debug: sleeping function called from invalid"
+               printk(KERN_ERR "BUG: sleeping function called from invalid"
                                " context at %s:%d\n", file, line);
                printk("in_atomic():%d, irqs_disabled():%d\n",
                        in_atomic(), irqs_disabled());