[Bluetooth] Add support for Canyon CN-BTU1 dongle
[pandora-kernel.git] / kernel / sched.c
index 4ee400f..5c848fd 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/times.h>
 #include <linux/acct.h>
 #include <linux/kprobes.h>
+#include <linux/delayacct.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -237,6 +238,7 @@ struct rq {
        /* For active balancing */
        int active_balance;
        int push_cpu;
+       int cpu;                /* cpu of this runqueue */
 
        struct task_struct *migration_thread;
        struct list_head migration_queue;
@@ -266,6 +268,15 @@ struct rq {
 
 static DEFINE_PER_CPU(struct rq, runqueues);
 
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+       return rq->cpu;
+#else
+       return 0;
+#endif
+}
+
 /*
  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
  * See detach_destroy_domains: synchronize_sched for details.
@@ -501,9 +512,36 @@ struct file_operations proc_schedstat_operations = {
        .release = single_release,
 };
 
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{
+       if (rq) {
+               rq->rq_sched_info.run_delay += delta_jiffies;
+               rq->rq_sched_info.pcnt++;
+       }
+}
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{
+       if (rq)
+               rq->rq_sched_info.cpu_time += delta_jiffies;
+}
 # define schedstat_inc(rq, field)      do { (rq)->field++; } while (0)
 # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
 #else /* !CONFIG_SCHEDSTATS */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{}
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{}
 # define schedstat_inc(rq, field)      do { } while (0)
 # define schedstat_add(rq, field, amt) do { } while (0)
 #endif
@@ -523,7 +561,7 @@ static inline struct rq *this_rq_lock(void)
        return rq;
 }
 
-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 /*
  * Called when a process is dequeued from the active array and given
  * the cpu.  We should note that with the exception of interactive
@@ -551,21 +589,16 @@ static inline void sched_info_dequeued(struct task_struct *t)
  */
 static void sched_info_arrive(struct task_struct *t)
 {
-       unsigned long now = jiffies, diff = 0;
-       struct rq *rq = task_rq(t);
+       unsigned long now = jiffies, delta_jiffies = 0;
 
        if (t->sched_info.last_queued)
-               diff = now - t->sched_info.last_queued;
+               delta_jiffies = now - t->sched_info.last_queued;
        sched_info_dequeued(t);
-       t->sched_info.run_delay += diff;
+       t->sched_info.run_delay += delta_jiffies;
        t->sched_info.last_arrival = now;
        t->sched_info.pcnt++;
 
-       if (!rq)
-               return;
-
-       rq->rq_sched_info.run_delay += diff;
-       rq->rq_sched_info.pcnt++;
+       rq_sched_info_arrive(task_rq(t), delta_jiffies);
 }
 
 /*
@@ -585,8 +618,9 @@ static void sched_info_arrive(struct task_struct *t)
  */
 static inline void sched_info_queued(struct task_struct *t)
 {
-       if (!t->sched_info.last_queued)
-               t->sched_info.last_queued = jiffies;
+       if (unlikely(sched_info_on()))
+               if (!t->sched_info.last_queued)
+                       t->sched_info.last_queued = jiffies;
 }
 
 /*
@@ -595,13 +629,10 @@ static inline void sched_info_queued(struct task_struct *t)
  */
 static inline void sched_info_depart(struct task_struct *t)
 {
-       struct rq *rq = task_rq(t);
-       unsigned long diff = jiffies - t->sched_info.last_arrival;
-
-       t->sched_info.cpu_time += diff;
+       unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
 
-       if (rq)
-               rq->rq_sched_info.cpu_time += diff;
+       t->sched_info.cpu_time += delta_jiffies;
+       rq_sched_info_depart(task_rq(t), delta_jiffies);
 }
 
 /*
@@ -610,7 +641,7 @@ static inline void sched_info_depart(struct task_struct *t)
  * the idle task.)  We are only called when prev != next.
  */
 static inline void
-sched_info_switch(struct task_struct *prev, struct task_struct *next)
+__sched_info_switch(struct task_struct *prev, struct task_struct *next)
 {
        struct rq *rq = task_rq(prev);
 
@@ -625,10 +656,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
        if (next != rq->idle)
                sched_info_arrive(next);
 }
+static inline void
+sched_info_switch(struct task_struct *prev, struct task_struct *next)
+{
+       if (unlikely(sched_info_on()))
+               __sched_info_switch(prev, next);
+}
 #else
 #define sched_info_queued(t)           do { } while (0)
 #define sched_info_switch(t, next)     do { } while (0)
-#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
 /*
  * Adding/removing a task to/from a priority array:
@@ -1530,8 +1567,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
 
        INIT_LIST_HEAD(&p->run_list);
        p->array = NULL;
-#ifdef CONFIG_SCHEDSTATS
-       memset(&p->sched_info, 0, sizeof(p->sched_info));
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+       if (unlikely(sched_info_on()))
+               memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
        p->oncpu = 0;
@@ -1788,7 +1826,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
                WARN_ON(rq->prev_mm);
                rq->prev_mm = oldmm;
        }
+       /*
+        * Since the runqueue lock will be released by the next
+        * task (which is an invalid locking op but in the case
+        * of the scheduler it's an obvious special-case), so we
+        * do an early lockdep release here:
+        */
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
        spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+#endif
 
        /* Here we just switch the register state and the stack. */
        switch_to(prev, next, prev);
@@ -2175,7 +2221,8 @@ out:
  */
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
-                  unsigned long *imbalance, enum idle_type idle, int *sd_idle)
+                  unsigned long *imbalance, enum idle_type idle, int *sd_idle,
+                  cpumask_t *cpus)
 {
        struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
        unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2212,7 +2259,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                sum_weighted_load = sum_nr_running = avg_load = 0;
 
                for_each_cpu_mask(i, group->cpumask) {
-                       struct rq *rq = cpu_rq(i);
+                       struct rq *rq;
+
+                       if (!cpu_isset(i, *cpus))
+                               continue;
+
+                       rq = cpu_rq(i);
 
                        if (*sd_idle && !idle_cpu(i))
                                *sd_idle = 0;
@@ -2430,13 +2482,17 @@ ret:
  */
 static struct rq *
 find_busiest_queue(struct sched_group *group, enum idle_type idle,
-                  unsigned long imbalance)
+                  unsigned long imbalance, cpumask_t *cpus)
 {
        struct rq *busiest = NULL, *rq;
        unsigned long max_load = 0;
        int i;
 
        for_each_cpu_mask(i, group->cpumask) {
+
+               if (!cpu_isset(i, *cpus))
+                       continue;
+
                rq = cpu_rq(i);
 
                if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
@@ -2475,6 +2531,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
        struct sched_group *group;
        unsigned long imbalance;
        struct rq *busiest;
+       cpumask_t cpus = CPU_MASK_ALL;
 
        if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
            !sched_smt_power_savings)
@@ -2482,13 +2539,15 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
        schedstat_inc(sd, lb_cnt[idle]);
 
-       group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
+redo:
+       group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
+                                                       &cpus);
        if (!group) {
                schedstat_inc(sd, lb_nobusyg[idle]);
                goto out_balanced;
        }
 
-       busiest = find_busiest_queue(group, idle, imbalance);
+       busiest = find_busiest_queue(group, idle, imbalance, &cpus);
        if (!busiest) {
                schedstat_inc(sd, lb_nobusyq[idle]);
                goto out_balanced;
@@ -2513,8 +2572,12 @@ static int load_balance(int this_cpu, struct rq *this_rq,
                double_rq_unlock(this_rq, busiest);
 
                /* All tasks on this runqueue were pinned by CPU affinity */
-               if (unlikely(all_pinned))
+               if (unlikely(all_pinned)) {
+                       cpu_clear(cpu_of(busiest), cpus);
+                       if (!cpus_empty(cpus))
+                               goto redo;
                        goto out_balanced;
+               }
        }
 
        if (!nr_moved) {
@@ -2603,18 +2666,22 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
        unsigned long imbalance;
        int nr_moved = 0;
        int sd_idle = 0;
+       cpumask_t cpus = CPU_MASK_ALL;
 
        if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
                sd_idle = 1;
 
        schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
-       group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, &sd_idle);
+redo:
+       group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
+                               &sd_idle, &cpus);
        if (!group) {
                schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
                goto out_balanced;
        }
 
-       busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance);
+       busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
+                               &cpus);
        if (!busiest) {
                schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
                goto out_balanced;
@@ -2632,6 +2699,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
                                        minus_1_or_zero(busiest->nr_running),
                                        imbalance, sd, NEWLY_IDLE, NULL);
                spin_unlock(&busiest->lock);
+
+               if (!nr_moved) {
+                       cpu_clear(cpu_of(busiest), cpus);
+                       if (!cpus_empty(cpus))
+                               goto redo;
+               }
        }
 
        if (!nr_moved) {
@@ -3384,7 +3457,7 @@ EXPORT_SYMBOL(schedule);
 
 #ifdef CONFIG_PREEMPT
 /*
- * this is is the entry point to schedule() from in-kernel preemption
+ * this is the entry point to schedule() from in-kernel preemption
  * off of preempt_enable.  Kernel preemptions off return from interrupt
  * occur there and call schedule directly.
  */
@@ -3427,7 +3500,7 @@ need_resched:
 EXPORT_SYMBOL(preempt_schedule);
 
 /*
- * this is is the entry point to schedule() from kernel preemption
+ * this is the entry point to schedule() from kernel preemption
  * off of irq context.
  * Note, that this is called and return with irqs disabled. This will
  * protect us against recursive calling from irq.
@@ -3439,7 +3512,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
        struct task_struct *task = current;
        int saved_lock_depth;
 #endif
-       /* Catch callers which need to be fixed*/
+       /* Catch callers which need to be fixed */
        BUG_ON(ti->preempt_count || !irqs_disabled());
 
 need_resched:
@@ -4126,10 +4199,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
                read_unlock_irq(&tasklist_lock);
                return -ESRCH;
        }
-       get_task_struct(p);
-       read_unlock_irq(&tasklist_lock);
        retval = sched_setscheduler(p, policy, &lparam);
-       put_task_struct(p);
+       read_unlock_irq(&tasklist_lock);
 
        return retval;
 }
@@ -4420,9 +4491,9 @@ asmlinkage long sys_sched_yield(void)
        return 0;
 }
 
-static inline int __resched_legal(void)
+static inline int __resched_legal(int expected_preempt_count)
 {
-       if (unlikely(preempt_count()))
+       if (unlikely(preempt_count() != expected_preempt_count))
                return 0;
        if (unlikely(system_state != SYSTEM_RUNNING))
                return 0;
@@ -4448,7 +4519,7 @@ static void __cond_resched(void)
 
 int __sched cond_resched(void)
 {
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(0)) {
                __cond_resched();
                return 1;
        }
@@ -4474,7 +4545,7 @@ int cond_resched_lock(spinlock_t *lock)
                ret = 1;
                spin_lock(lock);
        }
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(1)) {
                spin_release(&lock->dep_map, 1, _THIS_IP_);
                _raw_spin_unlock(lock);
                preempt_enable_no_resched();
@@ -4490,7 +4561,7 @@ int __sched cond_resched_softirq(void)
 {
        BUG_ON(!in_softirq());
 
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(0)) {
                raw_local_irq_disable();
                _local_bh_enable();
                raw_local_irq_enable();
@@ -4526,9 +4597,11 @@ void __sched io_schedule(void)
 {
        struct rq *rq = &__raw_get_cpu_var(runqueues);
 
+       delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
        schedule();
        atomic_dec(&rq->nr_iowait);
+       delayacct_blkio_end();
 }
 EXPORT_SYMBOL(io_schedule);
 
@@ -4537,9 +4610,11 @@ long __sched io_schedule_timeout(long timeout)
        struct rq *rq = &__raw_get_cpu_var(runqueues);
        long ret;
 
+       delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
        ret = schedule_timeout(timeout);
        atomic_dec(&rq->nr_iowait);
+       delayacct_blkio_end();
        return ret;
 }
 
@@ -4650,7 +4725,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p)
        return list_entry(p->sibling.next,struct task_struct,sibling);
 }
 
-static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
+static const char stat_nam[] = "RSDTtZX";
 
 static void show_task(struct task_struct *p)
 {
@@ -4658,12 +4733,9 @@ static void show_task(struct task_struct *p)
        unsigned long free = 0;
        unsigned state;
 
-       printk("%-13.13s ", p->comm);
        state = p->state ? __ffs(p->state) + 1 : 0;
-       if (state < ARRAY_SIZE(stat_nam))
-               printk(stat_nam[state]);
-       else
-               printk("?");
+       printk("%-13.13s %c", p->comm,
+               state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if (BITS_PER_LONG == 32)
        if (state == TASK_RUNNING)
                printk(" running ");
@@ -4877,7 +4949,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
                p->timestamp = p->timestamp - rq_src->timestamp_last_tick
                                + rq_dest->timestamp_last_tick;
                deactivate_task(p, rq_src);
-               activate_task(p, rq_dest, 0);
+               __activate_task(p, rq_dest);
                if (TASK_PREEMPTS_CURR(p, rq_dest))
                        resched_task(rq_dest->curr);
        }
@@ -5776,7 +5848,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
        cache = vmalloc(max_size);
        if (!cache) {
                printk("could not vmalloc %d bytes for cache!\n", 2*max_size);
-               return 1000000; // return 1 msec on very small boxen
+               return 1000000; /* return 1 msec on very small boxen */
        }
 
        while (size <= max_size) {
@@ -6457,7 +6529,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
        for (i = 0; i < MAX_NUMNODES; i++)
                init_numa_sched_groups_power(sched_group_nodes[i]);
 
-       init_numa_sched_groups_power(sched_group_allnodes);
+       if (sched_group_allnodes) {
+               int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
+               struct sched_group *sg = &sched_group_allnodes[group];
+
+               init_numa_sched_groups_power(sg);
+       }
 #endif
 
        /* Attach the domains */
@@ -6707,6 +6784,7 @@ void __init sched_init(void)
                        rq->cpu_load[j] = 0;
                rq->active_balance = 0;
                rq->push_cpu = 0;
+               rq->cpu = i;
                rq->migration_thread = NULL;
                INIT_LIST_HEAD(&rq->migration_queue);
 #endif
@@ -6724,6 +6802,11 @@ void __init sched_init(void)
        }
 
        set_load_weight(&init_task);
+
+#ifdef CONFIG_RT_MUTEXES
+       plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+#endif
+
        /*
         * The boot idle thread does lazy MMU switching as well:
         */