[NETLINK]: Extend netlink messaging interface
[pandora-kernel.git] / kernel / sched.c
index d714611..a234fbe 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/times.h>
 #include <linux/acct.h>
 #include <linux/kprobes.h>
+#include <linux/delayacct.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -501,9 +502,36 @@ struct file_operations proc_schedstat_operations = {
        .release = single_release,
 };
 
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{
+       if (rq) {
+               rq->rq_sched_info.run_delay += delta_jiffies;
+               rq->rq_sched_info.pcnt++;
+       }
+}
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{
+       if (rq)
+               rq->rq_sched_info.cpu_time += delta_jiffies;
+}
 # define schedstat_inc(rq, field)      do { (rq)->field++; } while (0)
 # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
 #else /* !CONFIG_SCHEDSTATS */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
+{}
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
+{}
 # define schedstat_inc(rq, field)      do { } while (0)
 # define schedstat_add(rq, field, amt) do { } while (0)
 #endif
@@ -523,7 +551,7 @@ static inline struct rq *this_rq_lock(void)
        return rq;
 }
 
-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 /*
  * Called when a process is dequeued from the active array and given
  * the cpu.  We should note that with the exception of interactive
@@ -551,21 +579,16 @@ static inline void sched_info_dequeued(struct task_struct *t)
  */
 static void sched_info_arrive(struct task_struct *t)
 {
-       unsigned long now = jiffies, diff = 0;
-       struct rq *rq = task_rq(t);
+       unsigned long now = jiffies, delta_jiffies = 0;
 
        if (t->sched_info.last_queued)
-               diff = now - t->sched_info.last_queued;
+               delta_jiffies = now - t->sched_info.last_queued;
        sched_info_dequeued(t);
-       t->sched_info.run_delay += diff;
+       t->sched_info.run_delay += delta_jiffies;
        t->sched_info.last_arrival = now;
        t->sched_info.pcnt++;
 
-       if (!rq)
-               return;
-
-       rq->rq_sched_info.run_delay += diff;
-       rq->rq_sched_info.pcnt++;
+       rq_sched_info_arrive(task_rq(t), delta_jiffies);
 }
 
 /*
@@ -585,8 +608,9 @@ static void sched_info_arrive(struct task_struct *t)
  */
 static inline void sched_info_queued(struct task_struct *t)
 {
-       if (!t->sched_info.last_queued)
-               t->sched_info.last_queued = jiffies;
+       if (unlikely(sched_info_on()))
+               if (!t->sched_info.last_queued)
+                       t->sched_info.last_queued = jiffies;
 }
 
 /*
@@ -595,13 +619,10 @@ static inline void sched_info_queued(struct task_struct *t)
  */
 static inline void sched_info_depart(struct task_struct *t)
 {
-       struct rq *rq = task_rq(t);
-       unsigned long diff = jiffies - t->sched_info.last_arrival;
+       unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
 
-       t->sched_info.cpu_time += diff;
-
-       if (rq)
-               rq->rq_sched_info.cpu_time += diff;
+       t->sched_info.cpu_time += delta_jiffies;
+       rq_sched_info_depart(task_rq(t), delta_jiffies);
 }
 
 /*
@@ -610,7 +631,7 @@ static inline void sched_info_depart(struct task_struct *t)
  * the idle task.)  We are only called when prev != next.
  */
 static inline void
-sched_info_switch(struct task_struct *prev, struct task_struct *next)
+__sched_info_switch(struct task_struct *prev, struct task_struct *next)
 {
        struct rq *rq = task_rq(prev);
 
@@ -625,10 +646,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
        if (next != rq->idle)
                sched_info_arrive(next);
 }
+static inline void
+sched_info_switch(struct task_struct *prev, struct task_struct *next)
+{
+       if (unlikely(sched_info_on()))
+               __sched_info_switch(prev, next);
+}
 #else
 #define sched_info_queued(t)           do { } while (0)
 #define sched_info_switch(t, next)     do { } while (0)
-#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
 /*
  * Adding/removing a task to/from a priority array:
@@ -1530,8 +1557,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
 
        INIT_LIST_HEAD(&p->run_list);
        p->array = NULL;
-#ifdef CONFIG_SCHEDSTATS
-       memset(&p->sched_info, 0, sizeof(p->sched_info));
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+       if (unlikely(sched_info_on()))
+               memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
        p->oncpu = 0;
@@ -1788,7 +1816,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
                WARN_ON(rq->prev_mm);
                rq->prev_mm = oldmm;
        }
+       /*
+        * Since the runqueue lock will be released by the next
+        * task (which is an invalid locking op but in the case
+        * of the scheduler it's an obvious special-case), so we
+        * do an early lockdep release here:
+        */
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
        spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+#endif
 
        /* Here we just switch the register state and the stack. */
        switch_to(prev, next, prev);
@@ -4126,10 +4162,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
                read_unlock_irq(&tasklist_lock);
                return -ESRCH;
        }
-       get_task_struct(p);
-       read_unlock_irq(&tasklist_lock);
        retval = sched_setscheduler(p, policy, &lparam);
-       put_task_struct(p);
+       read_unlock_irq(&tasklist_lock);
 
        return retval;
 }
@@ -4420,9 +4454,9 @@ asmlinkage long sys_sched_yield(void)
        return 0;
 }
 
-static inline int __resched_legal(void)
+static inline int __resched_legal(int expected_preempt_count)
 {
-       if (unlikely(preempt_count()))
+       if (unlikely(preempt_count() != expected_preempt_count))
                return 0;
        if (unlikely(system_state != SYSTEM_RUNNING))
                return 0;
@@ -4448,7 +4482,7 @@ static void __cond_resched(void)
 
 int __sched cond_resched(void)
 {
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(0)) {
                __cond_resched();
                return 1;
        }
@@ -4474,7 +4508,7 @@ int cond_resched_lock(spinlock_t *lock)
                ret = 1;
                spin_lock(lock);
        }
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(1)) {
                spin_release(&lock->dep_map, 1, _THIS_IP_);
                _raw_spin_unlock(lock);
                preempt_enable_no_resched();
@@ -4490,7 +4524,7 @@ int __sched cond_resched_softirq(void)
 {
        BUG_ON(!in_softirq());
 
-       if (need_resched() && __resched_legal()) {
+       if (need_resched() && __resched_legal(0)) {
                raw_local_irq_disable();
                _local_bh_enable();
                raw_local_irq_enable();
@@ -4526,9 +4560,11 @@ void __sched io_schedule(void)
 {
        struct rq *rq = &__raw_get_cpu_var(runqueues);
 
+       delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
        schedule();
        atomic_dec(&rq->nr_iowait);
+       delayacct_blkio_end();
 }
 EXPORT_SYMBOL(io_schedule);
 
@@ -4537,9 +4573,11 @@ long __sched io_schedule_timeout(long timeout)
        struct rq *rq = &__raw_get_cpu_var(runqueues);
        long ret;
 
+       delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
        ret = schedule_timeout(timeout);
        atomic_dec(&rq->nr_iowait);
+       delayacct_blkio_end();
        return ret;
 }
 
@@ -6454,7 +6492,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
        for (i = 0; i < MAX_NUMNODES; i++)
                init_numa_sched_groups_power(sched_group_nodes[i]);
 
-       init_numa_sched_groups_power(sched_group_allnodes);
+       if (sched_group_allnodes) {
+               int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
+               struct sched_group *sg = &sched_group_allnodes[group];
+
+               init_numa_sched_groups_power(sg);
+       }
 #endif
 
        /* Attach the domains */
@@ -6721,6 +6764,11 @@ void __init sched_init(void)
        }
 
        set_load_weight(&init_task);
+
+#ifdef CONFIG_RT_MUTEXES
+       plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+#endif
+
        /*
         * The boot idle thread does lazy MMU switching as well:
         */