Merge branch 'unlikely/sched' of git://git.kernel.org/pub/scm/linux/kernel/git/rosted...

[pandora-kernel.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index c62acf4..fd18f39 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -124,7 +124,7 @@
  
  static inline int rt_policy(int policy)
  {
-       if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
+       if (policy == SCHED_FIFO || policy == SCHED_RR)
                 return 1;
         return 0;
  }
@@ -293,7 +293,7 @@ static DEFINE_SPINLOCK(task_group_lock);
   *  limitation from this.)
   */
  #define MIN_SHARES     2
-#define MAX_SHARES     (1UL << 18)
+#define MAX_SHARES     (1UL << (18 + SCHED_LOAD_RESOLUTION))
  
  static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
  #endif
@@ -1330,13 +1330,25 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
  {
         u64 tmp;
  
-       tmp = (u64)delta_exec * weight;
+       /*
+        * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
+        * entities since MIN_SHARES = 2. Treat weight as 1 if less than
+        * 2^SCHED_LOAD_RESOLUTION.
+        */
+       if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
+               tmp = (u64)delta_exec * scale_load_down(weight);
+       else
+               tmp = (u64)delta_exec;
  
         if (!lw->inv_weight) {
-               if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
+               unsigned long w = scale_load_down(lw->weight);
+
+               if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
                         lw->inv_weight = 1;
+               else if (unlikely(!w))
+                       lw->inv_weight = WMULT_CONST;
                 else
-                       lw->inv_weight = WMULT_CONST / lw->weight;
+                       lw->inv_weight = WMULT_CONST / w;
         }
  
         /*
@@ -1778,17 +1790,20 @@ static void dec_nr_running(struct rq *rq)
  
  static void set_load_weight(struct task_struct *p)
  {
+       int prio = p->static_prio - MAX_RT_PRIO;
+       struct load_weight *load = &p->se.load;
+
         /*
          * SCHED_IDLE tasks get minimal weight:
          */
         if (p->policy == SCHED_IDLE) {
-               p->se.load.weight = WEIGHT_IDLEPRIO;
-               p->se.load.inv_weight = WMULT_IDLEPRIO;
+               load->weight = scale_load(WEIGHT_IDLEPRIO);
+               load->inv_weight = WMULT_IDLEPRIO;
                 return;
         }
  
-       p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
-       p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
+       load->weight = scale_load(prio_to_weight[prio]);
+       load->inv_weight = prio_to_wmult[prio];
  }
  
  static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -2432,6 +2447,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
                 }
                 rcu_read_unlock();
         }
+
+       if (wake_flags & WF_MIGRATED)
+               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+
  #endif /* CONFIG_SMP */
  
         schedstat_inc(rq, ttwu_count);
@@ -2440,9 +2459,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
         if (wake_flags & WF_SYNC)
                 schedstat_inc(p, se.statistics.nr_wakeups_sync);
  
-       if (cpu != task_cpu(p))
-               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-
  #endif /* CONFIG_SCHEDSTATS */
  }
  
@@ -2470,7 +2486,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
         if (p->sched_class->task_woken)
                 p->sched_class->task_woken(rq, p);
  
-       if (unlikely(rq->idle_stamp)) {
+       if (rq->idle_stamp) {
                 u64 delta = rq->clock - rq->idle_stamp;
                 u64 max = 2*sysctl_sched_migration_cost;
  
@@ -2558,14 +2574,34 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
         if (!next)
                 smp_send_reschedule(cpu);
  }
-#endif
+
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
+{
+       struct rq *rq;
+       int ret = 0;
+
+       rq = __task_rq_lock(p);
+       if (p->on_cpu) {
+               ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+               ttwu_do_wakeup(rq, p, wake_flags);
+               ret = 1;
+       }
+       __task_rq_unlock(rq);
+
+       return ret;
+
+}
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+#endif /* CONFIG_SMP */
  
  static void ttwu_queue(struct task_struct *p, int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
  
-#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
+#if defined(CONFIG_SMP)
         if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+               sched_clock_cpu(cpu); /* sync clocks x-cpu */
                 ttwu_queue_remote(p, cpu);
                 return;
         }
@@ -2616,17 +2652,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         while (p->on_cpu) {
  #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
                 /*
-                * If called from interrupt context we could have landed in the
-                * middle of schedule(), in this case we should take care not
-                * to spin on ->on_cpu if p is current, since that would
-                * deadlock.
+                * In case the architecture enables interrupts in
+                * context_switch(), we cannot busy wait, since that
+                * would lead to deadlocks when an interrupt hits and
+                * tries to wake up @prev. So bail and do a complete
+                * remote wakeup.
                  */
-               if (p == current) {
-                       ttwu_queue(p, cpu);
+               if (ttwu_activate_remote(p, wake_flags))
                         goto stat;
-               }
-#endif
+#else
                 cpu_relax();
+#endif
         }
         /*
          * Pairs with the smp_wmb() in finish_lock_switch().
@@ -2640,8 +2676,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                 p->sched_class->task_waking(p);
  
         cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-       if (task_cpu(p) != cpu)
+       if (task_cpu(p) != cpu) {
+               wake_flags |= WF_MIGRATED;
                 set_task_cpu(p, cpu);
+       }
  #endif /* CONFIG_SMP */
  
         ttwu_queue(p, cpu);
@@ -5826,7 +5864,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
         idle->state = TASK_RUNNING;
         idle->se.exec_start = sched_clock();
  
-       cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+       do_set_cpus_allowed(idle, cpumask_of(cpu));
         /*
          * We're having a chicken and egg problem, even though we are
          * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -5914,6 +5952,16 @@ static inline void sched_init_granularity(void)
  }
  
  #ifdef CONFIG_SMP
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+       if (p->sched_class && p->sched_class->set_cpus_allowed)
+               p->sched_class->set_cpus_allowed(p, new_mask);
+       else {
+               cpumask_copy(&p->cpus_allowed, new_mask);
+               p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
+       }
+}
+
  /*
   * This is how migration works:
   *
@@ -5959,12 +6007,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
                 goto out;
         }
  
-       if (p->sched_class->set_cpus_allowed)
-               p->sched_class->set_cpus_allowed(p, new_mask);
-       else {
-               cpumask_copy(&p->cpus_allowed, new_mask);
-               p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
-       }
+       do_set_cpus_allowed(p, new_mask);
  
         /* Can the task run on the task's current CPU? If so, we're done */
         if (cpumask_test_cpu(task_cpu(p), new_mask))
@@ -6527,7 +6570,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
  
                 printk(KERN_CONT " %s", str);
-               if (group->cpu_power != SCHED_LOAD_SCALE) {
+               if (group->cpu_power != SCHED_POWER_SCALE) {
                         printk(KERN_CONT " (cpu_power = %d)",
                                 group->cpu_power);
                 }
@@ -7902,7 +7945,7 @@ void __init sched_init(void)
  #ifdef CONFIG_SMP
                 rq->sd = NULL;
                 rq->rd = NULL;
-               rq->cpu_power = SCHED_LOAD_SCALE;
+               rq->cpu_power = SCHED_POWER_SCALE;
                 rq->post_schedule = 0;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
@@ -8749,42 +8792,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
         return 0;
  }
  
-static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                     struct task_struct *tsk, bool threadgroup)
-{
-       int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
-       if (retval)
-               return retval;
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       retval = cpu_cgroup_can_attach_task(cgrp, c);
-                       if (retval) {
-                               rcu_read_unlock();
-                               return retval;
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return 0;
-}
-
  static void
-cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                 struct cgroup *old_cont, struct task_struct *tsk,
-                 bool threadgroup)
+cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  {
         sched_move_task(tsk);
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       sched_move_task(c);
-               }
-               rcu_read_unlock();
-       }
  }
  
  static void
@@ -8806,14 +8817,14 @@ cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
  static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
                                 u64 shareval)
  {
-       return sched_group_set_shares(cgroup_tg(cgrp), shareval);
+       return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
  }
  
  static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
  {
         struct task_group *tg = cgroup_tg(cgrp);
  
-       return (u64) tg->shares;
+       return (u64) scale_load_down(tg->shares);
  }
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
@@ -8872,8 +8883,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
         .name           = "cpu",
         .create         = cpu_cgroup_create,
         .destroy        = cpu_cgroup_destroy,
-       .can_attach     = cpu_cgroup_can_attach,
-       .attach         = cpu_cgroup_attach,
+       .can_attach_task = cpu_cgroup_can_attach_task,
+       .attach_task    = cpu_cgroup_attach_task,
         .exit           = cpu_cgroup_exit,
         .populate       = cpu_cgroup_populate,
         .subsys_id      = cpu_cgroup_subsys_id,