sched, cgroups: Fix MIN_SHARES on 64-bit boxen

[pandora-kernel.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 5e43e9d..9769c75 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -292,8 +292,8 @@ static DEFINE_SPINLOCK(task_group_lock);
   * (The default weight is 1024 - so there's no practical
   *  limitation from this.)
   */
-#define MIN_SHARES     2
-#define MAX_SHARES     (1UL << (18 + SCHED_LOAD_RESOLUTION))
+#define MIN_SHARES     (1UL <<  1)
+#define MAX_SHARES     (1UL << 18)
  
  static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
  #endif
@@ -605,10 +605,10 @@ static inline int cpu_of(struct rq *rq)
  /*
   * Return the group to which this tasks belongs.
   *
- * We use task_subsys_state_check() and extend the RCU verification
- * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
- * holds that lock for each task it moves into the cgroup. Therefore
- * by holding that lock, we pin the task to the current cgroup.
+ * We use task_subsys_state_check() and extend the RCU verification with
+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
+ * task it moves into the cgroup. Therefore by holding either of those locks,
+ * we pin the task to the current cgroup.
   */
  static inline struct task_group *task_group(struct task_struct *p)
  {
@@ -616,7 +616,8 @@ static inline struct task_group *task_group(struct task_struct *p)
         struct cgroup_subsys_state *css;
  
         css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-                       lockdep_is_held(&p->pi_lock));
+                       lockdep_is_held(&p->pi_lock) ||
+                       lockdep_is_held(&task_rq(p)->lock));
         tg = container_of(css, struct task_group, css);
  
         return autogroup_task_group(p, tg);
@@ -2200,6 +2201,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                         !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
  
  #ifdef CONFIG_LOCKDEP
+       /*
+        * The caller should hold either p->pi_lock or rq->lock, when changing
+        * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
+        *
+        * sched_move_task() holds both and thus holding either pins the cgroup,
+        * see set_task_rq().
+        *
+        * Furthermore, all task_rq users should acquire both locks, see
+        * task_rq_lock().
+        */
         WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
                                       lockdep_is_held(&task_rq(p)->lock)));
  #endif
@@ -2447,6 +2458,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
                 }
                 rcu_read_unlock();
         }
+
+       if (wake_flags & WF_MIGRATED)
+               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+
  #endif /* CONFIG_SMP */
  
         schedstat_inc(rq, ttwu_count);
@@ -2455,9 +2470,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
         if (wake_flags & WF_SYNC)
                 schedstat_inc(p, se.statistics.nr_wakeups_sync);
  
-       if (cpu != task_cpu(p))
-               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-
  #endif /* CONFIG_SCHEDSTATS */
  }
  
@@ -2573,7 +2585,26 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
         if (!next)
                 smp_send_reschedule(cpu);
  }
-#endif
+
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
+{
+       struct rq *rq;
+       int ret = 0;
+
+       rq = __task_rq_lock(p);
+       if (p->on_cpu) {
+               ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+               ttwu_do_wakeup(rq, p, wake_flags);
+               ret = 1;
+       }
+       __task_rq_unlock(rq);
+
+       return ret;
+
+}
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+#endif /* CONFIG_SMP */
  
  static void ttwu_queue(struct task_struct *p, int cpu)
  {
@@ -2581,6 +2612,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
  
  #if defined(CONFIG_SMP)
         if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+               sched_clock_cpu(cpu); /* sync clocks x-cpu */
                 ttwu_queue_remote(p, cpu);
                 return;
         }
@@ -2631,17 +2663,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         while (p->on_cpu) {
  #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
                 /*
-                * If called from interrupt context we could have landed in the
-                * middle of schedule(), in this case we should take care not
-                * to spin on ->on_cpu if p is current, since that would
-                * deadlock.
+                * In case the architecture enables interrupts in
+                * context_switch(), we cannot busy wait, since that
+                * would lead to deadlocks when an interrupt hits and
+                * tries to wake up @prev. So bail and do a complete
+                * remote wakeup.
                  */
-               if (p == current) {
-                       ttwu_queue(p, cpu);
+               if (ttwu_activate_remote(p, wake_flags))
                         goto stat;
-               }
-#endif
+#else
                 cpu_relax();
+#endif
         }
         /*
          * Pairs with the smp_wmb() in finish_lock_switch().
@@ -2655,8 +2687,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                 p->sched_class->task_waking(p);
  
         cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-       if (task_cpu(p) != cpu)
+       if (task_cpu(p) != cpu) {
+               wake_flags |= WF_MIGRATED;
                 set_task_cpu(p, cpu);
+       }
  #endif /* CONFIG_SMP */
  
         ttwu_queue(p, cpu);
@@ -5841,7 +5875,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
         idle->state = TASK_RUNNING;
         idle->se.exec_start = sched_clock();
  
-       cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+       do_set_cpus_allowed(idle, cpumask_of(cpu));
         /*
          * We're having a chicken and egg problem, even though we are
          * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -5929,6 +5963,16 @@ static inline void sched_init_granularity(void)
  }
  
  #ifdef CONFIG_SMP
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+       if (p->sched_class && p->sched_class->set_cpus_allowed)
+               p->sched_class->set_cpus_allowed(p, new_mask);
+       else {
+               cpumask_copy(&p->cpus_allowed, new_mask);
+               p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
+       }
+}
+
  /*
   * This is how migration works:
   *
@@ -5974,12 +6018,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
                 goto out;
         }
  
-       if (p->sched_class->set_cpus_allowed)
-               p->sched_class->set_cpus_allowed(p, new_mask);
-       else {
-               cpumask_copy(&p->cpus_allowed, new_mask);
-               p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
-       }
+       do_set_cpus_allowed(p, new_mask);
  
         /* Can the task run on the task's current CPU? If so, we're done */
         if (cpumask_test_cpu(task_cpu(p), new_mask))
@@ -8411,10 +8450,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
         if (!tg->se[0])
                 return -EINVAL;
  
-       if (shares < MIN_SHARES)
-               shares = MIN_SHARES;
-       else if (shares > MAX_SHARES)
-               shares = MAX_SHARES;
+       shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
  
         mutex_lock(&shares_mutex);
         if (tg->shares == shares)