futex: Fix potential use-after-free in FUTEX_REQUEUE_PI

[pandora-kernel.git] / kernel / sched_fair.c
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 7e51b5b..98e1039 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1527,6 +1527,8 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
         cfs_rq->throttled_timestamp = rq->clock;
         raw_spin_lock(&cfs_b->lock);
         list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+       if (!cfs_b->timer_active)
+               __start_cfs_bandwidth(cfs_b);
         raw_spin_unlock(&cfs_b->lock);
  }
  
@@ -1756,7 +1758,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  
  static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  {
-       if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running)
+       if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
                 return;
  
         __return_cfs_rq_runtime(cfs_rq);
@@ -2326,7 +2328,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
         int cpu = smp_processor_id();
         int prev_cpu = task_cpu(p);
         struct sched_domain *sd;
-       int i;
+       struct sched_group *sg;
+       int i, smt = 0;
  
         /*
          * If the task is going to be woken-up on this cpu and if it is
@@ -2346,25 +2349,40 @@ static int select_idle_sibling(struct task_struct *p, int target)
          * Otherwise, iterate the domains and find an elegible idle cpu.
          */
         rcu_read_lock();
+again:
         for_each_domain(target, sd) {
+               if (!smt && (sd->flags & SD_SHARE_CPUPOWER))
+                       continue;
+
+               if (smt && !(sd->flags & SD_SHARE_CPUPOWER))
+                       break;
+
                 if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
                         break;
  
-               for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) {
-                       if (idle_cpu(i)) {
-                               target = i;
-                               break;
+               sg = sd->groups;
+               do {
+                       if (!cpumask_intersects(sched_group_cpus(sg),
+                                               tsk_cpus_allowed(p)))
+                               goto next;
+
+                       for_each_cpu(i, sched_group_cpus(sg)) {
+                               if (!idle_cpu(i))
+                                       goto next;
                         }
-               }
  
-               /*
-                * Lets stop looking for an idle sibling when we reached
-                * the domain that spans the current cpu and prev_cpu.
-                */
-               if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
-                   cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
-                       break;
+                       target = cpumask_first_and(sched_group_cpus(sg),
+                                       tsk_cpus_allowed(p));
+                       goto done;
+next:
+                       sg = sg->next;
+               } while (sg != sd->groups);
+       }
+       if (!smt) {
+               smt = 1;
+               goto again;
         }
+done:
         rcu_read_unlock();
  
         return target;
@@ -2773,6 +2791,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
          * 1) running (obviously), or
          * 2) cannot be migrated to this CPU due to cpus_allowed, or
          * 3) are cache-hot on their current CPU.
+        * 4) p->pi_lock is held.
          */
         if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) {
                 schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
@@ -2785,6 +2804,14 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
                 return 0;
         }
  
+       /*
+        * rt -> fair class change may be in progress.  If we sneak in should
+        * double_lock_balance() release rq->lock, and move the task, we will
+        * cause switched_to_fair() to meet a passed but no longer valid rq.
+        */
+       if (raw_spin_is_locked(&p->pi_lock))
+               return 0;
+
         /*
          * Aggressive migration if:
          * 1) task is cache cold, or
@@ -4719,7 +4746,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
  
                 raw_spin_lock_irq(&this_rq->lock);
                 update_rq_clock(this_rq);
-               update_cpu_load(this_rq);
+               update_idle_cpu_load(this_rq);
                 raw_spin_unlock_irq(&this_rq->lock);
  
                 rebalance_domains(balance_cpu, CPU_IDLE);
@@ -4830,6 +4857,9 @@ static void rq_online_fair(struct rq *rq)
  static void rq_offline_fair(struct rq *rq)
  {
         update_sysctl();
+
+       /* Ensure any throttled groups are reachable by pick_next_task */
+       unthrottle_offline_cfs_rqs(rq);
  }
  
  #else  /* CONFIG_SMP */
@@ -4874,11 +4904,15 @@ static void task_fork_fair(struct task_struct *p)
  
         update_rq_clock(rq);
  
-       if (unlikely(task_cpu(p) != this_cpu)) {
-               rcu_read_lock();
-               __set_task_cpu(p, this_cpu);
-               rcu_read_unlock();
-       }
+       /*
+        * Not only the cpu but also the task_group of the parent might have
+        * been changed after parent->se.parent,cfs_rq were copied to
+        * child->se.parent,cfs_rq. So call __set_task_cpu() to make those
+        * of child point to valid ones.
+        */
+       rcu_read_lock();
+       __set_task_cpu(p, this_cpu);
+       rcu_read_unlock();
  
         update_curr(cfs_rq);
  
@@ -4928,15 +4962,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
         /*
-        * Ensure the task's vruntime is normalized, so that when its
+        * Ensure the task's vruntime is normalized, so that when it's
          * switched back to the fair class the enqueue_entity(.flags=0) will
          * do the right thing.
          *
-        * If it was on_rq, then the dequeue_entity(.flags=0) will already
-        * have normalized the vruntime, if it was !on_rq, then only when
+        * If it's on_rq, then the dequeue_entity(.flags=0) will already
+        * have normalized the vruntime, if it's !on_rq, then only when
          * the task is sleeping will it still have non-normalized vruntime.
          */
-       if (!se->on_rq && p->state != TASK_RUNNING) {
+       if (!p->on_rq && p->state != TASK_RUNNING) {
                 /*
                  * Fix up our vruntime so that the current sleep doesn't
                  * cause 'unlimited' sleep bonus.
@@ -5017,7 +5051,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
          * idle runqueue:
          */
         if (rq->cfs.load.weight)
-               rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+               rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
  
         return rr_interval;
  }