x86/mm: Fix {pmd,pud}_{set,clear}_flags()
[pandora-kernel.git] / kernel / sched_rt.c
index 78fcacf..bd4afa4 100644 (file)
@@ -384,7 +384,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 static int do_balance_runtime(struct rt_rq *rt_rq)
 {
        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
-       struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+       struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
        int i, weight, more = 0;
        u64 rt_period;
 
@@ -509,6 +509,7 @@ balanced:
                 * runtime - in which case borrowing doesn't make sense.
                 */
                rt_rq->rt_runtime = RUNTIME_INF;
+               rt_rq->rt_throttled = 0;
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
                raw_spin_unlock(&rt_b->rt_runtime_lock);
        }
@@ -587,6 +588,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                return 1;
 
        span = sched_rt_period_mask();
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * FIXME: isolated CPUs should really leave the root task group,
+        * whether they are isolcpus or were isolated via cpusets, lest
+        * the timer run on a CPU which does not service all runqueues,
+        * potentially leaving other CPUs indefinitely throttled.  If
+        * isolation is really required, the user will turn the throttle
+        * off to kill the perturbations it causes anyway.  Meanwhile,
+        * this maintains functionality for boot and/or troubleshooting.
+        */
+       if (rt_b == &root_task_group.rt_bandwidth)
+               span = cpu_online_mask;
+#endif
        for_each_cpu(i, span) {
                int enqueue = 0;
                struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
@@ -719,6 +733,13 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
 
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
        if (rq->online && prio < prev_prio)
                cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
 }
@@ -728,6 +749,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
 
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
        if (rq->online && rt_rq->highest_prio.curr != prev_prio)
                cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
 }
@@ -1039,7 +1067,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
            (p->rt.nr_cpus_allowed > 1)) {
                int target = find_lowest_rq(p);
 
-               if (target != -1)
+               /*
+                * Don't bother moving it if the destination CPU is
+                * not running a lower priority task.
+                */
+               if (target != -1 &&
+                   p->prio < cpu_rq(target)->rt.highest_prio.curr)
                        cpu = target;
        }
        rcu_read_unlock();
@@ -1318,6 +1351,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 
                lowest_rq = cpu_rq(cpu);
 
+               if (lowest_rq->rt.highest_prio.curr <= task->prio) {
+                       /*
+                        * Target rq has tasks of equal or higher priority,
+                        * retrying does not release any lock and is unlikely
+                        * to yield a different result.
+                        */
+                       lowest_rq = NULL;
+                       break;
+               }
+
                /* if the prio of this runqueue changed, try again */
                if (double_lock_balance(rq, lowest_rq)) {
                        /*
@@ -1749,7 +1792,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
        if (soft != RLIM_INFINITY) {
                unsigned long next;
 
-               p->rt.timeout++;
+               if (p->rt.watchdog_stamp != jiffies) {
+                       p->rt.timeout++;
+                       p->rt.watchdog_stamp = jiffies;
+               }
+
                next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
                if (p->rt.timeout > next)
                        p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@@ -1758,6 +1805,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 {
+       struct sched_rt_entity *rt_se = &p->rt;
+
        update_curr_rt(rq);
 
        watchdog(rq, p);
@@ -1775,12 +1824,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
        p->rt.time_slice = DEF_TIMESLICE;
 
        /*
-        * Requeue to the end of queue if we are not the only element
-        * on the queue:
+        * Requeue to the end of queue if we (and all of our ancestors) are the
+        * only element on the queue
         */
-       if (p->rt.run_list.prev != p->rt.run_list.next) {
-               requeue_task_rt(rq, p, 0);
-               set_tsk_need_resched(p);
+       for_each_sched_rt_entity(rt_se) {
+               if (rt_se->run_list.prev != rt_se->run_list.next) {
+                       requeue_task_rt(rq, p, 0);
+                       set_tsk_need_resched(p);
+                       return;
+               }
        }
 }