ocfs2: don't clear SGID when inheriting ACLs
[pandora-kernel.git] / kernel / sched_rt.c
index af11778..bd4afa4 100644 (file)
@@ -124,21 +124,33 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
        update_rt_migration(rt_rq);
 }
 
+static inline int has_pushable_tasks(struct rq *rq)
+{
+       return !plist_head_empty(&rq->rt.pushable_tasks);
+}
+
 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
 {
        plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
        plist_node_init(&p->pushable_tasks, p->prio);
        plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
+
+       /* Update the highest prio pushable task */
+       if (p->prio < rq->rt.highest_prio.next)
+               rq->rt.highest_prio.next = p->prio;
 }
 
 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
 {
        plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
-}
 
-static inline int has_pushable_tasks(struct rq *rq)
-{
-       return !plist_head_empty(&rq->rt.pushable_tasks);
+       /* Update the new highest prio pushable task */
+       if (has_pushable_tasks(rq)) {
+               p = plist_first_entry(&rq->rt.pushable_tasks,
+                                     struct task_struct, pushable_tasks);
+               rq->rt.highest_prio.next = p->prio;
+       } else
+               rq->rt.highest_prio.next = MAX_RT_PRIO;
 }
 
 #else
@@ -372,7 +384,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 static int do_balance_runtime(struct rt_rq *rt_rq)
 {
        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
-       struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+       struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
        int i, weight, more = 0;
        u64 rt_period;
 
@@ -497,6 +509,7 @@ balanced:
                 * runtime - in which case borrowing doesn't make sense.
                 */
                rt_rq->rt_runtime = RUNTIME_INF;
+               rt_rq->rt_throttled = 0;
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
                raw_spin_unlock(&rt_b->rt_runtime_lock);
        }
@@ -548,6 +561,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
 {
        int more = 0;
 
+       if (!sched_feat(RT_RUNTIME_SHARE))
+               return more;
+
        if (rt_rq->rt_time > rt_rq->rt_runtime) {
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
                more = do_balance_runtime(rt_rq);
@@ -572,6 +588,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                return 1;
 
        span = sched_rt_period_mask();
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * FIXME: isolated CPUs should really leave the root task group,
+        * whether they are isolcpus or were isolated via cpusets, lest
+        * the timer run on a CPU which does not service all runqueues,
+        * potentially leaving other CPUs indefinitely throttled.  If
+        * isolation is really required, the user will turn the throttle
+        * off to kill the perturbations it causes anyway.  Meanwhile,
+        * this maintains functionality for boot and/or troubleshooting.
+        */
+       if (rt_b == &root_task_group.rt_bandwidth)
+               span = cpu_online_mask;
+#endif
        for_each_cpu(i, span) {
                int enqueue = 0;
                struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
@@ -643,6 +672,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
        if (rt_rq->rt_time > runtime) {
                rt_rq->rt_throttled = 1;
+               printk_once(KERN_WARNING "sched: RT throttling activated\n");
                if (rt_rq_throttled(rt_rq)) {
                        sched_rt_rq_dequeue(rt_rq);
                        return 1;
@@ -698,47 +728,20 @@ static void update_curr_rt(struct rq *rq)
 
 #if defined CONFIG_SMP
 
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
-
-static inline int next_prio(struct rq *rq)
-{
-       struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
-
-       if (next && rt_prio(next->prio))
-               return next->prio;
-       else
-               return MAX_RT_PRIO;
-}
-
 static void
 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
 
-       if (prio < prev_prio) {
-
-               /*
-                * If the new task is higher in priority than anything on the
-                * run-queue, we know that the previous high becomes our
-                * next-highest.
-                */
-               rt_rq->highest_prio.next = prev_prio;
-
-               if (rq->online)
-                       cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
-
-       } else if (prio == rt_rq->highest_prio.curr)
-               /*
-                * If the next task is equal in priority to the highest on
-                * the run-queue, then we implicitly know that the next highest
-                * task cannot be any lower than current
-                */
-               rt_rq->highest_prio.next = prio;
-       else if (prio < rt_rq->highest_prio.next)
-               /*
-                * Otherwise, we need to recompute next-highest
-                */
-               rt_rq->highest_prio.next = next_prio(rq);
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
+       if (rq->online && prio < prev_prio)
+               cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
 }
 
 static void
@@ -746,9 +749,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
 
-       if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
-               rt_rq->highest_prio.next = next_prio(rq);
-
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
        if (rq->online && rt_rq->highest_prio.curr != prev_prio)
                cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
 }
@@ -961,6 +968,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
        if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
                enqueue_pushable_task(rq, p);
+
+       inc_nr_running(rq);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -971,6 +980,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
        dequeue_rt_entity(rt_se);
 
        dequeue_pushable_task(rq, p);
+
+       dec_nr_running(rq);
 }
 
 /*
@@ -1017,10 +1028,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
        struct rq *rq;
        int cpu;
 
-       if (sd_flag != SD_BALANCE_WAKE)
-               return smp_processor_id();
-
        cpu = task_cpu(p);
+
+       /* For anything but wake ups, just return the task_cpu */
+       if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
+               goto out;
+
        rq = cpu_rq(cpu);
 
        rcu_read_lock();
@@ -1054,11 +1067,17 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
            (p->rt.nr_cpus_allowed > 1)) {
                int target = find_lowest_rq(p);
 
-               if (target != -1)
+               /*
+                * Don't bother moving it if the destination CPU is
+                * not running a lower priority task.
+                */
+               if (target != -1 &&
+                   p->prio < cpu_rq(target)->rt.highest_prio.curr)
                        cpu = target;
        }
        rcu_read_unlock();
 
+out:
        return cpu;
 }
 
@@ -1178,7 +1197,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
        update_curr_rt(rq);
-       p->se.exec_start = 0;
 
        /*
         * The previous task needs to be made eligible for pushing
@@ -1198,7 +1216,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
        if (!task_running(rq, p) &&
-           (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
+           (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
            (p->rt.nr_cpus_allowed > 1))
                return 1;
        return 0;
@@ -1333,6 +1351,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 
                lowest_rq = cpu_rq(cpu);
 
+               if (lowest_rq->rt.highest_prio.curr <= task->prio) {
+                       /*
+                        * Target rq has tasks of equal or higher priority,
+                        * retrying does not release any lock and is unlikely
+                        * to yield a different result.
+                        */
+                       lowest_rq = NULL;
+                       break;
+               }
+
                /* if the prio of this runqueue changed, try again */
                if (double_lock_balance(rq, lowest_rq)) {
                        /*
@@ -1343,7 +1371,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
                         */
                        if (unlikely(task_rq(task) != rq ||
                                     !cpumask_test_cpu(lowest_rq->cpu,
-                                                      &task->cpus_allowed) ||
+                                                      tsk_cpus_allowed(task)) ||
                                     task_running(rq, task) ||
                                     !task->on_rq)) {
 
@@ -1394,6 +1422,7 @@ static int push_rt_task(struct rq *rq)
 {
        struct task_struct *next_task;
        struct rq *lowest_rq;
+       int ret = 0;
 
        if (!rq->rt.overloaded)
                return 0;
@@ -1402,6 +1431,11 @@ static int push_rt_task(struct rq *rq)
        if (!next_task)
                return 0;
 
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+       if (unlikely(task_running(rq, next_task)))
+               return 0;
+#endif
+
 retry:
        if (unlikely(next_task == rq->curr)) {
                WARN_ON(1);
@@ -1426,7 +1460,7 @@ retry:
        if (!lowest_rq) {
                struct task_struct *task;
                /*
-                * find lock_lowest_rq releases rq->lock
+                * find_lock_lowest_rq releases rq->lock
                 * so it is possible that next_task has migrated.
                 *
                 * We need to make sure that the task is still on the same
@@ -1436,12 +1470,11 @@ retry:
                task = pick_next_pushable_task(rq);
                if (task_cpu(next_task) == rq->cpu && task == next_task) {
                        /*
-                        * If we get here, the task hasn't moved at all, but
-                        * it has failed to push.  We will not try again,
-                        * since the other cpus will pull from us when they
-                        * are ready.
+                        * The task hasn't migrated, and is still the next
+                        * eligible task, but we failed to find a run-queue
+                        * to push it to.  Do not retry in this case, since
+                        * other cpus will pull from us when ready.
                         */
-                       dequeue_pushable_task(rq, next_task);
                        goto out;
                }
 
@@ -1460,6 +1493,7 @@ retry:
        deactivate_task(rq, next_task, 0);
        set_task_cpu(next_task, lowest_rq->cpu);
        activate_task(lowest_rq, next_task, 0);
+       ret = 1;
 
        resched_task(lowest_rq->curr);
 
@@ -1468,7 +1502,7 @@ retry:
 out:
        put_task_struct(next_task);
 
-       return 1;
+       return ret;
 }
 
 static void push_rt_tasks(struct rq *rq)
@@ -1626,9 +1660,6 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 
                update_rt_migration(&rq->rt);
        }
-
-       cpumask_copy(&p->cpus_allowed, new_mask);
-       p->rt.nr_cpus_allowed = weight;
 }
 
 /* Assumes rq->lock is held */
@@ -1761,7 +1792,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
        if (soft != RLIM_INFINITY) {
                unsigned long next;
 
-               p->rt.timeout++;
+               if (p->rt.watchdog_stamp != jiffies) {
+                       p->rt.timeout++;
+                       p->rt.watchdog_stamp = jiffies;
+               }
+
                next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
                if (p->rt.timeout > next)
                        p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@@ -1770,6 +1805,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 {
+       struct sched_rt_entity *rt_se = &p->rt;
+
        update_curr_rt(rq);
 
        watchdog(rq, p);
@@ -1787,12 +1824,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
        p->rt.time_slice = DEF_TIMESLICE;
 
        /*
-        * Requeue to the end of queue if we are not the only element
-        * on the queue:
+        * Requeue to the end of queue if we (and all of our ancestors) are the
+        * only element on the queue
         */
-       if (p->rt.run_list.prev != p->rt.run_list.next) {
-               requeue_task_rt(rq, p, 0);
-               set_tsk_need_resched(p);
+       for_each_sched_rt_entity(rt_se) {
+               if (rt_se->run_list.prev != rt_se->run_list.next) {
+                       requeue_task_rt(rq, p, 0);
+                       set_tsk_need_resched(p);
+                       return;
+               }
        }
 }
 
@@ -1863,4 +1903,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
        rcu_read_unlock();
 }
 #endif /* CONFIG_SCHED_DEBUG */
-