ocfs2: don't clear SGID when inheriting ACLs

[pandora-kernel.git] / kernel / sched_rt.c
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index af11778..bd4afa4 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -124,21 +124,33 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
         update_rt_migration(rt_rq);
  }
  
+static inline int has_pushable_tasks(struct rq *rq)
+{
+       return !plist_head_empty(&rq->rt.pushable_tasks);
+}
+
  static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
  {
         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
         plist_node_init(&p->pushable_tasks, p->prio);
         plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
+
+       /* Update the highest prio pushable task */
+       if (p->prio < rq->rt.highest_prio.next)
+               rq->rt.highest_prio.next = p->prio;
  }
  
  static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
  {
         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
-}
  
-static inline int has_pushable_tasks(struct rq *rq)
-{
-       return !plist_head_empty(&rq->rt.pushable_tasks);
+       /* Update the new highest prio pushable task */
+       if (has_pushable_tasks(rq)) {
+               p = plist_first_entry(&rq->rt.pushable_tasks,
+                                     struct task_struct, pushable_tasks);
+               rq->rt.highest_prio.next = p->prio;
+       } else
+               rq->rt.highest_prio.next = MAX_RT_PRIO;
  }
  
  #else
@@ -372,7 +384,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
  static int do_balance_runtime(struct rt_rq *rt_rq)
  {
         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
-       struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+       struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
         int i, weight, more = 0;
         u64 rt_period;
  
@@ -497,6 +509,7 @@ balanced:
                  * runtime - in which case borrowing doesn't make sense.
                  */
                 rt_rq->rt_runtime = RUNTIME_INF;
+               rt_rq->rt_throttled = 0;
                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
                 raw_spin_unlock(&rt_b->rt_runtime_lock);
         }
@@ -548,6 +561,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
  {
         int more = 0;
  
+       if (!sched_feat(RT_RUNTIME_SHARE))
+               return more;
+
         if (rt_rq->rt_time > rt_rq->rt_runtime) {
                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
                 more = do_balance_runtime(rt_rq);
@@ -572,6 +588,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                 return 1;
  
         span = sched_rt_period_mask();
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * FIXME: isolated CPUs should really leave the root task group,
+        * whether they are isolcpus or were isolated via cpusets, lest
+        * the timer run on a CPU which does not service all runqueues,
+        * potentially leaving other CPUs indefinitely throttled.  If
+        * isolation is really required, the user will turn the throttle
+        * off to kill the perturbations it causes anyway.  Meanwhile,
+        * this maintains functionality for boot and/or troubleshooting.
+        */
+       if (rt_b == &root_task_group.rt_bandwidth)
+               span = cpu_online_mask;
+#endif
         for_each_cpu(i, span) {
                 int enqueue = 0;
                 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
@@ -643,6 +672,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
  
         if (rt_rq->rt_time > runtime) {
                 rt_rq->rt_throttled = 1;
+               printk_once(KERN_WARNING "sched: RT throttling activated\n");
                 if (rt_rq_throttled(rt_rq)) {
                         sched_rt_rq_dequeue(rt_rq);
                         return 1;
@@ -698,47 +728,20 @@ static void update_curr_rt(struct rq *rq)
  
  #if defined CONFIG_SMP
  
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
-
-static inline int next_prio(struct rq *rq)
-{
-       struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
-
-       if (next && rt_prio(next->prio))
-               return next->prio;
-       else
-               return MAX_RT_PRIO;
-}
-
  static void
  inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
  {
         struct rq *rq = rq_of_rt_rq(rt_rq);
  
-       if (prio < prev_prio) {
-
-               /*
-                * If the new task is higher in priority than anything on the
-                * run-queue, we know that the previous high becomes our
-                * next-highest.
-                */
-               rt_rq->highest_prio.next = prev_prio;
-
-               if (rq->online)
-                       cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
-
-       } else if (prio == rt_rq->highest_prio.curr)
-               /*
-                * If the next task is equal in priority to the highest on
-                * the run-queue, then we implicitly know that the next highest
-                * task cannot be any lower than current
-                */
-               rt_rq->highest_prio.next = prio;
-       else if (prio < rt_rq->highest_prio.next)
-               /*
-                * Otherwise, we need to recompute next-highest
-                */
-               rt_rq->highest_prio.next = next_prio(rq);
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
+       if (rq->online && prio < prev_prio)
+               cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
  }
  
  static void
@@ -746,9 +749,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
  {
         struct rq *rq = rq_of_rt_rq(rt_rq);
  
-       if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
-               rt_rq->highest_prio.next = next_prio(rq);
-
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
         if (rq->online && rt_rq->highest_prio.curr != prev_prio)
                 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
  }
@@ -961,6 +968,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
  
         if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
                 enqueue_pushable_task(rq, p);
+
+       inc_nr_running(rq);
  }
  
  static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -971,6 +980,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
         dequeue_rt_entity(rt_se);
  
         dequeue_pushable_task(rq, p);
+
+       dec_nr_running(rq);
  }
  
  /*
@@ -1017,10 +1028,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
         struct rq *rq;
         int cpu;
  
-       if (sd_flag != SD_BALANCE_WAKE)
-               return smp_processor_id();
-
         cpu = task_cpu(p);
+
+       /* For anything but wake ups, just return the task_cpu */
+       if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
+               goto out;
+
         rq = cpu_rq(cpu);
  
         rcu_read_lock();
@@ -1054,11 +1067,17 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
             (p->rt.nr_cpus_allowed > 1)) {
                 int target = find_lowest_rq(p);
  
-               if (target != -1)
+               /*
+                * Don't bother moving it if the destination CPU is
+                * not running a lower priority task.
+                */
+               if (target != -1 &&
+                   p->prio < cpu_rq(target)->rt.highest_prio.curr)
                         cpu = target;
         }
         rcu_read_unlock();
  
+out:
         return cpu;
  }
  
@@ -1178,7 +1197,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
  static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  {
         update_curr_rt(rq);
-       p->se.exec_start = 0;
  
         /*
          * The previous task needs to be made eligible for pushing
@@ -1198,7 +1216,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
  static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
  {
         if (!task_running(rq, p) &&
-           (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
+           (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
             (p->rt.nr_cpus_allowed > 1))
                 return 1;
         return 0;
@@ -1333,6 +1351,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
  
                 lowest_rq = cpu_rq(cpu);
  
+               if (lowest_rq->rt.highest_prio.curr <= task->prio) {
+                       /*
+                        * Target rq has tasks of equal or higher priority,
+                        * retrying does not release any lock and is unlikely
+                        * to yield a different result.
+                        */
+                       lowest_rq = NULL;
+                       break;
+               }
+
                 /* if the prio of this runqueue changed, try again */
                 if (double_lock_balance(rq, lowest_rq)) {
                         /*
@@ -1343,7 +1371,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
                          */
                         if (unlikely(task_rq(task) != rq ||
                                      !cpumask_test_cpu(lowest_rq->cpu,
-                                                      &task->cpus_allowed) ||
+                                                      tsk_cpus_allowed(task)) ||
                                      task_running(rq, task) ||
                                      !task->on_rq)) {
  
@@ -1394,6 +1422,7 @@ static int push_rt_task(struct rq *rq)
  {
         struct task_struct *next_task;
         struct rq *lowest_rq;
+       int ret = 0;
  
         if (!rq->rt.overloaded)
                 return 0;
@@ -1402,6 +1431,11 @@ static int push_rt_task(struct rq *rq)
         if (!next_task)
                 return 0;
  
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+       if (unlikely(task_running(rq, next_task)))
+               return 0;
+#endif
+
  retry:
         if (unlikely(next_task == rq->curr)) {
                 WARN_ON(1);
@@ -1426,7 +1460,7 @@ retry:
         if (!lowest_rq) {
                 struct task_struct *task;
                 /*
-                * find lock_lowest_rq releases rq->lock
+                * find_lock_lowest_rq releases rq->lock
                  * so it is possible that next_task has migrated.
                  *
                  * We need to make sure that the task is still on the same
@@ -1436,12 +1470,11 @@ retry:
                 task = pick_next_pushable_task(rq);
                 if (task_cpu(next_task) == rq->cpu && task == next_task) {
                         /*
-                        * If we get here, the task hasn't moved at all, but
-                        * it has failed to push.  We will not try again,
-                        * since the other cpus will pull from us when they
-                        * are ready.
+                        * The task hasn't migrated, and is still the next
+                        * eligible task, but we failed to find a run-queue
+                        * to push it to.  Do not retry in this case, since
+                        * other cpus will pull from us when ready.
                          */
-                       dequeue_pushable_task(rq, next_task);
                         goto out;
                 }
  
@@ -1460,6 +1493,7 @@ retry:
         deactivate_task(rq, next_task, 0);
         set_task_cpu(next_task, lowest_rq->cpu);
         activate_task(lowest_rq, next_task, 0);
+       ret = 1;
  
         resched_task(lowest_rq->curr);
  
@@ -1468,7 +1502,7 @@ retry:
  out:
         put_task_struct(next_task);
  
-       return 1;
+       return ret;
  }
  
  static void push_rt_tasks(struct rq *rq)
@@ -1626,9 +1660,6 @@ static void set_cpus_allowed_rt(struct task_struct *p,
  
                 update_rt_migration(&rq->rt);
         }
-
-       cpumask_copy(&p->cpus_allowed, new_mask);
-       p->rt.nr_cpus_allowed = weight;
  }
  
  /* Assumes rq->lock is held */
@@ -1761,7 +1792,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
         if (soft != RLIM_INFINITY) {
                 unsigned long next;
  
-               p->rt.timeout++;
+               if (p->rt.watchdog_stamp != jiffies) {
+                       p->rt.timeout++;
+                       p->rt.watchdog_stamp = jiffies;
+               }
+
                 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
                 if (p->rt.timeout > next)
                         p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@@ -1770,6 +1805,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
  
  static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
  {
+       struct sched_rt_entity *rt_se = &p->rt;
+
         update_curr_rt(rq);
  
         watchdog(rq, p);
@@ -1787,12 +1824,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
         p->rt.time_slice = DEF_TIMESLICE;
  
         /*
-        * Requeue to the end of queue if we are not the only element
-        * on the queue:
+        * Requeue to the end of queue if we (and all of our ancestors) are the
+        * only element on the queue
          */
-       if (p->rt.run_list.prev != p->rt.run_list.next) {
-               requeue_task_rt(rq, p, 0);
-               set_tsk_need_resched(p);
+       for_each_sched_rt_entity(rt_se) {
+               if (rt_se->run_list.prev != rt_se->run_list.next) {
+                       requeue_task_rt(rq, p, 0);
+                       set_tsk_need_resched(p);
+                       return;
+               }
         }
  }
  
@@ -1863,4 +1903,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
         rcu_read_unlock();
  }
  #endif /* CONFIG_SCHED_DEBUG */
-