Merge branch 'for_paulus' of master.kernel.org:/pub/scm/linux/kernel/git/galak/powerpc
[pandora-kernel.git] / kernel / sched.c
index 6e52e0a..365f0b9 100644 (file)
@@ -664,12 +664,58 @@ static int effective_prio(task_t *p)
        return prio;
 }
 
+/*
+ * We place interactive tasks back into the active array, if possible.
+ *
+ * To guarantee that this does not starve expired tasks we ignore the
+ * interactivity of a task if the first expired task had to wait more
+ * than a 'reasonable' amount of time. This deadline timeout is
+ * load-dependent, as the frequency of array switched decreases with
+ * increasing number of running tasks. We also ignore the interactivity
+ * if a better static_prio task has expired, and switch periodically
+ * regardless, to ensure that highly interactive tasks do not starve
+ * the less fortunate for unreasonably long periods.
+ */
+static inline int expired_starving(runqueue_t *rq)
+{
+       int limit;
+
+       /*
+        * Arrays were recently switched, all is well
+        */
+       if (!rq->expired_timestamp)
+               return 0;
+
+       limit = STARVATION_LIMIT * rq->nr_running;
+
+       /*
+        * It's time to switch arrays
+        */
+       if (jiffies - rq->expired_timestamp >= limit)
+               return 1;
+
+       /*
+        * There's a better selection in the expired array
+        */
+       if (rq->curr->static_prio > rq->best_expired_prio)
+               return 1;
+
+       /*
+        * All is well
+        */
+       return 0;
+}
+
 /*
  * __activate_task - move a task to the runqueue.
  */
-static inline void __activate_task(task_t *p, runqueue_t *rq)
+static void __activate_task(task_t *p, runqueue_t *rq)
 {
-       enqueue_task(p, rq->active);
+       prio_array_t *target = rq->active;
+
+       if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
+               target = rq->expired;
+       enqueue_task(p, target);
        rq->nr_running++;
 }
 
@@ -688,7 +734,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
        unsigned long long __sleep_time = now - p->timestamp;
        unsigned long sleep_time;
 
-       if (unlikely(p->policy == SCHED_BATCH))
+       if (batch_task(p))
                sleep_time = 0;
        else {
                if (__sleep_time > NS_MAX_SLEEP_AVG)
@@ -700,21 +746,25 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
        if (likely(sleep_time > 0)) {
                /*
                 * User tasks that sleep a long time are categorised as
-                * idle and will get just interactive status to stay active &
-                * prevent them suddenly becoming cpu hogs and starving
-                * other processes.
+                * idle. They will only have their sleep_avg increased to a
+                * level that makes them just interactive priority to stay
+                * active yet prevent them suddenly becoming cpu hogs and
+                * starving other processes.
                 */
-               if (p->mm && p->activated != -1 &&
-                       sleep_time > INTERACTIVE_SLEEP(p)) {
-                               p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
-                                               DEF_TIMESLICE);
+               if (p->mm && sleep_time > INTERACTIVE_SLEEP(p)) {
+                               unsigned long ceiling;
+
+                               ceiling = JIFFIES_TO_NS(MAX_SLEEP_AVG -
+                                       DEF_TIMESLICE);
+                               if (p->sleep_avg < ceiling)
+                                       p->sleep_avg = ceiling;
                } else {
                        /*
                         * Tasks waking from uninterruptible sleep are
                         * limited in their sleep_avg rise as they
                         * are likely to be waiting on I/O
                         */
-                       if (p->activated == -1 && p->mm) {
+                       if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
                                if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
                                        sleep_time = 0;
                                else if (p->sleep_avg + sleep_time >=
@@ -769,7 +819,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
         * This checks to make sure it's not an uninterruptible task
         * that is now waking up.
         */
-       if (!p->activated) {
+       if (p->sleep_type == SLEEP_NORMAL) {
                /*
                 * Tasks which were woken up by interrupts (ie. hw events)
                 * are most likely of interactive nature. So we give them
@@ -778,13 +828,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
                 * on a CPU, first time around:
                 */
                if (in_interrupt())
-                       p->activated = 2;
+                       p->sleep_type = SLEEP_INTERRUPTED;
                else {
                        /*
                         * Normal first-time wakeups get a credit too for
                         * on-runqueue time, but it will be weighted down:
                         */
-                       p->activated = 1;
+                       p->sleep_type = SLEEP_INTERACTIVE;
                }
        }
        p->timestamp = now;
@@ -1272,19 +1322,19 @@ out_activate:
                 * Tasks on involuntary sleep don't earn
                 * sleep_avg beyond just interactive state.
                 */
-               p->activated = -1;
-       }
+               p->sleep_type = SLEEP_NONINTERACTIVE;
+       } else
 
        /*
         * Tasks that have marked their sleep as noninteractive get
-        * woken up without updating their sleep average. (i.e. their
-        * sleep is handled in a priority-neutral manner, no priority
-        * boost and no penalty.)
+        * woken up with their sleep average not weighted in an
+        * interactive way.
         */
-       if (old_state & TASK_NONINTERACTIVE)
-               __activate_task(p, rq);
-       else
-               activate_task(p, rq, cpu == this_cpu);
+               if (old_state & TASK_NONINTERACTIVE)
+                       p->sleep_type = SLEEP_NONINTERACTIVE;
+
+
+       activate_task(p, rq, cpu == this_cpu);
        /*
         * Sync wakeups (i.e. those types of wakeups where the waker
         * has indicated that it will leave the CPU in short order)
@@ -2481,22 +2531,6 @@ unsigned long long current_sched_time(const task_t *tsk)
        return ns;
 }
 
-/*
- * We place interactive tasks back into the active array, if possible.
- *
- * To guarantee that this does not starve expired tasks we ignore the
- * interactivity of a task if the first expired task had to wait more
- * than a 'reasonable' amount of time. This deadline timeout is
- * load-dependent, as the frequency of array switched decreases with
- * increasing number of running tasks. We also ignore the interactivity
- * if a better static_prio task has expired:
- */
-#define EXPIRED_STARVING(rq) \
-       ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
-               (jiffies - (rq)->expired_timestamp >= \
-                       STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
-                       ((rq)->curr->static_prio > (rq)->best_expired_prio))
-
 /*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
@@ -2632,7 +2666,7 @@ void scheduler_tick(void)
 
                if (!rq->expired_timestamp)
                        rq->expired_timestamp = jiffies;
-               if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
+               if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
                        enqueue_task(p, rq->expired);
                        if (p->static_prio < rq->best_expired_prio)
                                rq->best_expired_prio = p->static_prio;
@@ -2875,6 +2909,12 @@ EXPORT_SYMBOL(sub_preempt_count);
 
 #endif
 
+static inline int interactive_sleep(enum sleep_type sleep_type)
+{
+       return (sleep_type == SLEEP_INTERACTIVE ||
+               sleep_type == SLEEP_INTERRUPTED);
+}
+
 /*
  * schedule() is the main scheduler function.
  */
@@ -2998,12 +3038,12 @@ go_idle:
        queue = array->queue + idx;
        next = list_entry(queue->next, task_t, run_list);
 
-       if (!rt_task(next) && next->activated > 0) {
+       if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
                unsigned long long delta = now - next->timestamp;
                if (unlikely((long long)(now - next->timestamp) < 0))
                        delta = 0;
 
-               if (next->activated == 1)
+               if (next->sleep_type == SLEEP_INTERACTIVE)
                        delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
 
                array = next->array;
@@ -3013,10 +3053,9 @@ go_idle:
                        dequeue_task(next, array);
                        next->prio = new_prio;
                        enqueue_task(next, array);
-               } else
-                       requeue_task(next, array);
+               }
        }
-       next->activated = 0;
+       next->sleep_type = SLEEP_NORMAL;
 switch_tasks:
        if (next == rq->idle)
                schedstat_inc(rq, sched_goidle);