Merge tag 'sched-cputime-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel...
[pandora-kernel.git] / kernel / sched / cputime.c
index 80b2fd5..293b202 100644 (file)
@@ -288,6 +288,34 @@ static __always_inline bool steal_account_process_tick(void)
        return false;
 }
 
+/*
+ * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
+ * tasks (sum on group iteration) belonging to @tsk's group.
+ */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+{
+       struct signal_struct *sig = tsk->signal;
+       struct task_struct *t;
+
+       times->utime = sig->utime;
+       times->stime = sig->stime;
+       times->sum_exec_runtime = sig->sum_sched_runtime;
+
+       rcu_read_lock();
+       /* make sure we can trust tsk->thread_group list */
+       if (!likely(pid_alive(tsk)))
+               goto out;
+
+       t = tsk;
+       do {
+               times->utime += t->utime;
+               times->stime += t->stime;
+               times->sum_exec_runtime += task_sched_runtime(t);
+       } while_each_thread(tsk, t);
+out:
+       rcu_read_unlock();
+}
+
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -417,13 +445,13 @@ void account_idle_ticks(unsigned long ticks)
  * Use precise platform statistics if available:
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
        *ut = p->utime;
        *st = p->stime;
 }
 
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
        struct task_cputime cputime;
 
@@ -495,14 +523,30 @@ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
        return (__force cputime_t) temp;
 }
 
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+/*
+ * Adjust tick based cputime random precision against scheduler
+ * runtime accounting.
+ */
+static void cputime_adjust(struct task_cputime *curr,
+                          struct cputime *prev,
+                          cputime_t *ut, cputime_t *st)
 {
-       cputime_t rtime, utime = p->utime, total = utime + p->stime;
+       cputime_t rtime, utime, total;
+
+       utime = curr->utime;
+       total = utime + curr->stime;
 
        /*
-        * Use CFS's precise accounting:
+        * Tick based cputime accounting depend on random scheduling
+        * timeslices of a task to be interrupted or not by the timer.
+        * Depending on these circumstances, the number of these interrupts
+        * may be over or under-optimistic, matching the real user and system
+        * cputime with a variable precision.
+        *
+        * Fix this by scaling these tick based values against the total
+        * runtime accounted by the CFS scheduler.
         */
-       rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
+       rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 
        if (total)
                utime = scale_utime(utime, rtime, total);
@@ -510,38 +554,36 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
                utime = rtime;
 
        /*
-        * Compare with previous values, to keep monotonicity:
+        * If the tick based count grows faster than the scheduler one,
+        * the result of the scaling may go backward.
+        * Let's enforce monotonicity.
         */
-       p->prev_utime = max(p->prev_utime, utime);
-       p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+       prev->utime = max(prev->utime, utime);
+       prev->stime = max(prev->stime, rtime - prev->utime);
 
-       *ut = p->prev_utime;
-       *st = p->prev_stime;
+       *ut = prev->utime;
+       *st = prev->stime;
+}
+
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+       struct task_cputime cputime = {
+               .utime = p->utime,
+               .stime = p->stime,
+               .sum_exec_runtime = p->se.sum_exec_runtime,
+       };
+
+       cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
 /*
  * Must be called with siglock held.
  */
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
-       struct signal_struct *sig = p->signal;
        struct task_cputime cputime;
-       cputime_t rtime, utime, total;
 
        thread_group_cputime(p, &cputime);
-
-       total = cputime.utime + cputime.stime;
-       rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
-
-       if (total)
-               utime = scale_utime(cputime.utime, rtime, total);
-       else
-               utime = rtime;
-
-       sig->prev_utime = max(sig->prev_utime, utime);
-       sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
-
-       *ut = sig->prev_utime;
-       *st = sig->prev_stime;
+       cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
 #endif