*/
#define KVM_MEMSLOT_INVALID (1UL << 16)
-/*
- * If we support unaligned MMIO, at most one fragment will be split into two:
- */
-#ifdef KVM_UNALIGNED_MMIO
-# define KVM_EXTRA_MMIO_FRAGMENTS 1
-#else
-# define KVM_EXTRA_MMIO_FRAGMENTS 0
-#endif
-
-#define KVM_USER_MMIO_SIZE 8
-
-#define KVM_MAX_MMIO_FRAGMENTS \
- (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
+/* Two fragments for cross MMIO pages. */
+#define KVM_MAX_MMIO_FRAGMENTS 2
/*
* For the normal pfn, the highest 12 bits should be zero,
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
- vtime_account_system(current);
+ vtime_account_system_irqsafe(current);
current->flags |= PF_VCPU;
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
- vtime_account_system(current);
+ vtime_account_system_irqsafe(current);
current->flags &= ~PF_VCPU;
}
return false;
}
+/*
+ * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
+ * tasks (sum on group iteration) belonging to @tsk's group.
+ */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct signal_struct *sig = tsk->signal;
+ struct task_struct *t;
+
+ times->utime = sig->utime;
+ times->stime = sig->stime;
+ times->sum_exec_runtime = sig->sum_sched_runtime;
+
+ rcu_read_lock();
+ /* make sure we can trust tsk->thread_group list */
+ if (!likely(pid_alive(tsk)))
+ goto out;
+
+ t = tsk;
+ do {
+ times->utime += t->utime;
+ times->stime += t->stime;
+ times->sum_exec_runtime += task_sched_runtime(t);
+ } while_each_thread(tsk, t);
+out:
+ rcu_read_unlock();
+}
+
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
*ut = p->utime;
*st = p->stime;
}
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
struct task_cputime cputime;
*st = cputime.stime;
}
- void vtime_account_system(struct task_struct *tsk)
+ void vtime_account_system_irqsafe(struct task_struct *tsk)
{
unsigned long flags;
local_irq_save(flags);
- __vtime_account_system(tsk);
+ vtime_account_system(tsk);
local_irq_restore(flags);
}
- EXPORT_SYMBOL_GPL(vtime_account_system);
+ EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
+
+ #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
+ void vtime_task_switch(struct task_struct *prev)
+ {
+ if (is_idle_task(prev))
+ vtime_account_idle(prev);
+ else
+ vtime_account_system(prev);
+
+ vtime_account_user(prev);
+ arch_vtime_task_switch(prev);
+ }
+ #endif
/*
* Archs that account the whole time spent in the idle task
* (outside irq) as idle time can rely on this and just implement
- * __vtime_account_system() and __vtime_account_idle(). Archs that
+ * vtime_account_system() and vtime_account_idle(). Archs that
* have other meaning of the idle time (s390 only includes the
* time spent by the CPU when it's in low power mode) must override
* vtime_account().
#ifndef __ARCH_HAS_VTIME_ACCOUNT
void vtime_account(struct task_struct *tsk)
{
- unsigned long flags;
-
- local_irq_save(flags);
-
if (in_interrupt() || !is_idle_task(tsk))
- __vtime_account_system(tsk);
+ vtime_account_system(tsk);
else
- __vtime_account_idle(tsk);
-
- local_irq_restore(flags);
+ vtime_account_idle(tsk);
}
EXPORT_SYMBOL_GPL(vtime_account);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
return (__force cputime_t) temp;
}
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+/*
+ * Adjust tick based cputime random precision against scheduler
+ * runtime accounting.
+ */
+static void cputime_adjust(struct task_cputime *curr,
+ struct cputime *prev,
+ cputime_t *ut, cputime_t *st)
{
- cputime_t rtime, utime = p->utime, total = utime + p->stime;
+ cputime_t rtime, utime, total;
+
+ utime = curr->utime;
+ total = utime + curr->stime;
/*
- * Use CFS's precise accounting:
+ * Tick based cputime accounting depend on random scheduling
+ * timeslices of a task to be interrupted or not by the timer.
+ * Depending on these circumstances, the number of these interrupts
+ * may be over or under-optimistic, matching the real user and system
+ * cputime with a variable precision.
+ *
+ * Fix this by scaling these tick based values against the total
+ * runtime accounted by the CFS scheduler.
*/
- rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
+ rtime = nsecs_to_cputime(curr->sum_exec_runtime);
if (total)
utime = scale_utime(utime, rtime, total);
utime = rtime;
/*
- * Compare with previous values, to keep monotonicity:
+ * If the tick based count grows faster than the scheduler one,
+ * the result of the scaling may go backward.
+ * Let's enforce monotonicity.
*/
- p->prev_utime = max(p->prev_utime, utime);
- p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
+ prev->utime = max(prev->utime, utime);
+ prev->stime = max(prev->stime, rtime - prev->utime);
- *ut = p->prev_utime;
- *st = p->prev_stime;
+ *ut = prev->utime;
+ *st = prev->stime;
+}
+
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ struct task_cputime cputime = {
+ .utime = p->utime,
+ .stime = p->stime,
+ .sum_exec_runtime = p->se.sum_exec_runtime,
+ };
+
+ cputime_adjust(&cputime, &p->prev_cputime, ut, st);
}
/*
* Must be called with siglock held.
*/
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
- struct signal_struct *sig = p->signal;
struct task_cputime cputime;
- cputime_t rtime, utime, total;
thread_group_cputime(p, &cputime);
-
- total = cputime.utime + cputime.stime;
- rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
-
- if (total)
- utime = scale_utime(cputime.utime, rtime, total);
- else
- utime = rtime;
-
- sig->prev_utime = max(sig->prev_utime, utime);
- sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
-
- *ut = sig->prev_utime;
- *st = sig->prev_stime;
+ cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
}
#endif