sched/idle: Avoid spurious wakeup IPIs
authorPeter Zijlstra <peterz@infradead.org>
Wed, 9 Apr 2014 13:35:08 +0000 (15:35 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 8 May 2014 07:16:58 +0000 (09:16 +0200)
Because mwait_idle_with_hints() gets called from !idle context it must
call current_clr_polling(). This however means that resched_task() is
very likely to send an IPI even when we were polling:

  CPU0 CPU1

  if (current_set_polling_and_test())
    goto out;

  __monitor(&ti->flags);
  if (!need_resched())
    __mwait(eax, ecx);
set_tsk_need_resched(p);
smp_mb();
out:
  current_clr_polling();
if (!tsk_is_polling(p))
  smp_send_reschedule(cpu);

So while it is correct (extra IPIs aren't a problem, whereas a missed
IPI would be) it is a performance problem (for some).

Avoid this issue by using fetch_or() to atomically set NEED_RESCHED
and test if POLLING_NRFLAG is set.

Since a CPU stuck in mwait is unlikely to modify the flags word,
contention on the cmpxchg is unlikely and thus we should mostly
succeed in a single go.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-kf5suce6njh5xf5d3od13rr0@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/core.c

index 1c9c3b7..4b82622 100644 (file)
@@ -505,6 +505,39 @@ static inline void init_hrtick(void)
 }
 #endif /* CONFIG_SCHED_HRTICK */
 
+/*
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#define fetch_or(ptr, val)                                             \
+({     typeof(*(ptr)) __old, __val = *(ptr);                           \
+       for (;;) {                                                      \
+               __old = cmpxchg((ptr), __val, __val | (val));           \
+               if (__old == __val)                                     \
+                       break;                                          \
+               __val = __old;                                          \
+       }                                                               \
+       __old;                                                          \
+})
+
+#ifdef TIF_POLLING_NRFLAG
+/*
+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
+ * this avoids any races wrt polling state changes and thereby avoids
+ * spurious IPIs.
+ */
+static bool set_nr_and_not_polling(struct task_struct *p)
+{
+       struct thread_info *ti = task_thread_info(p);
+       return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
+}
+#else
+static bool set_nr_and_not_polling(struct task_struct *p)
+{
+       set_tsk_need_resched(p);
+       return true;
+}
+#endif
+
 /*
  * resched_task - mark a task 'to be rescheduled now'.
  *
@@ -521,17 +554,15 @@ void resched_task(struct task_struct *p)
        if (test_tsk_need_resched(p))
                return;
 
-       set_tsk_need_resched(p);
-
        cpu = task_cpu(p);
+
        if (cpu == smp_processor_id()) {
+               set_tsk_need_resched(p);
                set_preempt_need_resched();
                return;
        }
 
-       /* NEED_RESCHED must be visible before we test polling */
-       smp_mb();
-       if (!tsk_is_polling(p))
+       if (set_nr_and_not_polling(p))
                smp_send_reschedule(cpu);
 }