Merge branch 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Jul 2009 01:35:24 +0000 (18:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 Jul 2009 01:35:24 +0000 (18:35 -0700)
* 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  hrtimer: Fix migration expiry check
  hrtimer: migration: do not check expiry time on current CPU

include/linux/clockchips.h
kernel/hrtimer.c
kernel/time/clockevents.c

index 20a100f..3a1dbba 100644 (file)
@@ -143,12 +143,3 @@ extern void clockevents_notify(unsigned long reason, void *arg);
 #endif
 
 #endif
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
-extern ktime_t clockevents_get_next_event(int cpu);
-#else
-static inline ktime_t clockevents_get_next_event(int cpu)
-{
-       return (ktime_t) { .tv64 = KTIME_MAX };
-}
-#endif
index 9002958..49da79a 100644 (file)
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
        }
 }
 
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+       if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+               int preferred_cpu = get_nohz_load_balancer();
+
+               if (preferred_cpu >= 0)
+                       return preferred_cpu;
+       }
+#endif
+       return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+       ktime_t expires;
+
+       if (!new_base->cpu_base->hres_active)
+               return 0;
+
+       expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+       return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+       return 0;
+#endif
+}
+
 /*
  * Switch the timer base to the current CPU when possible.
  */
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 {
        struct hrtimer_clock_base *new_base;
        struct hrtimer_cpu_base *new_cpu_base;
-       int cpu, preferred_cpu = -1;
-
-       cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-       if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
-               preferred_cpu = get_nohz_load_balancer();
-               if (preferred_cpu >= 0)
-                       cpu = preferred_cpu;
-       }
-#endif
+       int this_cpu = smp_processor_id();
+       int cpu = hrtimer_get_target(this_cpu, pinned);
 
 again:
        new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
 
        if (base != new_base) {
                /*
-                * We are trying to schedule the timer on the local CPU.
+                * We are trying to move timer to new_base.
                 * However we can't change timer's base while it is running,
                 * so we keep it on the same CPU. No hassle vs. reprogramming
                 * the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
                spin_unlock(&base->cpu_base->lock);
                spin_lock(&new_base->cpu_base->lock);
 
-               /* Optimized away for NOHZ=n SMP=n */
-               if (cpu == preferred_cpu) {
-                       /* Calculate clock monotonic expiry time */
-#ifdef CONFIG_HIGH_RES_TIMERS
-                       ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
-                                                       new_base->offset);
-#else
-                       ktime_t expires = hrtimer_get_expires(timer);
-#endif
-
-                       /*
-                        * Get the next event on target cpu from the
-                        * clock events layer.
-                        * This covers the highres=off nohz=on case as well.
-                        */
-                       ktime_t next = clockevents_get_next_event(cpu);
-
-                       ktime_t delta = ktime_sub(expires, next);
-
-                       /*
-                        * We do not migrate the timer when it is expiring
-                        * before the next event on the target cpu because
-                        * we cannot reprogram the target cpu hardware and
-                        * we would cause it to fire late.
-                        */
-                       if (delta.tv64 < 0) {
-                               cpu = smp_processor_id();
-                               spin_unlock(&new_base->cpu_base->lock);
-                               spin_lock(&base->cpu_base->lock);
-                               timer->base = base;
-                               goto again;
-                       }
+               if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+                       cpu = this_cpu;
+                       spin_unlock(&new_base->cpu_base->lock);
+                       spin_lock(&base->cpu_base->lock);
+                       timer->base = base;
+                       goto again;
                }
                timer->base = new_base;
        }
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
        expires_next.tv64 = KTIME_MAX;
 
+       spin_lock(&cpu_base->lock);
+       /*
+        * We set expires_next to KTIME_MAX here with cpu_base->lock
+        * held to prevent that a timer is enqueued in our queue via
+        * the migration code. This does not affect enqueueing of
+        * timers which run their callback and need to be requeued on
+        * this CPU.
+        */
+       cpu_base->expires_next.tv64 = KTIME_MAX;
+
        base = cpu_base->clock_base;
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                ktime_t basenow;
                struct rb_node *node;
 
-               spin_lock(&cpu_base->lock);
-
                basenow = ktime_add(now, base->offset);
 
                while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
                        __run_hrtimer(timer);
                }
-               spin_unlock(&cpu_base->lock);
                base++;
        }
 
+       /*
+        * Store the new expiry value so the migration code can verify
+        * against it.
+        */
        cpu_base->expires_next = expires_next;
+       spin_unlock(&cpu_base->lock);
 
        /* Reprogramming necessary ? */
        if (expires_next.tv64 != KTIME_MAX) {
index 1ad6dd4..a6dcd67 100644 (file)
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
        spin_unlock(&clockevents_lock);
 }
 EXPORT_SYMBOL_GPL(clockevents_notify);
-
-ktime_t clockevents_get_next_event(int cpu)
-{
-       struct tick_device *td;
-       struct clock_event_device *dev;
-
-       td = &per_cpu(tick_cpu_device, cpu);
-       dev = td->evtdev;
-
-       return dev->next_event;
-}
 #endif