Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6
[pandora-kernel.git] / kernel / hrtimer.c
index cb8a15c..9002958 100644 (file)
@@ -43,6 +43,8 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
 
 #include <asm/uaccess.h>
 
@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
  * Switch the timer base to the current CPU when possible.
  */
 static inline struct hrtimer_clock_base *
-switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
+switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
+                   int pinned)
 {
        struct hrtimer_clock_base *new_base;
        struct hrtimer_cpu_base *new_cpu_base;
+       int cpu, preferred_cpu = -1;
+
+       cpu = smp_processor_id();
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+       if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+               preferred_cpu = get_nohz_load_balancer();
+               if (preferred_cpu >= 0)
+                       cpu = preferred_cpu;
+       }
+#endif
 
-       new_cpu_base = &__get_cpu_var(hrtimer_bases);
+again:
+       new_cpu_base = &per_cpu(hrtimer_bases, cpu);
        new_base = &new_cpu_base->clock_base[base->index];
 
        if (base != new_base) {
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
                timer->base = NULL;
                spin_unlock(&base->cpu_base->lock);
                spin_lock(&new_base->cpu_base->lock);
+
+               /* Optimized away for NOHZ=n SMP=n */
+               if (cpu == preferred_cpu) {
+                       /* Calculate clock monotonic expiry time */
+#ifdef CONFIG_HIGH_RES_TIMERS
+                       ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
+                                                       new_base->offset);
+#else
+                       ktime_t expires = hrtimer_get_expires(timer);
+#endif
+
+                       /*
+                        * Get the next event on target cpu from the
+                        * clock events layer.
+                        * This covers the highres=off nohz=on case as well.
+                        */
+                       ktime_t next = clockevents_get_next_event(cpu);
+
+                       ktime_t delta = ktime_sub(expires, next);
+
+                       /*
+                        * We do not migrate the timer when it is expiring
+                        * before the next event on the target cpu because
+                        * we cannot reprogram the target cpu hardware and
+                        * we would cause it to fire late.
+                        */
+                       if (delta.tv64 < 0) {
+                               cpu = smp_processor_id();
+                               spin_unlock(&new_base->cpu_base->lock);
+                               spin_lock(&base->cpu_base->lock);
+                               timer->base = base;
+                               goto again;
+                       }
+               }
                timer->base = new_base;
        }
        return new_base;
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
        return base;
 }
 
-# define switch_hrtimer_base(t, b)     (b)
+# define switch_hrtimer_base(t, b, p)  (b)
 
 #endif /* !CONFIG_SMP */
 
@@ -332,6 +380,8 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
        return res;
 }
 
+EXPORT_SYMBOL_GPL(ktime_add_safe);
+
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 
 static struct debug_obj_descr hrtimer_debug_descr;
@@ -907,9 +957,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        ret = remove_hrtimer(timer, base);
 
        /* Switch the timer base, if necessary: */
-       new_base = switch_hrtimer_base(timer, base);
+       new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
 
-       if (mode == HRTIMER_MODE_REL) {
+       if (mode & HRTIMER_MODE_REL) {
                tim = ktime_add_safe(tim, new_base->get_time());
                /*
                 * CONFIG_TIME_LOW_RES is a temporary way for architectures