Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik...
[pandora-kernel.git] / kernel / timer.c
index 8533c37..440048a 100644 (file)
@@ -34,6 +34,8 @@
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
 #include <linux/delay.h>
+#include <linux/tick.h>
+#include <linux/kallsyms.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
        list_add_tail(&timer->entry, vec);
 }
 
+#ifdef CONFIG_TIMER_STATS
+void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
+{
+       if (timer->start_site)
+               return;
+
+       timer->start_site = addr;
+       memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+       timer->start_pid = current->pid;
+}
+#endif
+
 /**
  * init_timer - initialize a timer.
  * @timer: the timer to be initialized
@@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer)
 {
        timer->entry.next = NULL;
        timer->base = __raw_get_cpu_var(tvec_bases);
+#ifdef CONFIG_TIMER_STATS
+       timer->start_site = NULL;
+       timer->start_pid = -1;
+       memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
 }
 EXPORT_SYMBOL(init_timer);
 
 static inline void detach_timer(struct timer_list *timer,
-                                       int clear_pending)
+                               int clear_pending)
 {
        struct list_head *entry = &timer->entry;
 
@@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
        unsigned long flags;
        int ret = 0;
 
+       timer_stats_timer_set_start_info(timer);
        BUG_ON(!timer->function);
 
        base = lock_timer_base(timer, &flags);
@@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
        tvec_base_t *base = per_cpu(tvec_bases, cpu);
        unsigned long flags;
 
+       timer_stats_timer_set_start_info(timer);
        BUG_ON(timer_pending(timer) || !timer->function);
        spin_lock_irqsave(&base->lock, flags);
        timer->base = base;
@@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
 {
        BUG_ON(!timer->function);
 
+       timer_stats_timer_set_start_info(timer);
        /*
         * This is a common optimization triggered by the
         * networking code - if the timer is re-modified
@@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer)
        unsigned long flags;
        int ret = 0;
 
+       timer_stats_timer_clear_start_info(timer);
        if (timer_pending(timer)) {
                base = lock_timer_base(timer, &flags);
                if (timer_pending(timer)) {
@@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base)
                        fn = timer->function;
                        data = timer->data;
 
+                       timer_stats_account_timer(timer);
+
                        set_running_timer(base, timer);
                        detach_timer(timer, 1);
                        spin_unlock_irq(&base->lock);
@@ -591,105 +616,138 @@ static inline void __run_timers(tvec_base_t *base)
        spin_unlock_irq(&base->lock);
 }
 
-#ifdef CONFIG_NO_IDLE_HZ
+#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
 /*
  * Find out when the next timer event is due to happen. This
  * is used on S/390 to stop all activity when a cpus is idle.
  * This functions needs to be called disabled.
  */
-unsigned long next_timer_interrupt(void)
+static unsigned long __next_timer_interrupt(tvec_base_t *base)
 {
-       tvec_base_t *base;
-       struct list_head *list;
+       unsigned long timer_jiffies = base->timer_jiffies;
+       unsigned long expires = timer_jiffies + (LONG_MAX >> 1);
+       int index, slot, array, found = 0;
        struct timer_list *nte;
-       unsigned long expires;
-       unsigned long hr_expires = MAX_JIFFY_OFFSET;
-       ktime_t hr_delta;
        tvec_t *varray[4];
-       int i, j;
-
-       hr_delta = hrtimer_get_next_event();
-       if (hr_delta.tv64 != KTIME_MAX) {
-               struct timespec tsdelta;
-               tsdelta = ktime_to_timespec(hr_delta);
-               hr_expires = timespec_to_jiffies(&tsdelta);
-               if (hr_expires < 3)
-                       return hr_expires + jiffies;
-       }
-       hr_expires += jiffies;
-
-       base = __get_cpu_var(tvec_bases);
-       spin_lock(&base->lock);
-       expires = base->timer_jiffies + (LONG_MAX >> 1);
-       list = NULL;
 
        /* Look for timer events in tv1. */
-       j = base->timer_jiffies & TVR_MASK;
+       index = slot = timer_jiffies & TVR_MASK;
        do {
-               list_for_each_entry(nte, base->tv1.vec + j, entry) {
+               list_for_each_entry(nte, base->tv1.vec + slot, entry) {
+                       found = 1;
                        expires = nte->expires;
-                       if (j < (base->timer_jiffies & TVR_MASK))
-                               list = base->tv2.vec + (INDEX(0));
-                       goto found;
+                       /* Look at the cascade bucket(s)? */
+                       if (!index || slot < index)
+                               goto cascade;
+                       return expires;
                }
-               j = (j + 1) & TVR_MASK;
-       } while (j != (base->timer_jiffies & TVR_MASK));
+               slot = (slot + 1) & TVR_MASK;
+       } while (slot != index);
+
+cascade:
+       /* Calculate the next cascade event */
+       if (index)
+               timer_jiffies += TVR_SIZE - index;
+       timer_jiffies >>= TVR_BITS;
 
        /* Check tv2-tv5. */
        varray[0] = &base->tv2;
        varray[1] = &base->tv3;
        varray[2] = &base->tv4;
        varray[3] = &base->tv5;
-       for (i = 0; i < 4; i++) {
-               j = INDEX(i);
+
+       for (array = 0; array < 4; array++) {
+               tvec_t *varp = varray[array];
+
+               index = slot = timer_jiffies & TVN_MASK;
                do {
-                       if (list_empty(varray[i]->vec + j)) {
-                               j = (j + 1) & TVN_MASK;
-                               continue;
-                       }
-                       list_for_each_entry(nte, varray[i]->vec + j, entry)
+                       list_for_each_entry(nte, varp->vec + slot, entry) {
+                               found = 1;
                                if (time_before(nte->expires, expires))
                                        expires = nte->expires;
-                       if (j < (INDEX(i)) && i < 3)
-                               list = varray[i + 1]->vec + (INDEX(i + 1));
-                       goto found;
-               } while (j != (INDEX(i)));
-       }
-found:
-       if (list) {
-               /*
-                * The search wrapped. We need to look at the next list
-                * from next tv element that would cascade into tv element
-                * where we found the timer element.
-                */
-               list_for_each_entry(nte, list, entry) {
-                       if (time_before(nte->expires, expires))
-                               expires = nte->expires;
-               }
+                       }
+                       /*
+                        * Do we still search for the first timer or are
+                        * we looking up the cascade buckets ?
+                        */
+                       if (found) {
+                               /* Look at the cascade bucket(s)? */
+                               if (!index || slot < index)
+                                       break;
+                               return expires;
+                       }
+                       slot = (slot + 1) & TVN_MASK;
+               } while (slot != index);
+
+               if (index)
+                       timer_jiffies += TVN_SIZE - index;
+               timer_jiffies >>= TVN_BITS;
        }
-       spin_unlock(&base->lock);
+       return expires;
+}
+
+/*
+ * Check, if the next hrtimer event is before the next timer wheel
+ * event:
+ */
+static unsigned long cmp_next_hrtimer_event(unsigned long now,
+                                           unsigned long expires)
+{
+       ktime_t hr_delta = hrtimer_get_next_event();
+       struct timespec tsdelta;
+       unsigned long delta;
+
+       if (hr_delta.tv64 == KTIME_MAX)
+               return expires;
 
        /*
-        * It can happen that other CPUs service timer IRQs and increment
-        * jiffies, but we have not yet got a local timer tick to process
-        * the timer wheels.  In that case, the expiry time can be before
-        * jiffies, but since the high-resolution timer here is relative to
-        * jiffies, the default expression when high-resolution timers are
-        * not active,
-        *
-        *   time_before(MAX_JIFFY_OFFSET + jiffies, expires)
-        *
-        * would falsely evaluate to true.  If that is the case, just
-        * return jiffies so that we can immediately fire the local timer
+        * Expired timer available, let it expire in the next tick
         */
-       if (time_before(expires, jiffies))
-               return jiffies;
-
-       if (time_before(hr_expires, expires))
-               return hr_expires;
+       if (hr_delta.tv64 <= 0)
+               return now + 1;
 
+       tsdelta = ktime_to_timespec(hr_delta);
+       delta = timespec_to_jiffies(&tsdelta);
+       /*
+        * Take rounding errors in to account and make sure, that it
+        * expires in the next tick. Otherwise we go into an endless
+        * ping pong due to tick_nohz_stop_sched_tick() retriggering
+        * the timer softirq
+        */
+       if (delta < 1)
+               delta = 1;
+       now += delta;
+       if (time_before(now, expires))
+               return now;
        return expires;
 }
+
+/**
+ * next_timer_interrupt - return the jiffy of the next pending timer
+ * @now: current time (in jiffies)
+ */
+unsigned long get_next_timer_interrupt(unsigned long now)
+{
+       tvec_base_t *base = __get_cpu_var(tvec_bases);
+       unsigned long expires;
+
+       spin_lock(&base->lock);
+       expires = __next_timer_interrupt(base);
+       spin_unlock(&base->lock);
+
+       if (time_before_eq(expires, now))
+               return now;
+
+       return cmp_next_hrtimer_event(now, expires);
+}
+
+#ifdef CONFIG_NO_IDLE_HZ
+unsigned long next_timer_interrupt(void)
+{
+       return get_next_timer_interrupt(jiffies);
+}
+#endif
+
 #endif
 
 /******************************************************************/
@@ -817,6 +875,8 @@ int do_settimeofday(struct timespec *tv)
        clock->error = 0;
        ntp_clear();
 
+       update_vsyscall(&xtime, clock);
+
        write_sequnlock_irqrestore(&xtime_lock, flags);
 
        /* signal hrtimers about time change */
@@ -832,36 +892,39 @@ EXPORT_SYMBOL(do_settimeofday);
  *
  * Accumulates current time interval and initializes new clocksource
  */
-static int change_clocksource(void)
+static void change_clocksource(void)
 {
        struct clocksource *new;
        cycle_t now;
        u64 nsec;
+
        new = clocksource_get_next();
-       if (clock != new) {
-               now = clocksource_read(new);
-               nsec =  __get_nsec_offset();
-               timespec_add_ns(&xtime, nsec);
-
-               clock = new;
-               clock->cycle_last = now;
-               printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-                      clock->name);
-               return 1;
-       } else if (clock->update_callback) {
-               return clock->update_callback();
-       }
-       return 0;
+
+       if (clock == new)
+               return;
+
+       now = clocksource_read(new);
+       nsec =  __get_nsec_offset();
+       timespec_add_ns(&xtime, nsec);
+
+       clock = new;
+       clock->cycle_last = now;
+
+       clock->error = 0;
+       clock->xtime_nsec = 0;
+       clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+
+       tick_clock_notify();
+
+       printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+              clock->name);
 }
 #else
-static inline int change_clocksource(void)
-{
-       return 0;
-}
+static inline void change_clocksource(void) { }
 #endif
 
 /**
- * timeofday_is_continuous - check to see if timekeeping is free running
+ * timekeeping_is_continuous - check to see if timekeeping is free running
  */
 int timekeeping_is_continuous(void)
 {
@@ -871,33 +934,56 @@ int timekeeping_is_continuous(void)
        do {
                seq = read_seqbegin(&xtime_lock);
 
-               ret = clock->is_continuous;
+               ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
        } while (read_seqretry(&xtime_lock, seq));
 
        return ret;
 }
 
+/**
+ * read_persistent_clock -  Return time in seconds from the persistent clock.
+ *
+ * Weak dummy function for arches that do not yet support it.
+ * Returns seconds from epoch using the battery backed persistent clock.
+ * Returns zero if unsupported.
+ *
+ *  XXX - Do be sure to remove it once all arches implement it.
+ */
+unsigned long __attribute__((weak)) read_persistent_clock(void)
+{
+       return 0;
+}
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
 void __init timekeeping_init(void)
 {
        unsigned long flags;
+       unsigned long sec = read_persistent_clock();
 
        write_seqlock_irqsave(&xtime_lock, flags);
 
        ntp_clear();
 
        clock = clocksource_get_next();
-       clocksource_calculate_interval(clock, tick_nsec);
+       clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
        clock->cycle_last = clocksource_read(clock);
 
+       xtime.tv_sec = sec;
+       xtime.tv_nsec = 0;
+       set_normalized_timespec(&wall_to_monotonic,
+               -xtime.tv_sec, -xtime.tv_nsec);
+
        write_sequnlock_irqrestore(&xtime_lock, flags);
 }
 
-
+/* flag for if timekeeping is suspended */
 static int timekeeping_suspended;
+/* time in seconds when suspend began */
+static unsigned long timekeeping_suspend_time;
+
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
  * @dev:       unused
@@ -909,13 +995,29 @@ static int timekeeping_suspended;
 static int timekeeping_resume(struct sys_device *dev)
 {
        unsigned long flags;
+       unsigned long now = read_persistent_clock();
 
        write_seqlock_irqsave(&xtime_lock, flags);
-       /* restart the last cycle value */
+
+       if (now && (now > timekeeping_suspend_time)) {
+               unsigned long sleep_length = now - timekeeping_suspend_time;
+
+               xtime.tv_sec += sleep_length;
+               wall_to_monotonic.tv_sec -= sleep_length;
+       }
+       /* re-base the last cycle value */
        clock->cycle_last = clocksource_read(clock);
        clock->error = 0;
        timekeeping_suspended = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
+
+       touch_softlockup_watchdog();
+
+       clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
+
+       /* Resume hrtimers */
+       clock_was_set();
+
        return 0;
 }
 
@@ -925,7 +1027,11 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 
        write_seqlock_irqsave(&xtime_lock, flags);
        timekeeping_suspended = 1;
+       timekeeping_suspend_time = read_persistent_clock();
        write_sequnlock_irqrestore(&xtime_lock, flags);
+
+       clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+
        return 0;
 }
 
@@ -1089,11 +1195,8 @@ static void update_wall_time(void)
        clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
 
        /* check to see if there is a new clocksource to use */
-       if (change_clocksource()) {
-               clock->error = 0;
-               clock->xtime_nsec = 0;
-               clocksource_calculate_interval(clock, tick_nsec);
-       }
+       change_clocksource();
+       update_vsyscall(&xtime, clock);
 }
 
 /*
@@ -1162,11 +1265,9 @@ static inline void calc_load(unsigned long ticks)
  * This read-write spinlock protects us from races in SMP while
  * playing with xtime and avenrun.
  */
-#ifndef ARCH_HAVE_XTIME_LOCK
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
+__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
 
 EXPORT_SYMBOL(xtime_lock);
-#endif
 
 /*
  * This function runs timers and the timer-tq in bottom half context.
@@ -1175,7 +1276,8 @@ static void run_timer_softirq(struct softirq_action *h)
 {
        tvec_base_t *base = __get_cpu_var(tvec_bases);
 
-       hrtimer_run_queues();
+       hrtimer_run_queues();
+
        if (time_after_eq(jiffies, base->timer_jiffies))
                __run_timers(base);
 }
@@ -1570,8 +1672,8 @@ static void __devinit migrate_timers(int cpu)
        new_base = get_cpu_var(tvec_bases);
 
        local_irq_disable();
-       spin_lock(&new_base->lock);
-       spin_lock(&old_base->lock);
+       double_spin_lock(&new_base->lock, &old_base->lock,
+                        smp_processor_id() < cpu);
 
        BUG_ON(old_base->running_timer);
 
@@ -1584,8 +1686,8 @@ static void __devinit migrate_timers(int cpu)
                migrate_timer_list(new_base, old_base->tv5.vec + i);
        }
 
-       spin_unlock(&old_base->lock);
-       spin_unlock(&new_base->lock);
+       double_spin_unlock(&new_base->lock, &old_base->lock,
+                          smp_processor_id() < cpu);
        local_irq_enable();
        put_cpu_var(tvec_bases);
 }
@@ -1621,6 +1723,8 @@ void __init init_timers(void)
        int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
                                (void *)(long)smp_processor_id());
 
+       init_timer_stats();
+
        BUG_ON(err == NOTIFY_BAD);
        register_cpu_notifier(&timers_nb);
        open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);