Merge branches 'release', 'cpuidle-2.6.25' and 'idle' into release
[pandora-kernel.git] / drivers / acpi / processor_idle.c
index bc99b7b..32003fd 100644 (file)
@@ -38,7 +38,7 @@
 #include <linux/dmi.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>       /* need_resched() */
-#include <linux/latency.h>
+#include <linux/pm_qos_params.h>
 #include <linux/clockchips.h>
 #include <linux/cpuidle.h>
 
@@ -76,7 +76,11 @@ static void (*pm_idle_save) (void) __read_mostly;
 #define PM_TIMER_TICKS_TO_US(p)                (((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
 
 static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
+#ifdef CONFIG_CPU_IDLE
 module_param(max_cstate, uint, 0000);
+#else
+module_param(max_cstate, uint, 0644);
+#endif
 static unsigned int nocst __read_mostly;
 module_param(nocst, uint, 0000);
 
@@ -200,6 +204,23 @@ static inline u32 ticks_elapsed_in_us(u32 t1, u32 t2)
                return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2);
 }
 
+/*
+ * Callers should disable interrupts before the call and enable
+ * interrupts after return.
+ */
+static void acpi_safe_halt(void)
+{
+       current_thread_info()->status &= ~TS_POLLING;
+       /*
+        * TS_POLLING-cleared state must be visible before we
+        * test NEED_RESCHED:
+        */
+       smp_mb();
+       if (!need_resched())
+               safe_halt();
+       current_thread_info()->status |= TS_POLLING;
+}
+
 #ifndef CONFIG_CPU_IDLE
 
 static void
@@ -242,25 +263,12 @@ acpi_processor_power_activate(struct acpi_processor *pr,
        return;
 }
 
-static void acpi_safe_halt(void)
-{
-       current_thread_info()->status &= ~TS_POLLING;
-       /*
-        * TS_POLLING-cleared state must be visible before we
-        * test NEED_RESCHED:
-        */
-       smp_mb();
-       if (!need_resched())
-               safe_halt();
-       current_thread_info()->status |= TS_POLLING;
-}
-
 static atomic_t c3_cpu_count;
 
 /* Common C-state entry for C2, C3, .. */
 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 {
-       if (cstate->space_id == ACPI_CSTATE_FFH) {
+       if (cstate->entry_method == ACPI_CSTATE_FFH) {
                /* Call into architectural FFH based C-state */
                acpi_processor_ffh_cstate_enter(cstate);
        } else {
@@ -356,6 +364,26 @@ int acpi_processor_resume(struct acpi_device * device)
        return 0;
 }
 
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+static int tsc_halts_in_c(int state)
+{
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_AMD:
+               /*
+                * AMD Fam10h TSC will tick in all
+                * C/P/S0/S1 states when this bit is set.
+                */
+               if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+                       return 0;
+               /*FALL THROUGH*/
+       case X86_VENDOR_INTEL:
+               /* Several cases known where TSC halts in C2 too */
+       default:
+               return state > ACPI_STATE_C1;
+       }
+}
+#endif
+
 #ifndef CONFIG_CPU_IDLE
 static void acpi_processor_idle(void)
 {
@@ -392,6 +420,8 @@ static void acpi_processor_idle(void)
                        pm_idle_save();
                else
                        acpi_safe_halt();
+
+               local_irq_enable();
                return;
        }
 
@@ -500,6 +530,7 @@ static void acpi_processor_idle(void)
                 *       skew otherwise.
                 */
                sleep_ticks = 0xFFFFFFFF;
+               local_irq_enable();
                break;
 
        case ACPI_STATE_C2:
@@ -515,7 +546,8 @@ static void acpi_processor_idle(void)
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
                /* TSC halts in C2, so notify users */
-               mark_tsc_unstable("possible TSC halt in C2");
+               if (tsc_halts_in_c(ACPI_STATE_C2))
+                       mark_tsc_unstable("possible TSC halt in C2");
 #endif
                /* Compute time (ticks) that we were actually asleep */
                sleep_ticks = ticks_elapsed(t1, t2);
@@ -533,6 +565,12 @@ static void acpi_processor_idle(void)
                break;
 
        case ACPI_STATE_C3:
+               acpi_unlazy_tlb(smp_processor_id());
+               /*
+                * Must be done before busmaster disable as we might
+                * need to access HPET !
+                */
+               acpi_state_timer_broadcast(pr, cx, 1);
                /*
                 * disable bus master
                 * bm_check implies we need ARB_DIS
@@ -560,7 +598,6 @@ static void acpi_processor_idle(void)
                /* Get start time (ticks) */
                t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
                /* Invoke C3 */
-               acpi_state_timer_broadcast(pr, cx, 1);
                /* Tell the scheduler that we are going deep-idle: */
                sched_clock_idle_sleep_event();
                acpi_cstate_enter(cx);
@@ -574,7 +611,8 @@ static void acpi_processor_idle(void)
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
                /* TSC halts in C3, so notify users */
-               mark_tsc_unstable("TSC halts in C3");
+               if (tsc_halts_in_c(ACPI_STATE_C3))
+                       mark_tsc_unstable("TSC halts in C3");
 #endif
                /* Compute time (ticks) that we were actually asleep */
                sleep_ticks = ticks_elapsed(t1, t2);
@@ -620,7 +658,8 @@ static void acpi_processor_idle(void)
        if (cx->promotion.state &&
            ((cx->promotion.state - pr->power.states) <= max_cstate)) {
                if (sleep_ticks > cx->promotion.threshold.ticks &&
-                 cx->promotion.state->latency <= system_latency_constraint()) {
+                 cx->promotion.state->latency <=
+                               pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
                        cx->promotion.count++;
                        cx->demotion.count = 0;
                        if (cx->promotion.count >=
@@ -664,7 +703,8 @@ static void acpi_processor_idle(void)
         * or if the latency of the current state is unacceptable
         */
        if ((pr->power.state - pr->power.states) > max_cstate ||
-               pr->power.state->latency > system_latency_constraint()) {
+               pr->power.state->latency >
+                               pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
                if (cx->demotion.state)
                        next_state = cx->demotion.state;
        }
@@ -892,20 +932,20 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
                cx.address = reg->address;
                cx.index = current_count + 1;
 
-               cx.space_id = ACPI_CSTATE_SYSTEMIO;
+               cx.entry_method = ACPI_CSTATE_SYSTEMIO;
                if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
                        if (acpi_processor_ffh_cstate_probe
                                        (pr->id, &cx, reg) == 0) {
-                               cx.space_id = ACPI_CSTATE_FFH;
-                       } else if (cx.type != ACPI_STATE_C1) {
+                               cx.entry_method = ACPI_CSTATE_FFH;
+                       } else if (cx.type == ACPI_STATE_C1) {
                                /*
                                 * C1 is a special case where FIXED_HARDWARE
                                 * can be handled in non-MWAIT way as well.
                                 * In that case, save this _CST entry info.
-                                * That is, we retain space_id of SYSTEM_IO for
-                                * halt based C1.
                                 * Otherwise, ignore this info and continue.
                                 */
+                               cx.entry_method = ACPI_CSTATE_HALT;
+                       } else {
                                continue;
                        }
                }
@@ -1172,7 +1212,7 @@ static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
                   "maximum allowed latency: %d usec\n",
                   pr->power.state ? pr->power.state - pr->power.states : 0,
                   max_cstate, (unsigned)pr->power.bm_activity,
-                  system_latency_constraint());
+                  pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY));
 
        seq_puts(seq, "states:\n");
 
@@ -1339,12 +1379,16 @@ static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
 /**
  * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
  * @cx: cstate data
+ *
+ * Caller disables interrupt before call and enables interrupt after return.
  */
 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
-       if (cx->space_id == ACPI_CSTATE_FFH) {
+       if (cx->entry_method == ACPI_CSTATE_FFH) {
                /* Call into architectural FFH based C-state */
                acpi_processor_ffh_cstate_enter(cx);
+       } else if (cx->entry_method == ACPI_CSTATE_HALT) {
+               acpi_safe_halt();
        } else {
                int unused;
                /* IO port based C-state */
@@ -1366,29 +1410,27 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 static int acpi_idle_enter_c1(struct cpuidle_device *dev,
                              struct cpuidle_state *state)
 {
+       u32 t1, t2;
        struct acpi_processor *pr;
        struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+
        pr = processors[smp_processor_id()];
 
        if (unlikely(!pr))
                return 0;
 
+       local_irq_disable();
        if (pr->flags.bm_check)
                acpi_idle_update_bm_rld(pr, cx);
 
-       current_thread_info()->status &= ~TS_POLLING;
-       /*
-        * TS_POLLING-cleared state must be visible before we test
-        * NEED_RESCHED:
-        */
-       smp_mb();
-       if (!need_resched())
-               safe_halt();
-       current_thread_info()->status |= TS_POLLING;
+       t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+       acpi_idle_do_entry(cx);
+       t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
 
+       local_irq_enable();
        cx->usage++;
 
-       return 0;
+       return ticks_elapsed_in_us(t1, t2);
 }
 
 /**
@@ -1402,6 +1444,8 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
        struct acpi_processor *pr;
        struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
        u32 t1, t2;
+       int sleep_ticks = 0;
+
        pr = processors[smp_processor_id()];
 
        if (unlikely(!pr))
@@ -1410,9 +1454,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
        if (acpi_idle_suspend)
                return(acpi_idle_enter_c1(dev, state));
 
-       if (pr->flags.bm_check)
-               acpi_idle_update_bm_rld(pr, cx);
-
        local_irq_disable();
        current_thread_info()->status &= ~TS_POLLING;
        /*
@@ -1427,18 +1468,34 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
                return 0;
        }
 
+       acpi_unlazy_tlb(smp_processor_id());
+       /*
+        * Must be done before busmaster disable as we might need to
+        * access HPET !
+        */
+       acpi_state_timer_broadcast(pr, cx, 1);
+
+       if (pr->flags.bm_check)
+               acpi_idle_update_bm_rld(pr, cx);
+
        if (cx->type == ACPI_STATE_C3)
                ACPI_FLUSH_CPU_CACHE();
 
        t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-       acpi_state_timer_broadcast(pr, cx, 1);
+       /* Tell the scheduler that we are going deep-idle: */
+       sched_clock_idle_sleep_event();
        acpi_idle_do_entry(cx);
        t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
        /* TSC could halt in idle, so notify users */
-       mark_tsc_unstable("TSC halts in idle");;
+       if (tsc_halts_in_c(cx->type))
+               mark_tsc_unstable("TSC halts in idle");;
 #endif
+       sleep_ticks = ticks_elapsed(t1, t2);
+
+       /* Tell the scheduler how much we idled: */
+       sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
 
        local_irq_enable();
        current_thread_info()->status |= TS_POLLING;
@@ -1446,7 +1503,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
        cx->usage++;
 
        acpi_state_timer_broadcast(pr, cx, 0);
-       cx->time += ticks_elapsed(t1, t2);
+       cx->time += sleep_ticks;
        return ticks_elapsed_in_us(t1, t2);
 }
 
@@ -1466,6 +1523,8 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
        struct acpi_processor *pr;
        struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
        u32 t1, t2;
+       int sleep_ticks = 0;
+
        pr = processors[smp_processor_id()];
 
        if (unlikely(!pr))
@@ -1474,6 +1533,17 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
        if (acpi_idle_suspend)
                return(acpi_idle_enter_c1(dev, state));
 
+       if (acpi_idle_bm_check()) {
+               if (dev->safe_state) {
+                       return dev->safe_state->enter(dev, dev->safe_state);
+               } else {
+                       local_irq_disable();
+                       acpi_safe_halt();
+                       local_irq_enable();
+                       return 0;
+               }
+       }
+
        local_irq_disable();
        current_thread_info()->status &= ~TS_POLLING;
        /*
@@ -1488,46 +1558,57 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
                return 0;
        }
 
+       /* Tell the scheduler that we are going deep-idle: */
+       sched_clock_idle_sleep_event();
        /*
         * Must be done before busmaster disable as we might need to
         * access HPET !
         */
        acpi_state_timer_broadcast(pr, cx, 1);
 
-       if (acpi_idle_bm_check()) {
-               cx = pr->power.bm_state;
-
-               acpi_idle_update_bm_rld(pr, cx);
-
-               t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-               acpi_idle_do_entry(cx);
-               t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-       } else {
-               acpi_idle_update_bm_rld(pr, cx);
+       acpi_idle_update_bm_rld(pr, cx);
 
+       /*
+        * disable bus master
+        * bm_check implies we need ARB_DIS
+        * !bm_check implies we need cache flush
+        * bm_control implies whether we can do ARB_DIS
+        *
+        * That leaves a case where bm_check is set and bm_control is
+        * not set. In that case we cannot do much, we enter C3
+        * without doing anything.
+        */
+       if (pr->flags.bm_check && pr->flags.bm_control) {
                spin_lock(&c3_lock);
                c3_cpu_count++;
                /* Disable bus master arbitration when all CPUs are in C3 */
                if (c3_cpu_count == num_online_cpus())
                        acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
                spin_unlock(&c3_lock);
+       } else if (!pr->flags.bm_check) {
+               ACPI_FLUSH_CPU_CACHE();
+       }
 
-               t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
-               acpi_idle_do_entry(cx);
-               t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+       t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+       acpi_idle_do_entry(cx);
+       t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
 
+       /* Re-enable bus master arbitration */
+       if (pr->flags.bm_check && pr->flags.bm_control) {
                spin_lock(&c3_lock);
-               /* Re-enable bus master arbitration */
-               if (c3_cpu_count == num_online_cpus())
-                       acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+               acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
                c3_cpu_count--;
                spin_unlock(&c3_lock);
        }
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
        /* TSC could halt in idle, so notify users */
-       mark_tsc_unstable("TSC halts in idle");
+       if (tsc_halts_in_c(ACPI_STATE_C3))
+               mark_tsc_unstable("TSC halts in idle");
 #endif
+       sleep_ticks = ticks_elapsed(t1, t2);
+       /* Tell the scheduler how much we idled: */
+       sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
 
        local_irq_enable();
        current_thread_info()->status |= TS_POLLING;
@@ -1535,7 +1616,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
        cx->usage++;
 
        acpi_state_timer_broadcast(pr, cx, 0);
-       cx->time += ticks_elapsed(t1, t2);
+       cx->time += sleep_ticks;
        return ticks_elapsed_in_us(t1, t2);
 }
 
@@ -1550,7 +1631,7 @@ struct cpuidle_driver acpi_idle_driver = {
  */
 static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
 {
-       int i, count = 0;
+       int i, count = CPUIDLE_DRIVER_STATE_START;
        struct acpi_processor_cx *cx;
        struct cpuidle_state *state;
        struct cpuidle_device *dev = &pr->power.dev;
@@ -1586,13 +1667,16 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
                switch (cx->type) {
                        case ACPI_STATE_C1:
                        state->flags |= CPUIDLE_FLAG_SHALLOW;
+                       state->flags |= CPUIDLE_FLAG_TIME_VALID;
                        state->enter = acpi_idle_enter_c1;
+                       dev->safe_state = state;
                        break;
 
                        case ACPI_STATE_C2:
                        state->flags |= CPUIDLE_FLAG_BALANCED;
                        state->flags |= CPUIDLE_FLAG_TIME_VALID;
                        state->enter = acpi_idle_enter_simple;
+                       dev->safe_state = state;
                        break;
 
                        case ACPI_STATE_C3:
@@ -1606,6 +1690,8 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
                }
 
                count++;
+               if (count == CPUIDLE_STATE_MAX)
+                       break;
        }
 
        dev->state_count = count;
@@ -1613,14 +1699,6 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
        if (!count)
                return -EINVAL;
 
-       /* find the deepest state that can handle active BM */
-       if (pr->flags.bm_check) {
-               for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++)
-                       if (pr->power.states[i].type == ACPI_STATE_C3)
-                               break;
-               pr->power.bm_state = &pr->power.states[i-1];
-       }
-
        return 0;
 }
 
@@ -1661,13 +1739,15 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 
        if (!first_run) {
                dmi_check_system(processor_power_dmi_table);
+               max_cstate = acpi_processor_cstate_check(max_cstate);
                if (max_cstate < ACPI_C_STATES_MAX)
                        printk(KERN_NOTICE
                               "ACPI: processor limited to max C-state %d\n",
                               max_cstate);
                first_run++;
-#if !defined (CONFIG_CPU_IDLE) && defined (CONFIG_SMP)
-               register_latency_notifier(&acpi_processor_latency_notifier);
+#if !defined(CONFIG_CPU_IDLE) && defined(CONFIG_SMP)
+               pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY,
+                               &acpi_processor_latency_notifier);
 #endif
        }
 
@@ -1754,7 +1834,8 @@ int acpi_processor_power_exit(struct acpi_processor *pr,
                 */
                cpu_idle_wait();
 #ifdef CONFIG_SMP
-               unregister_latency_notifier(&acpi_processor_latency_notifier);
+               pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY,
+                               &acpi_processor_latency_notifier);
 #endif
        }
 #endif