Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux...
[pandora-kernel.git] / drivers / acpi / processor_idle.c
1 /*
2  * processor_idle - idle state submodule to the ACPI processor driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8  *                      - Added processor hotplug support
9  *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
10  *                      - Added support for C3 on SMP
11  *
12  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13  *
14  *  This program is free software; you can redistribute it and/or modify
15  *  it under the terms of the GNU General Public License as published by
16  *  the Free Software Foundation; either version 2 of the License, or (at
17  *  your option) any later version.
18  *
19  *  This program is distributed in the hope that it will be useful, but
20  *  WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  *  General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License along
25  *  with this program; if not, write to the Free Software Foundation, Inc.,
26  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
27  *
28  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29  */
30
31 #include <linux/kernel.h>
32 #include <linux/module.h>
33 #include <linux/init.h>
34 #include <linux/cpufreq.h>
35 #include <linux/slab.h>
36 #include <linux/acpi.h>
37 #include <linux/dmi.h>
38 #include <linux/moduleparam.h>
39 #include <linux/sched.h>        /* need_resched() */
40 #include <linux/pm_qos_params.h>
41 #include <linux/clockchips.h>
42 #include <linux/cpuidle.h>
43 #include <linux/irqflags.h>
44
45 /*
46  * Include the apic definitions for x86 to have the APIC timer related defines
47  * available also for UP (on SMP it gets magically included via linux/smp.h).
48  * asm/acpi.h is not an option, as it would require more include magic. Also
49  * creating an empty asm-ia64/apic.h would just trade pest vs. cholera.
50  */
51 #ifdef CONFIG_X86
52 #include <asm/apic.h>
53 #endif
54
55 #include <asm/io.h>
56 #include <asm/uaccess.h>
57
58 #include <acpi/acpi_bus.h>
59 #include <acpi/processor.h>
60 #include <asm/processor.h>
61
62 #define PREFIX "ACPI: "
63
64 #define ACPI_PROCESSOR_CLASS            "processor"
65 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
66 ACPI_MODULE_NAME("processor_idle");
67 #define PM_TIMER_TICK_NS                (1000000000ULL/PM_TIMER_FREQUENCY)
68 #define C2_OVERHEAD                     1       /* 1us */
69 #define C3_OVERHEAD                     1       /* 1us */
70 #define PM_TIMER_TICKS_TO_US(p)         (((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
71
72 static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
73 module_param(max_cstate, uint, 0000);
74 static unsigned int nocst __read_mostly;
75 module_param(nocst, uint, 0000);
76 static int bm_check_disable __read_mostly;
77 module_param(bm_check_disable, uint, 0000);
78
79 static unsigned int latency_factor __read_mostly = 2;
80 module_param(latency_factor, uint, 0644);
81
82 /*
83  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
84  * For now disable this. Probably a bug somewhere else.
85  *
86  * To skip this limit, boot/load with a large max_cstate limit.
87  */
88 static int set_max_cstate(const struct dmi_system_id *id)
89 {
90         if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
91                 return 0;
92
93         printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
94                " Override with \"processor.max_cstate=%d\"\n", id->ident,
95                (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
96
97         max_cstate = (long)id->driver_data;
98
99         return 0;
100 }
101
102 /* Actually this shouldn't be __cpuinitdata, would be better to fix the
103    callers to only run once -AK */
104 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = {
105         { set_max_cstate, "Clevo 5600D", {
106           DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
107           DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
108          (void *)2},
109         { set_max_cstate, "Pavilion zv5000", {
110           DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
111           DMI_MATCH(DMI_PRODUCT_NAME,"Pavilion zv5000 (DS502A#ABA)")},
112          (void *)1},
113         { set_max_cstate, "Asus L8400B", {
114           DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
115           DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")},
116          (void *)1},
117         {},
118 };
119
120
121 /*
122  * Callers should disable interrupts before the call and enable
123  * interrupts after return.
124  */
125 static void acpi_safe_halt(void)
126 {
127         current_thread_info()->status &= ~TS_POLLING;
128         /*
129          * TS_POLLING-cleared state must be visible before we
130          * test NEED_RESCHED:
131          */
132         smp_mb();
133         if (!need_resched()) {
134                 safe_halt();
135                 local_irq_disable();
136         }
137         current_thread_info()->status |= TS_POLLING;
138 }
139
140 #ifdef ARCH_APICTIMER_STOPS_ON_C3
141
142 /*
143  * Some BIOS implementations switch to C3 in the published C2 state.
144  * This seems to be a common problem on AMD boxen, but other vendors
145  * are affected too. We pick the most conservative approach: we assume
146  * that the local APIC stops in both C2 and C3.
147  */
148 static void lapic_timer_check_state(int state, struct acpi_processor *pr,
149                                    struct acpi_processor_cx *cx)
150 {
151         struct acpi_processor_power *pwr = &pr->power;
152         u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
153
154         if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
155                 return;
156
157         if (c1e_detected)
158                 type = ACPI_STATE_C1;
159
160         /*
161          * Check, if one of the previous states already marked the lapic
162          * unstable
163          */
164         if (pwr->timer_broadcast_on_state < state)
165                 return;
166
167         if (cx->type >= type)
168                 pr->power.timer_broadcast_on_state = state;
169 }
170
171 static void __lapic_timer_propagate_broadcast(void *arg)
172 {
173         struct acpi_processor *pr = (struct acpi_processor *) arg;
174         unsigned long reason;
175
176         reason = pr->power.timer_broadcast_on_state < INT_MAX ?
177                 CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
178
179         clockevents_notify(reason, &pr->id);
180 }
181
182 static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
183 {
184         smp_call_function_single(pr->id, __lapic_timer_propagate_broadcast,
185                                  (void *)pr, 1);
186 }
187
188 /* Power(C) State timer broadcast control */
189 static void lapic_timer_state_broadcast(struct acpi_processor *pr,
190                                        struct acpi_processor_cx *cx,
191                                        int broadcast)
192 {
193         int state = cx - pr->power.states;
194
195         if (state >= pr->power.timer_broadcast_on_state) {
196                 unsigned long reason;
197
198                 reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
199                         CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
200                 clockevents_notify(reason, &pr->id);
201         }
202 }
203
204 #else
205
206 static void lapic_timer_check_state(int state, struct acpi_processor *pr,
207                                    struct acpi_processor_cx *cstate) { }
208 static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
209 static void lapic_timer_state_broadcast(struct acpi_processor *pr,
210                                        struct acpi_processor_cx *cx,
211                                        int broadcast)
212 {
213 }
214
215 #endif
216
217 /*
218  * Suspend / resume control
219  */
220 static int acpi_idle_suspend;
221 static u32 saved_bm_rld;
222
223 static void acpi_idle_bm_rld_save(void)
224 {
225         acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld);
226 }
227 static void acpi_idle_bm_rld_restore(void)
228 {
229         u32 resumed_bm_rld;
230
231         acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld);
232
233         if (resumed_bm_rld != saved_bm_rld)
234                 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld);
235 }
236
237 int acpi_processor_suspend(struct acpi_device * device, pm_message_t state)
238 {
239         if (acpi_idle_suspend == 1)
240                 return 0;
241
242         acpi_idle_bm_rld_save();
243         acpi_idle_suspend = 1;
244         return 0;
245 }
246
247 int acpi_processor_resume(struct acpi_device * device)
248 {
249         if (acpi_idle_suspend == 0)
250                 return 0;
251
252         acpi_idle_bm_rld_restore();
253         acpi_idle_suspend = 0;
254         return 0;
255 }
256
257 #if defined(CONFIG_X86)
258 static void tsc_check_state(int state)
259 {
260         switch (boot_cpu_data.x86_vendor) {
261         case X86_VENDOR_AMD:
262         case X86_VENDOR_INTEL:
263                 /*
264                  * AMD Fam10h TSC will tick in all
265                  * C/P/S0/S1 states when this bit is set.
266                  */
267                 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
268                         return;
269
270                 /*FALL THROUGH*/
271         default:
272                 /* TSC could halt in idle, so notify users */
273                 if (state > ACPI_STATE_C1)
274                         mark_tsc_unstable("TSC halts in idle");
275         }
276 }
277 #else
278 static void tsc_check_state(int state) { return; }
279 #endif
280
281 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
282 {
283
284         if (!pr)
285                 return -EINVAL;
286
287         if (!pr->pblk)
288                 return -ENODEV;
289
290         /* if info is obtained from pblk/fadt, type equals state */
291         pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
292         pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;
293
294 #ifndef CONFIG_HOTPLUG_CPU
295         /*
296          * Check for P_LVL2_UP flag before entering C2 and above on
297          * an SMP system.
298          */
299         if ((num_online_cpus() > 1) &&
300             !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
301                 return -ENODEV;
302 #endif
303
304         /* determine C2 and C3 address from pblk */
305         pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
306         pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;
307
308         /* determine latencies from FADT */
309         pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency;
310         pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency;
311
312         /*
313          * FADT specified C2 latency must be less than or equal to
314          * 100 microseconds.
315          */
316         if (acpi_gbl_FADT.C2latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
317                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
318                         "C2 latency too large [%d]\n", acpi_gbl_FADT.C2latency));
319                 /* invalidate C2 */
320                 pr->power.states[ACPI_STATE_C2].address = 0;
321         }
322
323         /*
324          * FADT supplied C3 latency must be less than or equal to
325          * 1000 microseconds.
326          */
327         if (acpi_gbl_FADT.C3latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
328                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
329                         "C3 latency too large [%d]\n", acpi_gbl_FADT.C3latency));
330                 /* invalidate C3 */
331                 pr->power.states[ACPI_STATE_C3].address = 0;
332         }
333
334         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
335                           "lvl2[0x%08x] lvl3[0x%08x]\n",
336                           pr->power.states[ACPI_STATE_C2].address,
337                           pr->power.states[ACPI_STATE_C3].address));
338
339         return 0;
340 }
341
342 static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
343 {
344         if (!pr->power.states[ACPI_STATE_C1].valid) {
345                 /* set the first C-State to C1 */
346                 /* all processors need to support C1 */
347                 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
348                 pr->power.states[ACPI_STATE_C1].valid = 1;
349                 pr->power.states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_HALT;
350         }
351         /* the C0 state only exists as a filler in our array */
352         pr->power.states[ACPI_STATE_C0].valid = 1;
353         return 0;
354 }
355
356 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
357 {
358         acpi_status status = 0;
359         u64 count;
360         int current_count;
361         int i;
362         struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
363         union acpi_object *cst;
364
365
366         if (nocst)
367                 return -ENODEV;
368
369         current_count = 0;
370
371         status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
372         if (ACPI_FAILURE(status)) {
373                 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
374                 return -ENODEV;
375         }
376
377         cst = buffer.pointer;
378
379         /* There must be at least 2 elements */
380         if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
381                 printk(KERN_ERR PREFIX "not enough elements in _CST\n");
382                 status = -EFAULT;
383                 goto end;
384         }
385
386         count = cst->package.elements[0].integer.value;
387
388         /* Validate number of power states. */
389         if (count < 1 || count != cst->package.count - 1) {
390                 printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
391                 status = -EFAULT;
392                 goto end;
393         }
394
395         /* Tell driver that at least _CST is supported. */
396         pr->flags.has_cst = 1;
397
398         for (i = 1; i <= count; i++) {
399                 union acpi_object *element;
400                 union acpi_object *obj;
401                 struct acpi_power_register *reg;
402                 struct acpi_processor_cx cx;
403
404                 memset(&cx, 0, sizeof(cx));
405
406                 element = &(cst->package.elements[i]);
407                 if (element->type != ACPI_TYPE_PACKAGE)
408                         continue;
409
410                 if (element->package.count != 4)
411                         continue;
412
413                 obj = &(element->package.elements[0]);
414
415                 if (obj->type != ACPI_TYPE_BUFFER)
416                         continue;
417
418                 reg = (struct acpi_power_register *)obj->buffer.pointer;
419
420                 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
421                     (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
422                         continue;
423
424                 /* There should be an easy way to extract an integer... */
425                 obj = &(element->package.elements[1]);
426                 if (obj->type != ACPI_TYPE_INTEGER)
427                         continue;
428
429                 cx.type = obj->integer.value;
430                 /*
431                  * Some buggy BIOSes won't list C1 in _CST -
432                  * Let acpi_processor_get_power_info_default() handle them later
433                  */
434                 if (i == 1 && cx.type != ACPI_STATE_C1)
435                         current_count++;
436
437                 cx.address = reg->address;
438                 cx.index = current_count + 1;
439
440                 cx.entry_method = ACPI_CSTATE_SYSTEMIO;
441                 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
442                         if (acpi_processor_ffh_cstate_probe
443                                         (pr->id, &cx, reg) == 0) {
444                                 cx.entry_method = ACPI_CSTATE_FFH;
445                         } else if (cx.type == ACPI_STATE_C1) {
446                                 /*
447                                  * C1 is a special case where FIXED_HARDWARE
448                                  * can be handled in non-MWAIT way as well.
449                                  * In that case, save this _CST entry info.
450                                  * Otherwise, ignore this info and continue.
451                                  */
452                                 cx.entry_method = ACPI_CSTATE_HALT;
453                                 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
454                         } else {
455                                 continue;
456                         }
457                         if (cx.type == ACPI_STATE_C1 &&
458                                         (idle_halt || idle_nomwait)) {
459                                 /*
460                                  * In most cases the C1 space_id obtained from
461                                  * _CST object is FIXED_HARDWARE access mode.
462                                  * But when the option of idle=halt is added,
463                                  * the entry_method type should be changed from
464                                  * CSTATE_FFH to CSTATE_HALT.
465                                  * When the option of idle=nomwait is added,
466                                  * the C1 entry_method type should be
467                                  * CSTATE_HALT.
468                                  */
469                                 cx.entry_method = ACPI_CSTATE_HALT;
470                                 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
471                         }
472                 } else {
473                         snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x",
474                                  cx.address);
475                 }
476
477                 if (cx.type == ACPI_STATE_C1) {
478                         cx.valid = 1;
479                 }
480
481                 obj = &(element->package.elements[2]);
482                 if (obj->type != ACPI_TYPE_INTEGER)
483                         continue;
484
485                 cx.latency = obj->integer.value;
486
487                 obj = &(element->package.elements[3]);
488                 if (obj->type != ACPI_TYPE_INTEGER)
489                         continue;
490
491                 cx.power = obj->integer.value;
492
493                 current_count++;
494                 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
495
496                 /*
497                  * We support total ACPI_PROCESSOR_MAX_POWER - 1
498                  * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
499                  */
500                 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
501                         printk(KERN_WARNING
502                                "Limiting number of power states to max (%d)\n",
503                                ACPI_PROCESSOR_MAX_POWER);
504                         printk(KERN_WARNING
505                                "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
506                         break;
507                 }
508         }
509
510         ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
511                           current_count));
512
513         /* Validate number of power states discovered */
514         if (current_count < 2)
515                 status = -EFAULT;
516
517       end:
518         kfree(buffer.pointer);
519
520         return status;
521 }
522
523 static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
524                                            struct acpi_processor_cx *cx)
525 {
526         static int bm_check_flag = -1;
527         static int bm_control_flag = -1;
528
529
530         if (!cx->address)
531                 return;
532
533         /*
534          * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
535          * DMA transfers are used by any ISA device to avoid livelock.
536          * Note that we could disable Type-F DMA (as recommended by
537          * the erratum), but this is known to disrupt certain ISA
538          * devices thus we take the conservative approach.
539          */
540         else if (errata.piix4.fdma) {
541                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
542                                   "C3 not supported on PIIX4 with Type-F DMA\n"));
543                 return;
544         }
545
546         /* All the logic here assumes flags.bm_check is same across all CPUs */
547         if (bm_check_flag == -1) {
548                 /* Determine whether bm_check is needed based on CPU  */
549                 acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
550                 bm_check_flag = pr->flags.bm_check;
551                 bm_control_flag = pr->flags.bm_control;
552         } else {
553                 pr->flags.bm_check = bm_check_flag;
554                 pr->flags.bm_control = bm_control_flag;
555         }
556
557         if (pr->flags.bm_check) {
558                 if (!pr->flags.bm_control) {
559                         if (pr->flags.has_cst != 1) {
560                                 /* bus mastering control is necessary */
561                                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
562                                         "C3 support requires BM control\n"));
563                                 return;
564                         } else {
565                                 /* Here we enter C3 without bus mastering */
566                                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
567                                         "C3 support without BM control\n"));
568                         }
569                 }
570         } else {
571                 /*
572                  * WBINVD should be set in fadt, for C3 state to be
573                  * supported on when bm_check is not required.
574                  */
575                 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) {
576                         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
577                                           "Cache invalidation should work properly"
578                                           " for C3 to be enabled on SMP systems\n"));
579                         return;
580                 }
581         }
582
583         /*
584          * Otherwise we've met all of our C3 requirements.
585          * Normalize the C3 latency to expidite policy.  Enable
586          * checking of bus mastering status (bm_check) so we can
587          * use this in our C3 policy
588          */
589         cx->valid = 1;
590
591         cx->latency_ticks = cx->latency;
592         /*
593          * On older chipsets, BM_RLD needs to be set
594          * in order for Bus Master activity to wake the
595          * system from C3.  Newer chipsets handle DMA
596          * during C3 automatically and BM_RLD is a NOP.
597          * In either case, the proper way to
598          * handle BM_RLD is to set it and leave it set.
599          */
600         acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
601
602         return;
603 }
604
605 static int acpi_processor_power_verify(struct acpi_processor *pr)
606 {
607         unsigned int i;
608         unsigned int working = 0;
609
610         pr->power.timer_broadcast_on_state = INT_MAX;
611
612         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
613                 struct acpi_processor_cx *cx = &pr->power.states[i];
614
615                 switch (cx->type) {
616                 case ACPI_STATE_C1:
617                         cx->valid = 1;
618                         break;
619
620                 case ACPI_STATE_C2:
621                         if (!cx->address)
622                                 break;
623                         cx->valid = 1; 
624                         cx->latency_ticks = cx->latency; /* Normalize latency */
625                         break;
626
627                 case ACPI_STATE_C3:
628                         acpi_processor_power_verify_c3(pr, cx);
629                         break;
630                 }
631                 if (!cx->valid)
632                         continue;
633
634                 lapic_timer_check_state(i, pr, cx);
635                 tsc_check_state(cx->type);
636                 working++;
637         }
638
639         lapic_timer_propagate_broadcast(pr);
640
641         return (working);
642 }
643
644 static int acpi_processor_get_power_info(struct acpi_processor *pr)
645 {
646         unsigned int i;
647         int result;
648
649
650         /* NOTE: the idle thread may not be running while calling
651          * this function */
652
653         /* Zero initialize all the C-states info. */
654         memset(pr->power.states, 0, sizeof(pr->power.states));
655
656         result = acpi_processor_get_power_info_cst(pr);
657         if (result == -ENODEV)
658                 result = acpi_processor_get_power_info_fadt(pr);
659
660         if (result)
661                 return result;
662
663         acpi_processor_get_power_info_default(pr);
664
665         pr->power.count = acpi_processor_power_verify(pr);
666
667         /*
668          * if one state of type C2 or C3 is available, mark this
669          * CPU as being "idle manageable"
670          */
671         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
672                 if (pr->power.states[i].valid) {
673                         pr->power.count = i;
674                         if (pr->power.states[i].type >= ACPI_STATE_C2)
675                                 pr->flags.power = 1;
676                 }
677         }
678
679         return 0;
680 }
681
682 /**
683  * acpi_idle_bm_check - checks if bus master activity was detected
684  */
685 static int acpi_idle_bm_check(void)
686 {
687         u32 bm_status = 0;
688
689         if (bm_check_disable)
690                 return 0;
691
692         acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
693         if (bm_status)
694                 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
695         /*
696          * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
697          * the true state of bus mastering activity; forcing us to
698          * manually check the BMIDEA bit of each IDE channel.
699          */
700         else if (errata.piix4.bmisx) {
701                 if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
702                     || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
703                         bm_status = 1;
704         }
705         return bm_status;
706 }
707
708 /**
709  * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
710  * @cx: cstate data
711  *
712  * Caller disables interrupt before call and enables interrupt after return.
713  */
714 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
715 {
716         /* Don't trace irqs off for idle */
717         stop_critical_timings();
718         if (cx->entry_method == ACPI_CSTATE_FFH) {
719                 /* Call into architectural FFH based C-state */
720                 acpi_processor_ffh_cstate_enter(cx);
721         } else if (cx->entry_method == ACPI_CSTATE_HALT) {
722                 acpi_safe_halt();
723         } else {
724                 /* IO port based C-state */
725                 inb(cx->address);
726                 /* Dummy wait op - must do something useless after P_LVL2 read
727                    because chipsets cannot guarantee that STPCLK# signal
728                    gets asserted in time to freeze execution properly. */
729                 inl(acpi_gbl_FADT.xpm_timer_block.address);
730         }
731         start_critical_timings();
732 }
733
734 /**
735  * acpi_idle_enter_c1 - enters an ACPI C1 state-type
736  * @dev: the target CPU
737  * @state: the state data
738  *
739  * This is equivalent to the HALT instruction.
740  */
741 static int acpi_idle_enter_c1(struct cpuidle_device *dev,
742                               struct cpuidle_state *state)
743 {
744         ktime_t  kt1, kt2;
745         s64 idle_time;
746         struct acpi_processor *pr;
747         struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
748
749         pr = __get_cpu_var(processors);
750
751         if (unlikely(!pr))
752                 return 0;
753
754         local_irq_disable();
755
756         /* Do not access any ACPI IO ports in suspend path */
757         if (acpi_idle_suspend) {
758                 local_irq_enable();
759                 cpu_relax();
760                 return 0;
761         }
762
763         lapic_timer_state_broadcast(pr, cx, 1);
764         kt1 = ktime_get_real();
765         acpi_idle_do_entry(cx);
766         kt2 = ktime_get_real();
767         idle_time =  ktime_to_us(ktime_sub(kt2, kt1));
768
769         local_irq_enable();
770         cx->usage++;
771         lapic_timer_state_broadcast(pr, cx, 0);
772
773         return idle_time;
774 }
775
776 /**
777  * acpi_idle_enter_simple - enters an ACPI state without BM handling
778  * @dev: the target CPU
779  * @state: the state data
780  */
781 static int acpi_idle_enter_simple(struct cpuidle_device *dev,
782                                   struct cpuidle_state *state)
783 {
784         struct acpi_processor *pr;
785         struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
786         ktime_t  kt1, kt2;
787         s64 idle_time_ns;
788         s64 idle_time;
789
790         pr = __get_cpu_var(processors);
791
792         if (unlikely(!pr))
793                 return 0;
794
795         if (acpi_idle_suspend)
796                 return(acpi_idle_enter_c1(dev, state));
797
798         local_irq_disable();
799
800         if (cx->entry_method != ACPI_CSTATE_FFH) {
801                 current_thread_info()->status &= ~TS_POLLING;
802                 /*
803                  * TS_POLLING-cleared state must be visible before we test
804                  * NEED_RESCHED:
805                  */
806                 smp_mb();
807
808                 if (unlikely(need_resched())) {
809                         current_thread_info()->status |= TS_POLLING;
810                         local_irq_enable();
811                         return 0;
812                 }
813         }
814
815         /*
816          * Must be done before busmaster disable as we might need to
817          * access HPET !
818          */
819         lapic_timer_state_broadcast(pr, cx, 1);
820
821         if (cx->type == ACPI_STATE_C3)
822                 ACPI_FLUSH_CPU_CACHE();
823
824         kt1 = ktime_get_real();
825         /* Tell the scheduler that we are going deep-idle: */
826         sched_clock_idle_sleep_event();
827         acpi_idle_do_entry(cx);
828         kt2 = ktime_get_real();
829         idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1));
830         idle_time = idle_time_ns;
831         do_div(idle_time, NSEC_PER_USEC);
832
833         /* Tell the scheduler how much we idled: */
834         sched_clock_idle_wakeup_event(idle_time_ns);
835
836         local_irq_enable();
837         if (cx->entry_method != ACPI_CSTATE_FFH)
838                 current_thread_info()->status |= TS_POLLING;
839
840         cx->usage++;
841
842         lapic_timer_state_broadcast(pr, cx, 0);
843         cx->time += idle_time;
844         return idle_time;
845 }
846
847 static int c3_cpu_count;
848 static DEFINE_SPINLOCK(c3_lock);
849
850 /**
851  * acpi_idle_enter_bm - enters C3 with proper BM handling
852  * @dev: the target CPU
853  * @state: the state data
854  *
855  * If BM is detected, the deepest non-C3 idle state is entered instead.
856  */
857 static int acpi_idle_enter_bm(struct cpuidle_device *dev,
858                               struct cpuidle_state *state)
859 {
860         struct acpi_processor *pr;
861         struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
862         ktime_t  kt1, kt2;
863         s64 idle_time_ns;
864         s64 idle_time;
865
866
867         pr = __get_cpu_var(processors);
868
869         if (unlikely(!pr))
870                 return 0;
871
872         if (acpi_idle_suspend)
873                 return(acpi_idle_enter_c1(dev, state));
874
875         if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
876                 if (dev->safe_state) {
877                         dev->last_state = dev->safe_state;
878                         return dev->safe_state->enter(dev, dev->safe_state);
879                 } else {
880                         local_irq_disable();
881                         acpi_safe_halt();
882                         local_irq_enable();
883                         return 0;
884                 }
885         }
886
887         local_irq_disable();
888
889         if (cx->entry_method != ACPI_CSTATE_FFH) {
890                 current_thread_info()->status &= ~TS_POLLING;
891                 /*
892                  * TS_POLLING-cleared state must be visible before we test
893                  * NEED_RESCHED:
894                  */
895                 smp_mb();
896
897                 if (unlikely(need_resched())) {
898                         current_thread_info()->status |= TS_POLLING;
899                         local_irq_enable();
900                         return 0;
901                 }
902         }
903
904         acpi_unlazy_tlb(smp_processor_id());
905
906         /* Tell the scheduler that we are going deep-idle: */
907         sched_clock_idle_sleep_event();
908         /*
909          * Must be done before busmaster disable as we might need to
910          * access HPET !
911          */
912         lapic_timer_state_broadcast(pr, cx, 1);
913
914         kt1 = ktime_get_real();
915         /*
916          * disable bus master
917          * bm_check implies we need ARB_DIS
918          * !bm_check implies we need cache flush
919          * bm_control implies whether we can do ARB_DIS
920          *
921          * That leaves a case where bm_check is set and bm_control is
922          * not set. In that case we cannot do much, we enter C3
923          * without doing anything.
924          */
925         if (pr->flags.bm_check && pr->flags.bm_control) {
926                 spin_lock(&c3_lock);
927                 c3_cpu_count++;
928                 /* Disable bus master arbitration when all CPUs are in C3 */
929                 if (c3_cpu_count == num_online_cpus())
930                         acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
931                 spin_unlock(&c3_lock);
932         } else if (!pr->flags.bm_check) {
933                 ACPI_FLUSH_CPU_CACHE();
934         }
935
936         acpi_idle_do_entry(cx);
937
938         /* Re-enable bus master arbitration */
939         if (pr->flags.bm_check && pr->flags.bm_control) {
940                 spin_lock(&c3_lock);
941                 acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
942                 c3_cpu_count--;
943                 spin_unlock(&c3_lock);
944         }
945         kt2 = ktime_get_real();
946         idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1));
947         idle_time = idle_time_ns;
948         do_div(idle_time, NSEC_PER_USEC);
949
950         /* Tell the scheduler how much we idled: */
951         sched_clock_idle_wakeup_event(idle_time_ns);
952
953         local_irq_enable();
954         if (cx->entry_method != ACPI_CSTATE_FFH)
955                 current_thread_info()->status |= TS_POLLING;
956
957         cx->usage++;
958
959         lapic_timer_state_broadcast(pr, cx, 0);
960         cx->time += idle_time;
961         return idle_time;
962 }
963
964 struct cpuidle_driver acpi_idle_driver = {
965         .name =         "acpi_idle",
966         .owner =        THIS_MODULE,
967 };
968
969 /**
970  * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE
971  * @pr: the ACPI processor
972  */
973 static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
974 {
975         int i, count = CPUIDLE_DRIVER_STATE_START;
976         struct acpi_processor_cx *cx;
977         struct cpuidle_state *state;
978         struct cpuidle_device *dev = &pr->power.dev;
979
980         if (!pr->flags.power_setup_done)
981                 return -EINVAL;
982
983         if (pr->flags.power == 0) {
984                 return -EINVAL;
985         }
986
987         dev->cpu = pr->id;
988         for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
989                 dev->states[i].name[0] = '\0';
990                 dev->states[i].desc[0] = '\0';
991         }
992
993         if (max_cstate == 0)
994                 max_cstate = 1;
995
996         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
997                 cx = &pr->power.states[i];
998                 state = &dev->states[count];
999
1000                 if (!cx->valid)
1001                         continue;
1002
1003 #ifdef CONFIG_HOTPLUG_CPU
1004                 if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
1005                     !pr->flags.has_cst &&
1006                     !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
1007                         continue;
1008 #endif
1009                 cpuidle_set_statedata(state, cx);
1010
1011                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
1012                 strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1013                 state->exit_latency = cx->latency;
1014                 state->target_residency = cx->latency * latency_factor;
1015
1016                 state->flags = 0;
1017                 switch (cx->type) {
1018                         case ACPI_STATE_C1:
1019                         state->flags |= CPUIDLE_FLAG_SHALLOW;
1020                         if (cx->entry_method == ACPI_CSTATE_FFH)
1021                                 state->flags |= CPUIDLE_FLAG_TIME_VALID;
1022
1023                         state->enter = acpi_idle_enter_c1;
1024                         dev->safe_state = state;
1025                         break;
1026
1027                         case ACPI_STATE_C2:
1028                         state->flags |= CPUIDLE_FLAG_BALANCED;
1029                         state->flags |= CPUIDLE_FLAG_TIME_VALID;
1030                         state->enter = acpi_idle_enter_simple;
1031                         dev->safe_state = state;
1032                         break;
1033
1034                         case ACPI_STATE_C3:
1035                         state->flags |= CPUIDLE_FLAG_DEEP;
1036                         state->flags |= CPUIDLE_FLAG_TIME_VALID;
1037                         state->flags |= CPUIDLE_FLAG_CHECK_BM;
1038                         state->enter = pr->flags.bm_check ?
1039                                         acpi_idle_enter_bm :
1040                                         acpi_idle_enter_simple;
1041                         break;
1042                 }
1043
1044                 count++;
1045                 if (count == CPUIDLE_STATE_MAX)
1046                         break;
1047         }
1048
1049         dev->state_count = count;
1050
1051         if (!count)
1052                 return -EINVAL;
1053
1054         return 0;
1055 }
1056
1057 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
1058 {
1059         int ret = 0;
1060
1061         if (boot_option_idle_override)
1062                 return 0;
1063
1064         if (!pr)
1065                 return -EINVAL;
1066
1067         if (nocst) {
1068                 return -ENODEV;
1069         }
1070
1071         if (!pr->flags.power_setup_done)
1072                 return -ENODEV;
1073
1074         cpuidle_pause_and_lock();
1075         cpuidle_disable_device(&pr->power.dev);
1076         acpi_processor_get_power_info(pr);
1077         if (pr->flags.power) {
1078                 acpi_processor_setup_cpuidle(pr);
1079                 ret = cpuidle_enable_device(&pr->power.dev);
1080         }
1081         cpuidle_resume_and_unlock();
1082
1083         return ret;
1084 }
1085
1086 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
1087                               struct acpi_device *device)
1088 {
1089         acpi_status status = 0;
1090         static int first_run;
1091
1092         if (boot_option_idle_override)
1093                 return 0;
1094
1095         if (!first_run) {
1096                 if (idle_halt) {
1097                         /*
1098                          * When the boot option of "idle=halt" is added, halt
1099                          * is used for CPU IDLE.
1100                          * In such case C2/C3 is meaningless. So the max_cstate
1101                          * is set to one.
1102                          */
1103                         max_cstate = 1;
1104                 }
1105                 dmi_check_system(processor_power_dmi_table);
1106                 max_cstate = acpi_processor_cstate_check(max_cstate);
1107                 if (max_cstate < ACPI_C_STATES_MAX)
1108                         printk(KERN_NOTICE
1109                                "ACPI: processor limited to max C-state %d\n",
1110                                max_cstate);
1111                 first_run++;
1112         }
1113
1114         if (!pr)
1115                 return -EINVAL;
1116
1117         if (acpi_gbl_FADT.cst_control && !nocst) {
1118                 status =
1119                     acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
1120                 if (ACPI_FAILURE(status)) {
1121                         ACPI_EXCEPTION((AE_INFO, status,
1122                                         "Notifying BIOS of _CST ability failed"));
1123                 }
1124         }
1125
1126         acpi_processor_get_power_info(pr);
1127         pr->flags.power_setup_done = 1;
1128
1129         /*
1130          * Install the idle handler if processor power management is supported.
1131          * Note that we use previously set idle handler will be used on
1132          * platforms that only support C1.
1133          */
1134         if (pr->flags.power) {
1135                 acpi_processor_setup_cpuidle(pr);
1136                 if (cpuidle_register_device(&pr->power.dev))
1137                         return -EIO;
1138         }
1139         return 0;
1140 }
1141
1142 int acpi_processor_power_exit(struct acpi_processor *pr,
1143                               struct acpi_device *device)
1144 {
1145         if (boot_option_idle_override)
1146                 return 0;
1147
1148         cpuidle_unregister_device(&pr->power.dev);
1149         pr->flags.power_setup_done = 0;
1150
1151         return 0;
1152 }