ACPI: bay: use IS_ERR for return of register_platform_device_simple
[pandora-kernel.git] / drivers / acpi / processor_idle.c
1 /*
2  * processor_idle - idle state submodule to the ACPI processor driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8  *                      - Added processor hotplug support
9  *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
10  *                      - Added support for C3 on SMP
11  *
12  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13  *
14  *  This program is free software; you can redistribute it and/or modify
15  *  it under the terms of the GNU General Public License as published by
16  *  the Free Software Foundation; either version 2 of the License, or (at
17  *  your option) any later version.
18  *
19  *  This program is distributed in the hope that it will be useful, but
20  *  WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  *  General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License along
25  *  with this program; if not, write to the Free Software Foundation, Inc.,
26  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
27  *
28  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29  */
30
31 #include <linux/kernel.h>
32 #include <linux/module.h>
33 #include <linux/init.h>
34 #include <linux/cpufreq.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/acpi.h>
38 #include <linux/dmi.h>
39 #include <linux/moduleparam.h>
40 #include <linux/sched.h>        /* need_resched() */
41 #include <linux/latency.h>
42
43 /*
44  * Include the apic definitions for x86 to have the APIC timer related defines
45  * available also for UP (on SMP it gets magically included via linux/smp.h).
46  */
47 #ifdef CONFIG_X86
48 #include <asm/apic.h>
49 #endif
50
51 #include <asm/io.h>
52 #include <asm/uaccess.h>
53
54 #include <acpi/acpi_bus.h>
55 #include <acpi/processor.h>
56
57 #define ACPI_PROCESSOR_COMPONENT        0x01000000
58 #define ACPI_PROCESSOR_CLASS            "processor"
59 #define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
60 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
61 ACPI_MODULE_NAME("acpi_processor")
62 #define ACPI_PROCESSOR_FILE_POWER       "power"
63 #define US_TO_PM_TIMER_TICKS(t)         ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
64 #define C2_OVERHEAD                     4       /* 1us (3.579 ticks per us) */
65 #define C3_OVERHEAD                     4       /* 1us (3.579 ticks per us) */
66 static void (*pm_idle_save) (void) __read_mostly;
67 module_param(max_cstate, uint, 0644);
68
69 static unsigned int nocst __read_mostly;
70 module_param(nocst, uint, 0000);
71
72 /*
73  * bm_history -- bit-mask with a bit per jiffy of bus-master activity
74  * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
75  * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
76  * 100 HZ: 0x0000000F: 4 jiffies = 40ms
77  * reduce history for more aggressive entry into C3
78  */
79 static unsigned int bm_history __read_mostly =
80     (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
81 module_param(bm_history, uint, 0644);
82 /* --------------------------------------------------------------------------
83                                 Power Management
84    -------------------------------------------------------------------------- */
85
86 /*
87  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
88  * For now disable this. Probably a bug somewhere else.
89  *
90  * To skip this limit, boot/load with a large max_cstate limit.
91  */
92 static int set_max_cstate(struct dmi_system_id *id)
93 {
94         if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
95                 return 0;
96
97         printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
98                " Override with \"processor.max_cstate=%d\"\n", id->ident,
99                (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
100
101         max_cstate = (long)id->driver_data;
102
103         return 0;
104 }
105
106 /* Actually this shouldn't be __cpuinitdata, would be better to fix the
107    callers to only run once -AK */
108 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = {
109         { set_max_cstate, "IBM ThinkPad R40e", {
110           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
111           DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1},
112         { set_max_cstate, "IBM ThinkPad R40e", {
113           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
114           DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1},
115         { set_max_cstate, "IBM ThinkPad R40e", {
116           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
117           DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1},
118         { set_max_cstate, "IBM ThinkPad R40e", {
119           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
120           DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1},
121         { set_max_cstate, "IBM ThinkPad R40e", {
122           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
123           DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1},
124         { set_max_cstate, "IBM ThinkPad R40e", {
125           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
126           DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1},
127         { set_max_cstate, "IBM ThinkPad R40e", {
128           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
129           DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1},
130         { set_max_cstate, "IBM ThinkPad R40e", {
131           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
132           DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1},
133         { set_max_cstate, "IBM ThinkPad R40e", {
134           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
135           DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1},
136         { set_max_cstate, "IBM ThinkPad R40e", {
137           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
138           DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1},
139         { set_max_cstate, "IBM ThinkPad R40e", {
140           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
141           DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1},
142         { set_max_cstate, "IBM ThinkPad R40e", {
143           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
144           DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1},
145         { set_max_cstate, "IBM ThinkPad R40e", {
146           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
147           DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1},
148         { set_max_cstate, "IBM ThinkPad R40e", {
149           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
150           DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1},
151         { set_max_cstate, "IBM ThinkPad R40e", {
152           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
153           DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1},
154         { set_max_cstate, "IBM ThinkPad R40e", {
155           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
156           DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1},
157         { set_max_cstate, "Medion 41700", {
158           DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
159           DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1},
160         { set_max_cstate, "Clevo 5600D", {
161           DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
162           DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
163          (void *)2},
164         {},
165 };
166
167 static inline u32 ticks_elapsed(u32 t1, u32 t2)
168 {
169         if (t2 >= t1)
170                 return (t2 - t1);
171         else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
172                 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
173         else
174                 return ((0xFFFFFFFF - t1) + t2);
175 }
176
177 static void
178 acpi_processor_power_activate(struct acpi_processor *pr,
179                               struct acpi_processor_cx *new)
180 {
181         struct acpi_processor_cx *old;
182
183         if (!pr || !new)
184                 return;
185
186         old = pr->power.state;
187
188         if (old)
189                 old->promotion.count = 0;
190         new->demotion.count = 0;
191
192         /* Cleanup from old state. */
193         if (old) {
194                 switch (old->type) {
195                 case ACPI_STATE_C3:
196                         /* Disable bus master reload */
197                         if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
198                                 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
199                         break;
200                 }
201         }
202
203         /* Prepare to use new state. */
204         switch (new->type) {
205         case ACPI_STATE_C3:
206                 /* Enable bus master reload */
207                 if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
208                         acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
209                 break;
210         }
211
212         pr->power.state = new;
213
214         return;
215 }
216
217 static void acpi_safe_halt(void)
218 {
219         current_thread_info()->status &= ~TS_POLLING;
220         /*
221          * TS_POLLING-cleared state must be visible before we
222          * test NEED_RESCHED:
223          */
224         smp_mb();
225         if (!need_resched())
226                 safe_halt();
227         current_thread_info()->status |= TS_POLLING;
228 }
229
230 static atomic_t c3_cpu_count;
231
232 /* Common C-state entry for C2, C3, .. */
233 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
234 {
235         if (cstate->space_id == ACPI_CSTATE_FFH) {
236                 /* Call into architectural FFH based C-state */
237                 acpi_processor_ffh_cstate_enter(cstate);
238         } else {
239                 int unused;
240                 /* IO port based C-state */
241                 inb(cstate->address);
242                 /* Dummy wait op - must do something useless after P_LVL2 read
243                    because chipsets cannot guarantee that STPCLK# signal
244                    gets asserted in time to freeze execution properly. */
245                 unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
246         }
247 }
248
249 static void acpi_processor_idle(void)
250 {
251         struct acpi_processor *pr = NULL;
252         struct acpi_processor_cx *cx = NULL;
253         struct acpi_processor_cx *next_state = NULL;
254         int sleep_ticks = 0;
255         u32 t1, t2 = 0;
256
257         pr = processors[smp_processor_id()];
258         if (!pr)
259                 return;
260
261         /*
262          * Interrupts must be disabled during bus mastering calculations and
263          * for C2/C3 transitions.
264          */
265         local_irq_disable();
266
267         /*
268          * Check whether we truly need to go idle, or should
269          * reschedule:
270          */
271         if (unlikely(need_resched())) {
272                 local_irq_enable();
273                 return;
274         }
275
276         cx = pr->power.state;
277         if (!cx) {
278                 if (pm_idle_save)
279                         pm_idle_save();
280                 else
281                         acpi_safe_halt();
282                 return;
283         }
284
285         /*
286          * Check BM Activity
287          * -----------------
288          * Check for bus mastering activity (if required), record, and check
289          * for demotion.
290          */
291         if (pr->flags.bm_check) {
292                 u32 bm_status = 0;
293                 unsigned long diff = jiffies - pr->power.bm_check_timestamp;
294
295                 if (diff > 31)
296                         diff = 31;
297
298                 pr->power.bm_activity <<= diff;
299
300                 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
301                 if (bm_status) {
302                         pr->power.bm_activity |= 0x1;
303                         acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
304                 }
305                 /*
306                  * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
307                  * the true state of bus mastering activity; forcing us to
308                  * manually check the BMIDEA bit of each IDE channel.
309                  */
310                 else if (errata.piix4.bmisx) {
311                         if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
312                             || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
313                                 pr->power.bm_activity |= 0x1;
314                 }
315
316                 pr->power.bm_check_timestamp = jiffies;
317
318                 /*
319                  * If bus mastering is or was active this jiffy, demote
320                  * to avoid a faulty transition.  Note that the processor
321                  * won't enter a low-power state during this call (to this
322                  * function) but should upon the next.
323                  *
324                  * TBD: A better policy might be to fallback to the demotion
325                  *      state (use it for this quantum only) istead of
326                  *      demoting -- and rely on duration as our sole demotion
327                  *      qualification.  This may, however, introduce DMA
328                  *      issues (e.g. floppy DMA transfer overrun/underrun).
329                  */
330                 if ((pr->power.bm_activity & 0x1) &&
331                     cx->demotion.threshold.bm) {
332                         local_irq_enable();
333                         next_state = cx->demotion.state;
334                         goto end;
335                 }
336         }
337
338 #ifdef CONFIG_HOTPLUG_CPU
339         /*
340          * Check for P_LVL2_UP flag before entering C2 and above on
341          * an SMP system. We do it here instead of doing it at _CST/P_LVL
342          * detection phase, to work cleanly with logical CPU hotplug.
343          */
344         if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 
345             !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
346                 cx = &pr->power.states[ACPI_STATE_C1];
347 #endif
348
349         /*
350          * Sleep:
351          * ------
352          * Invoke the current Cx state to put the processor to sleep.
353          */
354         if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
355                 current_thread_info()->status &= ~TS_POLLING;
356                 /*
357                  * TS_POLLING-cleared state must be visible before we
358                  * test NEED_RESCHED:
359                  */
360                 smp_mb();
361                 if (need_resched()) {
362                         current_thread_info()->status |= TS_POLLING;
363                         local_irq_enable();
364                         return;
365                 }
366         }
367
368         switch (cx->type) {
369
370         case ACPI_STATE_C1:
371                 /*
372                  * Invoke C1.
373                  * Use the appropriate idle routine, the one that would
374                  * be used without acpi C-states.
375                  */
376                 if (pm_idle_save)
377                         pm_idle_save();
378                 else
379                         acpi_safe_halt();
380
381                 /*
382                  * TBD: Can't get time duration while in C1, as resumes
383                  *      go to an ISR rather than here.  Need to instrument
384                  *      base interrupt handler.
385                  */
386                 sleep_ticks = 0xFFFFFFFF;
387                 break;
388
389         case ACPI_STATE_C2:
390                 /* Get start time (ticks) */
391                 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
392                 /* Invoke C2 */
393                 acpi_cstate_enter(cx);
394                 /* Get end time (ticks) */
395                 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
396
397 #ifdef CONFIG_GENERIC_TIME
398                 /* TSC halts in C2, so notify users */
399                 mark_tsc_unstable();
400 #endif
401                 /* Re-enable interrupts */
402                 local_irq_enable();
403                 current_thread_info()->status |= TS_POLLING;
404                 /* Compute time (ticks) that we were actually asleep */
405                 sleep_ticks =
406                     ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
407                 break;
408
409         case ACPI_STATE_C3:
410
411                 if (pr->flags.bm_check) {
412                         if (atomic_inc_return(&c3_cpu_count) ==
413                             num_online_cpus()) {
414                                 /*
415                                  * All CPUs are trying to go to C3
416                                  * Disable bus master arbitration
417                                  */
418                                 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
419                         }
420                 } else {
421                         /* SMP with no shared cache... Invalidate cache  */
422                         ACPI_FLUSH_CPU_CACHE();
423                 }
424
425                 /* Get start time (ticks) */
426                 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
427                 /* Invoke C3 */
428                 acpi_cstate_enter(cx);
429                 /* Get end time (ticks) */
430                 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
431                 if (pr->flags.bm_check) {
432                         /* Enable bus master arbitration */
433                         atomic_dec(&c3_cpu_count);
434                         acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
435                 }
436
437 #ifdef CONFIG_GENERIC_TIME
438                 /* TSC halts in C3, so notify users */
439                 mark_tsc_unstable();
440 #endif
441                 /* Re-enable interrupts */
442                 local_irq_enable();
443                 current_thread_info()->status |= TS_POLLING;
444                 /* Compute time (ticks) that we were actually asleep */
445                 sleep_ticks =
446                     ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
447                 break;
448
449         default:
450                 local_irq_enable();
451                 return;
452         }
453         cx->usage++;
454         if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0))
455                 cx->time += sleep_ticks;
456
457         next_state = pr->power.state;
458
459 #ifdef CONFIG_HOTPLUG_CPU
460         /* Don't do promotion/demotion */
461         if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) &&
462             !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) {
463                 next_state = cx;
464                 goto end;
465         }
466 #endif
467
468         /*
469          * Promotion?
470          * ----------
471          * Track the number of longs (time asleep is greater than threshold)
472          * and promote when the count threshold is reached.  Note that bus
473          * mastering activity may prevent promotions.
474          * Do not promote above max_cstate.
475          */
476         if (cx->promotion.state &&
477             ((cx->promotion.state - pr->power.states) <= max_cstate)) {
478                 if (sleep_ticks > cx->promotion.threshold.ticks &&
479                   cx->promotion.state->latency <= system_latency_constraint()) {
480                         cx->promotion.count++;
481                         cx->demotion.count = 0;
482                         if (cx->promotion.count >=
483                             cx->promotion.threshold.count) {
484                                 if (pr->flags.bm_check) {
485                                         if (!
486                                             (pr->power.bm_activity & cx->
487                                              promotion.threshold.bm)) {
488                                                 next_state =
489                                                     cx->promotion.state;
490                                                 goto end;
491                                         }
492                                 } else {
493                                         next_state = cx->promotion.state;
494                                         goto end;
495                                 }
496                         }
497                 }
498         }
499
500         /*
501          * Demotion?
502          * ---------
503          * Track the number of shorts (time asleep is less than time threshold)
504          * and demote when the usage threshold is reached.
505          */
506         if (cx->demotion.state) {
507                 if (sleep_ticks < cx->demotion.threshold.ticks) {
508                         cx->demotion.count++;
509                         cx->promotion.count = 0;
510                         if (cx->demotion.count >= cx->demotion.threshold.count) {
511                                 next_state = cx->demotion.state;
512                                 goto end;
513                         }
514                 }
515         }
516
517       end:
518         /*
519          * Demote if current state exceeds max_cstate
520          * or if the latency of the current state is unacceptable
521          */
522         if ((pr->power.state - pr->power.states) > max_cstate ||
523                 pr->power.state->latency > system_latency_constraint()) {
524                 if (cx->demotion.state)
525                         next_state = cx->demotion.state;
526         }
527
528         /*
529          * New Cx State?
530          * -------------
531          * If we're going to start using a new Cx state we must clean up
532          * from the previous and prepare to use the new.
533          */
534         if (next_state != pr->power.state)
535                 acpi_processor_power_activate(pr, next_state);
536 }
537
538 static int acpi_processor_set_power_policy(struct acpi_processor *pr)
539 {
540         unsigned int i;
541         unsigned int state_is_set = 0;
542         struct acpi_processor_cx *lower = NULL;
543         struct acpi_processor_cx *higher = NULL;
544         struct acpi_processor_cx *cx;
545
546
547         if (!pr)
548                 return -EINVAL;
549
550         /*
551          * This function sets the default Cx state policy (OS idle handler).
552          * Our scheme is to promote quickly to C2 but more conservatively
553          * to C3.  We're favoring C2  for its characteristics of low latency
554          * (quick response), good power savings, and ability to allow bus
555          * mastering activity.  Note that the Cx state policy is completely
556          * customizable and can be altered dynamically.
557          */
558
559         /* startup state */
560         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
561                 cx = &pr->power.states[i];
562                 if (!cx->valid)
563                         continue;
564
565                 if (!state_is_set)
566                         pr->power.state = cx;
567                 state_is_set++;
568                 break;
569         }
570
571         if (!state_is_set)
572                 return -ENODEV;
573
574         /* demotion */
575         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
576                 cx = &pr->power.states[i];
577                 if (!cx->valid)
578                         continue;
579
580                 if (lower) {
581                         cx->demotion.state = lower;
582                         cx->demotion.threshold.ticks = cx->latency_ticks;
583                         cx->demotion.threshold.count = 1;
584                         if (cx->type == ACPI_STATE_C3)
585                                 cx->demotion.threshold.bm = bm_history;
586                 }
587
588                 lower = cx;
589         }
590
591         /* promotion */
592         for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) {
593                 cx = &pr->power.states[i];
594                 if (!cx->valid)
595                         continue;
596
597                 if (higher) {
598                         cx->promotion.state = higher;
599                         cx->promotion.threshold.ticks = cx->latency_ticks;
600                         if (cx->type >= ACPI_STATE_C2)
601                                 cx->promotion.threshold.count = 4;
602                         else
603                                 cx->promotion.threshold.count = 10;
604                         if (higher->type == ACPI_STATE_C3)
605                                 cx->promotion.threshold.bm = bm_history;
606                 }
607
608                 higher = cx;
609         }
610
611         return 0;
612 }
613
614 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
615 {
616
617         if (!pr)
618                 return -EINVAL;
619
620         if (!pr->pblk)
621                 return -ENODEV;
622
623         /* if info is obtained from pblk/fadt, type equals state */
624         pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
625         pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;
626
627 #ifndef CONFIG_HOTPLUG_CPU
628         /*
629          * Check for P_LVL2_UP flag before entering C2 and above on
630          * an SMP system. 
631          */
632         if ((num_online_cpus() > 1) &&
633             !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
634                 return -ENODEV;
635 #endif
636
637         /* determine C2 and C3 address from pblk */
638         pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
639         pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;
640
641         /* determine latencies from FADT */
642         pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency;
643         pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency;
644
645         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
646                           "lvl2[0x%08x] lvl3[0x%08x]\n",
647                           pr->power.states[ACPI_STATE_C2].address,
648                           pr->power.states[ACPI_STATE_C3].address));
649
650         return 0;
651 }
652
653 static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
654 {
655         if (!pr->power.states[ACPI_STATE_C1].valid) {
656                 /* set the first C-State to C1 */
657                 /* all processors need to support C1 */
658                 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
659                 pr->power.states[ACPI_STATE_C1].valid = 1;
660         }
661         /* the C0 state only exists as a filler in our array */
662         pr->power.states[ACPI_STATE_C0].valid = 1;
663         return 0;
664 }
665
666 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
667 {
668         acpi_status status = 0;
669         acpi_integer count;
670         int current_count;
671         int i;
672         struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
673         union acpi_object *cst;
674
675
676         if (nocst)
677                 return -ENODEV;
678
679         current_count = 0;
680
681         status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
682         if (ACPI_FAILURE(status)) {
683                 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
684                 return -ENODEV;
685         }
686
687         cst = buffer.pointer;
688
689         /* There must be at least 2 elements */
690         if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
691                 printk(KERN_ERR PREFIX "not enough elements in _CST\n");
692                 status = -EFAULT;
693                 goto end;
694         }
695
696         count = cst->package.elements[0].integer.value;
697
698         /* Validate number of power states. */
699         if (count < 1 || count != cst->package.count - 1) {
700                 printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
701                 status = -EFAULT;
702                 goto end;
703         }
704
705         /* Tell driver that at least _CST is supported. */
706         pr->flags.has_cst = 1;
707
708         for (i = 1; i <= count; i++) {
709                 union acpi_object *element;
710                 union acpi_object *obj;
711                 struct acpi_power_register *reg;
712                 struct acpi_processor_cx cx;
713
714                 memset(&cx, 0, sizeof(cx));
715
716                 element = &(cst->package.elements[i]);
717                 if (element->type != ACPI_TYPE_PACKAGE)
718                         continue;
719
720                 if (element->package.count != 4)
721                         continue;
722
723                 obj = &(element->package.elements[0]);
724
725                 if (obj->type != ACPI_TYPE_BUFFER)
726                         continue;
727
728                 reg = (struct acpi_power_register *)obj->buffer.pointer;
729
730                 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
731                     (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
732                         continue;
733
734                 /* There should be an easy way to extract an integer... */
735                 obj = &(element->package.elements[1]);
736                 if (obj->type != ACPI_TYPE_INTEGER)
737                         continue;
738
739                 cx.type = obj->integer.value;
740                 /*
741                  * Some buggy BIOSes won't list C1 in _CST -
742                  * Let acpi_processor_get_power_info_default() handle them later
743                  */
744                 if (i == 1 && cx.type != ACPI_STATE_C1)
745                         current_count++;
746
747                 cx.address = reg->address;
748                 cx.index = current_count + 1;
749
750                 cx.space_id = ACPI_CSTATE_SYSTEMIO;
751                 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
752                         if (acpi_processor_ffh_cstate_probe
753                                         (pr->id, &cx, reg) == 0) {
754                                 cx.space_id = ACPI_CSTATE_FFH;
755                         } else if (cx.type != ACPI_STATE_C1) {
756                                 /*
757                                  * C1 is a special case where FIXED_HARDWARE
758                                  * can be handled in non-MWAIT way as well.
759                                  * In that case, save this _CST entry info.
760                                  * That is, we retain space_id of SYSTEM_IO for
761                                  * halt based C1.
762                                  * Otherwise, ignore this info and continue.
763                                  */
764                                 continue;
765                         }
766                 }
767
768                 obj = &(element->package.elements[2]);
769                 if (obj->type != ACPI_TYPE_INTEGER)
770                         continue;
771
772                 cx.latency = obj->integer.value;
773
774                 obj = &(element->package.elements[3]);
775                 if (obj->type != ACPI_TYPE_INTEGER)
776                         continue;
777
778                 cx.power = obj->integer.value;
779
780                 current_count++;
781                 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
782
783                 /*
784                  * We support total ACPI_PROCESSOR_MAX_POWER - 1
785                  * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
786                  */
787                 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
788                         printk(KERN_WARNING
789                                "Limiting number of power states to max (%d)\n",
790                                ACPI_PROCESSOR_MAX_POWER);
791                         printk(KERN_WARNING
792                                "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
793                         break;
794                 }
795         }
796
797         ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
798                           current_count));
799
800         /* Validate number of power states discovered */
801         if (current_count < 2)
802                 status = -EFAULT;
803
804       end:
805         kfree(buffer.pointer);
806
807         return status;
808 }
809
810 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
811 {
812
813         if (!cx->address)
814                 return;
815
816         /*
817          * C2 latency must be less than or equal to 100
818          * microseconds.
819          */
820         else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
821                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
822                                   "latency too large [%d]\n", cx->latency));
823                 return;
824         }
825
826         /*
827          * Otherwise we've met all of our C2 requirements.
828          * Normalize the C2 latency to expidite policy
829          */
830         cx->valid = 1;
831         cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
832
833         return;
834 }
835
836 static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
837                                            struct acpi_processor_cx *cx)
838 {
839         static int bm_check_flag;
840
841
842         if (!cx->address)
843                 return;
844
845         /*
846          * C3 latency must be less than or equal to 1000
847          * microseconds.
848          */
849         else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
850                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
851                                   "latency too large [%d]\n", cx->latency));
852                 return;
853         }
854
855         /*
856          * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
857          * DMA transfers are used by any ISA device to avoid livelock.
858          * Note that we could disable Type-F DMA (as recommended by
859          * the erratum), but this is known to disrupt certain ISA
860          * devices thus we take the conservative approach.
861          */
862         else if (errata.piix4.fdma) {
863                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
864                                   "C3 not supported on PIIX4 with Type-F DMA\n"));
865                 return;
866         }
867
868         /* All the logic here assumes flags.bm_check is same across all CPUs */
869         if (!bm_check_flag) {
870                 /* Determine whether bm_check is needed based on CPU  */
871                 acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
872                 bm_check_flag = pr->flags.bm_check;
873         } else {
874                 pr->flags.bm_check = bm_check_flag;
875         }
876
877         if (pr->flags.bm_check) {
878                 /* bus mastering control is necessary */
879                 if (!pr->flags.bm_control) {
880                         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
881                                           "C3 support requires bus mastering control\n"));
882                         return;
883                 }
884         } else {
885                 /*
886                  * WBINVD should be set in fadt, for C3 state to be
887                  * supported on when bm_check is not required.
888                  */
889                 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) {
890                         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
891                                           "Cache invalidation should work properly"
892                                           " for C3 to be enabled on SMP systems\n"));
893                         return;
894                 }
895                 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
896         }
897
898         /*
899          * Otherwise we've met all of our C3 requirements.
900          * Normalize the C3 latency to expidite policy.  Enable
901          * checking of bus mastering status (bm_check) so we can
902          * use this in our C3 policy
903          */
904         cx->valid = 1;
905         cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
906
907         return;
908 }
909
910 static int acpi_processor_power_verify(struct acpi_processor *pr)
911 {
912         unsigned int i;
913         unsigned int working = 0;
914
915 #ifdef ARCH_APICTIMER_STOPS_ON_C3
916         int timer_broadcast = 0;
917         cpumask_t mask = cpumask_of_cpu(pr->id);
918         on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1);
919 #endif
920
921         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
922                 struct acpi_processor_cx *cx = &pr->power.states[i];
923
924                 switch (cx->type) {
925                 case ACPI_STATE_C1:
926                         cx->valid = 1;
927                         break;
928
929                 case ACPI_STATE_C2:
930                         acpi_processor_power_verify_c2(cx);
931 #ifdef ARCH_APICTIMER_STOPS_ON_C3
932                         /* Some AMD systems fake C3 as C2, but still
933                            have timer troubles */
934                         if (cx->valid && 
935                                 boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
936                                 timer_broadcast++;
937 #endif
938                         break;
939
940                 case ACPI_STATE_C3:
941                         acpi_processor_power_verify_c3(pr, cx);
942 #ifdef ARCH_APICTIMER_STOPS_ON_C3
943                         if (cx->valid)
944                                 timer_broadcast++;
945 #endif
946                         break;
947                 }
948
949                 if (cx->valid)
950                         working++;
951         }
952
953 #ifdef ARCH_APICTIMER_STOPS_ON_C3
954         if (timer_broadcast)
955                 on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1);
956 #endif
957
958         return (working);
959 }
960
961 static int acpi_processor_get_power_info(struct acpi_processor *pr)
962 {
963         unsigned int i;
964         int result;
965
966
967         /* NOTE: the idle thread may not be running while calling
968          * this function */
969
970         /* Zero initialize all the C-states info. */
971         memset(pr->power.states, 0, sizeof(pr->power.states));
972
973         result = acpi_processor_get_power_info_cst(pr);
974         if (result == -ENODEV)
975                 result = acpi_processor_get_power_info_fadt(pr);
976
977         if (result)
978                 return result;
979
980         acpi_processor_get_power_info_default(pr);
981
982         pr->power.count = acpi_processor_power_verify(pr);
983
984         /*
985          * Set Default Policy
986          * ------------------
987          * Now that we know which states are supported, set the default
988          * policy.  Note that this policy can be changed dynamically
989          * (e.g. encourage deeper sleeps to conserve battery life when
990          * not on AC).
991          */
992         result = acpi_processor_set_power_policy(pr);
993         if (result)
994                 return result;
995
996         /*
997          * if one state of type C2 or C3 is available, mark this
998          * CPU as being "idle manageable"
999          */
1000         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
1001                 if (pr->power.states[i].valid) {
1002                         pr->power.count = i;
1003                         if (pr->power.states[i].type >= ACPI_STATE_C2)
1004                                 pr->flags.power = 1;
1005                 }
1006         }
1007
1008         return 0;
1009 }
1010
1011 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
1012 {
1013         int result = 0;
1014
1015
1016         if (!pr)
1017                 return -EINVAL;
1018
1019         if (nocst) {
1020                 return -ENODEV;
1021         }
1022
1023         if (!pr->flags.power_setup_done)
1024                 return -ENODEV;
1025
1026         /* Fall back to the default idle loop */
1027         pm_idle = pm_idle_save;
1028         synchronize_sched();    /* Relies on interrupts forcing exit from idle. */
1029
1030         pr->flags.power = 0;
1031         result = acpi_processor_get_power_info(pr);
1032         if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
1033                 pm_idle = acpi_processor_idle;
1034
1035         return result;
1036 }
1037
1038 /* proc interface */
1039
1040 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
1041 {
1042         struct acpi_processor *pr = seq->private;
1043         unsigned int i;
1044
1045
1046         if (!pr)
1047                 goto end;
1048
1049         seq_printf(seq, "active state:            C%zd\n"
1050                    "max_cstate:              C%d\n"
1051                    "bus master activity:     %08x\n"
1052                    "maximum allowed latency: %d usec\n",
1053                    pr->power.state ? pr->power.state - pr->power.states : 0,
1054                    max_cstate, (unsigned)pr->power.bm_activity,
1055                    system_latency_constraint());
1056
1057         seq_puts(seq, "states:\n");
1058
1059         for (i = 1; i <= pr->power.count; i++) {
1060                 seq_printf(seq, "   %cC%d:                  ",
1061                            (&pr->power.states[i] ==
1062                             pr->power.state ? '*' : ' '), i);
1063
1064                 if (!pr->power.states[i].valid) {
1065                         seq_puts(seq, "<not supported>\n");
1066                         continue;
1067                 }
1068
1069                 switch (pr->power.states[i].type) {
1070                 case ACPI_STATE_C1:
1071                         seq_printf(seq, "type[C1] ");
1072                         break;
1073                 case ACPI_STATE_C2:
1074                         seq_printf(seq, "type[C2] ");
1075                         break;
1076                 case ACPI_STATE_C3:
1077                         seq_printf(seq, "type[C3] ");
1078                         break;
1079                 default:
1080                         seq_printf(seq, "type[--] ");
1081                         break;
1082                 }
1083
1084                 if (pr->power.states[i].promotion.state)
1085                         seq_printf(seq, "promotion[C%zd] ",
1086                                    (pr->power.states[i].promotion.state -
1087                                     pr->power.states));
1088                 else
1089                         seq_puts(seq, "promotion[--] ");
1090
1091                 if (pr->power.states[i].demotion.state)
1092                         seq_printf(seq, "demotion[C%zd] ",
1093                                    (pr->power.states[i].demotion.state -
1094                                     pr->power.states));
1095                 else
1096                         seq_puts(seq, "demotion[--] ");
1097
1098                 seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n",
1099                            pr->power.states[i].latency,
1100                            pr->power.states[i].usage,
1101                            (unsigned long long)pr->power.states[i].time);
1102         }
1103
1104       end:
1105         return 0;
1106 }
1107
1108 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file)
1109 {
1110         return single_open(file, acpi_processor_power_seq_show,
1111                            PDE(inode)->data);
1112 }
1113
1114 static const struct file_operations acpi_processor_power_fops = {
1115         .open = acpi_processor_power_open_fs,
1116         .read = seq_read,
1117         .llseek = seq_lseek,
1118         .release = single_release,
1119 };
1120
1121 #ifdef CONFIG_SMP
1122 static void smp_callback(void *v)
1123 {
1124         /* we already woke the CPU up, nothing more to do */
1125 }
1126
1127 /*
1128  * This function gets called when a part of the kernel has a new latency
1129  * requirement.  This means we need to get all processors out of their C-state,
1130  * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
1131  * wakes them all right up.
1132  */
1133 static int acpi_processor_latency_notify(struct notifier_block *b,
1134                 unsigned long l, void *v)
1135 {
1136         smp_call_function(smp_callback, NULL, 0, 1);
1137         return NOTIFY_OK;
1138 }
1139
1140 static struct notifier_block acpi_processor_latency_notifier = {
1141         .notifier_call = acpi_processor_latency_notify,
1142 };
1143 #endif
1144
1145 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
1146                               struct acpi_device *device)
1147 {
1148         acpi_status status = 0;
1149         static int first_run;
1150         struct proc_dir_entry *entry = NULL;
1151         unsigned int i;
1152
1153
1154         if (!first_run) {
1155                 dmi_check_system(processor_power_dmi_table);
1156                 if (max_cstate < ACPI_C_STATES_MAX)
1157                         printk(KERN_NOTICE
1158                                "ACPI: processor limited to max C-state %d\n",
1159                                max_cstate);
1160                 first_run++;
1161 #ifdef CONFIG_SMP
1162                 register_latency_notifier(&acpi_processor_latency_notifier);
1163 #endif
1164         }
1165
1166         if (!pr)
1167                 return -EINVAL;
1168
1169         if (acpi_gbl_FADT.cst_control && !nocst) {
1170                 status =
1171                     acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
1172                 if (ACPI_FAILURE(status)) {
1173                         ACPI_EXCEPTION((AE_INFO, status,
1174                                         "Notifying BIOS of _CST ability failed"));
1175                 }
1176         }
1177
1178         acpi_processor_get_power_info(pr);
1179
1180         /*
1181          * Install the idle handler if processor power management is supported.
1182          * Note that we use previously set idle handler will be used on
1183          * platforms that only support C1.
1184          */
1185         if ((pr->flags.power) && (!boot_option_idle_override)) {
1186                 printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
1187                 for (i = 1; i <= pr->power.count; i++)
1188                         if (pr->power.states[i].valid)
1189                                 printk(" C%d[C%d]", i,
1190                                        pr->power.states[i].type);
1191                 printk(")\n");
1192
1193                 if (pr->id == 0) {
1194                         pm_idle_save = pm_idle;
1195                         pm_idle = acpi_processor_idle;
1196                 }
1197         }
1198
1199         /* 'power' [R] */
1200         entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER,
1201                                   S_IRUGO, acpi_device_dir(device));
1202         if (!entry)
1203                 return -EIO;
1204         else {
1205                 entry->proc_fops = &acpi_processor_power_fops;
1206                 entry->data = acpi_driver_data(device);
1207                 entry->owner = THIS_MODULE;
1208         }
1209
1210         pr->flags.power_setup_done = 1;
1211
1212         return 0;
1213 }
1214
1215 int acpi_processor_power_exit(struct acpi_processor *pr,
1216                               struct acpi_device *device)
1217 {
1218
1219         pr->flags.power_setup_done = 0;
1220
1221         if (acpi_device_dir(device))
1222                 remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
1223                                   acpi_device_dir(device));
1224
1225         /* Unregister the idle handler when processor #0 is removed. */
1226         if (pr->id == 0) {
1227                 pm_idle = pm_idle_save;
1228
1229                 /*
1230                  * We are about to unload the current idle thread pm callback
1231                  * (pm_idle), Wait for all processors to update cached/local
1232                  * copies of pm_idle before proceeding.
1233                  */
1234                 cpu_idle_wait();
1235 #ifdef CONFIG_SMP
1236                 unregister_latency_notifier(&acpi_processor_latency_notifier);
1237 #endif
1238         }
1239
1240         return 0;
1241 }