Merge commit 'v2.6.26-rc8' into x86/mce
[pandora-kernel.git] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33                                                 "cpufreq-core", msg)
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS];
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  */
65 static DEFINE_PER_CPU(int, policy_cpu);
66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
67
68 #define lock_policy_rwsem(mode, cpu)                                    \
69 int lock_policy_rwsem_##mode                                            \
70 (int cpu)                                                               \
71 {                                                                       \
72         int policy_cpu = per_cpu(policy_cpu, cpu);                      \
73         BUG_ON(policy_cpu == -1);                                       \
74         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
75         if (unlikely(!cpu_online(cpu))) {                               \
76                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
77                 return -1;                                              \
78         }                                                               \
79                                                                         \
80         return 0;                                                       \
81 }
82
83 lock_policy_rwsem(read, cpu);
84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
85
86 lock_policy_rwsem(write, cpu);
87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
88
89 void unlock_policy_rwsem_read(int cpu)
90 {
91         int policy_cpu = per_cpu(policy_cpu, cpu);
92         BUG_ON(policy_cpu == -1);
93         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
96
97 void unlock_policy_rwsem_write(int cpu)
98 {
99         int policy_cpu = per_cpu(policy_cpu, cpu);
100         BUG_ON(policy_cpu == -1);
101         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
102 }
103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
104
105
106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
108 static unsigned int __cpufreq_get(unsigned int cpu);
109 static void handle_update(struct work_struct *work);
110
111 /**
112  * Two notifier lists: the "policy" list is involved in the
113  * validation process for a new CPU frequency policy; the
114  * "transition" list for kernel code that needs to handle
115  * changes to devices when the CPU clock speed changes.
116  * The mutex locks both lists.
117  */
118 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
119 static struct srcu_notifier_head cpufreq_transition_notifier_list;
120
121 static bool init_cpufreq_transition_notifier_list_called;
122 static int __init init_cpufreq_transition_notifier_list(void)
123 {
124         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
125         init_cpufreq_transition_notifier_list_called = true;
126         return 0;
127 }
128 pure_initcall(init_cpufreq_transition_notifier_list);
129
130 static LIST_HEAD(cpufreq_governor_list);
131 static DEFINE_MUTEX (cpufreq_governor_mutex);
132
133 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
134 {
135         struct cpufreq_policy *data;
136         unsigned long flags;
137
138         if (cpu >= NR_CPUS)
139                 goto err_out;
140
141         /* get the cpufreq driver */
142         spin_lock_irqsave(&cpufreq_driver_lock, flags);
143
144         if (!cpufreq_driver)
145                 goto err_out_unlock;
146
147         if (!try_module_get(cpufreq_driver->owner))
148                 goto err_out_unlock;
149
150
151         /* get the CPU */
152         data = cpufreq_cpu_data[cpu];
153
154         if (!data)
155                 goto err_out_put_module;
156
157         if (!kobject_get(&data->kobj))
158                 goto err_out_put_module;
159
160         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
161         return data;
162
163 err_out_put_module:
164         module_put(cpufreq_driver->owner);
165 err_out_unlock:
166         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
167 err_out:
168         return NULL;
169 }
170 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
171
172
173 void cpufreq_cpu_put(struct cpufreq_policy *data)
174 {
175         kobject_put(&data->kobj);
176         module_put(cpufreq_driver->owner);
177 }
178 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
179
180
181 /*********************************************************************
182  *                     UNIFIED DEBUG HELPERS                         *
183  *********************************************************************/
184 #ifdef CONFIG_CPU_FREQ_DEBUG
185
186 /* what part(s) of the CPUfreq subsystem are debugged? */
187 static unsigned int debug;
188
189 /* is the debug output ratelimit'ed using printk_ratelimit? User can
190  * set or modify this value.
191  */
192 static unsigned int debug_ratelimit = 1;
193
194 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
195  * loading of a cpufreq driver, temporarily disabled when a new policy
196  * is set, and disabled upon cpufreq driver removal
197  */
198 static unsigned int disable_ratelimit = 1;
199 static DEFINE_SPINLOCK(disable_ratelimit_lock);
200
201 static void cpufreq_debug_enable_ratelimit(void)
202 {
203         unsigned long flags;
204
205         spin_lock_irqsave(&disable_ratelimit_lock, flags);
206         if (disable_ratelimit)
207                 disable_ratelimit--;
208         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
209 }
210
211 static void cpufreq_debug_disable_ratelimit(void)
212 {
213         unsigned long flags;
214
215         spin_lock_irqsave(&disable_ratelimit_lock, flags);
216         disable_ratelimit++;
217         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
218 }
219
220 void cpufreq_debug_printk(unsigned int type, const char *prefix,
221                         const char *fmt, ...)
222 {
223         char s[256];
224         va_list args;
225         unsigned int len;
226         unsigned long flags;
227
228         WARN_ON(!prefix);
229         if (type & debug) {
230                 spin_lock_irqsave(&disable_ratelimit_lock, flags);
231                 if (!disable_ratelimit && debug_ratelimit
232                                         && !printk_ratelimit()) {
233                         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
234                         return;
235                 }
236                 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
237
238                 len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
239
240                 va_start(args, fmt);
241                 len += vsnprintf(&s[len], (256 - len), fmt, args);
242                 va_end(args);
243
244                 printk(s);
245
246                 WARN_ON(len < 5);
247         }
248 }
249 EXPORT_SYMBOL(cpufreq_debug_printk);
250
251
252 module_param(debug, uint, 0644);
253 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
254                         " 2 to debug drivers, and 4 to debug governors.");
255
256 module_param(debug_ratelimit, uint, 0644);
257 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
258                                         " set to 0 to disable ratelimiting.");
259
260 #else /* !CONFIG_CPU_FREQ_DEBUG */
261
262 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
263 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
264
265 #endif /* CONFIG_CPU_FREQ_DEBUG */
266
267
268 /*********************************************************************
269  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
270  *********************************************************************/
271
272 /**
273  * adjust_jiffies - adjust the system "loops_per_jiffy"
274  *
275  * This function alters the system "loops_per_jiffy" for the clock
276  * speed change. Note that loops_per_jiffy cannot be updated on SMP
277  * systems as each CPU might be scaled differently. So, use the arch
278  * per-CPU loops_per_jiffy value wherever possible.
279  */
280 #ifndef CONFIG_SMP
281 static unsigned long l_p_j_ref;
282 static unsigned int  l_p_j_ref_freq;
283
284 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
285 {
286         if (ci->flags & CPUFREQ_CONST_LOOPS)
287                 return;
288
289         if (!l_p_j_ref_freq) {
290                 l_p_j_ref = loops_per_jiffy;
291                 l_p_j_ref_freq = ci->old;
292                 dprintk("saving %lu as reference value for loops_per_jiffy; "
293                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
294         }
295         if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
296             (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
297             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
298                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
299                                                                 ci->new);
300                 dprintk("scaling loops_per_jiffy to %lu "
301                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
302         }
303 }
304 #else
305 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
306 {
307         return;
308 }
309 #endif
310
311
312 /**
313  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
314  * on frequency transition.
315  *
316  * This function calls the transition notifiers and the "adjust_jiffies"
317  * function. It is called twice on all CPU frequency changes that have
318  * external effects.
319  */
320 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
321 {
322         struct cpufreq_policy *policy;
323
324         BUG_ON(irqs_disabled());
325
326         freqs->flags = cpufreq_driver->flags;
327         dprintk("notification %u of frequency transition to %u kHz\n",
328                 state, freqs->new);
329
330         policy = cpufreq_cpu_data[freqs->cpu];
331         switch (state) {
332
333         case CPUFREQ_PRECHANGE:
334                 /* detect if the driver reported a value as "old frequency"
335                  * which is not equal to what the cpufreq core thinks is
336                  * "old frequency".
337                  */
338                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
339                         if ((policy) && (policy->cpu == freqs->cpu) &&
340                             (policy->cur) && (policy->cur != freqs->old)) {
341                                 dprintk("Warning: CPU frequency is"
342                                         " %u, cpufreq assumed %u kHz.\n",
343                                         freqs->old, policy->cur);
344                                 freqs->old = policy->cur;
345                         }
346                 }
347                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
348                                 CPUFREQ_PRECHANGE, freqs);
349                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
350                 break;
351
352         case CPUFREQ_POSTCHANGE:
353                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
354                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
355                                 CPUFREQ_POSTCHANGE, freqs);
356                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
357                         policy->cur = freqs->new;
358                 break;
359         }
360 }
361 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
362
363
364
365 /*********************************************************************
366  *                          SYSFS INTERFACE                          *
367  *********************************************************************/
368
369 static struct cpufreq_governor *__find_governor(const char *str_governor)
370 {
371         struct cpufreq_governor *t;
372
373         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
374                 if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN))
375                         return t;
376
377         return NULL;
378 }
379
380 /**
381  * cpufreq_parse_governor - parse a governor string
382  */
383 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
384                                 struct cpufreq_governor **governor)
385 {
386         int err = -EINVAL;
387
388         if (!cpufreq_driver)
389                 goto out;
390
391         if (cpufreq_driver->setpolicy) {
392                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
393                         *policy = CPUFREQ_POLICY_PERFORMANCE;
394                         err = 0;
395                 } else if (!strnicmp(str_governor, "powersave",
396                                                 CPUFREQ_NAME_LEN)) {
397                         *policy = CPUFREQ_POLICY_POWERSAVE;
398                         err = 0;
399                 }
400         } else if (cpufreq_driver->target) {
401                 struct cpufreq_governor *t;
402
403                 mutex_lock(&cpufreq_governor_mutex);
404
405                 t = __find_governor(str_governor);
406
407                 if (t == NULL) {
408                         char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
409                                                                 str_governor);
410
411                         if (name) {
412                                 int ret;
413
414                                 mutex_unlock(&cpufreq_governor_mutex);
415                                 ret = request_module("%s", name);
416                                 mutex_lock(&cpufreq_governor_mutex);
417
418                                 if (ret == 0)
419                                         t = __find_governor(str_governor);
420                         }
421
422                         kfree(name);
423                 }
424
425                 if (t != NULL) {
426                         *governor = t;
427                         err = 0;
428                 }
429
430                 mutex_unlock(&cpufreq_governor_mutex);
431         }
432   out:
433         return err;
434 }
435
436
437 /* drivers/base/cpu.c */
438 extern struct sysdev_class cpu_sysdev_class;
439
440
441 /**
442  * cpufreq_per_cpu_attr_read() / show_##file_name() -
443  * print out cpufreq information
444  *
445  * Write out information from cpufreq_driver->policy[cpu]; object must be
446  * "unsigned int".
447  */
448
449 #define show_one(file_name, object)                     \
450 static ssize_t show_##file_name                         \
451 (struct cpufreq_policy *policy, char *buf)              \
452 {                                                       \
453         return sprintf (buf, "%u\n", policy->object);   \
454 }
455
456 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
457 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
458 show_one(scaling_min_freq, min);
459 show_one(scaling_max_freq, max);
460 show_one(scaling_cur_freq, cur);
461
462 static int __cpufreq_set_policy(struct cpufreq_policy *data,
463                                 struct cpufreq_policy *policy);
464
465 /**
466  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
467  */
468 #define store_one(file_name, object)                    \
469 static ssize_t store_##file_name                                        \
470 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
471 {                                                                       \
472         unsigned int ret = -EINVAL;                                     \
473         struct cpufreq_policy new_policy;                               \
474                                                                         \
475         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
476         if (ret)                                                        \
477                 return -EINVAL;                                         \
478                                                                         \
479         ret = sscanf (buf, "%u", &new_policy.object);                   \
480         if (ret != 1)                                                   \
481                 return -EINVAL;                                         \
482                                                                         \
483         ret = __cpufreq_set_policy(policy, &new_policy);                \
484         policy->user_policy.object = policy->object;                    \
485                                                                         \
486         return ret ? ret : count;                                       \
487 }
488
489 store_one(scaling_min_freq,min);
490 store_one(scaling_max_freq,max);
491
492 /**
493  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
494  */
495 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
496                                         char *buf)
497 {
498         unsigned int cur_freq = __cpufreq_get(policy->cpu);
499         if (!cur_freq)
500                 return sprintf(buf, "<unknown>");
501         return sprintf(buf, "%u\n", cur_freq);
502 }
503
504
505 /**
506  * show_scaling_governor - show the current policy for the specified CPU
507  */
508 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
509 {
510         if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
511                 return sprintf(buf, "powersave\n");
512         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
513                 return sprintf(buf, "performance\n");
514         else if (policy->governor)
515                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name);
516         return -EINVAL;
517 }
518
519
520 /**
521  * store_scaling_governor - store policy for the specified CPU
522  */
523 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
524                                         const char *buf, size_t count)
525 {
526         unsigned int ret = -EINVAL;
527         char    str_governor[16];
528         struct cpufreq_policy new_policy;
529
530         ret = cpufreq_get_policy(&new_policy, policy->cpu);
531         if (ret)
532                 return ret;
533
534         ret = sscanf (buf, "%15s", str_governor);
535         if (ret != 1)
536                 return -EINVAL;
537
538         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
539                                                 &new_policy.governor))
540                 return -EINVAL;
541
542         /* Do not use cpufreq_set_policy here or the user_policy.max
543            will be wrongly overridden */
544         ret = __cpufreq_set_policy(policy, &new_policy);
545
546         policy->user_policy.policy = policy->policy;
547         policy->user_policy.governor = policy->governor;
548
549         if (ret)
550                 return ret;
551         else
552                 return count;
553 }
554
555 /**
556  * show_scaling_driver - show the cpufreq driver currently loaded
557  */
558 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
559 {
560         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
561 }
562
563 /**
564  * show_scaling_available_governors - show the available CPUfreq governors
565  */
566 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
567                                                 char *buf)
568 {
569         ssize_t i = 0;
570         struct cpufreq_governor *t;
571
572         if (!cpufreq_driver->target) {
573                 i += sprintf(buf, "performance powersave");
574                 goto out;
575         }
576
577         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
578                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2)))
579                         goto out;
580                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
581         }
582 out:
583         i += sprintf(&buf[i], "\n");
584         return i;
585 }
586
587 static ssize_t show_cpus(cpumask_t mask, char *buf)
588 {
589         ssize_t i = 0;
590         unsigned int cpu;
591
592         for_each_cpu_mask(cpu, mask) {
593                 if (i)
594                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
595                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
596                 if (i >= (PAGE_SIZE - 5))
597                     break;
598         }
599         i += sprintf(&buf[i], "\n");
600         return i;
601 }
602
603 /**
604  * show_related_cpus - show the CPUs affected by each transition even if
605  * hw coordination is in use
606  */
607 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
608 {
609         if (cpus_empty(policy->related_cpus))
610                 return show_cpus(policy->cpus, buf);
611         return show_cpus(policy->related_cpus, buf);
612 }
613
614 /**
615  * show_affected_cpus - show the CPUs affected by each transition
616  */
617 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
618 {
619         return show_cpus(policy->cpus, buf);
620 }
621
622 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
623                                         const char *buf, size_t count)
624 {
625         unsigned int freq = 0;
626         unsigned int ret;
627
628         if (!policy->governor || !policy->governor->store_setspeed)
629                 return -EINVAL;
630
631         ret = sscanf(buf, "%u", &freq);
632         if (ret != 1)
633                 return -EINVAL;
634
635         policy->governor->store_setspeed(policy, freq);
636
637         return count;
638 }
639
640 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
641 {
642         if (!policy->governor || !policy->governor->show_setspeed)
643                 return sprintf(buf, "<unsupported>\n");
644
645         return policy->governor->show_setspeed(policy, buf);
646 }
647
648 #define define_one_ro(_name) \
649 static struct freq_attr _name = \
650 __ATTR(_name, 0444, show_##_name, NULL)
651
652 #define define_one_ro0400(_name) \
653 static struct freq_attr _name = \
654 __ATTR(_name, 0400, show_##_name, NULL)
655
656 #define define_one_rw(_name) \
657 static struct freq_attr _name = \
658 __ATTR(_name, 0644, show_##_name, store_##_name)
659
660 define_one_ro0400(cpuinfo_cur_freq);
661 define_one_ro(cpuinfo_min_freq);
662 define_one_ro(cpuinfo_max_freq);
663 define_one_ro(scaling_available_governors);
664 define_one_ro(scaling_driver);
665 define_one_ro(scaling_cur_freq);
666 define_one_ro(related_cpus);
667 define_one_ro(affected_cpus);
668 define_one_rw(scaling_min_freq);
669 define_one_rw(scaling_max_freq);
670 define_one_rw(scaling_governor);
671 define_one_rw(scaling_setspeed);
672
673 static struct attribute *default_attrs[] = {
674         &cpuinfo_min_freq.attr,
675         &cpuinfo_max_freq.attr,
676         &scaling_min_freq.attr,
677         &scaling_max_freq.attr,
678         &affected_cpus.attr,
679         &related_cpus.attr,
680         &scaling_governor.attr,
681         &scaling_driver.attr,
682         &scaling_available_governors.attr,
683         &scaling_setspeed.attr,
684         NULL
685 };
686
687 #define to_policy(k) container_of(k,struct cpufreq_policy,kobj)
688 #define to_attr(a) container_of(a,struct freq_attr,attr)
689
690 static ssize_t show(struct kobject *kobj, struct attribute *attr ,char *buf)
691 {
692         struct cpufreq_policy *policy = to_policy(kobj);
693         struct freq_attr *fattr = to_attr(attr);
694         ssize_t ret = -EINVAL;
695         policy = cpufreq_cpu_get(policy->cpu);
696         if (!policy)
697                 goto no_policy;
698
699         if (lock_policy_rwsem_read(policy->cpu) < 0)
700                 goto fail;
701
702         if (fattr->show)
703                 ret = fattr->show(policy, buf);
704         else
705                 ret = -EIO;
706
707         unlock_policy_rwsem_read(policy->cpu);
708 fail:
709         cpufreq_cpu_put(policy);
710 no_policy:
711         return ret;
712 }
713
714 static ssize_t store(struct kobject *kobj, struct attribute *attr,
715                      const char *buf, size_t count)
716 {
717         struct cpufreq_policy *policy = to_policy(kobj);
718         struct freq_attr *fattr = to_attr(attr);
719         ssize_t ret = -EINVAL;
720         policy = cpufreq_cpu_get(policy->cpu);
721         if (!policy)
722                 goto no_policy;
723
724         if (lock_policy_rwsem_write(policy->cpu) < 0)
725                 goto fail;
726
727         if (fattr->store)
728                 ret = fattr->store(policy, buf, count);
729         else
730                 ret = -EIO;
731
732         unlock_policy_rwsem_write(policy->cpu);
733 fail:
734         cpufreq_cpu_put(policy);
735 no_policy:
736         return ret;
737 }
738
739 static void cpufreq_sysfs_release(struct kobject *kobj)
740 {
741         struct cpufreq_policy *policy = to_policy(kobj);
742         dprintk("last reference is dropped\n");
743         complete(&policy->kobj_unregister);
744 }
745
746 static struct sysfs_ops sysfs_ops = {
747         .show   = show,
748         .store  = store,
749 };
750
751 static struct kobj_type ktype_cpufreq = {
752         .sysfs_ops      = &sysfs_ops,
753         .default_attrs  = default_attrs,
754         .release        = cpufreq_sysfs_release,
755 };
756
757
758 /**
759  * cpufreq_add_dev - add a CPU device
760  *
761  * Adds the cpufreq interface for a CPU device.
762  */
763 static int cpufreq_add_dev(struct sys_device *sys_dev)
764 {
765         unsigned int cpu = sys_dev->id;
766         int ret = 0;
767         struct cpufreq_policy new_policy;
768         struct cpufreq_policy *policy;
769         struct freq_attr **drv_attr;
770         struct sys_device *cpu_sys_dev;
771         unsigned long flags;
772         unsigned int j;
773 #ifdef CONFIG_SMP
774         struct cpufreq_policy *managed_policy;
775 #endif
776
777         if (cpu_is_offline(cpu))
778                 return 0;
779
780         cpufreq_debug_disable_ratelimit();
781         dprintk("adding CPU %u\n", cpu);
782
783 #ifdef CONFIG_SMP
784         /* check whether a different CPU already registered this
785          * CPU because it is in the same boat. */
786         policy = cpufreq_cpu_get(cpu);
787         if (unlikely(policy)) {
788                 cpufreq_cpu_put(policy);
789                 cpufreq_debug_enable_ratelimit();
790                 return 0;
791         }
792 #endif
793
794         if (!try_module_get(cpufreq_driver->owner)) {
795                 ret = -EINVAL;
796                 goto module_out;
797         }
798
799         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
800         if (!policy) {
801                 ret = -ENOMEM;
802                 goto nomem_out;
803         }
804
805         policy->cpu = cpu;
806         policy->cpus = cpumask_of_cpu(cpu);
807
808         /* Initially set CPU itself as the policy_cpu */
809         per_cpu(policy_cpu, cpu) = cpu;
810         lock_policy_rwsem_write(cpu);
811
812         init_completion(&policy->kobj_unregister);
813         INIT_WORK(&policy->update, handle_update);
814
815         /* Set governor before ->init, so that driver could check it */
816         policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
817         /* call driver. From then on the cpufreq must be able
818          * to accept all calls to ->verify and ->setpolicy for this CPU
819          */
820         ret = cpufreq_driver->init(policy);
821         if (ret) {
822                 dprintk("initialization failed\n");
823                 goto err_out;
824         }
825         policy->user_policy.min = policy->cpuinfo.min_freq;
826         policy->user_policy.max = policy->cpuinfo.max_freq;
827
828 #ifdef CONFIG_SMP
829
830 #ifdef CONFIG_HOTPLUG_CPU
831         if (cpufreq_cpu_governor[cpu]){
832                 policy->governor = cpufreq_cpu_governor[cpu];
833                 dprintk("Restoring governor %s for cpu %d\n",
834                        policy->governor->name, cpu);
835         }
836 #endif
837
838         for_each_cpu_mask(j, policy->cpus) {
839                 if (cpu == j)
840                         continue;
841
842                 /* check for existing affected CPUs.  They may not be aware
843                  * of it due to CPU Hotplug.
844                  */
845                 managed_policy = cpufreq_cpu_get(j);            // FIXME: Where is this released?  What about error paths?
846                 if (unlikely(managed_policy)) {
847
848                         /* Set proper policy_cpu */
849                         unlock_policy_rwsem_write(cpu);
850                         per_cpu(policy_cpu, cpu) = managed_policy->cpu;
851
852                         if (lock_policy_rwsem_write(cpu) < 0)
853                                 goto err_out_driver_exit;
854
855                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
856                         managed_policy->cpus = policy->cpus;
857                         cpufreq_cpu_data[cpu] = managed_policy;
858                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
859
860                         dprintk("CPU already managed, adding link\n");
861                         ret = sysfs_create_link(&sys_dev->kobj,
862                                                 &managed_policy->kobj,
863                                                 "cpufreq");
864                         if (ret)
865                                 goto err_out_driver_exit;
866
867                         cpufreq_debug_enable_ratelimit();
868                         ret = 0;
869                         goto err_out_driver_exit; /* call driver->exit() */
870                 }
871         }
872 #endif
873         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
874
875         /* prepare interface data */
876         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
877                                    "cpufreq");
878         if (ret)
879                 goto err_out_driver_exit;
880
881         /* set up files for this cpu device */
882         drv_attr = cpufreq_driver->attr;
883         while ((drv_attr) && (*drv_attr)) {
884                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
885                 if (ret)
886                         goto err_out_driver_exit;
887                 drv_attr++;
888         }
889         if (cpufreq_driver->get) {
890                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
891                 if (ret)
892                         goto err_out_driver_exit;
893         }
894         if (cpufreq_driver->target) {
895                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
896                 if (ret)
897                         goto err_out_driver_exit;
898         }
899
900         spin_lock_irqsave(&cpufreq_driver_lock, flags);
901         for_each_cpu_mask(j, policy->cpus) {
902                 cpufreq_cpu_data[j] = policy;
903                 per_cpu(policy_cpu, j) = policy->cpu;
904         }
905         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
906
907         /* symlink affected CPUs */
908         for_each_cpu_mask(j, policy->cpus) {
909                 if (j == cpu)
910                         continue;
911                 if (!cpu_online(j))
912                         continue;
913
914                 dprintk("CPU %u already managed, adding link\n", j);
915                 cpufreq_cpu_get(cpu);
916                 cpu_sys_dev = get_cpu_sysdev(j);
917                 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
918                                         "cpufreq");
919                 if (ret)
920                         goto err_out_unregister;
921         }
922
923         policy->governor = NULL; /* to assure that the starting sequence is
924                                   * run in cpufreq_set_policy */
925
926         /* set default policy */
927         ret = __cpufreq_set_policy(policy, &new_policy);
928         policy->user_policy.policy = policy->policy;
929         policy->user_policy.governor = policy->governor;
930
931         if (ret) {
932                 dprintk("setting policy failed\n");
933                 goto err_out_unregister;
934         }
935
936         unlock_policy_rwsem_write(cpu);
937
938         kobject_uevent(&policy->kobj, KOBJ_ADD);
939         module_put(cpufreq_driver->owner);
940         dprintk("initialization complete\n");
941         cpufreq_debug_enable_ratelimit();
942
943         return 0;
944
945
946 err_out_unregister:
947         spin_lock_irqsave(&cpufreq_driver_lock, flags);
948         for_each_cpu_mask(j, policy->cpus)
949                 cpufreq_cpu_data[j] = NULL;
950         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
951
952         kobject_put(&policy->kobj);
953         wait_for_completion(&policy->kobj_unregister);
954
955 err_out_driver_exit:
956         if (cpufreq_driver->exit)
957                 cpufreq_driver->exit(policy);
958
959 err_out:
960         unlock_policy_rwsem_write(cpu);
961         kfree(policy);
962
963 nomem_out:
964         module_put(cpufreq_driver->owner);
965 module_out:
966         cpufreq_debug_enable_ratelimit();
967         return ret;
968 }
969
970
971 /**
972  * __cpufreq_remove_dev - remove a CPU device
973  *
974  * Removes the cpufreq interface for a CPU device.
975  * Caller should already have policy_rwsem in write mode for this CPU.
976  * This routine frees the rwsem before returning.
977  */
978 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
979 {
980         unsigned int cpu = sys_dev->id;
981         unsigned long flags;
982         struct cpufreq_policy *data;
983 #ifdef CONFIG_SMP
984         struct sys_device *cpu_sys_dev;
985         unsigned int j;
986 #endif
987
988         cpufreq_debug_disable_ratelimit();
989         dprintk("unregistering CPU %u\n", cpu);
990
991         spin_lock_irqsave(&cpufreq_driver_lock, flags);
992         data = cpufreq_cpu_data[cpu];
993
994         if (!data) {
995                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
996                 cpufreq_debug_enable_ratelimit();
997                 unlock_policy_rwsem_write(cpu);
998                 return -EINVAL;
999         }
1000         cpufreq_cpu_data[cpu] = NULL;
1001
1002
1003 #ifdef CONFIG_SMP
1004         /* if this isn't the CPU which is the parent of the kobj, we
1005          * only need to unlink, put and exit
1006          */
1007         if (unlikely(cpu != data->cpu)) {
1008                 dprintk("removing link\n");
1009                 cpu_clear(cpu, data->cpus);
1010                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1011                 sysfs_remove_link(&sys_dev->kobj, "cpufreq");
1012                 cpufreq_cpu_put(data);
1013                 cpufreq_debug_enable_ratelimit();
1014                 unlock_policy_rwsem_write(cpu);
1015                 return 0;
1016         }
1017 #endif
1018
1019 #ifdef CONFIG_SMP
1020
1021 #ifdef CONFIG_HOTPLUG_CPU
1022         cpufreq_cpu_governor[cpu] = data->governor;
1023 #endif
1024
1025         /* if we have other CPUs still registered, we need to unlink them,
1026          * or else wait_for_completion below will lock up. Clean the
1027          * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
1028          * links afterwards.
1029          */
1030         if (unlikely(cpus_weight(data->cpus) > 1)) {
1031                 for_each_cpu_mask(j, data->cpus) {
1032                         if (j == cpu)
1033                                 continue;
1034                         cpufreq_cpu_data[j] = NULL;
1035                 }
1036         }
1037
1038         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1039
1040         if (unlikely(cpus_weight(data->cpus) > 1)) {
1041                 for_each_cpu_mask(j, data->cpus) {
1042                         if (j == cpu)
1043                                 continue;
1044                         dprintk("removing link for cpu %u\n", j);
1045 #ifdef CONFIG_HOTPLUG_CPU
1046                         cpufreq_cpu_governor[j] = data->governor;
1047 #endif
1048                         cpu_sys_dev = get_cpu_sysdev(j);
1049                         sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1050                         cpufreq_cpu_put(data);
1051                 }
1052         }
1053 #else
1054         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1055 #endif
1056
1057         if (cpufreq_driver->target)
1058                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1059
1060         unlock_policy_rwsem_write(cpu);
1061
1062         kobject_put(&data->kobj);
1063
1064         /* we need to make sure that the underlying kobj is actually
1065          * not referenced anymore by anybody before we proceed with
1066          * unloading.
1067          */
1068         dprintk("waiting for dropping of refcount\n");
1069         wait_for_completion(&data->kobj_unregister);
1070         dprintk("wait complete\n");
1071
1072         if (cpufreq_driver->exit)
1073                 cpufreq_driver->exit(data);
1074
1075         kfree(data);
1076
1077         cpufreq_debug_enable_ratelimit();
1078         return 0;
1079 }
1080
1081
1082 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1083 {
1084         unsigned int cpu = sys_dev->id;
1085         int retval;
1086
1087         if (cpu_is_offline(cpu))
1088                 return 0;
1089
1090         if (unlikely(lock_policy_rwsem_write(cpu)))
1091                 BUG();
1092
1093         retval = __cpufreq_remove_dev(sys_dev);
1094         return retval;
1095 }
1096
1097
1098 static void handle_update(struct work_struct *work)
1099 {
1100         struct cpufreq_policy *policy =
1101                 container_of(work, struct cpufreq_policy, update);
1102         unsigned int cpu = policy->cpu;
1103         dprintk("handle_update for cpu %u called\n", cpu);
1104         cpufreq_update_policy(cpu);
1105 }
1106
1107 /**
1108  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1109  *      @cpu: cpu number
1110  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1111  *      @new_freq: CPU frequency the CPU actually runs at
1112  *
1113  *      We adjust to current frequency first, and need to clean up later. So either call
1114  *      to cpufreq_update_policy() or schedule handle_update()).
1115  */
1116 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1117                                 unsigned int new_freq)
1118 {
1119         struct cpufreq_freqs freqs;
1120
1121         dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1122                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1123
1124         freqs.cpu = cpu;
1125         freqs.old = old_freq;
1126         freqs.new = new_freq;
1127         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1128         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1129 }
1130
1131
1132 /**
1133  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1134  * @cpu: CPU number
1135  *
1136  * This is the last known freq, without actually getting it from the driver.
1137  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1138  */
1139 unsigned int cpufreq_quick_get(unsigned int cpu)
1140 {
1141         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1142         unsigned int ret_freq = 0;
1143
1144         if (policy) {
1145                 ret_freq = policy->cur;
1146                 cpufreq_cpu_put(policy);
1147         }
1148
1149         return ret_freq;
1150 }
1151 EXPORT_SYMBOL(cpufreq_quick_get);
1152
1153
1154 static unsigned int __cpufreq_get(unsigned int cpu)
1155 {
1156         struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
1157         unsigned int ret_freq = 0;
1158
1159         if (!cpufreq_driver->get)
1160                 return ret_freq;
1161
1162         ret_freq = cpufreq_driver->get(cpu);
1163
1164         if (ret_freq && policy->cur &&
1165                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1166                 /* verify no discrepancy between actual and
1167                                         saved value exists */
1168                 if (unlikely(ret_freq != policy->cur)) {
1169                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1170                         schedule_work(&policy->update);
1171                 }
1172         }
1173
1174         return ret_freq;
1175 }
1176
1177 /**
1178  * cpufreq_get - get the current CPU frequency (in kHz)
1179  * @cpu: CPU number
1180  *
1181  * Get the CPU current (static) CPU frequency
1182  */
1183 unsigned int cpufreq_get(unsigned int cpu)
1184 {
1185         unsigned int ret_freq = 0;
1186         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1187
1188         if (!policy)
1189                 goto out;
1190
1191         if (unlikely(lock_policy_rwsem_read(cpu)))
1192                 goto out_policy;
1193
1194         ret_freq = __cpufreq_get(cpu);
1195
1196         unlock_policy_rwsem_read(cpu);
1197
1198 out_policy:
1199         cpufreq_cpu_put(policy);
1200 out:
1201         return ret_freq;
1202 }
1203 EXPORT_SYMBOL(cpufreq_get);
1204
1205
1206 /**
1207  *      cpufreq_suspend - let the low level driver prepare for suspend
1208  */
1209
1210 static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
1211 {
1212         int cpu = sysdev->id;
1213         int ret = 0;
1214         unsigned int cur_freq = 0;
1215         struct cpufreq_policy *cpu_policy;
1216
1217         dprintk("suspending cpu %u\n", cpu);
1218
1219         if (!cpu_online(cpu))
1220                 return 0;
1221
1222         /* we may be lax here as interrupts are off. Nonetheless
1223          * we need to grab the correct cpu policy, as to check
1224          * whether we really run on this CPU.
1225          */
1226
1227         cpu_policy = cpufreq_cpu_get(cpu);
1228         if (!cpu_policy)
1229                 return -EINVAL;
1230
1231         /* only handle each CPU group once */
1232         if (unlikely(cpu_policy->cpu != cpu))
1233                 goto out;
1234
1235         if (cpufreq_driver->suspend) {
1236                 ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1237                 if (ret) {
1238                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1239                                         "step on CPU %u\n", cpu_policy->cpu);
1240                         goto out;
1241                 }
1242         }
1243
1244         if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1245                 goto out;
1246
1247         if (cpufreq_driver->get)
1248                 cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1249
1250         if (!cur_freq || !cpu_policy->cur) {
1251                 printk(KERN_ERR "cpufreq: suspend failed to assert current "
1252                        "frequency is what timing core thinks it is.\n");
1253                 goto out;
1254         }
1255
1256         if (unlikely(cur_freq != cpu_policy->cur)) {
1257                 struct cpufreq_freqs freqs;
1258
1259                 if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1260                         dprintk("Warning: CPU frequency is %u, "
1261                                "cpufreq assumed %u kHz.\n",
1262                                cur_freq, cpu_policy->cur);
1263
1264                 freqs.cpu = cpu;
1265                 freqs.old = cpu_policy->cur;
1266                 freqs.new = cur_freq;
1267
1268                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1269                                     CPUFREQ_SUSPENDCHANGE, &freqs);
1270                 adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1271
1272                 cpu_policy->cur = cur_freq;
1273         }
1274
1275 out:
1276         cpufreq_cpu_put(cpu_policy);
1277         return ret;
1278 }
1279
1280 /**
1281  *      cpufreq_resume -  restore proper CPU frequency handling after resume
1282  *
1283  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1284  *      2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1285  *      3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1286  *          restored.
1287  */
1288 static int cpufreq_resume(struct sys_device *sysdev)
1289 {
1290         int cpu = sysdev->id;
1291         int ret = 0;
1292         struct cpufreq_policy *cpu_policy;
1293
1294         dprintk("resuming cpu %u\n", cpu);
1295
1296         if (!cpu_online(cpu))
1297                 return 0;
1298
1299         /* we may be lax here as interrupts are off. Nonetheless
1300          * we need to grab the correct cpu policy, as to check
1301          * whether we really run on this CPU.
1302          */
1303
1304         cpu_policy = cpufreq_cpu_get(cpu);
1305         if (!cpu_policy)
1306                 return -EINVAL;
1307
1308         /* only handle each CPU group once */
1309         if (unlikely(cpu_policy->cpu != cpu))
1310                 goto fail;
1311
1312         if (cpufreq_driver->resume) {
1313                 ret = cpufreq_driver->resume(cpu_policy);
1314                 if (ret) {
1315                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1316                                         "step on CPU %u\n", cpu_policy->cpu);
1317                         goto fail;
1318                 }
1319         }
1320
1321         if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1322                 unsigned int cur_freq = 0;
1323
1324                 if (cpufreq_driver->get)
1325                         cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1326
1327                 if (!cur_freq || !cpu_policy->cur) {
1328                         printk(KERN_ERR "cpufreq: resume failed to assert "
1329                                         "current frequency is what timing core "
1330                                         "thinks it is.\n");
1331                         goto out;
1332                 }
1333
1334                 if (unlikely(cur_freq != cpu_policy->cur)) {
1335                         struct cpufreq_freqs freqs;
1336
1337                         if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1338                                 dprintk("Warning: CPU frequency "
1339                                        "is %u, cpufreq assumed %u kHz.\n",
1340                                        cur_freq, cpu_policy->cur);
1341
1342                         freqs.cpu = cpu;
1343                         freqs.old = cpu_policy->cur;
1344                         freqs.new = cur_freq;
1345
1346                         srcu_notifier_call_chain(
1347                                         &cpufreq_transition_notifier_list,
1348                                         CPUFREQ_RESUMECHANGE, &freqs);
1349                         adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1350
1351                         cpu_policy->cur = cur_freq;
1352                 }
1353         }
1354
1355 out:
1356         schedule_work(&cpu_policy->update);
1357 fail:
1358         cpufreq_cpu_put(cpu_policy);
1359         return ret;
1360 }
1361
1362 static struct sysdev_driver cpufreq_sysdev_driver = {
1363         .add            = cpufreq_add_dev,
1364         .remove         = cpufreq_remove_dev,
1365         .suspend        = cpufreq_suspend,
1366         .resume         = cpufreq_resume,
1367 };
1368
1369
1370 /*********************************************************************
1371  *                     NOTIFIER LISTS INTERFACE                      *
1372  *********************************************************************/
1373
1374 /**
1375  *      cpufreq_register_notifier - register a driver with cpufreq
1376  *      @nb: notifier function to register
1377  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1378  *
1379  *      Add a driver to one of two lists: either a list of drivers that
1380  *      are notified about clock rate changes (once before and once after
1381  *      the transition), or a list of drivers that are notified about
1382  *      changes in cpufreq policy.
1383  *
1384  *      This function may sleep, and has the same return conditions as
1385  *      blocking_notifier_chain_register.
1386  */
1387 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1388 {
1389         int ret;
1390
1391         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1392
1393         switch (list) {
1394         case CPUFREQ_TRANSITION_NOTIFIER:
1395                 ret = srcu_notifier_chain_register(
1396                                 &cpufreq_transition_notifier_list, nb);
1397                 break;
1398         case CPUFREQ_POLICY_NOTIFIER:
1399                 ret = blocking_notifier_chain_register(
1400                                 &cpufreq_policy_notifier_list, nb);
1401                 break;
1402         default:
1403                 ret = -EINVAL;
1404         }
1405
1406         return ret;
1407 }
1408 EXPORT_SYMBOL(cpufreq_register_notifier);
1409
1410
1411 /**
1412  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1413  *      @nb: notifier block to be unregistered
1414  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1415  *
1416  *      Remove a driver from the CPU frequency notifier list.
1417  *
1418  *      This function may sleep, and has the same return conditions as
1419  *      blocking_notifier_chain_unregister.
1420  */
1421 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1422 {
1423         int ret;
1424
1425         switch (list) {
1426         case CPUFREQ_TRANSITION_NOTIFIER:
1427                 ret = srcu_notifier_chain_unregister(
1428                                 &cpufreq_transition_notifier_list, nb);
1429                 break;
1430         case CPUFREQ_POLICY_NOTIFIER:
1431                 ret = blocking_notifier_chain_unregister(
1432                                 &cpufreq_policy_notifier_list, nb);
1433                 break;
1434         default:
1435                 ret = -EINVAL;
1436         }
1437
1438         return ret;
1439 }
1440 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1441
1442
1443 /*********************************************************************
1444  *                              GOVERNORS                            *
1445  *********************************************************************/
1446
1447
1448 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1449                             unsigned int target_freq,
1450                             unsigned int relation)
1451 {
1452         int retval = -EINVAL;
1453
1454         dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1455                 target_freq, relation);
1456         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1457                 retval = cpufreq_driver->target(policy, target_freq, relation);
1458
1459         return retval;
1460 }
1461 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1462
1463 int cpufreq_driver_target(struct cpufreq_policy *policy,
1464                           unsigned int target_freq,
1465                           unsigned int relation)
1466 {
1467         int ret;
1468
1469         policy = cpufreq_cpu_get(policy->cpu);
1470         if (!policy)
1471                 return -EINVAL;
1472
1473         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1474                 return -EINVAL;
1475
1476         ret = __cpufreq_driver_target(policy, target_freq, relation);
1477
1478         unlock_policy_rwsem_write(policy->cpu);
1479
1480         cpufreq_cpu_put(policy);
1481         return ret;
1482 }
1483 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1484
1485 int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
1486 {
1487         int ret = 0;
1488
1489         policy = cpufreq_cpu_get(policy->cpu);
1490         if (!policy)
1491                 return -EINVAL;
1492
1493         if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
1494                 ret = cpufreq_driver->getavg(policy->cpu);
1495
1496         cpufreq_cpu_put(policy);
1497         return ret;
1498 }
1499 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1500
1501 /*
1502  * when "event" is CPUFREQ_GOV_LIMITS
1503  */
1504
1505 static int __cpufreq_governor(struct cpufreq_policy *policy,
1506                                         unsigned int event)
1507 {
1508         int ret;
1509
1510         /* Only must be defined when default governor is known to have latency
1511            restrictions, like e.g. conservative or ondemand.
1512            That this is the case is already ensured in Kconfig
1513         */
1514 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1515         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1516 #else
1517         struct cpufreq_governor *gov = NULL;
1518 #endif
1519
1520         if (policy->governor->max_transition_latency &&
1521             policy->cpuinfo.transition_latency >
1522             policy->governor->max_transition_latency) {
1523                 if (!gov)
1524                         return -EINVAL;
1525                 else {
1526                         printk(KERN_WARNING "%s governor failed, too long"
1527                                " transition latency of HW, fallback"
1528                                " to %s governor\n",
1529                                policy->governor->name,
1530                                gov->name);
1531                         policy->governor = gov;
1532                 }
1533         }
1534
1535         if (!try_module_get(policy->governor->owner))
1536                 return -EINVAL;
1537
1538         dprintk("__cpufreq_governor for CPU %u, event %u\n",
1539                                                 policy->cpu, event);
1540         ret = policy->governor->governor(policy, event);
1541
1542         /* we keep one module reference alive for
1543                         each CPU governed by this CPU */
1544         if ((event != CPUFREQ_GOV_START) || ret)
1545                 module_put(policy->governor->owner);
1546         if ((event == CPUFREQ_GOV_STOP) && !ret)
1547                 module_put(policy->governor->owner);
1548
1549         return ret;
1550 }
1551
1552
1553 int cpufreq_register_governor(struct cpufreq_governor *governor)
1554 {
1555         int err;
1556
1557         if (!governor)
1558                 return -EINVAL;
1559
1560         mutex_lock(&cpufreq_governor_mutex);
1561
1562         err = -EBUSY;
1563         if (__find_governor(governor->name) == NULL) {
1564                 err = 0;
1565                 list_add(&governor->governor_list, &cpufreq_governor_list);
1566         }
1567
1568         mutex_unlock(&cpufreq_governor_mutex);
1569         return err;
1570 }
1571 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1572
1573
1574 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1575 {
1576         if (!governor)
1577                 return;
1578
1579         mutex_lock(&cpufreq_governor_mutex);
1580         list_del(&governor->governor_list);
1581         mutex_unlock(&cpufreq_governor_mutex);
1582         return;
1583 }
1584 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1585
1586
1587
1588 /*********************************************************************
1589  *                          POLICY INTERFACE                         *
1590  *********************************************************************/
1591
1592 /**
1593  * cpufreq_get_policy - get the current cpufreq_policy
1594  * @policy: struct cpufreq_policy into which the current cpufreq_policy is written
1595  *
1596  * Reads the current cpufreq policy.
1597  */
1598 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1599 {
1600         struct cpufreq_policy *cpu_policy;
1601         if (!policy)
1602                 return -EINVAL;
1603
1604         cpu_policy = cpufreq_cpu_get(cpu);
1605         if (!cpu_policy)
1606                 return -EINVAL;
1607
1608         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1609
1610         cpufreq_cpu_put(cpu_policy);
1611         return 0;
1612 }
1613 EXPORT_SYMBOL(cpufreq_get_policy);
1614
1615
1616 /*
1617  * data   : current policy.
1618  * policy : policy to be set.
1619  */
1620 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1621                                 struct cpufreq_policy *policy)
1622 {
1623         int ret = 0;
1624
1625         cpufreq_debug_disable_ratelimit();
1626         dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1627                 policy->min, policy->max);
1628
1629         memcpy(&policy->cpuinfo, &data->cpuinfo,
1630                                 sizeof(struct cpufreq_cpuinfo));
1631
1632         if (policy->min > data->max || policy->max < data->min) {
1633                 ret = -EINVAL;
1634                 goto error_out;
1635         }
1636
1637         /* verify the cpu speed can be set within this limit */
1638         ret = cpufreq_driver->verify(policy);
1639         if (ret)
1640                 goto error_out;
1641
1642         /* adjust if necessary - all reasons */
1643         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1644                         CPUFREQ_ADJUST, policy);
1645
1646         /* adjust if necessary - hardware incompatibility*/
1647         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1648                         CPUFREQ_INCOMPATIBLE, policy);
1649
1650         /* verify the cpu speed can be set within this limit,
1651            which might be different to the first one */
1652         ret = cpufreq_driver->verify(policy);
1653         if (ret)
1654                 goto error_out;
1655
1656         /* notification of the new policy */
1657         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1658                         CPUFREQ_NOTIFY, policy);
1659
1660         data->min = policy->min;
1661         data->max = policy->max;
1662
1663         dprintk("new min and max freqs are %u - %u kHz\n",
1664                                         data->min, data->max);
1665
1666         if (cpufreq_driver->setpolicy) {
1667                 data->policy = policy->policy;
1668                 dprintk("setting range\n");
1669                 ret = cpufreq_driver->setpolicy(policy);
1670         } else {
1671                 if (policy->governor != data->governor) {
1672                         /* save old, working values */
1673                         struct cpufreq_governor *old_gov = data->governor;
1674
1675                         dprintk("governor switch\n");
1676
1677                         /* end old governor */
1678                         if (data->governor)
1679                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1680
1681                         /* start new governor */
1682                         data->governor = policy->governor;
1683                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1684                                 /* new governor failed, so re-start old one */
1685                                 dprintk("starting governor %s failed\n",
1686                                                         data->governor->name);
1687                                 if (old_gov) {
1688                                         data->governor = old_gov;
1689                                         __cpufreq_governor(data,
1690                                                            CPUFREQ_GOV_START);
1691                                 }
1692                                 ret = -EINVAL;
1693                                 goto error_out;
1694                         }
1695                         /* might be a policy change, too, so fall through */
1696                 }
1697                 dprintk("governor: change or update limits\n");
1698                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1699         }
1700
1701 error_out:
1702         cpufreq_debug_enable_ratelimit();
1703         return ret;
1704 }
1705
1706 /**
1707  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1708  *      @cpu: CPU which shall be re-evaluated
1709  *
1710  *      Usefull for policy notifiers which have different necessities
1711  *      at different times.
1712  */
1713 int cpufreq_update_policy(unsigned int cpu)
1714 {
1715         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1716         struct cpufreq_policy policy;
1717         int ret = 0;
1718
1719         if (!data)
1720                 return -ENODEV;
1721
1722         if (unlikely(lock_policy_rwsem_write(cpu)))
1723                 return -EINVAL;
1724
1725         dprintk("updating policy for CPU %u\n", cpu);
1726         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1727         policy.min = data->user_policy.min;
1728         policy.max = data->user_policy.max;
1729         policy.policy = data->user_policy.policy;
1730         policy.governor = data->user_policy.governor;
1731
1732         /* BIOS might change freq behind our back
1733           -> ask driver for current freq and notify governors about a change */
1734         if (cpufreq_driver->get) {
1735                 policy.cur = cpufreq_driver->get(cpu);
1736                 if (!data->cur) {
1737                         dprintk("Driver did not initialize current freq");
1738                         data->cur = policy.cur;
1739                 } else {
1740                         if (data->cur != policy.cur)
1741                                 cpufreq_out_of_sync(cpu, data->cur,
1742                                                                 policy.cur);
1743                 }
1744         }
1745
1746         ret = __cpufreq_set_policy(data, &policy);
1747
1748         unlock_policy_rwsem_write(cpu);
1749
1750         cpufreq_cpu_put(data);
1751         return ret;
1752 }
1753 EXPORT_SYMBOL(cpufreq_update_policy);
1754
1755 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1756                                         unsigned long action, void *hcpu)
1757 {
1758         unsigned int cpu = (unsigned long)hcpu;
1759         struct sys_device *sys_dev;
1760
1761         sys_dev = get_cpu_sysdev(cpu);
1762         if (sys_dev) {
1763                 switch (action) {
1764                 case CPU_ONLINE:
1765                 case CPU_ONLINE_FROZEN:
1766                         cpufreq_add_dev(sys_dev);
1767                         break;
1768                 case CPU_DOWN_PREPARE:
1769                 case CPU_DOWN_PREPARE_FROZEN:
1770                         if (unlikely(lock_policy_rwsem_write(cpu)))
1771                                 BUG();
1772
1773                         __cpufreq_remove_dev(sys_dev);
1774                         break;
1775                 case CPU_DOWN_FAILED:
1776                 case CPU_DOWN_FAILED_FROZEN:
1777                         cpufreq_add_dev(sys_dev);
1778                         break;
1779                 }
1780         }
1781         return NOTIFY_OK;
1782 }
1783
1784 static struct notifier_block __refdata cpufreq_cpu_notifier =
1785 {
1786     .notifier_call = cpufreq_cpu_callback,
1787 };
1788
1789 /*********************************************************************
1790  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1791  *********************************************************************/
1792
1793 /**
1794  * cpufreq_register_driver - register a CPU Frequency driver
1795  * @driver_data: A struct cpufreq_driver containing the values#
1796  * submitted by the CPU Frequency driver.
1797  *
1798  *   Registers a CPU Frequency driver to this core code. This code
1799  * returns zero on success, -EBUSY when another driver got here first
1800  * (and isn't unregistered in the meantime).
1801  *
1802  */
1803 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1804 {
1805         unsigned long flags;
1806         int ret;
1807
1808         if (!driver_data || !driver_data->verify || !driver_data->init ||
1809             ((!driver_data->setpolicy) && (!driver_data->target)))
1810                 return -EINVAL;
1811
1812         dprintk("trying to register driver %s\n", driver_data->name);
1813
1814         if (driver_data->setpolicy)
1815                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1816
1817         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1818         if (cpufreq_driver) {
1819                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1820                 return -EBUSY;
1821         }
1822         cpufreq_driver = driver_data;
1823         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1824
1825         ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver);
1826
1827         if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1828                 int i;
1829                 ret = -ENODEV;
1830
1831                 /* check for at least one working CPU */
1832                 for (i=0; i<NR_CPUS; i++)
1833                         if (cpufreq_cpu_data[i])
1834                                 ret = 0;
1835
1836                 /* if all ->init() calls failed, unregister */
1837                 if (ret) {
1838                         dprintk("no CPU initialized for driver %s\n",
1839                                                         driver_data->name);
1840                         sysdev_driver_unregister(&cpu_sysdev_class,
1841                                                 &cpufreq_sysdev_driver);
1842
1843                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1844                         cpufreq_driver = NULL;
1845                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1846                 }
1847         }
1848
1849         if (!ret) {
1850                 register_hotcpu_notifier(&cpufreq_cpu_notifier);
1851                 dprintk("driver %s up and running\n", driver_data->name);
1852                 cpufreq_debug_enable_ratelimit();
1853         }
1854
1855         return ret;
1856 }
1857 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1858
1859
1860 /**
1861  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1862  *
1863  *    Unregister the current CPUFreq driver. Only call this if you have
1864  * the right to do so, i.e. if you have succeeded in initialising before!
1865  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1866  * currently not initialised.
1867  */
1868 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1869 {
1870         unsigned long flags;
1871
1872         cpufreq_debug_disable_ratelimit();
1873
1874         if (!cpufreq_driver || (driver != cpufreq_driver)) {
1875                 cpufreq_debug_enable_ratelimit();
1876                 return -EINVAL;
1877         }
1878
1879         dprintk("unregistering driver %s\n", driver->name);
1880
1881         sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1882         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1883
1884         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1885         cpufreq_driver = NULL;
1886         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1887
1888         return 0;
1889 }
1890 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1891
1892 static int __init cpufreq_core_init(void)
1893 {
1894         int cpu;
1895
1896         for_each_possible_cpu(cpu) {
1897                 per_cpu(policy_cpu, cpu) = -1;
1898                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1899         }
1900         return 0;
1901 }
1902
1903 core_initcall(cpufreq_core_init);