Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
[pandora-kernel.git] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32
33 #include <trace/events/power.h>
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69
70 #define lock_policy_rwsem(mode, cpu)                                    \
71 static int lock_policy_rwsem_##mode                                     \
72 (int cpu)                                                               \
73 {                                                                       \
74         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
75         BUG_ON(policy_cpu == -1);                                       \
76         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
77         if (unlikely(!cpu_online(cpu))) {                               \
78                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
79                 return -1;                                              \
80         }                                                               \
81                                                                         \
82         return 0;                                                       \
83 }
84
85 lock_policy_rwsem(read, cpu);
86
87 lock_policy_rwsem(write, cpu);
88
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92         BUG_ON(policy_cpu == -1);
93         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99         BUG_ON(policy_cpu == -1);
100         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102
103
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106                 unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124         init_cpufreq_transition_notifier_list_called = true;
125         return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128
129 static int off __read_mostly;
130 int cpufreq_disabled(void)
131 {
132         return off;
133 }
134 void disable_cpufreq(void)
135 {
136         off = 1;
137 }
138 static LIST_HEAD(cpufreq_governor_list);
139 static DEFINE_MUTEX(cpufreq_governor_mutex);
140
141 static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs)
142 {
143         struct cpufreq_policy *data;
144         unsigned long flags;
145
146         if (cpu >= nr_cpu_ids)
147                 goto err_out;
148
149         /* get the cpufreq driver */
150         spin_lock_irqsave(&cpufreq_driver_lock, flags);
151
152         if (!cpufreq_driver)
153                 goto err_out_unlock;
154
155         if (!try_module_get(cpufreq_driver->owner))
156                 goto err_out_unlock;
157
158
159         /* get the CPU */
160         data = per_cpu(cpufreq_cpu_data, cpu);
161
162         if (!data)
163                 goto err_out_put_module;
164
165         if (!sysfs && !kobject_get(&data->kobj))
166                 goto err_out_put_module;
167
168         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
169         return data;
170
171 err_out_put_module:
172         module_put(cpufreq_driver->owner);
173 err_out_unlock:
174         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
175 err_out:
176         return NULL;
177 }
178
179 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
180 {
181         return __cpufreq_cpu_get(cpu, false);
182 }
183 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
184
185 static struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu)
186 {
187         return __cpufreq_cpu_get(cpu, true);
188 }
189
190 static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs)
191 {
192         if (!sysfs)
193                 kobject_put(&data->kobj);
194         module_put(cpufreq_driver->owner);
195 }
196
197 void cpufreq_cpu_put(struct cpufreq_policy *data)
198 {
199         __cpufreq_cpu_put(data, false);
200 }
201 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
202
203 static void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data)
204 {
205         __cpufreq_cpu_put(data, true);
206 }
207
208 /*********************************************************************
209  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
210  *********************************************************************/
211
212 /**
213  * adjust_jiffies - adjust the system "loops_per_jiffy"
214  *
215  * This function alters the system "loops_per_jiffy" for the clock
216  * speed change. Note that loops_per_jiffy cannot be updated on SMP
217  * systems as each CPU might be scaled differently. So, use the arch
218  * per-CPU loops_per_jiffy value wherever possible.
219  */
220 #ifndef CONFIG_SMP
221 static unsigned long l_p_j_ref;
222 static unsigned int  l_p_j_ref_freq;
223
224 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
225 {
226         if (ci->flags & CPUFREQ_CONST_LOOPS)
227                 return;
228
229         if (!l_p_j_ref_freq) {
230                 l_p_j_ref = loops_per_jiffy;
231                 l_p_j_ref_freq = ci->old;
232                 pr_debug("saving %lu as reference value for loops_per_jiffy; "
233                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
234         }
235         if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
236             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
237                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
238                                                                 ci->new);
239                 pr_debug("scaling loops_per_jiffy to %lu "
240                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
241         }
242 }
243 #else
244 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
245 {
246         return;
247 }
248 #endif
249
250
251 /**
252  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
253  * on frequency transition.
254  *
255  * This function calls the transition notifiers and the "adjust_jiffies"
256  * function. It is called twice on all CPU frequency changes that have
257  * external effects.
258  */
259 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
260 {
261         struct cpufreq_policy *policy;
262
263         BUG_ON(irqs_disabled());
264
265         freqs->flags = cpufreq_driver->flags;
266         pr_debug("notification %u of frequency transition to %u kHz\n",
267                 state, freqs->new);
268
269         policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
270         switch (state) {
271
272         case CPUFREQ_PRECHANGE:
273                 /* detect if the driver reported a value as "old frequency"
274                  * which is not equal to what the cpufreq core thinks is
275                  * "old frequency".
276                  */
277                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
278                         if ((policy) && (policy->cpu == freqs->cpu) &&
279                             (policy->cur) && (policy->cur != freqs->old)) {
280                                 pr_debug("Warning: CPU frequency is"
281                                         " %u, cpufreq assumed %u kHz.\n",
282                                         freqs->old, policy->cur);
283                                 freqs->old = policy->cur;
284                         }
285                 }
286                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
287                                 CPUFREQ_PRECHANGE, freqs);
288                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
289                 break;
290
291         case CPUFREQ_POSTCHANGE:
292                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
293                 pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
294                         (unsigned long)freqs->cpu);
295                 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
296                 trace_cpu_frequency(freqs->new, freqs->cpu);
297                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
298                                 CPUFREQ_POSTCHANGE, freqs);
299                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
300                         policy->cur = freqs->new;
301                 break;
302         }
303 }
304 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
305
306
307
308 /*********************************************************************
309  *                          SYSFS INTERFACE                          *
310  *********************************************************************/
311
312 static struct cpufreq_governor *__find_governor(const char *str_governor)
313 {
314         struct cpufreq_governor *t;
315
316         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
317                 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
318                         return t;
319
320         return NULL;
321 }
322
323 /**
324  * cpufreq_parse_governor - parse a governor string
325  */
326 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
327                                 struct cpufreq_governor **governor)
328 {
329         int err = -EINVAL;
330
331         if (!cpufreq_driver)
332                 goto out;
333
334         if (cpufreq_driver->setpolicy) {
335                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
336                         *policy = CPUFREQ_POLICY_PERFORMANCE;
337                         err = 0;
338                 } else if (!strnicmp(str_governor, "powersave",
339                                                 CPUFREQ_NAME_LEN)) {
340                         *policy = CPUFREQ_POLICY_POWERSAVE;
341                         err = 0;
342                 }
343         } else if (cpufreq_driver->target) {
344                 struct cpufreq_governor *t;
345
346                 mutex_lock(&cpufreq_governor_mutex);
347
348                 t = __find_governor(str_governor);
349
350                 if (t == NULL) {
351                         int ret;
352
353                         mutex_unlock(&cpufreq_governor_mutex);
354                         ret = request_module("cpufreq_%s", str_governor);
355                         mutex_lock(&cpufreq_governor_mutex);
356
357                         if (ret == 0)
358                                 t = __find_governor(str_governor);
359                 }
360
361                 if (t != NULL) {
362                         *governor = t;
363                         err = 0;
364                 }
365
366                 mutex_unlock(&cpufreq_governor_mutex);
367         }
368 out:
369         return err;
370 }
371
372
373 /**
374  * cpufreq_per_cpu_attr_read() / show_##file_name() -
375  * print out cpufreq information
376  *
377  * Write out information from cpufreq_driver->policy[cpu]; object must be
378  * "unsigned int".
379  */
380
381 #define show_one(file_name, object)                     \
382 static ssize_t show_##file_name                         \
383 (struct cpufreq_policy *policy, char *buf)              \
384 {                                                       \
385         return sprintf(buf, "%u\n", policy->object);    \
386 }
387
388 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
389 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
390 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
391 show_one(scaling_min_freq, min);
392 show_one(scaling_max_freq, max);
393 show_one(scaling_cur_freq, cur);
394
395 static int __cpufreq_set_policy(struct cpufreq_policy *data,
396                                 struct cpufreq_policy *policy);
397
398 /**
399  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
400  */
401 #define store_one(file_name, object)                    \
402 static ssize_t store_##file_name                                        \
403 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
404 {                                                                       \
405         unsigned int ret = -EINVAL;                                     \
406         struct cpufreq_policy new_policy;                               \
407                                                                         \
408         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
409         if (ret)                                                        \
410                 return -EINVAL;                                         \
411                                                                         \
412         ret = sscanf(buf, "%u", &new_policy.object);                    \
413         if (ret != 1)                                                   \
414                 return -EINVAL;                                         \
415                                                                         \
416         ret = __cpufreq_set_policy(policy, &new_policy);                \
417         policy->user_policy.object = policy->object;                    \
418                                                                         \
419         return ret ? ret : count;                                       \
420 }
421
422 store_one(scaling_min_freq, min);
423 store_one(scaling_max_freq, max);
424
425 /**
426  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
427  */
428 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
429                                         char *buf)
430 {
431         unsigned int cur_freq = __cpufreq_get(policy->cpu);
432         if (!cur_freq)
433                 return sprintf(buf, "<unknown>");
434         return sprintf(buf, "%u\n", cur_freq);
435 }
436
437
438 /**
439  * show_scaling_governor - show the current policy for the specified CPU
440  */
441 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
442 {
443         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
444                 return sprintf(buf, "powersave\n");
445         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
446                 return sprintf(buf, "performance\n");
447         else if (policy->governor)
448                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
449                                 policy->governor->name);
450         return -EINVAL;
451 }
452
453
454 /**
455  * store_scaling_governor - store policy for the specified CPU
456  */
457 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
458                                         const char *buf, size_t count)
459 {
460         unsigned int ret = -EINVAL;
461         char    str_governor[16];
462         struct cpufreq_policy new_policy;
463
464         ret = cpufreq_get_policy(&new_policy, policy->cpu);
465         if (ret)
466                 return ret;
467
468         ret = sscanf(buf, "%15s", str_governor);
469         if (ret != 1)
470                 return -EINVAL;
471
472         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
473                                                 &new_policy.governor))
474                 return -EINVAL;
475
476         /* Do not use cpufreq_set_policy here or the user_policy.max
477            will be wrongly overridden */
478         ret = __cpufreq_set_policy(policy, &new_policy);
479
480         policy->user_policy.policy = policy->policy;
481         policy->user_policy.governor = policy->governor;
482
483         if (ret)
484                 return ret;
485         else
486                 return count;
487 }
488
489 /**
490  * show_scaling_driver - show the cpufreq driver currently loaded
491  */
492 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
493 {
494         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
495 }
496
497 /**
498  * show_scaling_available_governors - show the available CPUfreq governors
499  */
500 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
501                                                 char *buf)
502 {
503         ssize_t i = 0;
504         struct cpufreq_governor *t;
505
506         if (!cpufreq_driver->target) {
507                 i += sprintf(buf, "performance powersave");
508                 goto out;
509         }
510
511         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
512                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
513                     - (CPUFREQ_NAME_LEN + 2)))
514                         goto out;
515                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
516         }
517 out:
518         i += sprintf(&buf[i], "\n");
519         return i;
520 }
521
522 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
523 {
524         ssize_t i = 0;
525         unsigned int cpu;
526
527         for_each_cpu(cpu, mask) {
528                 if (i)
529                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
530                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
531                 if (i >= (PAGE_SIZE - 5))
532                         break;
533         }
534         i += sprintf(&buf[i], "\n");
535         return i;
536 }
537
538 /**
539  * show_related_cpus - show the CPUs affected by each transition even if
540  * hw coordination is in use
541  */
542 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
543 {
544         if (cpumask_empty(policy->related_cpus))
545                 return show_cpus(policy->cpus, buf);
546         return show_cpus(policy->related_cpus, buf);
547 }
548
549 /**
550  * show_affected_cpus - show the CPUs affected by each transition
551  */
552 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
553 {
554         return show_cpus(policy->cpus, buf);
555 }
556
557 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
558                                         const char *buf, size_t count)
559 {
560         unsigned int freq = 0;
561         unsigned int ret;
562
563         if (!policy->governor || !policy->governor->store_setspeed)
564                 return -EINVAL;
565
566         ret = sscanf(buf, "%u", &freq);
567         if (ret != 1)
568                 return -EINVAL;
569
570         policy->governor->store_setspeed(policy, freq);
571
572         return count;
573 }
574
575 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
576 {
577         if (!policy->governor || !policy->governor->show_setspeed)
578                 return sprintf(buf, "<unsupported>\n");
579
580         return policy->governor->show_setspeed(policy, buf);
581 }
582
583 /**
584  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
585  */
586 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
587 {
588         unsigned int limit;
589         int ret;
590         if (cpufreq_driver->bios_limit) {
591                 ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
592                 if (!ret)
593                         return sprintf(buf, "%u\n", limit);
594         }
595         return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
596 }
597
598 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
599 cpufreq_freq_attr_ro(cpuinfo_min_freq);
600 cpufreq_freq_attr_ro(cpuinfo_max_freq);
601 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
602 cpufreq_freq_attr_ro(scaling_available_governors);
603 cpufreq_freq_attr_ro(scaling_driver);
604 cpufreq_freq_attr_ro(scaling_cur_freq);
605 cpufreq_freq_attr_ro(bios_limit);
606 cpufreq_freq_attr_ro(related_cpus);
607 cpufreq_freq_attr_ro(affected_cpus);
608 cpufreq_freq_attr_rw(scaling_min_freq);
609 cpufreq_freq_attr_rw(scaling_max_freq);
610 cpufreq_freq_attr_rw(scaling_governor);
611 cpufreq_freq_attr_rw(scaling_setspeed);
612
613 static struct attribute *default_attrs[] = {
614         &cpuinfo_min_freq.attr,
615         &cpuinfo_max_freq.attr,
616         &cpuinfo_transition_latency.attr,
617         &scaling_min_freq.attr,
618         &scaling_max_freq.attr,
619         &affected_cpus.attr,
620         &related_cpus.attr,
621         &scaling_governor.attr,
622         &scaling_driver.attr,
623         &scaling_available_governors.attr,
624         &scaling_setspeed.attr,
625         NULL
626 };
627
628 struct kobject *cpufreq_global_kobject;
629 EXPORT_SYMBOL(cpufreq_global_kobject);
630
631 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
632 #define to_attr(a) container_of(a, struct freq_attr, attr)
633
634 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
635 {
636         struct cpufreq_policy *policy = to_policy(kobj);
637         struct freq_attr *fattr = to_attr(attr);
638         ssize_t ret = -EINVAL;
639         policy = cpufreq_cpu_get_sysfs(policy->cpu);
640         if (!policy)
641                 goto no_policy;
642
643         if (lock_policy_rwsem_read(policy->cpu) < 0)
644                 goto fail;
645
646         if (fattr->show)
647                 ret = fattr->show(policy, buf);
648         else
649                 ret = -EIO;
650
651         unlock_policy_rwsem_read(policy->cpu);
652 fail:
653         cpufreq_cpu_put_sysfs(policy);
654 no_policy:
655         return ret;
656 }
657
658 static ssize_t store(struct kobject *kobj, struct attribute *attr,
659                      const char *buf, size_t count)
660 {
661         struct cpufreq_policy *policy = to_policy(kobj);
662         struct freq_attr *fattr = to_attr(attr);
663         ssize_t ret = -EINVAL;
664         policy = cpufreq_cpu_get_sysfs(policy->cpu);
665         if (!policy)
666                 goto no_policy;
667
668         if (lock_policy_rwsem_write(policy->cpu) < 0)
669                 goto fail;
670
671         if (fattr->store)
672                 ret = fattr->store(policy, buf, count);
673         else
674                 ret = -EIO;
675
676         unlock_policy_rwsem_write(policy->cpu);
677 fail:
678         cpufreq_cpu_put_sysfs(policy);
679 no_policy:
680         return ret;
681 }
682
683 static void cpufreq_sysfs_release(struct kobject *kobj)
684 {
685         struct cpufreq_policy *policy = to_policy(kobj);
686         pr_debug("last reference is dropped\n");
687         complete(&policy->kobj_unregister);
688 }
689
690 static const struct sysfs_ops sysfs_ops = {
691         .show   = show,
692         .store  = store,
693 };
694
695 static struct kobj_type ktype_cpufreq = {
696         .sysfs_ops      = &sysfs_ops,
697         .default_attrs  = default_attrs,
698         .release        = cpufreq_sysfs_release,
699 };
700
701 /*
702  * Returns:
703  *   Negative: Failure
704  *   0:        Success
705  *   Positive: When we have a managed CPU and the sysfs got symlinked
706  */
707 static int cpufreq_add_dev_policy(unsigned int cpu,
708                                   struct cpufreq_policy *policy,
709                                   struct device *dev)
710 {
711         int ret = 0;
712 #ifdef CONFIG_SMP
713         unsigned long flags;
714         unsigned int j;
715 #ifdef CONFIG_HOTPLUG_CPU
716         struct cpufreq_governor *gov;
717
718         gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
719         if (gov) {
720                 policy->governor = gov;
721                 pr_debug("Restoring governor %s for cpu %d\n",
722                        policy->governor->name, cpu);
723         }
724 #endif
725
726         for_each_cpu(j, policy->cpus) {
727                 struct cpufreq_policy *managed_policy;
728
729                 if (cpu == j)
730                         continue;
731
732                 /* Check for existing affected CPUs.
733                  * They may not be aware of it due to CPU Hotplug.
734                  * cpufreq_cpu_put is called when the device is removed
735                  * in __cpufreq_remove_dev()
736                  */
737                 managed_policy = cpufreq_cpu_get(j);
738                 if (unlikely(managed_policy)) {
739
740                         /* Set proper policy_cpu */
741                         unlock_policy_rwsem_write(cpu);
742                         per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
743
744                         if (lock_policy_rwsem_write(cpu) < 0) {
745                                 /* Should not go through policy unlock path */
746                                 if (cpufreq_driver->exit)
747                                         cpufreq_driver->exit(policy);
748                                 cpufreq_cpu_put(managed_policy);
749                                 return -EBUSY;
750                         }
751
752                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
753                         cpumask_copy(managed_policy->cpus, policy->cpus);
754                         per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
755                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
756
757                         pr_debug("CPU already managed, adding link\n");
758                         ret = sysfs_create_link(&dev->kobj,
759                                                 &managed_policy->kobj,
760                                                 "cpufreq");
761                         if (ret)
762                                 cpufreq_cpu_put(managed_policy);
763                         /*
764                          * Success. We only needed to be added to the mask.
765                          * Call driver->exit() because only the cpu parent of
766                          * the kobj needed to call init().
767                          */
768                         if (cpufreq_driver->exit)
769                                 cpufreq_driver->exit(policy);
770
771                         if (!ret)
772                                 return 1;
773                         else
774                                 return ret;
775                 }
776         }
777 #endif
778         return ret;
779 }
780
781
782 /* symlink affected CPUs */
783 static int cpufreq_add_dev_symlink(unsigned int cpu,
784                                    struct cpufreq_policy *policy)
785 {
786         unsigned int j;
787         int ret = 0;
788
789         for_each_cpu(j, policy->cpus) {
790                 struct cpufreq_policy *managed_policy;
791                 struct device *cpu_dev;
792
793                 if (j == cpu)
794                         continue;
795                 if (!cpu_online(j))
796                         continue;
797
798                 pr_debug("CPU %u already managed, adding link\n", j);
799                 managed_policy = cpufreq_cpu_get(cpu);
800                 cpu_dev = get_cpu_device(j);
801                 ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
802                                         "cpufreq");
803                 if (ret) {
804                         cpufreq_cpu_put(managed_policy);
805                         return ret;
806                 }
807         }
808         return ret;
809 }
810
811 static int cpufreq_add_dev_interface(unsigned int cpu,
812                                      struct cpufreq_policy *policy,
813                                      struct device *dev)
814 {
815         struct cpufreq_policy new_policy;
816         struct freq_attr **drv_attr;
817         unsigned long flags;
818         int ret = 0;
819         unsigned int j;
820
821         /* prepare interface data */
822         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
823                                    &dev->kobj, "cpufreq");
824         if (ret)
825                 return ret;
826
827         /* set up files for this cpu device */
828         drv_attr = cpufreq_driver->attr;
829         while ((drv_attr) && (*drv_attr)) {
830                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
831                 if (ret)
832                         goto err_out_kobj_put;
833                 drv_attr++;
834         }
835         if (cpufreq_driver->get) {
836                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
837                 if (ret)
838                         goto err_out_kobj_put;
839         }
840         if (cpufreq_driver->target) {
841                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
842                 if (ret)
843                         goto err_out_kobj_put;
844         }
845         if (cpufreq_driver->bios_limit) {
846                 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
847                 if (ret)
848                         goto err_out_kobj_put;
849         }
850
851         spin_lock_irqsave(&cpufreq_driver_lock, flags);
852         for_each_cpu(j, policy->cpus) {
853                 if (!cpu_online(j))
854                         continue;
855                 per_cpu(cpufreq_cpu_data, j) = policy;
856                 per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
857         }
858         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
859
860         ret = cpufreq_add_dev_symlink(cpu, policy);
861         if (ret)
862                 goto err_out_kobj_put;
863
864         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
865         /* assure that the starting sequence is run in __cpufreq_set_policy */
866         policy->governor = NULL;
867
868         /* set default policy */
869         ret = __cpufreq_set_policy(policy, &new_policy);
870         policy->user_policy.policy = policy->policy;
871         policy->user_policy.governor = policy->governor;
872
873         if (ret) {
874                 pr_debug("setting policy failed\n");
875                 if (cpufreq_driver->exit)
876                         cpufreq_driver->exit(policy);
877         }
878         return ret;
879
880 err_out_kobj_put:
881         kobject_put(&policy->kobj);
882         wait_for_completion(&policy->kobj_unregister);
883         return ret;
884 }
885
886
887 /**
888  * cpufreq_add_dev - add a CPU device
889  *
890  * Adds the cpufreq interface for a CPU device.
891  *
892  * The Oracle says: try running cpufreq registration/unregistration concurrently
893  * with with cpu hotplugging and all hell will break loose. Tried to clean this
894  * mess up, but more thorough testing is needed. - Mathieu
895  */
896 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
897 {
898         unsigned int cpu = dev->id;
899         int ret = 0, found = 0;
900         struct cpufreq_policy *policy;
901         unsigned long flags;
902         unsigned int j;
903 #ifdef CONFIG_HOTPLUG_CPU
904         int sibling;
905 #endif
906
907         if (cpu_is_offline(cpu))
908                 return 0;
909
910         pr_debug("adding CPU %u\n", cpu);
911
912 #ifdef CONFIG_SMP
913         /* check whether a different CPU already registered this
914          * CPU because it is in the same boat. */
915         policy = cpufreq_cpu_get(cpu);
916         if (unlikely(policy)) {
917                 cpufreq_cpu_put(policy);
918                 return 0;
919         }
920 #endif
921
922         if (!try_module_get(cpufreq_driver->owner)) {
923                 ret = -EINVAL;
924                 goto module_out;
925         }
926
927         ret = -ENOMEM;
928         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
929         if (!policy)
930                 goto nomem_out;
931
932         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
933                 goto err_free_policy;
934
935         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
936                 goto err_free_cpumask;
937
938         policy->cpu = cpu;
939         cpumask_copy(policy->cpus, cpumask_of(cpu));
940
941         /* Initially set CPU itself as the policy_cpu */
942         per_cpu(cpufreq_policy_cpu, cpu) = cpu;
943         ret = (lock_policy_rwsem_write(cpu) < 0);
944         WARN_ON(ret);
945
946         init_completion(&policy->kobj_unregister);
947         INIT_WORK(&policy->update, handle_update);
948
949         /* Set governor before ->init, so that driver could check it */
950 #ifdef CONFIG_HOTPLUG_CPU
951         for_each_online_cpu(sibling) {
952                 struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
953                 if (cp && cp->governor &&
954                     (cpumask_test_cpu(cpu, cp->related_cpus))) {
955                         policy->governor = cp->governor;
956                         found = 1;
957                         break;
958                 }
959         }
960 #endif
961         if (!found)
962                 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
963         /* call driver. From then on the cpufreq must be able
964          * to accept all calls to ->verify and ->setpolicy for this CPU
965          */
966         ret = cpufreq_driver->init(policy);
967         if (ret) {
968                 pr_debug("initialization failed\n");
969                 goto err_unlock_policy;
970         }
971         policy->user_policy.min = policy->min;
972         policy->user_policy.max = policy->max;
973
974         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
975                                      CPUFREQ_START, policy);
976
977         ret = cpufreq_add_dev_policy(cpu, policy, dev);
978         if (ret) {
979                 if (ret > 0)
980                         /* This is a managed cpu, symlink created,
981                            exit with 0 */
982                         ret = 0;
983                 goto err_unlock_policy;
984         }
985
986         ret = cpufreq_add_dev_interface(cpu, policy, dev);
987         if (ret)
988                 goto err_out_unregister;
989
990         unlock_policy_rwsem_write(cpu);
991
992         kobject_uevent(&policy->kobj, KOBJ_ADD);
993         module_put(cpufreq_driver->owner);
994         pr_debug("initialization complete\n");
995
996         return 0;
997
998
999 err_out_unregister:
1000         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1001         for_each_cpu(j, policy->cpus)
1002                 per_cpu(cpufreq_cpu_data, j) = NULL;
1003         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1004
1005         kobject_put(&policy->kobj);
1006         wait_for_completion(&policy->kobj_unregister);
1007
1008 err_unlock_policy:
1009         unlock_policy_rwsem_write(cpu);
1010         free_cpumask_var(policy->related_cpus);
1011 err_free_cpumask:
1012         free_cpumask_var(policy->cpus);
1013 err_free_policy:
1014         kfree(policy);
1015 nomem_out:
1016         module_put(cpufreq_driver->owner);
1017 module_out:
1018         return ret;
1019 }
1020
1021
1022 /**
1023  * __cpufreq_remove_dev - remove a CPU device
1024  *
1025  * Removes the cpufreq interface for a CPU device.
1026  * Caller should already have policy_rwsem in write mode for this CPU.
1027  * This routine frees the rwsem before returning.
1028  */
1029 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1030 {
1031         unsigned int cpu = dev->id;
1032         unsigned long flags;
1033         struct cpufreq_policy *data;
1034         struct kobject *kobj;
1035         struct completion *cmp;
1036 #ifdef CONFIG_SMP
1037         struct device *cpu_dev;
1038         unsigned int j;
1039 #endif
1040
1041         pr_debug("unregistering CPU %u\n", cpu);
1042
1043         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1044         data = per_cpu(cpufreq_cpu_data, cpu);
1045
1046         if (!data) {
1047                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1048                 unlock_policy_rwsem_write(cpu);
1049                 return -EINVAL;
1050         }
1051         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1052
1053
1054 #ifdef CONFIG_SMP
1055         /* if this isn't the CPU which is the parent of the kobj, we
1056          * only need to unlink, put and exit
1057          */
1058         if (unlikely(cpu != data->cpu)) {
1059                 pr_debug("removing link\n");
1060                 cpumask_clear_cpu(cpu, data->cpus);
1061                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1062                 kobj = &dev->kobj;
1063                 cpufreq_cpu_put(data);
1064                 unlock_policy_rwsem_write(cpu);
1065                 sysfs_remove_link(kobj, "cpufreq");
1066                 return 0;
1067         }
1068 #endif
1069
1070 #ifdef CONFIG_SMP
1071
1072 #ifdef CONFIG_HOTPLUG_CPU
1073         strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1074                         CPUFREQ_NAME_LEN);
1075 #endif
1076
1077         /* if we have other CPUs still registered, we need to unlink them,
1078          * or else wait_for_completion below will lock up. Clean the
1079          * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1080          * the sysfs links afterwards.
1081          */
1082         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1083                 for_each_cpu(j, data->cpus) {
1084                         if (j == cpu)
1085                                 continue;
1086                         per_cpu(cpufreq_cpu_data, j) = NULL;
1087                 }
1088         }
1089
1090         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1091
1092         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1093                 for_each_cpu(j, data->cpus) {
1094                         if (j == cpu)
1095                                 continue;
1096                         pr_debug("removing link for cpu %u\n", j);
1097 #ifdef CONFIG_HOTPLUG_CPU
1098                         strncpy(per_cpu(cpufreq_cpu_governor, j),
1099                                 data->governor->name, CPUFREQ_NAME_LEN);
1100 #endif
1101                         cpu_dev = get_cpu_device(j);
1102                         kobj = &cpu_dev->kobj;
1103                         unlock_policy_rwsem_write(cpu);
1104                         sysfs_remove_link(kobj, "cpufreq");
1105                         lock_policy_rwsem_write(cpu);
1106                         cpufreq_cpu_put(data);
1107                 }
1108         }
1109 #else
1110         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1111 #endif
1112
1113         if (cpufreq_driver->target)
1114                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1115
1116         kobj = &data->kobj;
1117         cmp = &data->kobj_unregister;
1118         unlock_policy_rwsem_write(cpu);
1119         kobject_put(kobj);
1120
1121         /* we need to make sure that the underlying kobj is actually
1122          * not referenced anymore by anybody before we proceed with
1123          * unloading.
1124          */
1125         pr_debug("waiting for dropping of refcount\n");
1126         wait_for_completion(cmp);
1127         pr_debug("wait complete\n");
1128
1129         lock_policy_rwsem_write(cpu);
1130         if (cpufreq_driver->exit)
1131                 cpufreq_driver->exit(data);
1132         unlock_policy_rwsem_write(cpu);
1133
1134 #ifdef CONFIG_HOTPLUG_CPU
1135         /* when the CPU which is the parent of the kobj is hotplugged
1136          * offline, check for siblings, and create cpufreq sysfs interface
1137          * and symlinks
1138          */
1139         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1140                 /* first sibling now owns the new sysfs dir */
1141                 cpumask_clear_cpu(cpu, data->cpus);
1142                 cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1143
1144                 /* finally remove our own symlink */
1145                 lock_policy_rwsem_write(cpu);
1146                 __cpufreq_remove_dev(dev, sif);
1147         }
1148 #endif
1149
1150         free_cpumask_var(data->related_cpus);
1151         free_cpumask_var(data->cpus);
1152         kfree(data);
1153
1154         return 0;
1155 }
1156
1157
1158 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1159 {
1160         unsigned int cpu = dev->id;
1161         int retval;
1162
1163         if (cpu_is_offline(cpu))
1164                 return 0;
1165
1166         if (unlikely(lock_policy_rwsem_write(cpu)))
1167                 BUG();
1168
1169         retval = __cpufreq_remove_dev(dev, sif);
1170         return retval;
1171 }
1172
1173
1174 static void handle_update(struct work_struct *work)
1175 {
1176         struct cpufreq_policy *policy =
1177                 container_of(work, struct cpufreq_policy, update);
1178         unsigned int cpu = policy->cpu;
1179         pr_debug("handle_update for cpu %u called\n", cpu);
1180         cpufreq_update_policy(cpu);
1181 }
1182
1183 /**
1184  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1185  *      @cpu: cpu number
1186  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1187  *      @new_freq: CPU frequency the CPU actually runs at
1188  *
1189  *      We adjust to current frequency first, and need to clean up later.
1190  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1191  */
1192 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1193                                 unsigned int new_freq)
1194 {
1195         struct cpufreq_freqs freqs;
1196
1197         pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1198                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1199
1200         freqs.cpu = cpu;
1201         freqs.old = old_freq;
1202         freqs.new = new_freq;
1203         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1204         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1205 }
1206
1207
1208 /**
1209  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1210  * @cpu: CPU number
1211  *
1212  * This is the last known freq, without actually getting it from the driver.
1213  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1214  */
1215 unsigned int cpufreq_quick_get(unsigned int cpu)
1216 {
1217         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1218         unsigned int ret_freq = 0;
1219
1220         if (policy) {
1221                 ret_freq = policy->cur;
1222                 cpufreq_cpu_put(policy);
1223         }
1224
1225         return ret_freq;
1226 }
1227 EXPORT_SYMBOL(cpufreq_quick_get);
1228
1229 /**
1230  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1231  * @cpu: CPU number
1232  *
1233  * Just return the max possible frequency for a given CPU.
1234  */
1235 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1236 {
1237         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1238         unsigned int ret_freq = 0;
1239
1240         if (policy) {
1241                 ret_freq = policy->max;
1242                 cpufreq_cpu_put(policy);
1243         }
1244
1245         return ret_freq;
1246 }
1247 EXPORT_SYMBOL(cpufreq_quick_get_max);
1248
1249
1250 static unsigned int __cpufreq_get(unsigned int cpu)
1251 {
1252         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1253         unsigned int ret_freq = 0;
1254
1255         if (!cpufreq_driver->get)
1256                 return ret_freq;
1257
1258         ret_freq = cpufreq_driver->get(cpu);
1259
1260         if (ret_freq && policy->cur &&
1261                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1262                 /* verify no discrepancy between actual and
1263                                         saved value exists */
1264                 if (unlikely(ret_freq != policy->cur)) {
1265                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1266                         schedule_work(&policy->update);
1267                 }
1268         }
1269
1270         return ret_freq;
1271 }
1272
1273 /**
1274  * cpufreq_get - get the current CPU frequency (in kHz)
1275  * @cpu: CPU number
1276  *
1277  * Get the CPU current (static) CPU frequency
1278  */
1279 unsigned int cpufreq_get(unsigned int cpu)
1280 {
1281         unsigned int ret_freq = 0;
1282         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1283
1284         if (!policy)
1285                 goto out;
1286
1287         if (unlikely(lock_policy_rwsem_read(cpu)))
1288                 goto out_policy;
1289
1290         ret_freq = __cpufreq_get(cpu);
1291
1292         unlock_policy_rwsem_read(cpu);
1293
1294 out_policy:
1295         cpufreq_cpu_put(policy);
1296 out:
1297         return ret_freq;
1298 }
1299 EXPORT_SYMBOL(cpufreq_get);
1300
1301 static struct subsys_interface cpufreq_interface = {
1302         .name           = "cpufreq",
1303         .subsys         = &cpu_subsys,
1304         .add_dev        = cpufreq_add_dev,
1305         .remove_dev     = cpufreq_remove_dev,
1306 };
1307
1308
1309 /**
1310  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1311  *
1312  * This function is only executed for the boot processor.  The other CPUs
1313  * have been put offline by means of CPU hotplug.
1314  */
1315 static int cpufreq_bp_suspend(void)
1316 {
1317         int ret = 0;
1318
1319         int cpu = smp_processor_id();
1320         struct cpufreq_policy *cpu_policy;
1321
1322         pr_debug("suspending cpu %u\n", cpu);
1323
1324         /* If there's no policy for the boot CPU, we have nothing to do. */
1325         cpu_policy = cpufreq_cpu_get(cpu);
1326         if (!cpu_policy)
1327                 return 0;
1328
1329         if (cpufreq_driver->suspend) {
1330                 ret = cpufreq_driver->suspend(cpu_policy);
1331                 if (ret)
1332                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1333                                         "step on CPU %u\n", cpu_policy->cpu);
1334         }
1335
1336         cpufreq_cpu_put(cpu_policy);
1337         return ret;
1338 }
1339
1340 /**
1341  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1342  *
1343  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1344  *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1345  *          restored. It will verify that the current freq is in sync with
1346  *          what we believe it to be. This is a bit later than when it
1347  *          should be, but nonethteless it's better than calling
1348  *          cpufreq_driver->get() here which might re-enable interrupts...
1349  *
1350  * This function is only executed for the boot CPU.  The other CPUs have not
1351  * been turned on yet.
1352  */
1353 static void cpufreq_bp_resume(void)
1354 {
1355         int ret = 0;
1356
1357         int cpu = smp_processor_id();
1358         struct cpufreq_policy *cpu_policy;
1359
1360         pr_debug("resuming cpu %u\n", cpu);
1361
1362         /* If there's no policy for the boot CPU, we have nothing to do. */
1363         cpu_policy = cpufreq_cpu_get(cpu);
1364         if (!cpu_policy)
1365                 return;
1366
1367         if (cpufreq_driver->resume) {
1368                 ret = cpufreq_driver->resume(cpu_policy);
1369                 if (ret) {
1370                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1371                                         "step on CPU %u\n", cpu_policy->cpu);
1372                         goto fail;
1373                 }
1374         }
1375
1376         schedule_work(&cpu_policy->update);
1377
1378 fail:
1379         cpufreq_cpu_put(cpu_policy);
1380 }
1381
1382 static struct syscore_ops cpufreq_syscore_ops = {
1383         .suspend        = cpufreq_bp_suspend,
1384         .resume         = cpufreq_bp_resume,
1385 };
1386
1387
1388 /*********************************************************************
1389  *                     NOTIFIER LISTS INTERFACE                      *
1390  *********************************************************************/
1391
1392 /**
1393  *      cpufreq_register_notifier - register a driver with cpufreq
1394  *      @nb: notifier function to register
1395  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1396  *
1397  *      Add a driver to one of two lists: either a list of drivers that
1398  *      are notified about clock rate changes (once before and once after
1399  *      the transition), or a list of drivers that are notified about
1400  *      changes in cpufreq policy.
1401  *
1402  *      This function may sleep, and has the same return conditions as
1403  *      blocking_notifier_chain_register.
1404  */
1405 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1406 {
1407         int ret;
1408
1409         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1410
1411         switch (list) {
1412         case CPUFREQ_TRANSITION_NOTIFIER:
1413                 ret = srcu_notifier_chain_register(
1414                                 &cpufreq_transition_notifier_list, nb);
1415                 break;
1416         case CPUFREQ_POLICY_NOTIFIER:
1417                 ret = blocking_notifier_chain_register(
1418                                 &cpufreq_policy_notifier_list, nb);
1419                 break;
1420         default:
1421                 ret = -EINVAL;
1422         }
1423
1424         return ret;
1425 }
1426 EXPORT_SYMBOL(cpufreq_register_notifier);
1427
1428
1429 /**
1430  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1431  *      @nb: notifier block to be unregistered
1432  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1433  *
1434  *      Remove a driver from the CPU frequency notifier list.
1435  *
1436  *      This function may sleep, and has the same return conditions as
1437  *      blocking_notifier_chain_unregister.
1438  */
1439 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1440 {
1441         int ret;
1442
1443         switch (list) {
1444         case CPUFREQ_TRANSITION_NOTIFIER:
1445                 ret = srcu_notifier_chain_unregister(
1446                                 &cpufreq_transition_notifier_list, nb);
1447                 break;
1448         case CPUFREQ_POLICY_NOTIFIER:
1449                 ret = blocking_notifier_chain_unregister(
1450                                 &cpufreq_policy_notifier_list, nb);
1451                 break;
1452         default:
1453                 ret = -EINVAL;
1454         }
1455
1456         return ret;
1457 }
1458 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1459
1460
1461 /*********************************************************************
1462  *                              GOVERNORS                            *
1463  *********************************************************************/
1464
1465
1466 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1467                             unsigned int target_freq,
1468                             unsigned int relation)
1469 {
1470         int retval = -EINVAL;
1471
1472         if (cpufreq_disabled())
1473                 return -ENODEV;
1474
1475         pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1476                 target_freq, relation);
1477         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1478                 retval = cpufreq_driver->target(policy, target_freq, relation);
1479
1480         return retval;
1481 }
1482 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1483
1484 int cpufreq_driver_target(struct cpufreq_policy *policy,
1485                           unsigned int target_freq,
1486                           unsigned int relation)
1487 {
1488         int ret = -EINVAL;
1489
1490         policy = cpufreq_cpu_get(policy->cpu);
1491         if (!policy)
1492                 goto no_policy;
1493
1494         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1495                 goto fail;
1496
1497         ret = __cpufreq_driver_target(policy, target_freq, relation);
1498
1499         unlock_policy_rwsem_write(policy->cpu);
1500
1501 fail:
1502         cpufreq_cpu_put(policy);
1503 no_policy:
1504         return ret;
1505 }
1506 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1507
1508 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1509 {
1510         int ret = 0;
1511
1512         policy = cpufreq_cpu_get(policy->cpu);
1513         if (!policy)
1514                 return -EINVAL;
1515
1516         if (cpu_online(cpu) && cpufreq_driver->getavg)
1517                 ret = cpufreq_driver->getavg(policy, cpu);
1518
1519         cpufreq_cpu_put(policy);
1520         return ret;
1521 }
1522 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1523
1524 /*
1525  * when "event" is CPUFREQ_GOV_LIMITS
1526  */
1527
1528 static int __cpufreq_governor(struct cpufreq_policy *policy,
1529                                         unsigned int event)
1530 {
1531         int ret;
1532
1533         /* Only must be defined when default governor is known to have latency
1534            restrictions, like e.g. conservative or ondemand.
1535            That this is the case is already ensured in Kconfig
1536         */
1537 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1538         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1539 #else
1540         struct cpufreq_governor *gov = NULL;
1541 #endif
1542
1543         if (policy->governor->max_transition_latency &&
1544             policy->cpuinfo.transition_latency >
1545             policy->governor->max_transition_latency) {
1546                 if (!gov)
1547                         return -EINVAL;
1548                 else {
1549                         printk(KERN_WARNING "%s governor failed, too long"
1550                                " transition latency of HW, fallback"
1551                                " to %s governor\n",
1552                                policy->governor->name,
1553                                gov->name);
1554                         policy->governor = gov;
1555                 }
1556         }
1557
1558         if (!try_module_get(policy->governor->owner))
1559                 return -EINVAL;
1560
1561         pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1562                                                 policy->cpu, event);
1563         ret = policy->governor->governor(policy, event);
1564
1565         /* we keep one module reference alive for
1566                         each CPU governed by this CPU */
1567         if ((event != CPUFREQ_GOV_START) || ret)
1568                 module_put(policy->governor->owner);
1569         if ((event == CPUFREQ_GOV_STOP) && !ret)
1570                 module_put(policy->governor->owner);
1571
1572         return ret;
1573 }
1574
1575
1576 int cpufreq_register_governor(struct cpufreq_governor *governor)
1577 {
1578         int err;
1579
1580         if (!governor)
1581                 return -EINVAL;
1582
1583         if (cpufreq_disabled())
1584                 return -ENODEV;
1585
1586         mutex_lock(&cpufreq_governor_mutex);
1587
1588         err = -EBUSY;
1589         if (__find_governor(governor->name) == NULL) {
1590                 err = 0;
1591                 list_add(&governor->governor_list, &cpufreq_governor_list);
1592         }
1593
1594         mutex_unlock(&cpufreq_governor_mutex);
1595         return err;
1596 }
1597 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1598
1599
1600 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1601 {
1602 #ifdef CONFIG_HOTPLUG_CPU
1603         int cpu;
1604 #endif
1605
1606         if (!governor)
1607                 return;
1608
1609         if (cpufreq_disabled())
1610                 return;
1611
1612 #ifdef CONFIG_HOTPLUG_CPU
1613         for_each_present_cpu(cpu) {
1614                 if (cpu_online(cpu))
1615                         continue;
1616                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1617                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1618         }
1619 #endif
1620
1621         mutex_lock(&cpufreq_governor_mutex);
1622         list_del(&governor->governor_list);
1623         mutex_unlock(&cpufreq_governor_mutex);
1624         return;
1625 }
1626 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1627
1628
1629
1630 /*********************************************************************
1631  *                          POLICY INTERFACE                         *
1632  *********************************************************************/
1633
1634 /**
1635  * cpufreq_get_policy - get the current cpufreq_policy
1636  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1637  *      is written
1638  *
1639  * Reads the current cpufreq policy.
1640  */
1641 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1642 {
1643         struct cpufreq_policy *cpu_policy;
1644         if (!policy)
1645                 return -EINVAL;
1646
1647         cpu_policy = cpufreq_cpu_get(cpu);
1648         if (!cpu_policy)
1649                 return -EINVAL;
1650
1651         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1652
1653         cpufreq_cpu_put(cpu_policy);
1654         return 0;
1655 }
1656 EXPORT_SYMBOL(cpufreq_get_policy);
1657
1658
1659 /*
1660  * data   : current policy.
1661  * policy : policy to be set.
1662  */
1663 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1664                                 struct cpufreq_policy *policy)
1665 {
1666         int ret = 0;
1667
1668         pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1669                 policy->min, policy->max);
1670
1671         memcpy(&policy->cpuinfo, &data->cpuinfo,
1672                                 sizeof(struct cpufreq_cpuinfo));
1673
1674         if (policy->min > data->max || policy->max < data->min) {
1675                 ret = -EINVAL;
1676                 goto error_out;
1677         }
1678
1679         /* verify the cpu speed can be set within this limit */
1680         ret = cpufreq_driver->verify(policy);
1681         if (ret)
1682                 goto error_out;
1683
1684         /* adjust if necessary - all reasons */
1685         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1686                         CPUFREQ_ADJUST, policy);
1687
1688         /* adjust if necessary - hardware incompatibility*/
1689         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1690                         CPUFREQ_INCOMPATIBLE, policy);
1691
1692         /* verify the cpu speed can be set within this limit,
1693            which might be different to the first one */
1694         ret = cpufreq_driver->verify(policy);
1695         if (ret)
1696                 goto error_out;
1697
1698         /* notification of the new policy */
1699         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1700                         CPUFREQ_NOTIFY, policy);
1701
1702         data->min = policy->min;
1703         data->max = policy->max;
1704
1705         pr_debug("new min and max freqs are %u - %u kHz\n",
1706                                         data->min, data->max);
1707
1708         if (cpufreq_driver->setpolicy) {
1709                 data->policy = policy->policy;
1710                 pr_debug("setting range\n");
1711                 ret = cpufreq_driver->setpolicy(policy);
1712         } else {
1713                 if (policy->governor != data->governor) {
1714                         /* save old, working values */
1715                         struct cpufreq_governor *old_gov = data->governor;
1716
1717                         pr_debug("governor switch\n");
1718
1719                         /* end old governor */
1720                         if (data->governor)
1721                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1722
1723                         /* start new governor */
1724                         data->governor = policy->governor;
1725                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1726                                 /* new governor failed, so re-start old one */
1727                                 pr_debug("starting governor %s failed\n",
1728                                                         data->governor->name);
1729                                 if (old_gov) {
1730                                         data->governor = old_gov;
1731                                         __cpufreq_governor(data,
1732                                                            CPUFREQ_GOV_START);
1733                                 }
1734                                 ret = -EINVAL;
1735                                 goto error_out;
1736                         }
1737                         /* might be a policy change, too, so fall through */
1738                 }
1739                 pr_debug("governor: change or update limits\n");
1740                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1741         }
1742
1743 error_out:
1744         return ret;
1745 }
1746
1747 /**
1748  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1749  *      @cpu: CPU which shall be re-evaluated
1750  *
1751  *      Useful for policy notifiers which have different necessities
1752  *      at different times.
1753  */
1754 int cpufreq_update_policy(unsigned int cpu)
1755 {
1756         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1757         struct cpufreq_policy policy;
1758         int ret;
1759
1760         if (!data) {
1761                 ret = -ENODEV;
1762                 goto no_policy;
1763         }
1764
1765         if (unlikely(lock_policy_rwsem_write(cpu))) {
1766                 ret = -EINVAL;
1767                 goto fail;
1768         }
1769
1770         pr_debug("updating policy for CPU %u\n", cpu);
1771         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1772         policy.min = data->user_policy.min;
1773         policy.max = data->user_policy.max;
1774         policy.policy = data->user_policy.policy;
1775         policy.governor = data->user_policy.governor;
1776
1777         /* BIOS might change freq behind our back
1778           -> ask driver for current freq and notify governors about a change */
1779         if (cpufreq_driver->get) {
1780                 policy.cur = cpufreq_driver->get(cpu);
1781                 if (!data->cur) {
1782                         pr_debug("Driver did not initialize current freq");
1783                         data->cur = policy.cur;
1784                 } else {
1785                         if (data->cur != policy.cur)
1786                                 cpufreq_out_of_sync(cpu, data->cur,
1787                                                                 policy.cur);
1788                 }
1789         }
1790
1791         ret = __cpufreq_set_policy(data, &policy);
1792
1793         unlock_policy_rwsem_write(cpu);
1794
1795 fail:
1796         cpufreq_cpu_put(data);
1797 no_policy:
1798         return ret;
1799 }
1800 EXPORT_SYMBOL(cpufreq_update_policy);
1801
1802 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1803                                         unsigned long action, void *hcpu)
1804 {
1805         unsigned int cpu = (unsigned long)hcpu;
1806         struct device *dev;
1807
1808         dev = get_cpu_device(cpu);
1809         if (dev) {
1810                 switch (action) {
1811                 case CPU_ONLINE:
1812                 case CPU_ONLINE_FROZEN:
1813                         cpufreq_add_dev(dev, NULL);
1814                         break;
1815                 case CPU_DOWN_PREPARE:
1816                 case CPU_DOWN_PREPARE_FROZEN:
1817                         if (unlikely(lock_policy_rwsem_write(cpu)))
1818                                 BUG();
1819
1820                         __cpufreq_remove_dev(dev, NULL);
1821                         break;
1822                 case CPU_DOWN_FAILED:
1823                 case CPU_DOWN_FAILED_FROZEN:
1824                         cpufreq_add_dev(dev, NULL);
1825                         break;
1826                 }
1827         }
1828         return NOTIFY_OK;
1829 }
1830
1831 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1832     .notifier_call = cpufreq_cpu_callback,
1833 };
1834
1835 /*********************************************************************
1836  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1837  *********************************************************************/
1838
1839 /**
1840  * cpufreq_register_driver - register a CPU Frequency driver
1841  * @driver_data: A struct cpufreq_driver containing the values#
1842  * submitted by the CPU Frequency driver.
1843  *
1844  *   Registers a CPU Frequency driver to this core code. This code
1845  * returns zero on success, -EBUSY when another driver got here first
1846  * (and isn't unregistered in the meantime).
1847  *
1848  */
1849 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1850 {
1851         unsigned long flags;
1852         int ret;
1853
1854         if (cpufreq_disabled())
1855                 return -ENODEV;
1856
1857         if (!driver_data || !driver_data->verify || !driver_data->init ||
1858             ((!driver_data->setpolicy) && (!driver_data->target)))
1859                 return -EINVAL;
1860
1861         pr_debug("trying to register driver %s\n", driver_data->name);
1862
1863         if (driver_data->setpolicy)
1864                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1865
1866         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1867         if (cpufreq_driver) {
1868                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1869                 return -EBUSY;
1870         }
1871         cpufreq_driver = driver_data;
1872         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1873
1874         ret = subsys_interface_register(&cpufreq_interface);
1875         if (ret)
1876                 goto err_null_driver;
1877
1878         if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1879                 int i;
1880                 ret = -ENODEV;
1881
1882                 /* check for at least one working CPU */
1883                 for (i = 0; i < nr_cpu_ids; i++)
1884                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1885                                 ret = 0;
1886                                 break;
1887                         }
1888
1889                 /* if all ->init() calls failed, unregister */
1890                 if (ret) {
1891                         pr_debug("no CPU initialized for driver %s\n",
1892                                                         driver_data->name);
1893                         goto err_if_unreg;
1894                 }
1895         }
1896
1897         register_hotcpu_notifier(&cpufreq_cpu_notifier);
1898         pr_debug("driver %s up and running\n", driver_data->name);
1899
1900         return 0;
1901 err_if_unreg:
1902         subsys_interface_unregister(&cpufreq_interface);
1903 err_null_driver:
1904         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1905         cpufreq_driver = NULL;
1906         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1907         return ret;
1908 }
1909 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1910
1911
1912 /**
1913  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1914  *
1915  *    Unregister the current CPUFreq driver. Only call this if you have
1916  * the right to do so, i.e. if you have succeeded in initialising before!
1917  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1918  * currently not initialised.
1919  */
1920 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1921 {
1922         unsigned long flags;
1923
1924         if (!cpufreq_driver || (driver != cpufreq_driver))
1925                 return -EINVAL;
1926
1927         pr_debug("unregistering driver %s\n", driver->name);
1928
1929         subsys_interface_unregister(&cpufreq_interface);
1930         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1931
1932         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1933         cpufreq_driver = NULL;
1934         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1935
1936         return 0;
1937 }
1938 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1939
1940 static int __init cpufreq_core_init(void)
1941 {
1942         int cpu;
1943
1944         if (cpufreq_disabled())
1945                 return -ENODEV;
1946
1947         for_each_possible_cpu(cpu) {
1948                 per_cpu(cpufreq_policy_cpu, cpu) = -1;
1949                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1950         }
1951
1952         cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1953         BUG_ON(!cpufreq_global_kobject);
1954         register_syscore_ops(&cpufreq_syscore_ops);
1955
1956         return 0;
1957 }
1958 core_initcall(cpufreq_core_init);