Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[pandora-kernel.git] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32
33 #include <trace/events/power.h>
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69
70 #define lock_policy_rwsem(mode, cpu)                                    \
71 static int lock_policy_rwsem_##mode                                     \
72 (int cpu)                                                               \
73 {                                                                       \
74         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
75         BUG_ON(policy_cpu == -1);                                       \
76         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
77         if (unlikely(!cpu_online(cpu))) {                               \
78                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
79                 return -1;                                              \
80         }                                                               \
81                                                                         \
82         return 0;                                                       \
83 }
84
85 lock_policy_rwsem(read, cpu);
86
87 lock_policy_rwsem(write, cpu);
88
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92         BUG_ON(policy_cpu == -1);
93         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99         BUG_ON(policy_cpu == -1);
100         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102
103
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106                 unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124         init_cpufreq_transition_notifier_list_called = true;
125         return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128
129 static LIST_HEAD(cpufreq_governor_list);
130 static DEFINE_MUTEX(cpufreq_governor_mutex);
131
132 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
133 {
134         struct cpufreq_policy *data;
135         unsigned long flags;
136
137         if (cpu >= nr_cpu_ids)
138                 goto err_out;
139
140         /* get the cpufreq driver */
141         spin_lock_irqsave(&cpufreq_driver_lock, flags);
142
143         if (!cpufreq_driver)
144                 goto err_out_unlock;
145
146         if (!try_module_get(cpufreq_driver->owner))
147                 goto err_out_unlock;
148
149
150         /* get the CPU */
151         data = per_cpu(cpufreq_cpu_data, cpu);
152
153         if (!data)
154                 goto err_out_put_module;
155
156         if (!kobject_get(&data->kobj))
157                 goto err_out_put_module;
158
159         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
160         return data;
161
162 err_out_put_module:
163         module_put(cpufreq_driver->owner);
164 err_out_unlock:
165         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
166 err_out:
167         return NULL;
168 }
169 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
170
171
172 void cpufreq_cpu_put(struct cpufreq_policy *data)
173 {
174         kobject_put(&data->kobj);
175         module_put(cpufreq_driver->owner);
176 }
177 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
178
179
180 /*********************************************************************
181  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
182  *********************************************************************/
183
184 /**
185  * adjust_jiffies - adjust the system "loops_per_jiffy"
186  *
187  * This function alters the system "loops_per_jiffy" for the clock
188  * speed change. Note that loops_per_jiffy cannot be updated on SMP
189  * systems as each CPU might be scaled differently. So, use the arch
190  * per-CPU loops_per_jiffy value wherever possible.
191  */
192 #ifndef CONFIG_SMP
193 static unsigned long l_p_j_ref;
194 static unsigned int  l_p_j_ref_freq;
195
196 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
197 {
198         if (ci->flags & CPUFREQ_CONST_LOOPS)
199                 return;
200
201         if (!l_p_j_ref_freq) {
202                 l_p_j_ref = loops_per_jiffy;
203                 l_p_j_ref_freq = ci->old;
204                 pr_debug("saving %lu as reference value for loops_per_jiffy; "
205                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
206         }
207         if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
208             (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
209             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
210                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
211                                                                 ci->new);
212                 pr_debug("scaling loops_per_jiffy to %lu "
213                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
214         }
215 }
216 #else
217 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
218 {
219         return;
220 }
221 #endif
222
223
224 /**
225  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
226  * on frequency transition.
227  *
228  * This function calls the transition notifiers and the "adjust_jiffies"
229  * function. It is called twice on all CPU frequency changes that have
230  * external effects.
231  */
232 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
233 {
234         struct cpufreq_policy *policy;
235
236         BUG_ON(irqs_disabled());
237
238         freqs->flags = cpufreq_driver->flags;
239         pr_debug("notification %u of frequency transition to %u kHz\n",
240                 state, freqs->new);
241
242         policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
243         switch (state) {
244
245         case CPUFREQ_PRECHANGE:
246                 /* detect if the driver reported a value as "old frequency"
247                  * which is not equal to what the cpufreq core thinks is
248                  * "old frequency".
249                  */
250                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
251                         if ((policy) && (policy->cpu == freqs->cpu) &&
252                             (policy->cur) && (policy->cur != freqs->old)) {
253                                 pr_debug("Warning: CPU frequency is"
254                                         " %u, cpufreq assumed %u kHz.\n",
255                                         freqs->old, policy->cur);
256                                 freqs->old = policy->cur;
257                         }
258                 }
259                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
260                                 CPUFREQ_PRECHANGE, freqs);
261                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
262                 break;
263
264         case CPUFREQ_POSTCHANGE:
265                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
266                 pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
267                         (unsigned long)freqs->cpu);
268                 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
269                 trace_cpu_frequency(freqs->new, freqs->cpu);
270                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
271                                 CPUFREQ_POSTCHANGE, freqs);
272                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
273                         policy->cur = freqs->new;
274                 break;
275         }
276 }
277 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
278
279
280
281 /*********************************************************************
282  *                          SYSFS INTERFACE                          *
283  *********************************************************************/
284
285 static struct cpufreq_governor *__find_governor(const char *str_governor)
286 {
287         struct cpufreq_governor *t;
288
289         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
290                 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
291                         return t;
292
293         return NULL;
294 }
295
296 /**
297  * cpufreq_parse_governor - parse a governor string
298  */
299 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
300                                 struct cpufreq_governor **governor)
301 {
302         int err = -EINVAL;
303
304         if (!cpufreq_driver)
305                 goto out;
306
307         if (cpufreq_driver->setpolicy) {
308                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
309                         *policy = CPUFREQ_POLICY_PERFORMANCE;
310                         err = 0;
311                 } else if (!strnicmp(str_governor, "powersave",
312                                                 CPUFREQ_NAME_LEN)) {
313                         *policy = CPUFREQ_POLICY_POWERSAVE;
314                         err = 0;
315                 }
316         } else if (cpufreq_driver->target) {
317                 struct cpufreq_governor *t;
318
319                 mutex_lock(&cpufreq_governor_mutex);
320
321                 t = __find_governor(str_governor);
322
323                 if (t == NULL) {
324                         int ret;
325
326                         mutex_unlock(&cpufreq_governor_mutex);
327                         ret = request_module("cpufreq_%s", str_governor);
328                         mutex_lock(&cpufreq_governor_mutex);
329
330                         if (ret == 0)
331                                 t = __find_governor(str_governor);
332                 }
333
334                 if (t != NULL) {
335                         *governor = t;
336                         err = 0;
337                 }
338
339                 mutex_unlock(&cpufreq_governor_mutex);
340         }
341 out:
342         return err;
343 }
344
345
346 /**
347  * cpufreq_per_cpu_attr_read() / show_##file_name() -
348  * print out cpufreq information
349  *
350  * Write out information from cpufreq_driver->policy[cpu]; object must be
351  * "unsigned int".
352  */
353
354 #define show_one(file_name, object)                     \
355 static ssize_t show_##file_name                         \
356 (struct cpufreq_policy *policy, char *buf)              \
357 {                                                       \
358         return sprintf(buf, "%u\n", policy->object);    \
359 }
360
361 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
362 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
363 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
364 show_one(scaling_min_freq, min);
365 show_one(scaling_max_freq, max);
366 show_one(scaling_cur_freq, cur);
367
368 static int __cpufreq_set_policy(struct cpufreq_policy *data,
369                                 struct cpufreq_policy *policy);
370
371 /**
372  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
373  */
374 #define store_one(file_name, object)                    \
375 static ssize_t store_##file_name                                        \
376 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
377 {                                                                       \
378         unsigned int ret = -EINVAL;                                     \
379         struct cpufreq_policy new_policy;                               \
380                                                                         \
381         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
382         if (ret)                                                        \
383                 return -EINVAL;                                         \
384                                                                         \
385         ret = sscanf(buf, "%u", &new_policy.object);                    \
386         if (ret != 1)                                                   \
387                 return -EINVAL;                                         \
388                                                                         \
389         ret = __cpufreq_set_policy(policy, &new_policy);                \
390         policy->user_policy.object = policy->object;                    \
391                                                                         \
392         return ret ? ret : count;                                       \
393 }
394
395 store_one(scaling_min_freq, min);
396 store_one(scaling_max_freq, max);
397
398 /**
399  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
400  */
401 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
402                                         char *buf)
403 {
404         unsigned int cur_freq = __cpufreq_get(policy->cpu);
405         if (!cur_freq)
406                 return sprintf(buf, "<unknown>");
407         return sprintf(buf, "%u\n", cur_freq);
408 }
409
410
411 /**
412  * show_scaling_governor - show the current policy for the specified CPU
413  */
414 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
415 {
416         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
417                 return sprintf(buf, "powersave\n");
418         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
419                 return sprintf(buf, "performance\n");
420         else if (policy->governor)
421                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
422                                 policy->governor->name);
423         return -EINVAL;
424 }
425
426
427 /**
428  * store_scaling_governor - store policy for the specified CPU
429  */
430 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
431                                         const char *buf, size_t count)
432 {
433         unsigned int ret = -EINVAL;
434         char    str_governor[16];
435         struct cpufreq_policy new_policy;
436
437         ret = cpufreq_get_policy(&new_policy, policy->cpu);
438         if (ret)
439                 return ret;
440
441         ret = sscanf(buf, "%15s", str_governor);
442         if (ret != 1)
443                 return -EINVAL;
444
445         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
446                                                 &new_policy.governor))
447                 return -EINVAL;
448
449         /* Do not use cpufreq_set_policy here or the user_policy.max
450            will be wrongly overridden */
451         ret = __cpufreq_set_policy(policy, &new_policy);
452
453         policy->user_policy.policy = policy->policy;
454         policy->user_policy.governor = policy->governor;
455
456         if (ret)
457                 return ret;
458         else
459                 return count;
460 }
461
462 /**
463  * show_scaling_driver - show the cpufreq driver currently loaded
464  */
465 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
466 {
467         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
468 }
469
470 /**
471  * show_scaling_available_governors - show the available CPUfreq governors
472  */
473 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
474                                                 char *buf)
475 {
476         ssize_t i = 0;
477         struct cpufreq_governor *t;
478
479         if (!cpufreq_driver->target) {
480                 i += sprintf(buf, "performance powersave");
481                 goto out;
482         }
483
484         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
485                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
486                     - (CPUFREQ_NAME_LEN + 2)))
487                         goto out;
488                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
489         }
490 out:
491         i += sprintf(&buf[i], "\n");
492         return i;
493 }
494
495 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
496 {
497         ssize_t i = 0;
498         unsigned int cpu;
499
500         for_each_cpu(cpu, mask) {
501                 if (i)
502                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
503                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
504                 if (i >= (PAGE_SIZE - 5))
505                         break;
506         }
507         i += sprintf(&buf[i], "\n");
508         return i;
509 }
510
511 /**
512  * show_related_cpus - show the CPUs affected by each transition even if
513  * hw coordination is in use
514  */
515 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
516 {
517         if (cpumask_empty(policy->related_cpus))
518                 return show_cpus(policy->cpus, buf);
519         return show_cpus(policy->related_cpus, buf);
520 }
521
522 /**
523  * show_affected_cpus - show the CPUs affected by each transition
524  */
525 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
526 {
527         return show_cpus(policy->cpus, buf);
528 }
529
530 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
531                                         const char *buf, size_t count)
532 {
533         unsigned int freq = 0;
534         unsigned int ret;
535
536         if (!policy->governor || !policy->governor->store_setspeed)
537                 return -EINVAL;
538
539         ret = sscanf(buf, "%u", &freq);
540         if (ret != 1)
541                 return -EINVAL;
542
543         policy->governor->store_setspeed(policy, freq);
544
545         return count;
546 }
547
548 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
549 {
550         if (!policy->governor || !policy->governor->show_setspeed)
551                 return sprintf(buf, "<unsupported>\n");
552
553         return policy->governor->show_setspeed(policy, buf);
554 }
555
556 /**
557  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
558  */
559 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
560 {
561         unsigned int limit;
562         int ret;
563         if (cpufreq_driver->bios_limit) {
564                 ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
565                 if (!ret)
566                         return sprintf(buf, "%u\n", limit);
567         }
568         return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
569 }
570
571 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
572 cpufreq_freq_attr_ro(cpuinfo_min_freq);
573 cpufreq_freq_attr_ro(cpuinfo_max_freq);
574 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
575 cpufreq_freq_attr_ro(scaling_available_governors);
576 cpufreq_freq_attr_ro(scaling_driver);
577 cpufreq_freq_attr_ro(scaling_cur_freq);
578 cpufreq_freq_attr_ro(bios_limit);
579 cpufreq_freq_attr_ro(related_cpus);
580 cpufreq_freq_attr_ro(affected_cpus);
581 cpufreq_freq_attr_rw(scaling_min_freq);
582 cpufreq_freq_attr_rw(scaling_max_freq);
583 cpufreq_freq_attr_rw(scaling_governor);
584 cpufreq_freq_attr_rw(scaling_setspeed);
585
586 static struct attribute *default_attrs[] = {
587         &cpuinfo_min_freq.attr,
588         &cpuinfo_max_freq.attr,
589         &cpuinfo_transition_latency.attr,
590         &scaling_min_freq.attr,
591         &scaling_max_freq.attr,
592         &affected_cpus.attr,
593         &related_cpus.attr,
594         &scaling_governor.attr,
595         &scaling_driver.attr,
596         &scaling_available_governors.attr,
597         &scaling_setspeed.attr,
598         NULL
599 };
600
601 struct kobject *cpufreq_global_kobject;
602 EXPORT_SYMBOL(cpufreq_global_kobject);
603
604 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
605 #define to_attr(a) container_of(a, struct freq_attr, attr)
606
607 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
608 {
609         struct cpufreq_policy *policy = to_policy(kobj);
610         struct freq_attr *fattr = to_attr(attr);
611         ssize_t ret = -EINVAL;
612         policy = cpufreq_cpu_get(policy->cpu);
613         if (!policy)
614                 goto no_policy;
615
616         if (lock_policy_rwsem_read(policy->cpu) < 0)
617                 goto fail;
618
619         if (fattr->show)
620                 ret = fattr->show(policy, buf);
621         else
622                 ret = -EIO;
623
624         unlock_policy_rwsem_read(policy->cpu);
625 fail:
626         cpufreq_cpu_put(policy);
627 no_policy:
628         return ret;
629 }
630
631 static ssize_t store(struct kobject *kobj, struct attribute *attr,
632                      const char *buf, size_t count)
633 {
634         struct cpufreq_policy *policy = to_policy(kobj);
635         struct freq_attr *fattr = to_attr(attr);
636         ssize_t ret = -EINVAL;
637         policy = cpufreq_cpu_get(policy->cpu);
638         if (!policy)
639                 goto no_policy;
640
641         if (lock_policy_rwsem_write(policy->cpu) < 0)
642                 goto fail;
643
644         if (fattr->store)
645                 ret = fattr->store(policy, buf, count);
646         else
647                 ret = -EIO;
648
649         unlock_policy_rwsem_write(policy->cpu);
650 fail:
651         cpufreq_cpu_put(policy);
652 no_policy:
653         return ret;
654 }
655
656 static void cpufreq_sysfs_release(struct kobject *kobj)
657 {
658         struct cpufreq_policy *policy = to_policy(kobj);
659         pr_debug("last reference is dropped\n");
660         complete(&policy->kobj_unregister);
661 }
662
663 static const struct sysfs_ops sysfs_ops = {
664         .show   = show,
665         .store  = store,
666 };
667
668 static struct kobj_type ktype_cpufreq = {
669         .sysfs_ops      = &sysfs_ops,
670         .default_attrs  = default_attrs,
671         .release        = cpufreq_sysfs_release,
672 };
673
674 /*
675  * Returns:
676  *   Negative: Failure
677  *   0:        Success
678  *   Positive: When we have a managed CPU and the sysfs got symlinked
679  */
680 static int cpufreq_add_dev_policy(unsigned int cpu,
681                                   struct cpufreq_policy *policy,
682                                   struct sys_device *sys_dev)
683 {
684         int ret = 0;
685 #ifdef CONFIG_SMP
686         unsigned long flags;
687         unsigned int j;
688 #ifdef CONFIG_HOTPLUG_CPU
689         struct cpufreq_governor *gov;
690
691         gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
692         if (gov) {
693                 policy->governor = gov;
694                 pr_debug("Restoring governor %s for cpu %d\n",
695                        policy->governor->name, cpu);
696         }
697 #endif
698
699         for_each_cpu(j, policy->cpus) {
700                 struct cpufreq_policy *managed_policy;
701
702                 if (cpu == j)
703                         continue;
704
705                 /* Check for existing affected CPUs.
706                  * They may not be aware of it due to CPU Hotplug.
707                  * cpufreq_cpu_put is called when the device is removed
708                  * in __cpufreq_remove_dev()
709                  */
710                 managed_policy = cpufreq_cpu_get(j);
711                 if (unlikely(managed_policy)) {
712
713                         /* Set proper policy_cpu */
714                         unlock_policy_rwsem_write(cpu);
715                         per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
716
717                         if (lock_policy_rwsem_write(cpu) < 0) {
718                                 /* Should not go through policy unlock path */
719                                 if (cpufreq_driver->exit)
720                                         cpufreq_driver->exit(policy);
721                                 cpufreq_cpu_put(managed_policy);
722                                 return -EBUSY;
723                         }
724
725                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
726                         cpumask_copy(managed_policy->cpus, policy->cpus);
727                         per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
728                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
729
730                         pr_debug("CPU already managed, adding link\n");
731                         ret = sysfs_create_link(&sys_dev->kobj,
732                                                 &managed_policy->kobj,
733                                                 "cpufreq");
734                         if (ret)
735                                 cpufreq_cpu_put(managed_policy);
736                         /*
737                          * Success. We only needed to be added to the mask.
738                          * Call driver->exit() because only the cpu parent of
739                          * the kobj needed to call init().
740                          */
741                         if (cpufreq_driver->exit)
742                                 cpufreq_driver->exit(policy);
743
744                         if (!ret)
745                                 return 1;
746                         else
747                                 return ret;
748                 }
749         }
750 #endif
751         return ret;
752 }
753
754
755 /* symlink affected CPUs */
756 static int cpufreq_add_dev_symlink(unsigned int cpu,
757                                    struct cpufreq_policy *policy)
758 {
759         unsigned int j;
760         int ret = 0;
761
762         for_each_cpu(j, policy->cpus) {
763                 struct cpufreq_policy *managed_policy;
764                 struct sys_device *cpu_sys_dev;
765
766                 if (j == cpu)
767                         continue;
768                 if (!cpu_online(j))
769                         continue;
770
771                 pr_debug("CPU %u already managed, adding link\n", j);
772                 managed_policy = cpufreq_cpu_get(cpu);
773                 cpu_sys_dev = get_cpu_sysdev(j);
774                 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
775                                         "cpufreq");
776                 if (ret) {
777                         cpufreq_cpu_put(managed_policy);
778                         return ret;
779                 }
780         }
781         return ret;
782 }
783
784 static int cpufreq_add_dev_interface(unsigned int cpu,
785                                      struct cpufreq_policy *policy,
786                                      struct sys_device *sys_dev)
787 {
788         struct cpufreq_policy new_policy;
789         struct freq_attr **drv_attr;
790         unsigned long flags;
791         int ret = 0;
792         unsigned int j;
793
794         /* prepare interface data */
795         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
796                                    &sys_dev->kobj, "cpufreq");
797         if (ret)
798                 return ret;
799
800         /* set up files for this cpu device */
801         drv_attr = cpufreq_driver->attr;
802         while ((drv_attr) && (*drv_attr)) {
803                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
804                 if (ret)
805                         goto err_out_kobj_put;
806                 drv_attr++;
807         }
808         if (cpufreq_driver->get) {
809                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
810                 if (ret)
811                         goto err_out_kobj_put;
812         }
813         if (cpufreq_driver->target) {
814                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
815                 if (ret)
816                         goto err_out_kobj_put;
817         }
818         if (cpufreq_driver->bios_limit) {
819                 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
820                 if (ret)
821                         goto err_out_kobj_put;
822         }
823
824         spin_lock_irqsave(&cpufreq_driver_lock, flags);
825         for_each_cpu(j, policy->cpus) {
826                 if (!cpu_online(j))
827                         continue;
828                 per_cpu(cpufreq_cpu_data, j) = policy;
829                 per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
830         }
831         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
832
833         ret = cpufreq_add_dev_symlink(cpu, policy);
834         if (ret)
835                 goto err_out_kobj_put;
836
837         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
838         /* assure that the starting sequence is run in __cpufreq_set_policy */
839         policy->governor = NULL;
840
841         /* set default policy */
842         ret = __cpufreq_set_policy(policy, &new_policy);
843         policy->user_policy.policy = policy->policy;
844         policy->user_policy.governor = policy->governor;
845
846         if (ret) {
847                 pr_debug("setting policy failed\n");
848                 if (cpufreq_driver->exit)
849                         cpufreq_driver->exit(policy);
850         }
851         return ret;
852
853 err_out_kobj_put:
854         kobject_put(&policy->kobj);
855         wait_for_completion(&policy->kobj_unregister);
856         return ret;
857 }
858
859
860 /**
861  * cpufreq_add_dev - add a CPU device
862  *
863  * Adds the cpufreq interface for a CPU device.
864  *
865  * The Oracle says: try running cpufreq registration/unregistration concurrently
866  * with with cpu hotplugging and all hell will break loose. Tried to clean this
867  * mess up, but more thorough testing is needed. - Mathieu
868  */
869 static int cpufreq_add_dev(struct sys_device *sys_dev)
870 {
871         unsigned int cpu = sys_dev->id;
872         int ret = 0, found = 0;
873         struct cpufreq_policy *policy;
874         unsigned long flags;
875         unsigned int j;
876 #ifdef CONFIG_HOTPLUG_CPU
877         int sibling;
878 #endif
879
880         if (cpu_is_offline(cpu))
881                 return 0;
882
883         pr_debug("adding CPU %u\n", cpu);
884
885 #ifdef CONFIG_SMP
886         /* check whether a different CPU already registered this
887          * CPU because it is in the same boat. */
888         policy = cpufreq_cpu_get(cpu);
889         if (unlikely(policy)) {
890                 cpufreq_cpu_put(policy);
891                 return 0;
892         }
893 #endif
894
895         if (!try_module_get(cpufreq_driver->owner)) {
896                 ret = -EINVAL;
897                 goto module_out;
898         }
899
900         ret = -ENOMEM;
901         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
902         if (!policy)
903                 goto nomem_out;
904
905         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
906                 goto err_free_policy;
907
908         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
909                 goto err_free_cpumask;
910
911         policy->cpu = cpu;
912         cpumask_copy(policy->cpus, cpumask_of(cpu));
913
914         /* Initially set CPU itself as the policy_cpu */
915         per_cpu(cpufreq_policy_cpu, cpu) = cpu;
916         ret = (lock_policy_rwsem_write(cpu) < 0);
917         WARN_ON(ret);
918
919         init_completion(&policy->kobj_unregister);
920         INIT_WORK(&policy->update, handle_update);
921
922         /* Set governor before ->init, so that driver could check it */
923 #ifdef CONFIG_HOTPLUG_CPU
924         for_each_online_cpu(sibling) {
925                 struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
926                 if (cp && cp->governor &&
927                     (cpumask_test_cpu(cpu, cp->related_cpus))) {
928                         policy->governor = cp->governor;
929                         found = 1;
930                         break;
931                 }
932         }
933 #endif
934         if (!found)
935                 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
936         /* call driver. From then on the cpufreq must be able
937          * to accept all calls to ->verify and ->setpolicy for this CPU
938          */
939         ret = cpufreq_driver->init(policy);
940         if (ret) {
941                 pr_debug("initialization failed\n");
942                 goto err_unlock_policy;
943         }
944         policy->user_policy.min = policy->min;
945         policy->user_policy.max = policy->max;
946
947         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
948                                      CPUFREQ_START, policy);
949
950         ret = cpufreq_add_dev_policy(cpu, policy, sys_dev);
951         if (ret) {
952                 if (ret > 0)
953                         /* This is a managed cpu, symlink created,
954                            exit with 0 */
955                         ret = 0;
956                 goto err_unlock_policy;
957         }
958
959         ret = cpufreq_add_dev_interface(cpu, policy, sys_dev);
960         if (ret)
961                 goto err_out_unregister;
962
963         unlock_policy_rwsem_write(cpu);
964
965         kobject_uevent(&policy->kobj, KOBJ_ADD);
966         module_put(cpufreq_driver->owner);
967         pr_debug("initialization complete\n");
968
969         return 0;
970
971
972 err_out_unregister:
973         spin_lock_irqsave(&cpufreq_driver_lock, flags);
974         for_each_cpu(j, policy->cpus)
975                 per_cpu(cpufreq_cpu_data, j) = NULL;
976         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
977
978         kobject_put(&policy->kobj);
979         wait_for_completion(&policy->kobj_unregister);
980
981 err_unlock_policy:
982         unlock_policy_rwsem_write(cpu);
983         free_cpumask_var(policy->related_cpus);
984 err_free_cpumask:
985         free_cpumask_var(policy->cpus);
986 err_free_policy:
987         kfree(policy);
988 nomem_out:
989         module_put(cpufreq_driver->owner);
990 module_out:
991         return ret;
992 }
993
994
995 /**
996  * __cpufreq_remove_dev - remove a CPU device
997  *
998  * Removes the cpufreq interface for a CPU device.
999  * Caller should already have policy_rwsem in write mode for this CPU.
1000  * This routine frees the rwsem before returning.
1001  */
1002 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1003 {
1004         unsigned int cpu = sys_dev->id;
1005         unsigned long flags;
1006         struct cpufreq_policy *data;
1007         struct kobject *kobj;
1008         struct completion *cmp;
1009 #ifdef CONFIG_SMP
1010         struct sys_device *cpu_sys_dev;
1011         unsigned int j;
1012 #endif
1013
1014         pr_debug("unregistering CPU %u\n", cpu);
1015
1016         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1017         data = per_cpu(cpufreq_cpu_data, cpu);
1018
1019         if (!data) {
1020                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1021                 unlock_policy_rwsem_write(cpu);
1022                 return -EINVAL;
1023         }
1024         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1025
1026
1027 #ifdef CONFIG_SMP
1028         /* if this isn't the CPU which is the parent of the kobj, we
1029          * only need to unlink, put and exit
1030          */
1031         if (unlikely(cpu != data->cpu)) {
1032                 pr_debug("removing link\n");
1033                 cpumask_clear_cpu(cpu, data->cpus);
1034                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1035                 kobj = &sys_dev->kobj;
1036                 cpufreq_cpu_put(data);
1037                 unlock_policy_rwsem_write(cpu);
1038                 sysfs_remove_link(kobj, "cpufreq");
1039                 return 0;
1040         }
1041 #endif
1042
1043 #ifdef CONFIG_SMP
1044
1045 #ifdef CONFIG_HOTPLUG_CPU
1046         strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1047                         CPUFREQ_NAME_LEN);
1048 #endif
1049
1050         /* if we have other CPUs still registered, we need to unlink them,
1051          * or else wait_for_completion below will lock up. Clean the
1052          * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1053          * the sysfs links afterwards.
1054          */
1055         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1056                 for_each_cpu(j, data->cpus) {
1057                         if (j == cpu)
1058                                 continue;
1059                         per_cpu(cpufreq_cpu_data, j) = NULL;
1060                 }
1061         }
1062
1063         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064
1065         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1066                 for_each_cpu(j, data->cpus) {
1067                         if (j == cpu)
1068                                 continue;
1069                         pr_debug("removing link for cpu %u\n", j);
1070 #ifdef CONFIG_HOTPLUG_CPU
1071                         strncpy(per_cpu(cpufreq_cpu_governor, j),
1072                                 data->governor->name, CPUFREQ_NAME_LEN);
1073 #endif
1074                         cpu_sys_dev = get_cpu_sysdev(j);
1075                         kobj = &cpu_sys_dev->kobj;
1076                         unlock_policy_rwsem_write(cpu);
1077                         sysfs_remove_link(kobj, "cpufreq");
1078                         lock_policy_rwsem_write(cpu);
1079                         cpufreq_cpu_put(data);
1080                 }
1081         }
1082 #else
1083         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1084 #endif
1085
1086         if (cpufreq_driver->target)
1087                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1088
1089         kobj = &data->kobj;
1090         cmp = &data->kobj_unregister;
1091         unlock_policy_rwsem_write(cpu);
1092         kobject_put(kobj);
1093
1094         /* we need to make sure that the underlying kobj is actually
1095          * not referenced anymore by anybody before we proceed with
1096          * unloading.
1097          */
1098         pr_debug("waiting for dropping of refcount\n");
1099         wait_for_completion(cmp);
1100         pr_debug("wait complete\n");
1101
1102         lock_policy_rwsem_write(cpu);
1103         if (cpufreq_driver->exit)
1104                 cpufreq_driver->exit(data);
1105         unlock_policy_rwsem_write(cpu);
1106
1107 #ifdef CONFIG_HOTPLUG_CPU
1108         /* when the CPU which is the parent of the kobj is hotplugged
1109          * offline, check for siblings, and create cpufreq sysfs interface
1110          * and symlinks
1111          */
1112         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1113                 /* first sibling now owns the new sysfs dir */
1114                 cpumask_clear_cpu(cpu, data->cpus);
1115                 cpufreq_add_dev(get_cpu_sysdev(cpumask_first(data->cpus)));
1116
1117                 /* finally remove our own symlink */
1118                 lock_policy_rwsem_write(cpu);
1119                 __cpufreq_remove_dev(sys_dev);
1120         }
1121 #endif
1122
1123         free_cpumask_var(data->related_cpus);
1124         free_cpumask_var(data->cpus);
1125         kfree(data);
1126
1127         return 0;
1128 }
1129
1130
1131 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1132 {
1133         unsigned int cpu = sys_dev->id;
1134         int retval;
1135
1136         if (cpu_is_offline(cpu))
1137                 return 0;
1138
1139         if (unlikely(lock_policy_rwsem_write(cpu)))
1140                 BUG();
1141
1142         retval = __cpufreq_remove_dev(sys_dev);
1143         return retval;
1144 }
1145
1146
1147 static void handle_update(struct work_struct *work)
1148 {
1149         struct cpufreq_policy *policy =
1150                 container_of(work, struct cpufreq_policy, update);
1151         unsigned int cpu = policy->cpu;
1152         pr_debug("handle_update for cpu %u called\n", cpu);
1153         cpufreq_update_policy(cpu);
1154 }
1155
1156 /**
1157  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1158  *      @cpu: cpu number
1159  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1160  *      @new_freq: CPU frequency the CPU actually runs at
1161  *
1162  *      We adjust to current frequency first, and need to clean up later.
1163  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1164  */
1165 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1166                                 unsigned int new_freq)
1167 {
1168         struct cpufreq_freqs freqs;
1169
1170         pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1171                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1172
1173         freqs.cpu = cpu;
1174         freqs.old = old_freq;
1175         freqs.new = new_freq;
1176         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1177         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1178 }
1179
1180
1181 /**
1182  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1183  * @cpu: CPU number
1184  *
1185  * This is the last known freq, without actually getting it from the driver.
1186  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1187  */
1188 unsigned int cpufreq_quick_get(unsigned int cpu)
1189 {
1190         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1191         unsigned int ret_freq = 0;
1192
1193         if (policy) {
1194                 ret_freq = policy->cur;
1195                 cpufreq_cpu_put(policy);
1196         }
1197
1198         return ret_freq;
1199 }
1200 EXPORT_SYMBOL(cpufreq_quick_get);
1201
1202
1203 static unsigned int __cpufreq_get(unsigned int cpu)
1204 {
1205         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1206         unsigned int ret_freq = 0;
1207
1208         if (!cpufreq_driver->get)
1209                 return ret_freq;
1210
1211         ret_freq = cpufreq_driver->get(cpu);
1212
1213         if (ret_freq && policy->cur &&
1214                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1215                 /* verify no discrepancy between actual and
1216                                         saved value exists */
1217                 if (unlikely(ret_freq != policy->cur)) {
1218                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1219                         schedule_work(&policy->update);
1220                 }
1221         }
1222
1223         return ret_freq;
1224 }
1225
1226 /**
1227  * cpufreq_get - get the current CPU frequency (in kHz)
1228  * @cpu: CPU number
1229  *
1230  * Get the CPU current (static) CPU frequency
1231  */
1232 unsigned int cpufreq_get(unsigned int cpu)
1233 {
1234         unsigned int ret_freq = 0;
1235         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1236
1237         if (!policy)
1238                 goto out;
1239
1240         if (unlikely(lock_policy_rwsem_read(cpu)))
1241                 goto out_policy;
1242
1243         ret_freq = __cpufreq_get(cpu);
1244
1245         unlock_policy_rwsem_read(cpu);
1246
1247 out_policy:
1248         cpufreq_cpu_put(policy);
1249 out:
1250         return ret_freq;
1251 }
1252 EXPORT_SYMBOL(cpufreq_get);
1253
1254 static struct sysdev_driver cpufreq_sysdev_driver = {
1255         .add            = cpufreq_add_dev,
1256         .remove         = cpufreq_remove_dev,
1257 };
1258
1259
1260 /**
1261  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1262  *
1263  * This function is only executed for the boot processor.  The other CPUs
1264  * have been put offline by means of CPU hotplug.
1265  */
1266 static int cpufreq_bp_suspend(void)
1267 {
1268         int ret = 0;
1269
1270         int cpu = smp_processor_id();
1271         struct cpufreq_policy *cpu_policy;
1272
1273         pr_debug("suspending cpu %u\n", cpu);
1274
1275         /* If there's no policy for the boot CPU, we have nothing to do. */
1276         cpu_policy = cpufreq_cpu_get(cpu);
1277         if (!cpu_policy)
1278                 return 0;
1279
1280         if (cpufreq_driver->suspend) {
1281                 ret = cpufreq_driver->suspend(cpu_policy);
1282                 if (ret)
1283                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1284                                         "step on CPU %u\n", cpu_policy->cpu);
1285         }
1286
1287         cpufreq_cpu_put(cpu_policy);
1288         return ret;
1289 }
1290
1291 /**
1292  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1293  *
1294  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1295  *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1296  *          restored. It will verify that the current freq is in sync with
1297  *          what we believe it to be. This is a bit later than when it
1298  *          should be, but nonethteless it's better than calling
1299  *          cpufreq_driver->get() here which might re-enable interrupts...
1300  *
1301  * This function is only executed for the boot CPU.  The other CPUs have not
1302  * been turned on yet.
1303  */
1304 static void cpufreq_bp_resume(void)
1305 {
1306         int ret = 0;
1307
1308         int cpu = smp_processor_id();
1309         struct cpufreq_policy *cpu_policy;
1310
1311         pr_debug("resuming cpu %u\n", cpu);
1312
1313         /* If there's no policy for the boot CPU, we have nothing to do. */
1314         cpu_policy = cpufreq_cpu_get(cpu);
1315         if (!cpu_policy)
1316                 return;
1317
1318         if (cpufreq_driver->resume) {
1319                 ret = cpufreq_driver->resume(cpu_policy);
1320                 if (ret) {
1321                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1322                                         "step on CPU %u\n", cpu_policy->cpu);
1323                         goto fail;
1324                 }
1325         }
1326
1327         schedule_work(&cpu_policy->update);
1328
1329 fail:
1330         cpufreq_cpu_put(cpu_policy);
1331 }
1332
1333 static struct syscore_ops cpufreq_syscore_ops = {
1334         .suspend        = cpufreq_bp_suspend,
1335         .resume         = cpufreq_bp_resume,
1336 };
1337
1338
1339 /*********************************************************************
1340  *                     NOTIFIER LISTS INTERFACE                      *
1341  *********************************************************************/
1342
1343 /**
1344  *      cpufreq_register_notifier - register a driver with cpufreq
1345  *      @nb: notifier function to register
1346  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1347  *
1348  *      Add a driver to one of two lists: either a list of drivers that
1349  *      are notified about clock rate changes (once before and once after
1350  *      the transition), or a list of drivers that are notified about
1351  *      changes in cpufreq policy.
1352  *
1353  *      This function may sleep, and has the same return conditions as
1354  *      blocking_notifier_chain_register.
1355  */
1356 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1357 {
1358         int ret;
1359
1360         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1361
1362         switch (list) {
1363         case CPUFREQ_TRANSITION_NOTIFIER:
1364                 ret = srcu_notifier_chain_register(
1365                                 &cpufreq_transition_notifier_list, nb);
1366                 break;
1367         case CPUFREQ_POLICY_NOTIFIER:
1368                 ret = blocking_notifier_chain_register(
1369                                 &cpufreq_policy_notifier_list, nb);
1370                 break;
1371         default:
1372                 ret = -EINVAL;
1373         }
1374
1375         return ret;
1376 }
1377 EXPORT_SYMBOL(cpufreq_register_notifier);
1378
1379
1380 /**
1381  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1382  *      @nb: notifier block to be unregistered
1383  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1384  *
1385  *      Remove a driver from the CPU frequency notifier list.
1386  *
1387  *      This function may sleep, and has the same return conditions as
1388  *      blocking_notifier_chain_unregister.
1389  */
1390 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1391 {
1392         int ret;
1393
1394         switch (list) {
1395         case CPUFREQ_TRANSITION_NOTIFIER:
1396                 ret = srcu_notifier_chain_unregister(
1397                                 &cpufreq_transition_notifier_list, nb);
1398                 break;
1399         case CPUFREQ_POLICY_NOTIFIER:
1400                 ret = blocking_notifier_chain_unregister(
1401                                 &cpufreq_policy_notifier_list, nb);
1402                 break;
1403         default:
1404                 ret = -EINVAL;
1405         }
1406
1407         return ret;
1408 }
1409 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1410
1411
1412 /*********************************************************************
1413  *                              GOVERNORS                            *
1414  *********************************************************************/
1415
1416
1417 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1418                             unsigned int target_freq,
1419                             unsigned int relation)
1420 {
1421         int retval = -EINVAL;
1422
1423         pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1424                 target_freq, relation);
1425         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1426                 retval = cpufreq_driver->target(policy, target_freq, relation);
1427
1428         return retval;
1429 }
1430 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1431
1432 int cpufreq_driver_target(struct cpufreq_policy *policy,
1433                           unsigned int target_freq,
1434                           unsigned int relation)
1435 {
1436         int ret = -EINVAL;
1437
1438         policy = cpufreq_cpu_get(policy->cpu);
1439         if (!policy)
1440                 goto no_policy;
1441
1442         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1443                 goto fail;
1444
1445         ret = __cpufreq_driver_target(policy, target_freq, relation);
1446
1447         unlock_policy_rwsem_write(policy->cpu);
1448
1449 fail:
1450         cpufreq_cpu_put(policy);
1451 no_policy:
1452         return ret;
1453 }
1454 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1455
1456 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1457 {
1458         int ret = 0;
1459
1460         policy = cpufreq_cpu_get(policy->cpu);
1461         if (!policy)
1462                 return -EINVAL;
1463
1464         if (cpu_online(cpu) && cpufreq_driver->getavg)
1465                 ret = cpufreq_driver->getavg(policy, cpu);
1466
1467         cpufreq_cpu_put(policy);
1468         return ret;
1469 }
1470 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1471
1472 /*
1473  * when "event" is CPUFREQ_GOV_LIMITS
1474  */
1475
1476 static int __cpufreq_governor(struct cpufreq_policy *policy,
1477                                         unsigned int event)
1478 {
1479         int ret;
1480
1481         /* Only must be defined when default governor is known to have latency
1482            restrictions, like e.g. conservative or ondemand.
1483            That this is the case is already ensured in Kconfig
1484         */
1485 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1486         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1487 #else
1488         struct cpufreq_governor *gov = NULL;
1489 #endif
1490
1491         if (policy->governor->max_transition_latency &&
1492             policy->cpuinfo.transition_latency >
1493             policy->governor->max_transition_latency) {
1494                 if (!gov)
1495                         return -EINVAL;
1496                 else {
1497                         printk(KERN_WARNING "%s governor failed, too long"
1498                                " transition latency of HW, fallback"
1499                                " to %s governor\n",
1500                                policy->governor->name,
1501                                gov->name);
1502                         policy->governor = gov;
1503                 }
1504         }
1505
1506         if (!try_module_get(policy->governor->owner))
1507                 return -EINVAL;
1508
1509         pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1510                                                 policy->cpu, event);
1511         ret = policy->governor->governor(policy, event);
1512
1513         /* we keep one module reference alive for
1514                         each CPU governed by this CPU */
1515         if ((event != CPUFREQ_GOV_START) || ret)
1516                 module_put(policy->governor->owner);
1517         if ((event == CPUFREQ_GOV_STOP) && !ret)
1518                 module_put(policy->governor->owner);
1519
1520         return ret;
1521 }
1522
1523
1524 int cpufreq_register_governor(struct cpufreq_governor *governor)
1525 {
1526         int err;
1527
1528         if (!governor)
1529                 return -EINVAL;
1530
1531         mutex_lock(&cpufreq_governor_mutex);
1532
1533         err = -EBUSY;
1534         if (__find_governor(governor->name) == NULL) {
1535                 err = 0;
1536                 list_add(&governor->governor_list, &cpufreq_governor_list);
1537         }
1538
1539         mutex_unlock(&cpufreq_governor_mutex);
1540         return err;
1541 }
1542 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1543
1544
1545 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1546 {
1547 #ifdef CONFIG_HOTPLUG_CPU
1548         int cpu;
1549 #endif
1550
1551         if (!governor)
1552                 return;
1553
1554 #ifdef CONFIG_HOTPLUG_CPU
1555         for_each_present_cpu(cpu) {
1556                 if (cpu_online(cpu))
1557                         continue;
1558                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1559                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1560         }
1561 #endif
1562
1563         mutex_lock(&cpufreq_governor_mutex);
1564         list_del(&governor->governor_list);
1565         mutex_unlock(&cpufreq_governor_mutex);
1566         return;
1567 }
1568 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1569
1570
1571
1572 /*********************************************************************
1573  *                          POLICY INTERFACE                         *
1574  *********************************************************************/
1575
1576 /**
1577  * cpufreq_get_policy - get the current cpufreq_policy
1578  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1579  *      is written
1580  *
1581  * Reads the current cpufreq policy.
1582  */
1583 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1584 {
1585         struct cpufreq_policy *cpu_policy;
1586         if (!policy)
1587                 return -EINVAL;
1588
1589         cpu_policy = cpufreq_cpu_get(cpu);
1590         if (!cpu_policy)
1591                 return -EINVAL;
1592
1593         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1594
1595         cpufreq_cpu_put(cpu_policy);
1596         return 0;
1597 }
1598 EXPORT_SYMBOL(cpufreq_get_policy);
1599
1600
1601 /*
1602  * data   : current policy.
1603  * policy : policy to be set.
1604  */
1605 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1606                                 struct cpufreq_policy *policy)
1607 {
1608         int ret = 0;
1609
1610         pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1611                 policy->min, policy->max);
1612
1613         memcpy(&policy->cpuinfo, &data->cpuinfo,
1614                                 sizeof(struct cpufreq_cpuinfo));
1615
1616         if (policy->min > data->max || policy->max < data->min) {
1617                 ret = -EINVAL;
1618                 goto error_out;
1619         }
1620
1621         /* verify the cpu speed can be set within this limit */
1622         ret = cpufreq_driver->verify(policy);
1623         if (ret)
1624                 goto error_out;
1625
1626         /* adjust if necessary - all reasons */
1627         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1628                         CPUFREQ_ADJUST, policy);
1629
1630         /* adjust if necessary - hardware incompatibility*/
1631         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1632                         CPUFREQ_INCOMPATIBLE, policy);
1633
1634         /* verify the cpu speed can be set within this limit,
1635            which might be different to the first one */
1636         ret = cpufreq_driver->verify(policy);
1637         if (ret)
1638                 goto error_out;
1639
1640         /* notification of the new policy */
1641         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1642                         CPUFREQ_NOTIFY, policy);
1643
1644         data->min = policy->min;
1645         data->max = policy->max;
1646
1647         pr_debug("new min and max freqs are %u - %u kHz\n",
1648                                         data->min, data->max);
1649
1650         if (cpufreq_driver->setpolicy) {
1651                 data->policy = policy->policy;
1652                 pr_debug("setting range\n");
1653                 ret = cpufreq_driver->setpolicy(policy);
1654         } else {
1655                 if (policy->governor != data->governor) {
1656                         /* save old, working values */
1657                         struct cpufreq_governor *old_gov = data->governor;
1658
1659                         pr_debug("governor switch\n");
1660
1661                         /* end old governor */
1662                         if (data->governor)
1663                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1664
1665                         /* start new governor */
1666                         data->governor = policy->governor;
1667                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1668                                 /* new governor failed, so re-start old one */
1669                                 pr_debug("starting governor %s failed\n",
1670                                                         data->governor->name);
1671                                 if (old_gov) {
1672                                         data->governor = old_gov;
1673                                         __cpufreq_governor(data,
1674                                                            CPUFREQ_GOV_START);
1675                                 }
1676                                 ret = -EINVAL;
1677                                 goto error_out;
1678                         }
1679                         /* might be a policy change, too, so fall through */
1680                 }
1681                 pr_debug("governor: change or update limits\n");
1682                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1683         }
1684
1685 error_out:
1686         return ret;
1687 }
1688
1689 /**
1690  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1691  *      @cpu: CPU which shall be re-evaluated
1692  *
1693  *      Useful for policy notifiers which have different necessities
1694  *      at different times.
1695  */
1696 int cpufreq_update_policy(unsigned int cpu)
1697 {
1698         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1699         struct cpufreq_policy policy;
1700         int ret;
1701
1702         if (!data) {
1703                 ret = -ENODEV;
1704                 goto no_policy;
1705         }
1706
1707         if (unlikely(lock_policy_rwsem_write(cpu))) {
1708                 ret = -EINVAL;
1709                 goto fail;
1710         }
1711
1712         pr_debug("updating policy for CPU %u\n", cpu);
1713         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1714         policy.min = data->user_policy.min;
1715         policy.max = data->user_policy.max;
1716         policy.policy = data->user_policy.policy;
1717         policy.governor = data->user_policy.governor;
1718
1719         /* BIOS might change freq behind our back
1720           -> ask driver for current freq and notify governors about a change */
1721         if (cpufreq_driver->get) {
1722                 policy.cur = cpufreq_driver->get(cpu);
1723                 if (!data->cur) {
1724                         pr_debug("Driver did not initialize current freq");
1725                         data->cur = policy.cur;
1726                 } else {
1727                         if (data->cur != policy.cur)
1728                                 cpufreq_out_of_sync(cpu, data->cur,
1729                                                                 policy.cur);
1730                 }
1731         }
1732
1733         ret = __cpufreq_set_policy(data, &policy);
1734
1735         unlock_policy_rwsem_write(cpu);
1736
1737 fail:
1738         cpufreq_cpu_put(data);
1739 no_policy:
1740         return ret;
1741 }
1742 EXPORT_SYMBOL(cpufreq_update_policy);
1743
1744 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1745                                         unsigned long action, void *hcpu)
1746 {
1747         unsigned int cpu = (unsigned long)hcpu;
1748         struct sys_device *sys_dev;
1749
1750         sys_dev = get_cpu_sysdev(cpu);
1751         if (sys_dev) {
1752                 switch (action) {
1753                 case CPU_ONLINE:
1754                 case CPU_ONLINE_FROZEN:
1755                         cpufreq_add_dev(sys_dev);
1756                         break;
1757                 case CPU_DOWN_PREPARE:
1758                 case CPU_DOWN_PREPARE_FROZEN:
1759                         if (unlikely(lock_policy_rwsem_write(cpu)))
1760                                 BUG();
1761
1762                         __cpufreq_remove_dev(sys_dev);
1763                         break;
1764                 case CPU_DOWN_FAILED:
1765                 case CPU_DOWN_FAILED_FROZEN:
1766                         cpufreq_add_dev(sys_dev);
1767                         break;
1768                 }
1769         }
1770         return NOTIFY_OK;
1771 }
1772
1773 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1774     .notifier_call = cpufreq_cpu_callback,
1775 };
1776
1777 /*********************************************************************
1778  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1779  *********************************************************************/
1780
1781 /**
1782  * cpufreq_register_driver - register a CPU Frequency driver
1783  * @driver_data: A struct cpufreq_driver containing the values#
1784  * submitted by the CPU Frequency driver.
1785  *
1786  *   Registers a CPU Frequency driver to this core code. This code
1787  * returns zero on success, -EBUSY when another driver got here first
1788  * (and isn't unregistered in the meantime).
1789  *
1790  */
1791 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1792 {
1793         unsigned long flags;
1794         int ret;
1795
1796         if (!driver_data || !driver_data->verify || !driver_data->init ||
1797             ((!driver_data->setpolicy) && (!driver_data->target)))
1798                 return -EINVAL;
1799
1800         pr_debug("trying to register driver %s\n", driver_data->name);
1801
1802         if (driver_data->setpolicy)
1803                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1804
1805         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1806         if (cpufreq_driver) {
1807                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1808                 return -EBUSY;
1809         }
1810         cpufreq_driver = driver_data;
1811         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1812
1813         ret = sysdev_driver_register(&cpu_sysdev_class,
1814                                         &cpufreq_sysdev_driver);
1815         if (ret)
1816                 goto err_null_driver;
1817
1818         if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1819                 int i;
1820                 ret = -ENODEV;
1821
1822                 /* check for at least one working CPU */
1823                 for (i = 0; i < nr_cpu_ids; i++)
1824                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1825                                 ret = 0;
1826                                 break;
1827                         }
1828
1829                 /* if all ->init() calls failed, unregister */
1830                 if (ret) {
1831                         pr_debug("no CPU initialized for driver %s\n",
1832                                                         driver_data->name);
1833                         goto err_sysdev_unreg;
1834                 }
1835         }
1836
1837         register_hotcpu_notifier(&cpufreq_cpu_notifier);
1838         pr_debug("driver %s up and running\n", driver_data->name);
1839
1840         return 0;
1841 err_sysdev_unreg:
1842         sysdev_driver_unregister(&cpu_sysdev_class,
1843                         &cpufreq_sysdev_driver);
1844 err_null_driver:
1845         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1846         cpufreq_driver = NULL;
1847         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1848         return ret;
1849 }
1850 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1851
1852
1853 /**
1854  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1855  *
1856  *    Unregister the current CPUFreq driver. Only call this if you have
1857  * the right to do so, i.e. if you have succeeded in initialising before!
1858  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1859  * currently not initialised.
1860  */
1861 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1862 {
1863         unsigned long flags;
1864
1865         if (!cpufreq_driver || (driver != cpufreq_driver))
1866                 return -EINVAL;
1867
1868         pr_debug("unregistering driver %s\n", driver->name);
1869
1870         sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1871         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1872
1873         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1874         cpufreq_driver = NULL;
1875         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1876
1877         return 0;
1878 }
1879 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1880
1881 static int __init cpufreq_core_init(void)
1882 {
1883         int cpu;
1884
1885         for_each_possible_cpu(cpu) {
1886                 per_cpu(cpufreq_policy_cpu, cpu) = -1;
1887                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1888         }
1889
1890         cpufreq_global_kobject = kobject_create_and_add("cpufreq",
1891                                                 &cpu_sysdev_class.kset.kobj);
1892         BUG_ON(!cpufreq_global_kobject);
1893         register_syscore_ops(&cpufreq_syscore_ops);
1894
1895         return 0;
1896 }
1897 core_initcall(cpufreq_core_init);