Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[pandora-kernel.git] / drivers / cpufreq / cpufreq_ondemand.c
1 /*
2  *  drivers/cpufreq/cpufreq_ondemand.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/cpufreq.h>
17 #include <linux/cpu.h>
18 #include <linux/jiffies.h>
19 #include <linux/kernel_stat.h>
20 #include <linux/mutex.h>
21
22 /*
23  * dbs is used in this file as a shortform for demandbased switching
24  * It helps to keep variable names smaller, simpler
25  */
26
27 #define DEF_FREQUENCY_UP_THRESHOLD              (80)
28 #define MIN_FREQUENCY_UP_THRESHOLD              (11)
29 #define MAX_FREQUENCY_UP_THRESHOLD              (100)
30
31 /*
32  * The polling frequency of this governor depends on the capability of
33  * the processor. Default polling frequency is 1000 times the transition
34  * latency of the processor. The governor will work on any processor with
35  * transition latency <= 10mS, using appropriate sampling
36  * rate.
37  * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
38  * this governor will not work.
39  * All times here are in uS.
40  */
41 static unsigned int def_sampling_rate;
42 #define MIN_SAMPLING_RATE_RATIO                 (2)
43 /* for correct statistics, we need at least 10 ticks between each measure */
44 #define MIN_STAT_SAMPLING_RATE                  (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
45 #define MIN_SAMPLING_RATE                       (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
46 #define MAX_SAMPLING_RATE                       (500 * def_sampling_rate)
47 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
48 #define TRANSITION_LATENCY_LIMIT                (10 * 1000)
49
50 static void do_dbs_timer(void *data);
51
52 struct cpu_dbs_info_s {
53         cputime64_t prev_cpu_idle;
54         cputime64_t prev_cpu_wall;
55         struct cpufreq_policy *cur_policy;
56         struct work_struct work;
57         unsigned int enable;
58 };
59 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
60
61 static unsigned int dbs_enable; /* number of CPUs using this policy */
62
63 /*
64  * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug
65  * lock and dbs_mutex. cpu_hotplug lock should always be held before
66  * dbs_mutex. If any function that can potentially take cpu_hotplug lock
67  * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
68  * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
69  * is recursive for the same process. -Venki
70  */
71 static DEFINE_MUTEX(dbs_mutex);
72
73 static struct workqueue_struct  *kondemand_wq;
74
75 struct dbs_tuners {
76         unsigned int sampling_rate;
77         unsigned int up_threshold;
78         unsigned int ignore_nice;
79 };
80
81 static struct dbs_tuners dbs_tuners_ins = {
82         .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
83         .ignore_nice = 0,
84 };
85
86 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
87 {
88         cputime64_t retval;
89
90         retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
91                         kstat_cpu(cpu).cpustat.iowait);
92
93         if (dbs_tuners_ins.ignore_nice)
94                 retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
95
96         return retval;
97 }
98
99 /************************** sysfs interface ************************/
100 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
101 {
102         return sprintf (buf, "%u\n", MAX_SAMPLING_RATE);
103 }
104
105 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
106 {
107         return sprintf (buf, "%u\n", MIN_SAMPLING_RATE);
108 }
109
110 #define define_one_ro(_name)            \
111 static struct freq_attr _name =         \
112 __ATTR(_name, 0444, show_##_name, NULL)
113
114 define_one_ro(sampling_rate_max);
115 define_one_ro(sampling_rate_min);
116
117 /* cpufreq_ondemand Governor Tunables */
118 #define show_one(file_name, object)                                     \
119 static ssize_t show_##file_name                                         \
120 (struct cpufreq_policy *unused, char *buf)                              \
121 {                                                                       \
122         return sprintf(buf, "%u\n", dbs_tuners_ins.object);             \
123 }
124 show_one(sampling_rate, sampling_rate);
125 show_one(up_threshold, up_threshold);
126 show_one(ignore_nice_load, ignore_nice);
127
128 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
129                 const char *buf, size_t count)
130 {
131         unsigned int input;
132         int ret;
133         ret = sscanf(buf, "%u", &input);
134
135         mutex_lock(&dbs_mutex);
136         if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) {
137                 mutex_unlock(&dbs_mutex);
138                 return -EINVAL;
139         }
140
141         dbs_tuners_ins.sampling_rate = input;
142         mutex_unlock(&dbs_mutex);
143
144         return count;
145 }
146
147 static ssize_t store_up_threshold(struct cpufreq_policy *unused,
148                 const char *buf, size_t count)
149 {
150         unsigned int input;
151         int ret;
152         ret = sscanf(buf, "%u", &input);
153
154         mutex_lock(&dbs_mutex);
155         if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
156                         input < MIN_FREQUENCY_UP_THRESHOLD) {
157                 mutex_unlock(&dbs_mutex);
158                 return -EINVAL;
159         }
160
161         dbs_tuners_ins.up_threshold = input;
162         mutex_unlock(&dbs_mutex);
163
164         return count;
165 }
166
167 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
168                 const char *buf, size_t count)
169 {
170         unsigned int input;
171         int ret;
172
173         unsigned int j;
174
175         ret = sscanf(buf, "%u", &input);
176         if ( ret != 1 )
177                 return -EINVAL;
178
179         if ( input > 1 )
180                 input = 1;
181
182         mutex_lock(&dbs_mutex);
183         if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */
184                 mutex_unlock(&dbs_mutex);
185                 return count;
186         }
187         dbs_tuners_ins.ignore_nice = input;
188
189         /* we need to re-evaluate prev_cpu_idle */
190         for_each_online_cpu(j) {
191                 struct cpu_dbs_info_s *dbs_info;
192                 dbs_info = &per_cpu(cpu_dbs_info, j);
193                 dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
194                 dbs_info->prev_cpu_wall = get_jiffies_64();
195         }
196         mutex_unlock(&dbs_mutex);
197
198         return count;
199 }
200
201 #define define_one_rw(_name) \
202 static struct freq_attr _name = \
203 __ATTR(_name, 0644, show_##_name, store_##_name)
204
205 define_one_rw(sampling_rate);
206 define_one_rw(up_threshold);
207 define_one_rw(ignore_nice_load);
208
209 static struct attribute * dbs_attributes[] = {
210         &sampling_rate_max.attr,
211         &sampling_rate_min.attr,
212         &sampling_rate.attr,
213         &up_threshold.attr,
214         &ignore_nice_load.attr,
215         NULL
216 };
217
218 static struct attribute_group dbs_attr_group = {
219         .attrs = dbs_attributes,
220         .name = "ondemand",
221 };
222
223 /************************** sysfs end ************************/
224
225 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
226 {
227         unsigned int idle_ticks, total_ticks;
228         unsigned int load;
229         cputime64_t cur_jiffies;
230
231         struct cpufreq_policy *policy;
232         unsigned int j;
233
234         if (!this_dbs_info->enable)
235                 return;
236
237         policy = this_dbs_info->cur_policy;
238         cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
239         total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
240                         this_dbs_info->prev_cpu_wall);
241         this_dbs_info->prev_cpu_wall = cur_jiffies;
242         if (!total_ticks)
243                 return;
244         /*
245          * Every sampling_rate, we check, if current idle time is less
246          * than 20% (default), then we try to increase frequency
247          * Every sampling_rate, we look for a the lowest
248          * frequency which can sustain the load while keeping idle time over
249          * 30%. If such a frequency exist, we try to decrease to this frequency.
250          *
251          * Any frequency increase takes it to the maximum frequency.
252          * Frequency reduction happens at minimum steps of
253          * 5% (default) of current frequency
254          */
255
256         /* Get Idle Time */
257         idle_ticks = UINT_MAX;
258         for_each_cpu_mask(j, policy->cpus) {
259                 cputime64_t total_idle_ticks;
260                 unsigned int tmp_idle_ticks;
261                 struct cpu_dbs_info_s *j_dbs_info;
262
263                 j_dbs_info = &per_cpu(cpu_dbs_info, j);
264                 total_idle_ticks = get_cpu_idle_time(j);
265                 tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
266                                 j_dbs_info->prev_cpu_idle);
267                 j_dbs_info->prev_cpu_idle = total_idle_ticks;
268
269                 if (tmp_idle_ticks < idle_ticks)
270                         idle_ticks = tmp_idle_ticks;
271         }
272         load = (100 * (total_ticks - idle_ticks)) / total_ticks;
273
274         /* Check for frequency increase */
275         if (load > dbs_tuners_ins.up_threshold) {
276                 /* if we are already at full speed then break out early */
277                 if (policy->cur == policy->max)
278                         return;
279
280                 __cpufreq_driver_target(policy, policy->max,
281                         CPUFREQ_RELATION_H);
282                 return;
283         }
284
285         /* Check for frequency decrease */
286         /* if we cannot reduce the frequency anymore, break out early */
287         if (policy->cur == policy->min)
288                 return;
289
290         /*
291          * The optimal frequency is the frequency that is the lowest that
292          * can support the current CPU usage without triggering the up
293          * policy. To be safe, we focus 10 points under the threshold.
294          */
295         if (load < (dbs_tuners_ins.up_threshold - 10)) {
296                 unsigned int freq_next;
297                 freq_next = (policy->cur * load) /
298                         (dbs_tuners_ins.up_threshold - 10);
299
300                 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
301         }
302 }
303
304 static void do_dbs_timer(void *data)
305 {
306         unsigned int cpu = smp_processor_id();
307         struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
308
309         if (!dbs_info->enable)
310                 return;
311
312         lock_cpu_hotplug();
313         dbs_check_cpu(dbs_info);
314         unlock_cpu_hotplug();
315         queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
316                         usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
317 }
318
319 static inline void dbs_timer_init(unsigned int cpu)
320 {
321         struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
322
323         INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
324         queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
325                         usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
326         return;
327 }
328
329 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
330 {
331         dbs_info->enable = 0;
332         cancel_delayed_work(&dbs_info->work);
333         flush_workqueue(kondemand_wq);
334 }
335
336 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
337                                    unsigned int event)
338 {
339         unsigned int cpu = policy->cpu;
340         struct cpu_dbs_info_s *this_dbs_info;
341         unsigned int j;
342
343         this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
344
345         switch (event) {
346         case CPUFREQ_GOV_START:
347                 if ((!cpu_online(cpu)) || (!policy->cur))
348                         return -EINVAL;
349
350                 if (policy->cpuinfo.transition_latency >
351                                 (TRANSITION_LATENCY_LIMIT * 1000)) {
352                         printk(KERN_WARNING "ondemand governor failed to load "
353                                "due to too long transition latency\n");
354                         return -EINVAL;
355                 }
356                 if (this_dbs_info->enable) /* Already enabled */
357                         break;
358
359                 mutex_lock(&dbs_mutex);
360                 dbs_enable++;
361                 if (dbs_enable == 1) {
362                         kondemand_wq = create_workqueue("kondemand");
363                         if (!kondemand_wq) {
364                                 printk(KERN_ERR "Creation of kondemand failed\n");
365                                 dbs_enable--;
366                                 mutex_unlock(&dbs_mutex);
367                                 return -ENOSPC;
368                         }
369                 }
370                 for_each_cpu_mask(j, policy->cpus) {
371                         struct cpu_dbs_info_s *j_dbs_info;
372                         j_dbs_info = &per_cpu(cpu_dbs_info, j);
373                         j_dbs_info->cur_policy = policy;
374
375                         j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
376                         j_dbs_info->prev_cpu_wall = get_jiffies_64();
377                 }
378                 this_dbs_info->enable = 1;
379                 sysfs_create_group(&policy->kobj, &dbs_attr_group);
380                 /*
381                  * Start the timerschedule work, when this governor
382                  * is used for first time
383                  */
384                 if (dbs_enable == 1) {
385                         unsigned int latency;
386                         /* policy latency is in nS. Convert it to uS first */
387                         latency = policy->cpuinfo.transition_latency / 1000;
388                         if (latency == 0)
389                                 latency = 1;
390
391                         def_sampling_rate = latency *
392                                         DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
393
394                         if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
395                                 def_sampling_rate = MIN_STAT_SAMPLING_RATE;
396
397                         dbs_tuners_ins.sampling_rate = def_sampling_rate;
398                 }
399                 dbs_timer_init(policy->cpu);
400
401                 mutex_unlock(&dbs_mutex);
402                 break;
403
404         case CPUFREQ_GOV_STOP:
405                 mutex_lock(&dbs_mutex);
406                 dbs_timer_exit(this_dbs_info);
407                 sysfs_remove_group(&policy->kobj, &dbs_attr_group);
408                 dbs_enable--;
409                 if (dbs_enable == 0)
410                         destroy_workqueue(kondemand_wq);
411
412                 mutex_unlock(&dbs_mutex);
413
414                 break;
415
416         case CPUFREQ_GOV_LIMITS:
417                 mutex_lock(&dbs_mutex);
418                 if (policy->max < this_dbs_info->cur_policy->cur)
419                         __cpufreq_driver_target(this_dbs_info->cur_policy,
420                                                 policy->max,
421                                                 CPUFREQ_RELATION_H);
422                 else if (policy->min > this_dbs_info->cur_policy->cur)
423                         __cpufreq_driver_target(this_dbs_info->cur_policy,
424                                                 policy->min,
425                                                 CPUFREQ_RELATION_L);
426                 mutex_unlock(&dbs_mutex);
427                 break;
428         }
429         return 0;
430 }
431
432 static struct cpufreq_governor cpufreq_gov_dbs = {
433         .name = "ondemand",
434         .governor = cpufreq_governor_dbs,
435         .owner = THIS_MODULE,
436 };
437
438 static int __init cpufreq_gov_dbs_init(void)
439 {
440         return cpufreq_register_governor(&cpufreq_gov_dbs);
441 }
442
443 static void __exit cpufreq_gov_dbs_exit(void)
444 {
445         cpufreq_unregister_governor(&cpufreq_gov_dbs);
446 }
447
448
449 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
450 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
451 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
452                    "Low Latency Frequency Transition capable processors");
453 MODULE_LICENSE("GPL");
454
455 module_init(cpufreq_gov_dbs_init);
456 module_exit(cpufreq_gov_dbs_exit);