Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetoot...

[pandora-kernel.git] / drivers / cpufreq / cpufreq_governor.c
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c

index e1c6433..1b44496 100644 (file)
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -36,14 +36,29 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
         struct od_dbs_tuners *od_tuners = dbs_data->tuners;
         struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
         struct cpufreq_policy *policy;
+       unsigned int sampling_rate;
         unsigned int max_load = 0;
         unsigned int ignore_nice;
         unsigned int j;
  
-       if (dbs_data->cdata->governor == GOV_ONDEMAND)
+       if (dbs_data->cdata->governor == GOV_ONDEMAND) {
+               struct od_cpu_dbs_info_s *od_dbs_info =
+                               dbs_data->cdata->get_cpu_dbs_info_s(cpu);
+
+               /*
+                * Sometimes, the ondemand governor uses an additional
+                * multiplier to give long delays. So apply this multiplier to
+                * the 'sampling_rate', so as to keep the wake-up-from-idle
+                * detection logic a bit conservative.
+                */
+               sampling_rate = od_tuners->sampling_rate;
+               sampling_rate *= od_dbs_info->rate_mult;
+
                 ignore_nice = od_tuners->ignore_nice_load;
-       else
+       } else {
+               sampling_rate = cs_tuners->sampling_rate;
                 ignore_nice = cs_tuners->ignore_nice_load;
+       }
  
         policy = cdbs->cur_policy;
  
@@ -96,7 +111,46 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
                 if (unlikely(!wall_time || wall_time < idle_time))
                         continue;
  
-               load = 100 * (wall_time - idle_time) / wall_time;
+               /*
+                * If the CPU had gone completely idle, and a task just woke up
+                * on this CPU now, it would be unfair to calculate 'load' the
+                * usual way for this elapsed time-window, because it will show
+                * near-zero load, irrespective of how CPU intensive that task
+                * actually is. This is undesirable for latency-sensitive bursty
+                * workloads.
+                *
+                * To avoid this, we reuse the 'load' from the previous
+                * time-window and give this task a chance to start with a
+                * reasonably high CPU frequency. (However, we shouldn't over-do
+                * this copy, lest we get stuck at a high load (high frequency)
+                * for too long, even when the current system load has actually
+                * dropped down. So we perform the copy only once, upon the
+                * first wake-up from idle.)
+                *
+                * Detecting this situation is easy: the governor's deferrable
+                * timer would not have fired during CPU-idle periods. Hence
+                * an unusually large 'wall_time' (as compared to the sampling
+                * rate) indicates this scenario.
+                *
+                * prev_load can be zero in two cases and we must recalculate it
+                * for both cases:
+                * - during long idle intervals
+                * - explicitly set to zero
+                */
+               if (unlikely(wall_time > (2 * sampling_rate) &&
+                            j_cdbs->prev_load)) {
+                       load = j_cdbs->prev_load;
+
+                       /*
+                        * Perform a destructive copy, to ensure that we copy
+                        * the previous load only once, upon the first wake-up
+                        * from idle.
+                        */
+                       j_cdbs->prev_load = 0;
+               } else {
+                       load = 100 * (wall_time - idle_time) / wall_time;
+                       j_cdbs->prev_load = load;
+               }
  
                 if (load > max_load)
                         max_load = load;
@@ -318,11 +372,18 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
                 for_each_cpu(j, policy->cpus) {
                         struct cpu_dbs_common_info *j_cdbs =
                                 dbs_data->cdata->get_cpu_cdbs(j);
+                       unsigned int prev_load;
  
                         j_cdbs->cpu = j;
                         j_cdbs->cur_policy = policy;
                         j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
                                                &j_cdbs->prev_cpu_wall, io_busy);
+
+                       prev_load = (unsigned int)
+                               (j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle);
+                       j_cdbs->prev_load = 100 * prev_load /
+                                       (unsigned int) j_cdbs->prev_cpu_wall;
+
                         if (ignore_nice)
                                 j_cdbs->prev_cpu_nice =
                                         kcpustat_cpu(j).cpustat[CPUTIME_NICE];