Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

[pandora-kernel.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 5757e03..6cc1fd5 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
  
         spin_lock(&rt_b->rt_runtime_lock);
         for (;;) {
+               unsigned long delta;
+               ktime_t soft, hard;
+
                 if (hrtimer_active(&rt_b->rt_period_timer))
                         break;
  
                 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
                 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
-               hrtimer_start_expires(&rt_b->rt_period_timer,
-                               HRTIMER_MODE_ABS);
+
+               soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
+               hard = hrtimer_get_expires(&rt_b->rt_period_timer);
+               delta = ktime_to_ns(ktime_sub(hard, soft));
+               __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
+                               HRTIMER_MODE_ABS, 0);
         }
         spin_unlock(&rt_b->rt_runtime_lock);
  }
@@ -1110,7 +1117,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
         if (rq == this_rq()) {
                 hrtimer_restart(timer);
         } else if (!rq->hrtick_csd_pending) {
-               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
                 rq->hrtick_csd_pending = 1;
         }
  }
@@ -1146,7 +1153,8 @@ static __init void init_hrtick(void)
   */
  static void hrtick_start(struct rq *rq, u64 delay)
  {
-       hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
+       __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
+                       HRTIMER_MODE_REL, 0);
  }
  
  static inline void init_hrtick(void)
@@ -3818,19 +3826,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
   */
  #define MAX_PINNED_INTERVAL    512
  
+/* Working cpumask for load_balance and load_balance_newidle. */
+static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+
  /*
   * Check this_cpu to ensure it is balanced within domain. Attempt to move
   * tasks if there is an imbalance.
   */
  static int load_balance(int this_cpu, struct rq *this_rq,
                         struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *balance, struct cpumask *cpus)
+                       int *balance)
  {
         int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
         struct sched_group *group;
         unsigned long imbalance;
         struct rq *busiest;
         unsigned long flags;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
         cpumask_setall(cpus);
  
@@ -3985,8 +3997,7 @@ out:
   * this_rq is locked.
   */
  static int
-load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-                       struct cpumask *cpus)
+load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
  {
         struct sched_group *group;
         struct rq *busiest = NULL;
@@ -3994,6 +4005,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
         int ld_moved = 0;
         int sd_idle = 0;
         int all_pinned = 0;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
         cpumask_setall(cpus);
  
@@ -4134,10 +4146,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
         struct sched_domain *sd;
         int pulled_task = 0;
         unsigned long next_balance = jiffies + HZ;
-       cpumask_var_t tmpmask;
-
-       if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
-               return;
  
         for_each_domain(this_cpu, sd) {
                 unsigned long interval;
@@ -4148,7 +4156,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                 if (sd->flags & SD_BALANCE_NEWIDLE)
                         /* If we've pulled tasks over stop searching: */
                         pulled_task = load_balance_newidle(this_cpu, this_rq,
-                                                          sd, tmpmask);
+                                                          sd);
  
                 interval = msecs_to_jiffies(sd->balance_interval);
                 if (time_after(next_balance, sd->last_balance + interval))
@@ -4163,7 +4171,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
                  */
                 this_rq->next_balance = next_balance;
         }
-       free_cpumask_var(tmpmask);
  }
  
  /*
@@ -4313,11 +4320,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
         unsigned long next_balance = jiffies + 60*HZ;
         int update_next_balance = 0;
         int need_serialize;
-       cpumask_var_t tmp;
-
-       /* Fails alloc?  Rebalancing probably not a priority right now. */
-       if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
-               return;
  
         for_each_domain(cpu, sd) {
                 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -4342,7 +4344,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
                 }
  
                 if (time_after_eq(jiffies, sd->last_balance + interval)) {
-                       if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+                       if (load_balance(cpu, rq, sd, idle, &balance)) {
                                 /*
                                  * We've pulled tasks over so either we're no
                                  * longer idle, or one of our SMT siblings is
@@ -4376,8 +4378,6 @@ out:
          */
         if (likely(update_next_balance))
                 rq->next_balance = next_balance;
-
-       free_cpumask_var(tmp);
  }
  
  /*
@@ -4781,10 +4781,7 @@ void scheduler_tick(void)
  #endif
  }
  
-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
-                               defined(CONFIG_PREEMPT_TRACER))
-
-static inline unsigned long get_parent_ip(unsigned long addr)
+unsigned long get_parent_ip(unsigned long addr)
  {
         if (in_lock_functions(addr)) {
                 addr = CALLER_ADDR2;
@@ -4794,6 +4791,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
         return addr;
  }
  
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+                               defined(CONFIG_PREEMPT_TRACER))
+
  void __kprobes add_preempt_count(int val)
  {
  #ifdef CONFIG_DEBUG_PREEMPT
@@ -4942,15 +4942,13 @@ pick_next_task(struct rq *rq)
  /*
   * schedule() is the main scheduler function.
   */
-asmlinkage void __sched schedule(void)
+asmlinkage void __sched __schedule(void)
  {
         struct task_struct *prev, *next;
         unsigned long *switch_count;
         struct rq *rq;
         int cpu;
  
-need_resched:
-       preempt_disable();
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
         rcu_qsctr_inc(cpu);
@@ -5007,13 +5005,80 @@ need_resched_nonpreemptible:
  
         if (unlikely(reacquire_kernel_lock(current) < 0))
                 goto need_resched_nonpreemptible;
+}
  
+asmlinkage void __sched schedule(void)
+{
+need_resched:
+       preempt_disable();
+       __schedule();
         preempt_enable_no_resched();
         if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
                 goto need_resched;
  }
  EXPORT_SYMBOL(schedule);
  
+#ifdef CONFIG_SMP
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+{
+       unsigned int cpu;
+       struct rq *rq;
+
+       if (!sched_feat(OWNER_SPIN))
+               return 0;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       /*
+        * Need to access the cpu field knowing that
+        * DEBUG_PAGEALLOC could have unmapped it if
+        * the mutex owner just released it and exited.
+        */
+       if (probe_kernel_address(&owner->cpu, cpu))
+               goto out;
+#else
+       cpu = owner->cpu;
+#endif
+
+       /*
+        * Even if the access succeeded (likely case),
+        * the cpu field may no longer be valid.
+        */
+       if (cpu >= nr_cpumask_bits)
+               goto out;
+
+       /*
+        * We need to validate that we can do a
+        * get_cpu() and that we have the percpu area.
+        */
+       if (!cpu_online(cpu))
+               goto out;
+
+       rq = cpu_rq(cpu);
+
+       for (;;) {
+               /*
+                * Owner changed, break to re-assess state.
+                */
+               if (lock->owner != owner)
+                       break;
+
+               /*
+                * Is that owner really running on that cpu?
+                */
+               if (task_thread_info(rq->curr) != owner || need_resched())
+                       return 0;
+
+               cpu_relax();
+       }
+out:
+       return 1;
+}
+#endif
+
  #ifdef CONFIG_PREEMPT
  /*
   * this is the entry point to schedule() from in-kernel preemption
@@ -5131,11 +5196,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
         __wake_up_common(q, mode, 1, 0, NULL);
  }
  
+void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+{
+       __wake_up_common(q, mode, 1, 0, key);
+}
+
  /**
- * __wake_up_sync - wake up threads blocked on a waitqueue.
+ * __wake_up_sync_key - wake up threads blocked on a waitqueue.
   * @q: the waitqueue
   * @mode: which threads
   * @nr_exclusive: how many wake-one or wake-many threads to wake up
+ * @key: opaque value to be passed to wakeup targets
   *
   * The sync wakeup differs that the waker knows that it will schedule
   * away soon, so while the target thread will be woken up, it will not
@@ -5144,8 +5215,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
   *
   * On UP it can prevent extra preemption.
   */
-void
-__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+                       int nr_exclusive, void *key)
  {
         unsigned long flags;
         int sync = 1;
@@ -5157,9 +5228,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
                 sync = 0;
  
         spin_lock_irqsave(&q->lock, flags);
-       __wake_up_common(q, mode, nr_exclusive, sync, NULL);
+       __wake_up_common(q, mode, nr_exclusive, sync, key);
         spin_unlock_irqrestore(&q->lock, flags);
  }
+EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+
+/*
+ * __wake_up_sync - see __wake_up_sync_key()
+ */
+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+{
+       __wake_up_sync_key(q, mode, nr_exclusive, NULL);
+}
  EXPORT_SYMBOL_GPL(__wake_up_sync);     /* For internal use only */
  
  /**
@@ -7648,7 +7728,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
  {
         int group;
  
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
         if (sg)
                 *sg = &per_cpu(sched_group_core, group).sg;
@@ -7677,7 +7757,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
         cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
         group = cpumask_first(mask);
  #elif defined(CONFIG_SCHED_SMT)
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
         group = cpumask_first(mask);
  #else
         group = cpu;
@@ -8020,7 +8100,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                 SD_INIT(sd, SIBLING);
                 set_domain_attribute(sd, attr);
                 cpumask_and(sched_domain_span(sd),
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                 sd->parent = p;
                 p->child = sd;
                 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -8031,7 +8111,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
         /* Set up CPU (sibling) groups */
         for_each_cpu(i, cpu_map) {
                 cpumask_and(this_sibling_map,
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                 if (i != cpumask_first(this_sibling_map))
                         continue;
  
@@ -8706,6 +8786,9 @@ void __init sched_init(void)
  #endif
  #ifdef CONFIG_USER_SCHED
         alloc_size *= 2;
+#endif
+#ifdef CONFIG_CPUMASK_OFFSTACK
+       alloc_size += num_possible_cpus() * cpumask_size();
  #endif
         /*
          * As sched_init() is called before page_alloc is setup,
@@ -8744,6 +8827,12 @@ void __init sched_init(void)
                 ptr += nr_cpu_ids * sizeof(void **);
  #endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+               for_each_possible_cpu(i) {
+                       per_cpu(load_balance_tmpmask, i) = (void *)ptr;
+                       ptr += cpumask_size();
+               }
+#endif /* CONFIG_CPUMASK_OFFSTACK */
         }
  
  #ifdef CONFIG_SMP