alpha: move include/asm-alpha to arch/alpha/include/asm

[pandora-kernel.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 0047bd9..95e6ad3 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -600,7 +600,6 @@ struct rq {
         /* BKL stats */
         unsigned int bkl_count;
  #endif
-       struct lock_class_key rq_lock_key;
  };
  
  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -834,7 +833,7 @@ static inline u64 global_rt_period(void)
  
  static inline u64 global_rt_runtime(void)
  {
-       if (sysctl_sched_rt_period < 0)
+       if (sysctl_sched_rt_runtime < 0)
                 return RUNTIME_INF;
  
         return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
@@ -1867,16 +1866,24 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
  /*
   * wait_task_inactive - wait for a thread to unschedule.
   *
+ * If @match_state is nonzero, it's the @p->state value just checked and
+ * not expected to change.  If it changes, i.e. @p might have woken up,
+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
+ * we return a positive number (its total switch count).  If a second call
+ * a short while later returns the same number, the caller can be sure that
+ * @p has remained unscheduled the whole time.
+ *
   * The caller must ensure that the task *will* unschedule sometime soon,
   * else this function might spin for a *long* time. This function can't
   * be called with interrupts off, or it may introduce deadlock with
   * smp_call_function() if an IPI is sent by the same process we are
   * waiting to become inactive.
   */
-void wait_task_inactive(struct task_struct *p)
+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  {
         unsigned long flags;
         int running, on_rq;
+       unsigned long ncsw;
         struct rq *rq;
  
         for (;;) {
@@ -1899,8 +1906,11 @@ void wait_task_inactive(struct task_struct *p)
                  * return false if the runqueue has changed and p
                  * is actually now running somewhere else!
                  */
-               while (task_running(rq, p))
+               while (task_running(rq, p)) {
+                       if (match_state && unlikely(p->state != match_state))
+                               return 0;
                         cpu_relax();
+               }
  
                 /*
                  * Ok, time to look more closely! We need the rq
@@ -1910,8 +1920,20 @@ void wait_task_inactive(struct task_struct *p)
                 rq = task_rq_lock(p, &flags);
                 running = task_running(rq, p);
                 on_rq = p->se.on_rq;
+               ncsw = 0;
+               if (!match_state || p->state == match_state) {
+                       ncsw = p->nivcsw + p->nvcsw;
+                       if (unlikely(!ncsw))
+                               ncsw = 1;
+               }
                 task_rq_unlock(rq, &flags);
  
+               /*
+                * If it changed from the expected state, bail out now.
+                */
+               if (unlikely(!ncsw))
+                       break;
+
                 /*
                  * Was it really running after all now that we
                  * checked with the proper locks actually held?
@@ -1944,6 +1966,8 @@ void wait_task_inactive(struct task_struct *p)
                  */
                 break;
         }
+
+       return ncsw;
  }
  
  /***
@@ -2734,10 +2758,10 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
         } else {
                 if (rq1 < rq2) {
                         spin_lock(&rq1->lock);
-                       spin_lock(&rq2->lock);
+                       spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
                 } else {
                         spin_lock(&rq2->lock);
-                       spin_lock(&rq1->lock);
+                       spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
                 }
         }
         update_rq_clock(rq1);
@@ -2780,14 +2804,21 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
                 if (busiest < this_rq) {
                         spin_unlock(&this_rq->lock);
                         spin_lock(&busiest->lock);
-                       spin_lock(&this_rq->lock);
+                       spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
                         ret = 1;
                 } else
-                       spin_lock(&busiest->lock);
+                       spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
         }
         return ret;
  }
  
+static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+       __releases(busiest->lock)
+{
+       spin_unlock(&busiest->lock);
+       lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
+
  /*
   * If dest_cpu is allowed for this process, migrate the task to it.
   * This is accomplished by forcing the cpu_allowed mask to only
@@ -3612,7 +3643,7 @@ redo:
                 ld_moved = move_tasks(this_rq, this_cpu, busiest,
                                         imbalance, sd, CPU_NEWLY_IDLE,
                                         &all_pinned);
-               spin_unlock(&busiest->lock);
+               double_unlock_balance(this_rq, busiest);
  
                 if (unlikely(all_pinned)) {
                         cpu_clear(cpu_of(busiest), *cpus);
@@ -3727,7 +3758,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
                 else
                         schedstat_inc(sd, alb_failed);
         }
-       spin_unlock(&target_rq->lock);
+       double_unlock_balance(busiest_rq, target_rq);
  }
  
  #ifdef CONFIG_NO_HZ
@@ -4638,6 +4669,52 @@ int __sched wait_for_completion_killable(struct completion *x)
  }
  EXPORT_SYMBOL(wait_for_completion_killable);
  
+/**
+ *     try_wait_for_completion - try to decrement a completion without blocking
+ *     @x:     completion structure
+ *
+ *     Returns: 0 if a decrement cannot be done without blocking
+ *              1 if a decrement succeeded.
+ *
+ *     If a completion is being used as a counting completion,
+ *     attempt to decrement the counter without blocking. This
+ *     enables us to avoid waiting if the resource the completion
+ *     is protecting is not available.
+ */
+bool try_wait_for_completion(struct completion *x)
+{
+       int ret = 1;
+
+       spin_lock_irq(&x->wait.lock);
+       if (!x->done)
+               ret = 0;
+       else
+               x->done--;
+       spin_unlock_irq(&x->wait.lock);
+       return ret;
+}
+EXPORT_SYMBOL(try_wait_for_completion);
+
+/**
+ *     completion_done - Test to see if a completion has any waiters
+ *     @x:     completion structure
+ *
+ *     Returns: 0 if there are waiters (wait_for_completion() in progress)
+ *              1 if there are no waiters.
+ *
+ */
+bool completion_done(struct completion *x)
+{
+       int ret = 1;
+
+       spin_lock_irq(&x->wait.lock);
+       if (!x->done)
+               ret = 0;
+       spin_unlock_irq(&x->wait.lock);
+       return ret;
+}
+EXPORT_SYMBOL(completion_done);
+
  static long __sched
  sleep_on_common(wait_queue_head_t *q, int state, long timeout)
  {
@@ -4979,19 +5056,21 @@ recheck:
                         return -EPERM;
         }
  
+       if (user) {
  #ifdef CONFIG_RT_GROUP_SCHED
-       /*
-        * Do not allow realtime tasks into groups that have no runtime
-        * assigned.
-        */
-       if (user
-           && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
-               return -EPERM;
+               /*
+                * Do not allow realtime tasks into groups that have no runtime
+                * assigned.
+                */
+               if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+                       return -EPERM;
  #endif
  
-       retval = security_task_setscheduler(p, policy, param);
-       if (retval)
-               return retval;
+               retval = security_task_setscheduler(p, policy, param);
+               if (retval)
+                       return retval;
+       }
+
         /*
          * make sure no PI-waiters arrive (or leave) while we are
          * changing the priority of the task:
@@ -6389,7 +6468,7 @@ static struct notifier_block __cpuinitdata migration_notifier = {
         .priority = 10
  };
  
-void __init migration_init(void)
+static int __init migration_init(void)
  {
         void *cpu = (void *)(long)smp_processor_id();
         int err;
@@ -6399,7 +6478,10 @@ void __init migration_init(void)
         BUG_ON(err == NOTIFY_BAD);
         migration_call(&migration_notifier, CPU_ONLINE, cpu);
         register_cpu_notifier(&migration_notifier);
+
+       return err;
  }
+early_initcall(migration_init);
  #endif
  
  #ifdef CONFIG_SMP
@@ -7643,34 +7725,34 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
  }
  
  #ifdef CONFIG_SCHED_MC
-static ssize_t sched_mc_power_savings_show(struct sys_device *dev,
-                               struct sysdev_attribute *attr, char *page)
+static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
+                                          char *page)
  {
         return sprintf(page, "%u\n", sched_mc_power_savings);
  }
-static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
-                                           struct sysdev_attribute *attr,
+static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
                                             const char *buf, size_t count)
  {
         return sched_power_savings_store(buf, count, 0);
  }
-static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
-                  sched_mc_power_savings_store);
+static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
+                        sched_mc_power_savings_show,
+                        sched_mc_power_savings_store);
  #endif
  
  #ifdef CONFIG_SCHED_SMT
-static ssize_t sched_smt_power_savings_show(struct sys_device *dev,
-                               struct sysdev_attribute *attr, char *page)
+static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
+                                           char *page)
  {
         return sprintf(page, "%u\n", sched_smt_power_savings);
  }
-static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
-                                            struct sysdev_attribute *attr,
+static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
                                              const char *buf, size_t count)
  {
         return sched_power_savings_store(buf, count, 1);
  }
-static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
+static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644,
+                  sched_smt_power_savings_show,
                    sched_smt_power_savings_store);
  #endif
  
@@ -7970,7 +8052,6 @@ void __init sched_init(void)
  
                 rq = cpu_rq(i);
                 spin_lock_init(&rq->lock);
-               lockdep_set_class(&rq->lock, &rq->rq_lock_key);
                 rq->nr_running = 0;
                 init_cfs_rq(&rq->cfs, rq);
                 init_rt_rq(&rq->rt, rq);