Merge branch 'linus' into sched/devel
[pandora-kernel.git] / kernel / sched.c
index 21f7da9..29e2ec0 100644 (file)
@@ -600,7 +600,6 @@ struct rq {
        /* BKL stats */
        unsigned int bkl_count;
 #endif
-       struct lock_class_key rq_lock_key;
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -809,9 +808,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
 /*
  * ratelimit for updating the group shares.
- * default: 0.5ms
+ * default: 0.25ms
  */
-const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;
+unsigned int sysctl_sched_shares_ratelimit = 250000;
 
 /*
  * period over which we measure -rt task cpu usage in us.
@@ -834,7 +833,7 @@ static inline u64 global_rt_period(void)
 
 static inline u64 global_rt_runtime(void)
 {
-       if (sysctl_sched_rt_period < 0)
+       if (sysctl_sched_rt_runtime < 0)
                return RUNTIME_INF;
 
        return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
@@ -1922,11 +1921,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                running = task_running(rq, p);
                on_rq = p->se.on_rq;
                ncsw = 0;
-               if (!match_state || p->state == match_state) {
-                       ncsw = p->nivcsw + p->nvcsw;
-                       if (unlikely(!ncsw))
-                               ncsw = 1;
-               }
+               if (!match_state || p->state == match_state)
+                       ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                task_rq_unlock(rq, &flags);
 
                /*
@@ -2759,10 +2755,10 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
        } else {
                if (rq1 < rq2) {
                        spin_lock(&rq1->lock);
-                       spin_lock(&rq2->lock);
+                       spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
                } else {
                        spin_lock(&rq2->lock);
-                       spin_lock(&rq1->lock);
+                       spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
                }
        }
        update_rq_clock(rq1);
@@ -2805,14 +2801,21 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
                if (busiest < this_rq) {
                        spin_unlock(&this_rq->lock);
                        spin_lock(&busiest->lock);
-                       spin_lock(&this_rq->lock);
+                       spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
                        ret = 1;
                } else
-                       spin_lock(&busiest->lock);
+                       spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
        }
        return ret;
 }
 
+static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+       __releases(busiest->lock)
+{
+       spin_unlock(&busiest->lock);
+       lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
+
 /*
  * If dest_cpu is allowed for this process, migrate the task to it.
  * This is accomplished by forcing the cpu_allowed mask to only
@@ -3637,7 +3640,7 @@ redo:
                ld_moved = move_tasks(this_rq, this_cpu, busiest,
                                        imbalance, sd, CPU_NEWLY_IDLE,
                                        &all_pinned);
-               spin_unlock(&busiest->lock);
+               double_unlock_balance(this_rq, busiest);
 
                if (unlikely(all_pinned)) {
                        cpu_clear(cpu_of(busiest), *cpus);
@@ -3752,7 +3755,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
                else
                        schedstat_inc(sd, alb_failed);
        }
-       spin_unlock(&target_rq->lock);
+       double_unlock_balance(busiest_rq, target_rq);
 }
 
 #ifdef CONFIG_NO_HZ
@@ -4593,10 +4596,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
                wait.flags |= WQ_FLAG_EXCLUSIVE;
                __add_wait_queue_tail(&x->wait, &wait);
                do {
-                       if ((state == TASK_INTERRUPTIBLE &&
-                            signal_pending(current)) ||
-                           (state == TASK_KILLABLE &&
-                            fatal_signal_pending(current))) {
+                       if (signal_pending_state(state, current)) {
                                timeout = -ERESTARTSYS;
                                break;
                        }
@@ -4663,6 +4663,52 @@ int __sched wait_for_completion_killable(struct completion *x)
 }
 EXPORT_SYMBOL(wait_for_completion_killable);
 
+/**
+ *     try_wait_for_completion - try to decrement a completion without blocking
+ *     @x:     completion structure
+ *
+ *     Returns: 0 if a decrement cannot be done without blocking
+ *              1 if a decrement succeeded.
+ *
+ *     If a completion is being used as a counting completion,
+ *     attempt to decrement the counter without blocking. This
+ *     enables us to avoid waiting if the resource the completion
+ *     is protecting is not available.
+ */
+bool try_wait_for_completion(struct completion *x)
+{
+       int ret = 1;
+
+       spin_lock_irq(&x->wait.lock);
+       if (!x->done)
+               ret = 0;
+       else
+               x->done--;
+       spin_unlock_irq(&x->wait.lock);
+       return ret;
+}
+EXPORT_SYMBOL(try_wait_for_completion);
+
+/**
+ *     completion_done - Test to see if a completion has any waiters
+ *     @x:     completion structure
+ *
+ *     Returns: 0 if there are waiters (wait_for_completion() in progress)
+ *              1 if there are no waiters.
+ *
+ */
+bool completion_done(struct completion *x)
+{
+       int ret = 1;
+
+       spin_lock_irq(&x->wait.lock);
+       if (!x->done)
+               ret = 0;
+       spin_unlock_irq(&x->wait.lock);
+       return ret;
+}
+EXPORT_SYMBOL(completion_done);
+
 static long __sched
 sleep_on_common(wait_queue_head_t *q, int state, long timeout)
 {
@@ -5004,19 +5050,21 @@ recheck:
                        return -EPERM;
        }
 
+       if (user) {
 #ifdef CONFIG_RT_GROUP_SCHED
-       /*
-        * Do not allow realtime tasks into groups that have no runtime
-        * assigned.
-        */
-       if (user
-           && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
-               return -EPERM;
+               /*
+                * Do not allow realtime tasks into groups that have no runtime
+                * assigned.
+                */
+               if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+                       return -EPERM;
 #endif
 
-       retval = security_task_setscheduler(p, policy, param);
-       if (retval)
-               return retval;
+               retval = security_task_setscheduler(p, policy, param);
+               if (retval)
+                       return retval;
+       }
+
        /*
         * make sure no PI-waiters arrive (or leave) while we are
         * changing the priority of the task:
@@ -5732,6 +5780,8 @@ static inline void sched_init_granularity(void)
                sysctl_sched_latency = limit;
 
        sysctl_sched_wakeup_granularity *= factor;
+
+       sysctl_sched_shares_ratelimit *= factor;
 }
 
 #ifdef CONFIG_SMP
@@ -7998,7 +8048,6 @@ void __init sched_init(void)
 
                rq = cpu_rq(i);
                spin_lock_init(&rq->lock);
-               lockdep_set_class(&rq->lock, &rq->rq_lock_key);
                rq->nr_running = 0;
                init_cfs_rq(&rq->cfs, rq);
                init_rt_rq(&rq->rt, rq);
@@ -8455,8 +8504,8 @@ struct task_group *sched_create_group(struct task_group *parent)
        WARN_ON(!parent); /* root should already exist */
 
        tg->parent = parent;
-       list_add_rcu(&tg->siblings, &parent->children);
        INIT_LIST_HEAD(&tg->children);
+       list_add_rcu(&tg->siblings, &parent->children);
        spin_unlock_irqrestore(&task_group_lock, flags);
 
        return tg;