rtmutex: Detect changes in the pi lock chain
[pandora-kernel.git] / kernel / rtmutex.c
1 /*
2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
3  *
4  * started by Ingo Molnar and Thomas Gleixner.
5  *
6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9  *  Copyright (C) 2006 Esben Nielsen
10  *
11  *  See Documentation/rt-mutex-design.txt for details.
12  */
13 #include <linux/spinlock.h>
14 #include <linux/export.h>
15 #include <linux/sched.h>
16 #include <linux/timer.h>
17
18 #include "rtmutex_common.h"
19
20 /*
21  * lock->owner state tracking:
22  *
23  * lock->owner holds the task_struct pointer of the owner. Bit 0
24  * is used to keep track of the "lock has waiters" state.
25  *
26  * owner        bit0
27  * NULL         0       lock is free (fast acquire possible)
28  * NULL         1       lock is free and has waiters and the top waiter
29  *                              is going to take the lock*
30  * taskpointer  0       lock is held (fast release possible)
31  * taskpointer  1       lock is held and has waiters**
32  *
33  * The fast atomic compare exchange based acquire and release is only
34  * possible when bit 0 of lock->owner is 0.
35  *
36  * (*) It also can be a transitional state when grabbing the lock
37  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
38  * we need to set the bit0 before looking at the lock, and the owner may be
39  * NULL in this small time, hence this can be a transitional state.
40  *
41  * (**) There is a small time when bit 0 is set but there are no
42  * waiters. This can happen when grabbing the lock in the slow path.
43  * To prevent a cmpxchg of the owner releasing the lock, we need to
44  * set this bit before looking at the lock.
45  */
46
47 static void
48 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
49 {
50         unsigned long val = (unsigned long)owner;
51
52         if (rt_mutex_has_waiters(lock))
53                 val |= RT_MUTEX_HAS_WAITERS;
54
55         lock->owner = (struct task_struct *)val;
56 }
57
58 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
59 {
60         lock->owner = (struct task_struct *)
61                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
62 }
63
64 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
65 {
66         if (!rt_mutex_has_waiters(lock))
67                 clear_rt_mutex_waiters(lock);
68 }
69
70 /*
71  * We can speed up the acquire/release, if the architecture
72  * supports cmpxchg and if there's no debugging state to be set up
73  */
74 #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
75 # define rt_mutex_cmpxchg(l,c,n)        (cmpxchg(&l->owner, c, n) == c)
76 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
77 {
78         unsigned long owner, *p = (unsigned long *) &lock->owner;
79
80         do {
81                 owner = *p;
82         } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
83 }
84 #else
85 # define rt_mutex_cmpxchg(l,c,n)        (0)
86 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
87 {
88         lock->owner = (struct task_struct *)
89                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
90 }
91 #endif
92
93 /*
94  * Calculate task priority from the waiter list priority
95  *
96  * Return task->normal_prio when the waiter list is empty or when
97  * the waiter is not allowed to do priority boosting
98  */
99 int rt_mutex_getprio(struct task_struct *task)
100 {
101         if (likely(!task_has_pi_waiters(task)))
102                 return task->normal_prio;
103
104         return min(task_top_pi_waiter(task)->pi_list_entry.prio,
105                    task->normal_prio);
106 }
107
108 /*
109  * Adjust the priority of a task, after its pi_waiters got modified.
110  *
111  * This can be both boosting and unboosting. task->pi_lock must be held.
112  */
113 static void __rt_mutex_adjust_prio(struct task_struct *task)
114 {
115         int prio = rt_mutex_getprio(task);
116
117         if (task->prio != prio)
118                 rt_mutex_setprio(task, prio);
119 }
120
121 /*
122  * Adjust task priority (undo boosting). Called from the exit path of
123  * rt_mutex_slowunlock() and rt_mutex_slowlock().
124  *
125  * (Note: We do this outside of the protection of lock->wait_lock to
126  * allow the lock to be taken while or before we readjust the priority
127  * of task. We do not use the spin_xx_mutex() variants here as we are
128  * outside of the debug path.)
129  */
130 static void rt_mutex_adjust_prio(struct task_struct *task)
131 {
132         unsigned long flags;
133
134         raw_spin_lock_irqsave(&task->pi_lock, flags);
135         __rt_mutex_adjust_prio(task);
136         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
137 }
138
139 /*
140  * Max number of times we'll walk the boosting chain:
141  */
142 int max_lock_depth = 1024;
143
144 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
145 {
146         return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
147 }
148
149 /*
150  * Adjust the priority chain. Also used for deadlock detection.
151  * Decreases task's usage by one - may thus free the task.
152  *
153  * @task:       the task owning the mutex (owner) for which a chain walk is
154  *              probably needed
155  * @deadlock_detect: do we have to carry out deadlock detection?
156  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
157  *              things for a task that has just got its priority adjusted, and
158  *              is waiting on a mutex)
159  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
160  *              we dropped its pi_lock. Is never dereferenced, only used for
161  *              comparison to detect lock chain changes.
162  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
163  *              its priority to the mutex owner (can be NULL in the case
164  *              depicted above or if the top waiter is gone away and we are
165  *              actually deboosting the owner)
166  * @top_task:   the current top waiter
167  *
168  * Returns 0 or -EDEADLK.
169  */
170 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
171                                       int deadlock_detect,
172                                       struct rt_mutex *orig_lock,
173                                       struct rt_mutex *next_lock,
174                                       struct rt_mutex_waiter *orig_waiter,
175                                       struct task_struct *top_task)
176 {
177         struct rt_mutex *lock;
178         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
179         int detect_deadlock, ret = 0, depth = 0;
180         unsigned long flags;
181
182         detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
183                                                          deadlock_detect);
184
185         /*
186          * The (de)boosting is a step by step approach with a lot of
187          * pitfalls. We want this to be preemptible and we want hold a
188          * maximum of two locks per step. So we have to check
189          * carefully whether things change under us.
190          */
191  again:
192         if (++depth > max_lock_depth) {
193                 static int prev_max;
194
195                 /*
196                  * Print this only once. If the admin changes the limit,
197                  * print a new message when reaching the limit again.
198                  */
199                 if (prev_max != max_lock_depth) {
200                         prev_max = max_lock_depth;
201                         printk(KERN_WARNING "Maximum lock depth %d reached "
202                                "task: %s (%d)\n", max_lock_depth,
203                                top_task->comm, task_pid_nr(top_task));
204                 }
205                 put_task_struct(task);
206
207                 return -EDEADLK;
208         }
209  retry:
210         /*
211          * Task can not go away as we did a get_task() before !
212          */
213         raw_spin_lock_irqsave(&task->pi_lock, flags);
214
215         waiter = task->pi_blocked_on;
216         /*
217          * Check whether the end of the boosting chain has been
218          * reached or the state of the chain has changed while we
219          * dropped the locks.
220          */
221         if (!waiter)
222                 goto out_unlock_pi;
223
224         /*
225          * Check the orig_waiter state. After we dropped the locks,
226          * the previous owner of the lock might have released the lock.
227          */
228         if (orig_waiter && !rt_mutex_owner(orig_lock))
229                 goto out_unlock_pi;
230
231         /*
232          * We dropped all locks after taking a refcount on @task, so
233          * the task might have moved on in the lock chain or even left
234          * the chain completely and blocks now on an unrelated lock or
235          * on @orig_lock.
236          *
237          * We stored the lock on which @task was blocked in @next_lock,
238          * so we can detect the chain change.
239          */
240         if (next_lock != waiter->lock)
241                 goto out_unlock_pi;
242
243         /*
244          * Drop out, when the task has no waiters. Note,
245          * top_waiter can be NULL, when we are in the deboosting
246          * mode!
247          */
248         if (top_waiter) {
249                 if (!task_has_pi_waiters(task))
250                         goto out_unlock_pi;
251                 /*
252                  * If deadlock detection is off, we stop here if we
253                  * are not the top pi waiter of the task.
254                  */
255                 if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
256                         goto out_unlock_pi;
257         }
258
259         /*
260          * When deadlock detection is off then we check, if further
261          * priority adjustment is necessary.
262          */
263         if (!detect_deadlock && waiter->list_entry.prio == task->prio)
264                 goto out_unlock_pi;
265
266         lock = waiter->lock;
267         if (!raw_spin_trylock(&lock->wait_lock)) {
268                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
269                 cpu_relax();
270                 goto retry;
271         }
272
273         /*
274          * Deadlock detection. If the lock is the same as the original
275          * lock which caused us to walk the lock chain or if the
276          * current lock is owned by the task which initiated the chain
277          * walk, we detected a deadlock.
278          */
279         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
280                 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
281                 raw_spin_unlock(&lock->wait_lock);
282                 ret = -EDEADLK;
283                 goto out_unlock_pi;
284         }
285
286         top_waiter = rt_mutex_top_waiter(lock);
287
288         /* Requeue the waiter */
289         plist_del(&waiter->list_entry, &lock->wait_list);
290         waiter->list_entry.prio = task->prio;
291         plist_add(&waiter->list_entry, &lock->wait_list);
292
293         /* Release the task */
294         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
295         if (!rt_mutex_owner(lock)) {
296                 /*
297                  * If the requeue above changed the top waiter, then we need
298                  * to wake the new top waiter up to try to get the lock.
299                  */
300
301                 if (top_waiter != rt_mutex_top_waiter(lock))
302                         wake_up_process(rt_mutex_top_waiter(lock)->task);
303                 raw_spin_unlock(&lock->wait_lock);
304                 goto out_put_task;
305         }
306         put_task_struct(task);
307
308         /* Grab the next task */
309         task = rt_mutex_owner(lock);
310         get_task_struct(task);
311         raw_spin_lock_irqsave(&task->pi_lock, flags);
312
313         if (waiter == rt_mutex_top_waiter(lock)) {
314                 /* Boost the owner */
315                 plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
316                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
317                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
318                 __rt_mutex_adjust_prio(task);
319
320         } else if (top_waiter == waiter) {
321                 /* Deboost the owner */
322                 plist_del(&waiter->pi_list_entry, &task->pi_waiters);
323                 waiter = rt_mutex_top_waiter(lock);
324                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
325                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
326                 __rt_mutex_adjust_prio(task);
327         }
328
329         /*
330          * Check whether the task which owns the current lock is pi
331          * blocked itself. If yes we store a pointer to the lock for
332          * the lock chain change detection above. After we dropped
333          * task->pi_lock next_lock cannot be dereferenced anymore.
334          */
335         next_lock = task_blocked_on_lock(task);
336
337         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
338
339         top_waiter = rt_mutex_top_waiter(lock);
340         raw_spin_unlock(&lock->wait_lock);
341
342         /*
343          * We reached the end of the lock chain. Stop right here. No
344          * point to go back just to figure that out.
345          */
346         if (!next_lock)
347                 goto out_put_task;
348
349         if (!detect_deadlock && waiter != top_waiter)
350                 goto out_put_task;
351
352         goto again;
353
354  out_unlock_pi:
355         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
356  out_put_task:
357         put_task_struct(task);
358
359         return ret;
360 }
361
362 /*
363  * Try to take an rt-mutex
364  *
365  * Must be called with lock->wait_lock held.
366  *
367  * @lock:   the lock to be acquired.
368  * @task:   the task which wants to acquire the lock
369  * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
370  */
371 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
372                 struct rt_mutex_waiter *waiter)
373 {
374         /*
375          * We have to be careful here if the atomic speedups are
376          * enabled, such that, when
377          *  - no other waiter is on the lock
378          *  - the lock has been released since we did the cmpxchg
379          * the lock can be released or taken while we are doing the
380          * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
381          *
382          * The atomic acquire/release aware variant of
383          * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
384          * the WAITERS bit, the atomic release / acquire can not
385          * happen anymore and lock->wait_lock protects us from the
386          * non-atomic case.
387          *
388          * Note, that this might set lock->owner =
389          * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
390          * any more. This is fixed up when we take the ownership.
391          * This is the transitional state explained at the top of this file.
392          */
393         mark_rt_mutex_waiters(lock);
394
395         if (rt_mutex_owner(lock))
396                 return 0;
397
398         /*
399          * It will get the lock because of one of these conditions:
400          * 1) there is no waiter
401          * 2) higher priority than waiters
402          * 3) it is top waiter
403          */
404         if (rt_mutex_has_waiters(lock)) {
405                 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
406                         if (!waiter || waiter != rt_mutex_top_waiter(lock))
407                                 return 0;
408                 }
409         }
410
411         if (waiter || rt_mutex_has_waiters(lock)) {
412                 unsigned long flags;
413                 struct rt_mutex_waiter *top;
414
415                 raw_spin_lock_irqsave(&task->pi_lock, flags);
416
417                 /* remove the queued waiter. */
418                 if (waiter) {
419                         plist_del(&waiter->list_entry, &lock->wait_list);
420                         task->pi_blocked_on = NULL;
421                 }
422
423                 /*
424                  * We have to enqueue the top waiter(if it exists) into
425                  * task->pi_waiters list.
426                  */
427                 if (rt_mutex_has_waiters(lock)) {
428                         top = rt_mutex_top_waiter(lock);
429                         top->pi_list_entry.prio = top->list_entry.prio;
430                         plist_add(&top->pi_list_entry, &task->pi_waiters);
431                 }
432                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
433         }
434
435         /* We got the lock. */
436         debug_rt_mutex_lock(lock);
437
438         rt_mutex_set_owner(lock, task);
439
440         rt_mutex_deadlock_account_lock(lock, task);
441
442         return 1;
443 }
444
445 /*
446  * Task blocks on lock.
447  *
448  * Prepare waiter and propagate pi chain
449  *
450  * This must be called with lock->wait_lock held.
451  */
452 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
453                                    struct rt_mutex_waiter *waiter,
454                                    struct task_struct *task,
455                                    int detect_deadlock)
456 {
457         struct task_struct *owner = rt_mutex_owner(lock);
458         struct rt_mutex_waiter *top_waiter = waiter;
459         struct rt_mutex *next_lock;
460         int chain_walk = 0, res;
461         unsigned long flags;
462
463         /*
464          * Early deadlock detection. We really don't want the task to
465          * enqueue on itself just to untangle the mess later. It's not
466          * only an optimization. We drop the locks, so another waiter
467          * can come in before the chain walk detects the deadlock. So
468          * the other will detect the deadlock and return -EDEADLOCK,
469          * which is wrong, as the other waiter is not in a deadlock
470          * situation.
471          */
472         if (owner == task)
473                 return -EDEADLK;
474
475         raw_spin_lock_irqsave(&task->pi_lock, flags);
476         __rt_mutex_adjust_prio(task);
477         waiter->task = task;
478         waiter->lock = lock;
479         plist_node_init(&waiter->list_entry, task->prio);
480         plist_node_init(&waiter->pi_list_entry, task->prio);
481
482         /* Get the top priority waiter on the lock */
483         if (rt_mutex_has_waiters(lock))
484                 top_waiter = rt_mutex_top_waiter(lock);
485         plist_add(&waiter->list_entry, &lock->wait_list);
486
487         task->pi_blocked_on = waiter;
488
489         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
490
491         if (!owner)
492                 return 0;
493
494         raw_spin_lock_irqsave(&owner->pi_lock, flags);
495         if (waiter == rt_mutex_top_waiter(lock)) {
496                 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
497                 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
498
499                 __rt_mutex_adjust_prio(owner);
500                 if (owner->pi_blocked_on)
501                         chain_walk = 1;
502         } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
503                 chain_walk = 1;
504         }
505
506         /* Store the lock on which owner is blocked or NULL */
507         next_lock = task_blocked_on_lock(owner);
508
509         raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
510         /*
511          * Even if full deadlock detection is on, if the owner is not
512          * blocked itself, we can avoid finding this out in the chain
513          * walk.
514          */
515         if (!chain_walk || !next_lock)
516                 return 0;
517
518         /*
519          * The owner can't disappear while holding a lock,
520          * so the owner struct is protected by wait_lock.
521          * Gets dropped in rt_mutex_adjust_prio_chain()!
522          */
523         get_task_struct(owner);
524
525         raw_spin_unlock(&lock->wait_lock);
526
527         res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
528                                          next_lock, waiter, task);
529
530         raw_spin_lock(&lock->wait_lock);
531
532         return res;
533 }
534
535 /*
536  * Wake up the next waiter on the lock.
537  *
538  * Remove the top waiter from the current tasks waiter list and wake it up.
539  *
540  * Called with lock->wait_lock held.
541  */
542 static void wakeup_next_waiter(struct rt_mutex *lock)
543 {
544         struct rt_mutex_waiter *waiter;
545         unsigned long flags;
546
547         raw_spin_lock_irqsave(&current->pi_lock, flags);
548
549         waiter = rt_mutex_top_waiter(lock);
550
551         /*
552          * Remove it from current->pi_waiters. We do not adjust a
553          * possible priority boost right now. We execute wakeup in the
554          * boosted mode and go back to normal after releasing
555          * lock->wait_lock.
556          */
557         plist_del(&waiter->pi_list_entry, &current->pi_waiters);
558
559         rt_mutex_set_owner(lock, NULL);
560
561         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
562
563         wake_up_process(waiter->task);
564 }
565
566 /*
567  * Remove a waiter from a lock and give up
568  *
569  * Must be called with lock->wait_lock held and
570  * have just failed to try_to_take_rt_mutex().
571  */
572 static void remove_waiter(struct rt_mutex *lock,
573                           struct rt_mutex_waiter *waiter)
574 {
575         int first = (waiter == rt_mutex_top_waiter(lock));
576         struct task_struct *owner = rt_mutex_owner(lock);
577         struct rt_mutex *next_lock = NULL;
578         unsigned long flags;
579
580         raw_spin_lock_irqsave(&current->pi_lock, flags);
581         plist_del(&waiter->list_entry, &lock->wait_list);
582         current->pi_blocked_on = NULL;
583         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
584
585         if (!owner)
586                 return;
587
588         if (first) {
589
590                 raw_spin_lock_irqsave(&owner->pi_lock, flags);
591
592                 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
593
594                 if (rt_mutex_has_waiters(lock)) {
595                         struct rt_mutex_waiter *next;
596
597                         next = rt_mutex_top_waiter(lock);
598                         plist_add(&next->pi_list_entry, &owner->pi_waiters);
599                 }
600                 __rt_mutex_adjust_prio(owner);
601
602                 /* Store the lock on which owner is blocked or NULL */
603                 next_lock = task_blocked_on_lock(owner);
604
605                 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
606         }
607
608         WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
609
610         if (!next_lock)
611                 return;
612
613         /* gets dropped in rt_mutex_adjust_prio_chain()! */
614         get_task_struct(owner);
615
616         raw_spin_unlock(&lock->wait_lock);
617
618         rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
619
620         raw_spin_lock(&lock->wait_lock);
621 }
622
623 /*
624  * Recheck the pi chain, in case we got a priority setting
625  *
626  * Called from sched_setscheduler
627  */
628 void rt_mutex_adjust_pi(struct task_struct *task)
629 {
630         struct rt_mutex_waiter *waiter;
631         struct rt_mutex *next_lock;
632         unsigned long flags;
633
634         raw_spin_lock_irqsave(&task->pi_lock, flags);
635
636         waiter = task->pi_blocked_on;
637         if (!waiter || waiter->list_entry.prio == task->prio) {
638                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
639                 return;
640         }
641         next_lock = waiter->lock;
642         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
643
644         /* gets dropped in rt_mutex_adjust_prio_chain()! */
645         get_task_struct(task);
646
647         rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
648 }
649
650 /**
651  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
652  * @lock:                the rt_mutex to take
653  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
654  *                       or TASK_UNINTERRUPTIBLE)
655  * @timeout:             the pre-initialized and started timer, or NULL for none
656  * @waiter:              the pre-initialized rt_mutex_waiter
657  *
658  * lock->wait_lock must be held by the caller.
659  */
660 static int __sched
661 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
662                     struct hrtimer_sleeper *timeout,
663                     struct rt_mutex_waiter *waiter)
664 {
665         int ret = 0;
666         int was_disabled;
667
668         for (;;) {
669                 /* Try to acquire the lock: */
670                 if (try_to_take_rt_mutex(lock, current, waiter))
671                         break;
672
673                 /*
674                  * TASK_INTERRUPTIBLE checks for signals and
675                  * timeout. Ignored otherwise.
676                  */
677                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
678                         /* Signal pending? */
679                         if (signal_pending(current))
680                                 ret = -EINTR;
681                         if (timeout && !timeout->task)
682                                 ret = -ETIMEDOUT;
683                         if (ret)
684                                 break;
685                 }
686
687                 raw_spin_unlock(&lock->wait_lock);
688
689                 was_disabled = irqs_disabled();
690                 if (was_disabled)
691                         local_irq_enable();
692
693                 debug_rt_mutex_print_deadlock(waiter);
694
695                 schedule_rt_mutex(lock);
696
697                 if (was_disabled)
698                         local_irq_disable();
699
700                 raw_spin_lock(&lock->wait_lock);
701                 set_current_state(state);
702         }
703
704         return ret;
705 }
706
707 static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
708                                      struct rt_mutex_waiter *w)
709 {
710         /*
711          * If the result is not -EDEADLOCK or the caller requested
712          * deadlock detection, nothing to do here.
713          */
714         if (res != -EDEADLOCK || detect_deadlock)
715                 return;
716
717         /*
718          * Yell lowdly and stop the task right here.
719          */
720         rt_mutex_print_deadlock(w);
721         while (1) {
722                 set_current_state(TASK_INTERRUPTIBLE);
723                 schedule();
724         }
725 }
726
727 /*
728  * Slow path lock function:
729  */
730 static int __sched
731 rt_mutex_slowlock(struct rt_mutex *lock, int state,
732                   struct hrtimer_sleeper *timeout,
733                   int detect_deadlock)
734 {
735         struct rt_mutex_waiter waiter;
736         int ret = 0;
737
738         debug_rt_mutex_init_waiter(&waiter);
739
740         raw_spin_lock(&lock->wait_lock);
741
742         /* Try to acquire the lock again: */
743         if (try_to_take_rt_mutex(lock, current, NULL)) {
744                 raw_spin_unlock(&lock->wait_lock);
745                 return 0;
746         }
747
748         set_current_state(state);
749
750         /* Setup the timer, when timeout != NULL */
751         if (unlikely(timeout)) {
752                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
753                 if (!hrtimer_active(&timeout->timer))
754                         timeout->task = NULL;
755         }
756
757         ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
758
759         if (likely(!ret))
760                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
761
762         set_current_state(TASK_RUNNING);
763
764         if (unlikely(ret)) {
765                 remove_waiter(lock, &waiter);
766                 rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
767         }
768
769         /*
770          * try_to_take_rt_mutex() sets the waiter bit
771          * unconditionally. We might have to fix that up.
772          */
773         fixup_rt_mutex_waiters(lock);
774
775         raw_spin_unlock(&lock->wait_lock);
776
777         /* Remove pending timer: */
778         if (unlikely(timeout))
779                 hrtimer_cancel(&timeout->timer);
780
781         debug_rt_mutex_free_waiter(&waiter);
782
783         return ret;
784 }
785
786 /*
787  * Slow path try-lock function:
788  */
789 static inline int
790 rt_mutex_slowtrylock(struct rt_mutex *lock)
791 {
792         int ret = 0;
793
794         raw_spin_lock(&lock->wait_lock);
795
796         if (likely(rt_mutex_owner(lock) != current)) {
797
798                 ret = try_to_take_rt_mutex(lock, current, NULL);
799                 /*
800                  * try_to_take_rt_mutex() sets the lock waiters
801                  * bit unconditionally. Clean this up.
802                  */
803                 fixup_rt_mutex_waiters(lock);
804         }
805
806         raw_spin_unlock(&lock->wait_lock);
807
808         return ret;
809 }
810
811 /*
812  * Slow path to release a rt-mutex:
813  */
814 static void __sched
815 rt_mutex_slowunlock(struct rt_mutex *lock)
816 {
817         raw_spin_lock(&lock->wait_lock);
818
819         debug_rt_mutex_unlock(lock);
820
821         rt_mutex_deadlock_account_unlock(current);
822
823         if (!rt_mutex_has_waiters(lock)) {
824                 lock->owner = NULL;
825                 raw_spin_unlock(&lock->wait_lock);
826                 return;
827         }
828
829         wakeup_next_waiter(lock);
830
831         raw_spin_unlock(&lock->wait_lock);
832
833         /* Undo pi boosting if necessary: */
834         rt_mutex_adjust_prio(current);
835 }
836
837 /*
838  * debug aware fast / slowpath lock,trylock,unlock
839  *
840  * The atomic acquire/release ops are compiled away, when either the
841  * architecture does not support cmpxchg or when debugging is enabled.
842  */
843 static inline int
844 rt_mutex_fastlock(struct rt_mutex *lock, int state,
845                   int detect_deadlock,
846                   int (*slowfn)(struct rt_mutex *lock, int state,
847                                 struct hrtimer_sleeper *timeout,
848                                 int detect_deadlock))
849 {
850         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
851                 rt_mutex_deadlock_account_lock(lock, current);
852                 return 0;
853         } else
854                 return slowfn(lock, state, NULL, detect_deadlock);
855 }
856
857 static inline int
858 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
859                         struct hrtimer_sleeper *timeout, int detect_deadlock,
860                         int (*slowfn)(struct rt_mutex *lock, int state,
861                                       struct hrtimer_sleeper *timeout,
862                                       int detect_deadlock))
863 {
864         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
865                 rt_mutex_deadlock_account_lock(lock, current);
866                 return 0;
867         } else
868                 return slowfn(lock, state, timeout, detect_deadlock);
869 }
870
871 static inline int
872 rt_mutex_fasttrylock(struct rt_mutex *lock,
873                      int (*slowfn)(struct rt_mutex *lock))
874 {
875         if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
876                 rt_mutex_deadlock_account_lock(lock, current);
877                 return 1;
878         }
879         return slowfn(lock);
880 }
881
882 static inline void
883 rt_mutex_fastunlock(struct rt_mutex *lock,
884                     void (*slowfn)(struct rt_mutex *lock))
885 {
886         if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
887                 rt_mutex_deadlock_account_unlock(current);
888         else
889                 slowfn(lock);
890 }
891
892 /**
893  * rt_mutex_lock - lock a rt_mutex
894  *
895  * @lock: the rt_mutex to be locked
896  */
897 void __sched rt_mutex_lock(struct rt_mutex *lock)
898 {
899         might_sleep();
900
901         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
902 }
903 EXPORT_SYMBOL_GPL(rt_mutex_lock);
904
905 /**
906  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
907  *
908  * @lock:               the rt_mutex to be locked
909  * @detect_deadlock:    deadlock detection on/off
910  *
911  * Returns:
912  *  0           on success
913  * -EINTR       when interrupted by a signal
914  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
915  */
916 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
917                                                  int detect_deadlock)
918 {
919         might_sleep();
920
921         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
922                                  detect_deadlock, rt_mutex_slowlock);
923 }
924 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
925
926 /**
927  * rt_mutex_timed_lock - lock a rt_mutex interruptible
928  *                      the timeout structure is provided
929  *                      by the caller
930  *
931  * @lock:               the rt_mutex to be locked
932  * @timeout:            timeout structure or NULL (no timeout)
933  * @detect_deadlock:    deadlock detection on/off
934  *
935  * Returns:
936  *  0           on success
937  * -EINTR       when interrupted by a signal
938  * -ETIMEDOUT   when the timeout expired
939  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
940  */
941 int
942 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
943                     int detect_deadlock)
944 {
945         might_sleep();
946
947         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
948                                        detect_deadlock, rt_mutex_slowlock);
949 }
950 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
951
952 /**
953  * rt_mutex_trylock - try to lock a rt_mutex
954  *
955  * @lock:       the rt_mutex to be locked
956  *
957  * Returns 1 on success and 0 on contention
958  */
959 int __sched rt_mutex_trylock(struct rt_mutex *lock)
960 {
961         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
962 }
963 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
964
965 /**
966  * rt_mutex_unlock - unlock a rt_mutex
967  *
968  * @lock: the rt_mutex to be unlocked
969  */
970 void __sched rt_mutex_unlock(struct rt_mutex *lock)
971 {
972         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
973 }
974 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
975
976 /**
977  * rt_mutex_destroy - mark a mutex unusable
978  * @lock: the mutex to be destroyed
979  *
980  * This function marks the mutex uninitialized, and any subsequent
981  * use of the mutex is forbidden. The mutex must not be locked when
982  * this function is called.
983  */
984 void rt_mutex_destroy(struct rt_mutex *lock)
985 {
986         WARN_ON(rt_mutex_is_locked(lock));
987 #ifdef CONFIG_DEBUG_RT_MUTEXES
988         lock->magic = NULL;
989 #endif
990 }
991
992 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
993
994 /**
995  * __rt_mutex_init - initialize the rt lock
996  *
997  * @lock: the rt lock to be initialized
998  *
999  * Initialize the rt lock to unlocked state.
1000  *
1001  * Initializing of a locked rt lock is not allowed
1002  */
1003 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
1004 {
1005         lock->owner = NULL;
1006         raw_spin_lock_init(&lock->wait_lock);
1007         plist_head_init(&lock->wait_list);
1008
1009         debug_rt_mutex_init(lock, name);
1010 }
1011 EXPORT_SYMBOL_GPL(__rt_mutex_init);
1012
1013 /**
1014  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1015  *                              proxy owner
1016  *
1017  * @lock:       the rt_mutex to be locked
1018  * @proxy_owner:the task to set as owner
1019  *
1020  * No locking. Caller has to do serializing itself
1021  * Special API call for PI-futex support
1022  */
1023 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1024                                 struct task_struct *proxy_owner)
1025 {
1026         __rt_mutex_init(lock, NULL);
1027         debug_rt_mutex_proxy_lock(lock, proxy_owner);
1028         rt_mutex_set_owner(lock, proxy_owner);
1029         rt_mutex_deadlock_account_lock(lock, proxy_owner);
1030 }
1031
1032 /**
1033  * rt_mutex_proxy_unlock - release a lock on behalf of owner
1034  *
1035  * @lock:       the rt_mutex to be locked
1036  *
1037  * No locking. Caller has to do serializing itself
1038  * Special API call for PI-futex support
1039  */
1040 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1041                            struct task_struct *proxy_owner)
1042 {
1043         debug_rt_mutex_proxy_unlock(lock);
1044         rt_mutex_set_owner(lock, NULL);
1045         rt_mutex_deadlock_account_unlock(proxy_owner);
1046 }
1047
1048 /**
1049  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1050  * @lock:               the rt_mutex to take
1051  * @waiter:             the pre-initialized rt_mutex_waiter
1052  * @task:               the task to prepare
1053  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1054  *
1055  * Returns:
1056  *  0 - task blocked on lock
1057  *  1 - acquired the lock for task, caller should wake it up
1058  * <0 - error
1059  *
1060  * Special API call for FUTEX_REQUEUE_PI support.
1061  */
1062 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1063                               struct rt_mutex_waiter *waiter,
1064                               struct task_struct *task, int detect_deadlock)
1065 {
1066         int ret;
1067
1068         raw_spin_lock(&lock->wait_lock);
1069
1070         if (try_to_take_rt_mutex(lock, task, NULL)) {
1071                 raw_spin_unlock(&lock->wait_lock);
1072                 return 1;
1073         }
1074
1075         /* We enforce deadlock detection for futexes */
1076         ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
1077
1078         if (ret && !rt_mutex_owner(lock)) {
1079                 /*
1080                  * Reset the return value. We might have
1081                  * returned with -EDEADLK and the owner
1082                  * released the lock while we were walking the
1083                  * pi chain.  Let the waiter sort it out.
1084                  */
1085                 ret = 0;
1086         }
1087
1088         if (unlikely(ret))
1089                 remove_waiter(lock, waiter);
1090
1091         raw_spin_unlock(&lock->wait_lock);
1092
1093         debug_rt_mutex_print_deadlock(waiter);
1094
1095         return ret;
1096 }
1097
1098 /**
1099  * rt_mutex_next_owner - return the next owner of the lock
1100  *
1101  * @lock: the rt lock query
1102  *
1103  * Returns the next owner of the lock or NULL
1104  *
1105  * Caller has to serialize against other accessors to the lock
1106  * itself.
1107  *
1108  * Special API call for PI-futex support
1109  */
1110 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1111 {
1112         if (!rt_mutex_has_waiters(lock))
1113                 return NULL;
1114
1115         return rt_mutex_top_waiter(lock)->task;
1116 }
1117
1118 /**
1119  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1120  * @lock:               the rt_mutex we were woken on
1121  * @to:                 the timeout, null if none. hrtimer should already have
1122  *                      been started.
1123  * @waiter:             the pre-initialized rt_mutex_waiter
1124  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1125  *
1126  * Complete the lock acquisition started our behalf by another thread.
1127  *
1128  * Returns:
1129  *  0 - success
1130  * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1131  *
1132  * Special API call for PI-futex requeue support
1133  */
1134 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1135                                struct hrtimer_sleeper *to,
1136                                struct rt_mutex_waiter *waiter,
1137                                int detect_deadlock)
1138 {
1139         int ret;
1140
1141         raw_spin_lock(&lock->wait_lock);
1142
1143         set_current_state(TASK_INTERRUPTIBLE);
1144
1145         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1146
1147         set_current_state(TASK_RUNNING);
1148
1149         if (unlikely(ret))
1150                 remove_waiter(lock, waiter);
1151
1152         /*
1153          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1154          * have to fix that up.
1155          */
1156         fixup_rt_mutex_waiters(lock);
1157
1158         raw_spin_unlock(&lock->wait_lock);
1159
1160         return ret;
1161 }