+ up_read(&curr->mm->mmap_sem);
+ return ret;
+}
+
+/*
+ * Userspace tried a 0 -> TID atomic transition of the futex value
+ * and failed. The kernel side here does the whole locking operation:
+ * if there are waiters then it will block, it does PI, etc. (Due to
+ * races the kernel might see a 0 value of the futex too.)
+ */
+static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
+ struct hrtimer_sleeper *to)
+{
+ struct task_struct *curr = current;
+ struct futex_hash_bucket *hb;
+ u32 uval, newval, curval;
+ struct futex_q q;
+ int ret, attempt = 0;
+
+ if (refill_pi_state_cache())
+ return -ENOMEM;
+
+ q.pi_state = NULL;
+ retry:
+ down_read(&curr->mm->mmap_sem);
+
+ ret = get_futex_key(uaddr, &q.key);
+ if (unlikely(ret != 0))
+ goto out_release_sem;
+
+ hb = queue_lock(&q, -1, NULL);
+
+ retry_locked:
+ /*
+ * To avoid races, we attempt to take the lock here again
+ * (by doing a 0 -> TID atomic cmpxchg), while holding all
+ * the locks. It will most likely not succeed.
+ */
+ newval = current->pid;
+
+ inc_preempt_count();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
+ dec_preempt_count();
+
+ if (unlikely(curval == -EFAULT))
+ goto uaddr_faulted;
+
+ /* We own the lock already */
+ if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
+ if (!detect && 0)
+ force_sig(SIGKILL, current);
+ ret = -EDEADLK;
+ goto out_unlock_release_sem;
+ }
+
+ /*
+ * Surprise - we got the lock. Just return
+ * to userspace:
+ */
+ if (unlikely(!curval))
+ goto out_unlock_release_sem;
+
+ uval = curval;
+ newval = uval | FUTEX_WAITERS;
+
+ inc_preempt_count();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ dec_preempt_count();
+
+ if (unlikely(curval == -EFAULT))
+ goto uaddr_faulted;
+ if (unlikely(curval != uval))
+ goto retry_locked;
+
+ /*
+ * We dont have the lock. Look up the PI state (or create it if
+ * we are the first waiter):
+ */
+ ret = lookup_pi_state(uval, hb, &q);
+
+ if (unlikely(ret)) {
+ /*
+ * There were no waiters and the owner task lookup
+ * failed. When the OWNER_DIED bit is set, then we
+ * know that this is a robust futex and we actually
+ * take the lock. This is safe as we are protected by
+ * the hash bucket lock. We also set the waiters bit
+ * unconditionally here, to simplify glibc handling of
+ * multiple tasks racing to acquire the lock and
+ * cleanup the problems which were left by the dead
+ * owner.
+ */
+ if (curval & FUTEX_OWNER_DIED) {
+ uval = newval;
+ newval = current->pid |
+ FUTEX_OWNER_DIED | FUTEX_WAITERS;
+
+ inc_preempt_count();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr,
+ uval, newval);
+ dec_preempt_count();
+
+ if (unlikely(curval == -EFAULT))
+ goto uaddr_faulted;
+ if (unlikely(curval != uval))
+ goto retry_locked;
+ ret = 0;
+ }
+ goto out_unlock_release_sem;
+ }
+
+ /*
+ * Only actually queue now that the atomic ops are done:
+ */
+ __queue_me(&q, hb);
+
+ /*
+ * Now the futex is queued and we have checked the data, we
+ * don't want to hold mmap_sem while we sleep.
+ */
+ up_read(&curr->mm->mmap_sem);
+
+ WARN_ON(!q.pi_state);
+ /*
+ * Block on the PI mutex:
+ */
+ if (!trylock)
+ ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
+ else {
+ ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
+ /* Fixup the trylock return value: */
+ ret = ret ? 0 : -EWOULDBLOCK;
+ }
+
+ down_read(&curr->mm->mmap_sem);
+ spin_lock(q.lock_ptr);
+
+ /*
+ * Got the lock. We might not be the anticipated owner if we
+ * did a lock-steal - fix up the PI-state in that case.
+ */
+ if (!ret && q.pi_state->owner != curr) {
+ u32 newtid = current->pid | FUTEX_WAITERS;
+
+ /* Owner died? */
+ if (q.pi_state->owner != NULL) {
+ spin_lock_irq(&q.pi_state->owner->pi_lock);
+ list_del_init(&q.pi_state->list);
+ spin_unlock_irq(&q.pi_state->owner->pi_lock);
+ } else
+ newtid |= FUTEX_OWNER_DIED;
+
+ q.pi_state->owner = current;
+
+ spin_lock_irq(¤t->pi_lock);
+ list_add(&q.pi_state->list, ¤t->pi_state_list);
+ spin_unlock_irq(¤t->pi_lock);
+
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q, hb);
+ up_read(&curr->mm->mmap_sem);
+ /*
+ * We own it, so we have to replace the pending owner
+ * TID. This must be atomic as we have preserve the
+ * owner died bit here.
+ */
+ ret = get_user(uval, uaddr);
+ while (!ret) {
+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
+ curval = futex_atomic_cmpxchg_inatomic(uaddr,
+ uval, newval);
+ if (curval == -EFAULT)
+ ret = -EFAULT;
+ if (curval == uval)
+ break;
+ uval = curval;
+ }
+ } else {
+ /*
+ * Catch the rare case, where the lock was released
+ * when we were on the way back before we locked
+ * the hash bucket.
+ */
+ if (ret && q.pi_state->owner == curr) {
+ if (rt_mutex_trylock(&q.pi_state->pi_mutex))
+ ret = 0;
+ }
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q, hb);
+ up_read(&curr->mm->mmap_sem);
+ }
+
+ if (!detect && ret == -EDEADLK && 0)
+ force_sig(SIGKILL, current);
+
+ return ret;
+
+ out_unlock_release_sem:
+ queue_unlock(&q, hb);
+
+ out_release_sem:
+ up_read(&curr->mm->mmap_sem);
+ return ret;
+
+ uaddr_faulted:
+ /*
+ * We have to r/w *(int __user *)uaddr, but we can't modify it
+ * non-atomically. Therefore, if get_user below is not
+ * enough, we need to handle the fault ourselves, while
+ * still holding the mmap_sem.
+ */
+ if (attempt++) {
+ if (futex_handle_fault((unsigned long)uaddr, attempt))
+ goto out_unlock_release_sem;
+
+ goto retry_locked;
+ }
+
+ queue_unlock(&q, hb);
+ up_read(&curr->mm->mmap_sem);
+
+ ret = get_user(uval, uaddr);
+ if (!ret && (uval != -EFAULT))
+ goto retry;
+
+ return ret;
+}
+
+/*
+ * Restart handler
+ */
+static long futex_lock_pi_restart(struct restart_block *restart)
+{
+ struct hrtimer_sleeper timeout, *to = NULL;
+ int ret;
+
+ restart->fn = do_no_restart_syscall;
+
+ if (restart->arg2 || restart->arg3) {
+ to = &timeout;
+ hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
+ hrtimer_init_sleeper(to, current);
+ to->timer.expires.tv64 = ((u64)restart->arg1 << 32) |
+ (u64) restart->arg0;
+ }
+
+ pr_debug("lock_pi restart: %p, %d (%d)\n",
+ (u32 __user *)restart->arg0, current->pid);
+
+ ret = do_futex_lock_pi((u32 __user *)restart->arg0, restart->arg1,
+ 0, to);
+
+ if (ret != -EINTR)
+ return ret;
+
+ restart->fn = futex_lock_pi_restart;
+
+ /* The other values are filled in */
+ return -ERESTART_RESTARTBLOCK;
+}
+
+/*
+ * Called from the syscall entry below.
+ */
+static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
+ long nsec, int trylock)
+{
+ struct hrtimer_sleeper timeout, *to = NULL;
+ struct restart_block *restart;
+ int ret;
+
+ if (sec != MAX_SCHEDULE_TIMEOUT) {
+ to = &timeout;
+ hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
+ hrtimer_init_sleeper(to, current);
+ to->timer.expires = ktime_set(sec, nsec);
+ }
+
+ ret = do_futex_lock_pi(uaddr, detect, trylock, to);
+
+ if (ret != -EINTR)
+ return ret;
+
+ pr_debug("lock_pi interrupted: %p, %d (%d)\n", uaddr, current->pid);
+
+ restart = ¤t_thread_info()->restart_block;
+ restart->fn = futex_lock_pi_restart;
+ restart->arg0 = (unsigned long) uaddr;
+ restart->arg1 = detect;
+ if (to) {
+ restart->arg2 = to->timer.expires.tv64 & 0xFFFFFFFF;
+ restart->arg3 = to->timer.expires.tv64 >> 32;
+ } else
+ restart->arg2 = restart->arg3 = 0;
+
+ return -ERESTART_RESTARTBLOCK;
+}
+
+/*
+ * Userspace attempted a TID -> 0 atomic transition, and failed.
+ * This is the in-kernel slowpath: we look up the PI state (if any),
+ * and do the rt-mutex unlock.
+ */
+static int futex_unlock_pi(u32 __user *uaddr)
+{
+ struct futex_hash_bucket *hb;
+ struct futex_q *this, *next;
+ u32 uval;
+ struct list_head *head;
+ union futex_key key;
+ int ret, attempt = 0;
+
+retry:
+ if (get_user(uval, uaddr))
+ return -EFAULT;
+ /*
+ * We release only a lock we actually own:
+ */
+ if ((uval & FUTEX_TID_MASK) != current->pid)
+ return -EPERM;
+ /*
+ * First take all the futex related locks:
+ */
+ down_read(¤t->mm->mmap_sem);
+
+ ret = get_futex_key(uaddr, &key);
+ if (unlikely(ret != 0))
+ goto out;
+
+ hb = hash_futex(&key);
+ spin_lock(&hb->lock);
+
+retry_locked:
+ /*
+ * To avoid races, try to do the TID -> 0 atomic transition
+ * again. If it succeeds then we can return without waking
+ * anyone else up:
+ */
+ inc_preempt_count();
+ uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
+ dec_preempt_count();
+
+ if (unlikely(uval == -EFAULT))
+ goto pi_faulted;
+ /*
+ * Rare case: we managed to release the lock atomically,
+ * no need to wake anyone else up:
+ */
+ if (unlikely(uval == current->pid))
+ goto out_unlock;
+
+ /*
+ * Ok, other tasks may need to be woken up - check waiters
+ * and do the wakeup if necessary:
+ */
+ head = &hb->chain;
+
+ list_for_each_entry_safe(this, next, head, list) {
+ if (!match_futex (&this->key, &key))
+ continue;
+ ret = wake_futex_pi(uaddr, uval, this);
+ /*
+ * The atomic access to the futex value
+ * generated a pagefault, so retry the
+ * user-access and the wakeup:
+ */
+ if (ret == -EFAULT)
+ goto pi_faulted;
+ goto out_unlock;
+ }
+ /*
+ * No waiters - kernel unlocks the futex:
+ */
+ ret = unlock_futex_pi(uaddr, uval);
+ if (ret == -EFAULT)
+ goto pi_faulted;
+
+out_unlock:
+ spin_unlock(&hb->lock);
+out: