Merge branch 'stable-3.2' into pandora-3.2
[pandora-kernel.git] / kernel / futex.c
index ea87f4d..1d0538e 100644 (file)
@@ -59,6 +59,8 @@
 #include <linux/magic.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
+#include <linux/ptrace.h>
+#include <linux/hugetlb.h>
 
 #include <asm/futex.h>
 
@@ -314,17 +316,29 @@ again:
 #endif
 
        lock_page(page_head);
+
+       /*
+        * If page_head->mapping is NULL, then it cannot be a PageAnon
+        * page; but it might be the ZERO_PAGE or in the gate area or
+        * in a special mapping (all cases which we are happy to fail);
+        * or it may have been a good file page when get_user_pages_fast
+        * found it, but truncated or holepunched or subjected to
+        * invalidate_complete_page2 before we got the page lock (also
+        * cases which we are happy to fail).  And we hold a reference,
+        * so refcount care in invalidate_complete_page's remove_mapping
+        * prevents drop_caches from setting mapping to NULL beneath us.
+        *
+        * The case we do have to guard against is when memory pressure made
+        * shmem_writepage move it from filecache to swapcache beneath us:
+        * an unlikely race, but we do need to retry for page_head->mapping.
+        */
        if (!page_head->mapping) {
+               int shmem_swizzled = PageSwapCache(page_head);
                unlock_page(page_head);
                put_page(page_head);
-               /*
-               * ZERO_PAGE pages don't have a mapping. Avoid a busy loop
-               * trying to find one. RW mapping would have COW'd (and thus
-               * have a mapping) so this page is RO and won't ever change.
-               */
-               if ((page_head == ZERO_PAGE(address)))
-                       return -EFAULT;
-               goto again;
+               if (shmem_swizzled)
+                       goto again;
+               return -EFAULT;
        }
 
        /*
@@ -350,7 +364,7 @@ again:
        } else {
                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
                key->shared.inode = page_head->mapping->host;
-               key->shared.pgoff = page_head->index;
+               key->shared.pgoff = basepage_index(page);
        }
 
        get_futex_key_refs(key);
@@ -703,7 +717,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
                                struct futex_pi_state **ps,
                                struct task_struct *task, int set_waiters)
 {
-       int lock_taken, ret, ownerdied = 0;
+       int lock_taken, ret, force_take = 0;
        u32 uval, newval, curval, vpid = task_pid_vnr(task);
 
 retry:
@@ -742,17 +756,15 @@ retry:
        newval = curval | FUTEX_WAITERS;
 
        /*
-        * There are two cases, where a futex might have no owner (the
-        * owner TID is 0): OWNER_DIED. We take over the futex in this
-        * case. We also do an unconditional take over, when the owner
-        * of the futex died.
-        *
-        * This is safe as we are protected by the hash bucket lock !
+        * Should we force take the futex? See below.
         */
-       if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
-               /* Keep the OWNER_DIED bit */
+       if (unlikely(force_take)) {
+               /*
+                * Keep the OWNER_DIED and the WAITERS bit and set the
+                * new TID value.
+                */
                newval = (curval & ~FUTEX_TID_MASK) | vpid;
-               ownerdied = 0;
+               force_take = 0;
                lock_taken = 1;
        }
 
@@ -762,7 +774,7 @@ retry:
                goto retry;
 
        /*
-        * We took the lock due to owner died take over.
+        * We took the lock due to forced take over.
         */
        if (unlikely(lock_taken))
                return 1;
@@ -777,20 +789,25 @@ retry:
                switch (ret) {
                case -ESRCH:
                        /*
-                        * No owner found for this futex. Check if the
-                        * OWNER_DIED bit is set to figure out whether
-                        * this is a robust futex or not.
+                        * We failed to find an owner for this
+                        * futex. So we have no pi_state to block
+                        * on. This can happen in two cases:
+                        *
+                        * 1) The owner died
+                        * 2) A stale FUTEX_WAITERS bit
+                        *
+                        * Re-read the futex value.
                         */
                        if (get_futex_value_locked(&curval, uaddr))
                                return -EFAULT;
 
                        /*
-                        * We simply start over in case of a robust
-                        * futex. The code above will take the futex
-                        * and return happy.
+                        * If the owner died or we have a stale
+                        * WAITERS bit the owner TID in the user space
+                        * futex is 0.
                         */
-                       if (curval & FUTEX_OWNER_DIED) {
-                               ownerdied = 1;
+                       if (!(curval & FUTEX_TID_MASK)) {
+                               force_take = 1;
                                goto retry;
                        }
                default:
@@ -827,6 +844,9 @@ static void wake_futex(struct futex_q *q)
 {
        struct task_struct *p = q->task;
 
+       if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
+               return;
+
        /*
         * We set q->lock_ptr = NULL _before_ we wake up the task. If
         * a non-futex wake up happens on another CPU then the task
@@ -1062,6 +1082,10 @@ retry_private:
 
        plist_for_each_entry_safe(this, next, head, list) {
                if (match_futex (&this->key, &key1)) {
+                       if (this->pi_state || this->rt_waiter) {
+                               ret = -EINVAL;
+                               goto out_unlock;
+                       }
                        wake_futex(this);
                        if (++ret >= nr_wake)
                                break;
@@ -1074,6 +1098,10 @@ retry_private:
                op_ret = 0;
                plist_for_each_entry_safe(this, next, head, list) {
                        if (match_futex (&this->key, &key2)) {
+                               if (this->pi_state || this->rt_waiter) {
+                                       ret = -EINVAL;
+                                       goto out_unlock;
+                               }
                                wake_futex(this);
                                if (++op_ret >= nr_wake2)
                                        break;
@@ -1082,6 +1110,7 @@ retry_private:
                ret += op_ret;
        }
 
+out_unlock:
        double_unlock_hb(hb1, hb2);
 out_put_keys:
        put_futex_key(&key2);
@@ -1371,9 +1400,13 @@ retry_private:
                /*
                 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
                 * be paired with each other and no other futex ops.
+                *
+                * We should never be requeueing a futex_q with a pi_state,
+                * which is awaiting a futex_unlock_pi().
                 */
                if ((requeue_pi && !this->rt_waiter) ||
-                   (!requeue_pi && this->rt_waiter)) {
+                   (!requeue_pi && this->rt_waiter) ||
+                   this->pi_state) {
                        ret = -EINVAL;
                        break;
                }
@@ -2218,11 +2251,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  * @uaddr2:    the pi futex we will take prior to returning to user-space
  *
  * The caller will wait on uaddr and will be requeued by futex_requeue() to
- * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
- * complete the acquisition of the rt_mutex prior to returning to userspace.
- * This ensures the rt_mutex maintains an owner when it has waiters; without
- * one, the pi logic wouldn't know which task to boost/deboost, if there was a
- * need to.
+ * uaddr2 which must be PI aware and unique from uaddr.  Normal wakeup will wake
+ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
+ * userspace.  This ensures the rt_mutex maintains an owner when it has waiters;
+ * without one, the pi logic would not know which task to boost/deboost, if
+ * there was a need to.
  *
  * We call schedule in futex_wait_queue_me() when we enqueue and return there
  * via the following:
@@ -2259,6 +2292,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
        struct futex_q q = futex_q_init;
        int res, ret;
 
+       if (uaddr == uaddr2)
+               return -EINVAL;
+
        if (!bitset)
                return -EINVAL;
 
@@ -2330,7 +2366,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
                 * the pi_state.
                 */
-               WARN_ON(!&q.pi_state);
+               WARN_ON(!q.pi_state);
                pi_mutex = &q.pi_state->pi_mutex;
                ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
                debug_rt_mutex_free_waiter(&rt_waiter);
@@ -2357,7 +2393,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
         * fault, unlock the rt_mutex and return the fault to userspace.
         */
        if (ret == -EFAULT) {
-               if (rt_mutex_owner(pi_mutex) == current)
+               if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
                        rt_mutex_unlock(pi_mutex);
        } else if (ret == -EINTR) {
                /*
@@ -2431,40 +2467,29 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
 {
        struct robust_list_head __user *head;
        unsigned long ret;
-       const struct cred *cred = current_cred(), *pcred;
+       struct task_struct *p;
 
        if (!futex_cmpxchg_enabled)
                return -ENOSYS;
 
+       rcu_read_lock();
+
+       ret = -ESRCH;
        if (!pid)
-               head = current->robust_list;
+               p = current;
        else {
-               struct task_struct *p;
-
-               ret = -ESRCH;
-               rcu_read_lock();
                p = find_task_by_vpid(pid);
                if (!p)
                        goto err_unlock;
-               ret = -EPERM;
-               pcred = __task_cred(p);
-               /* If victim is in different user_ns, then uids are not
-                  comparable, so we must have CAP_SYS_PTRACE */
-               if (cred->user->user_ns != pcred->user->user_ns) {
-                       if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
-                               goto err_unlock;
-                       goto ok;
-               }
-               /* If victim is in same user_ns, then uids are comparable */
-               if (cred->euid != pcred->euid &&
-                   cred->euid != pcred->uid &&
-                   !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
-                       goto err_unlock;
-ok:
-               head = p->robust_list;
-               rcu_read_unlock();
        }
 
+       ret = -EPERM;
+       if (!ptrace_may_access(p, PTRACE_MODE_READ))
+               goto err_unlock;
+
+       head = p->robust_list;
+       rcu_read_unlock();
+
        if (put_user(sizeof(*head), len_ptr))
                return -EFAULT;
        return put_user(head, head_ptr);
@@ -2628,6 +2653,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                        return -ENOSYS;
        }
 
+       switch (cmd) {
+       case FUTEX_LOCK_PI:
+       case FUTEX_UNLOCK_PI:
+       case FUTEX_TRYLOCK_PI:
+       case FUTEX_WAIT_REQUEUE_PI:
+       case FUTEX_CMP_REQUEUE_PI:
+               if (!futex_cmpxchg_enabled)
+                       return -ENOSYS;
+       }
+
        switch (cmd) {
        case FUTEX_WAIT:
                val3 = FUTEX_BITSET_MATCH_ANY;
@@ -2649,16 +2684,13 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
                break;
        case FUTEX_LOCK_PI:
-               if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
+               ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
                break;
        case FUTEX_UNLOCK_PI:
-               if (futex_cmpxchg_enabled)
-                       ret = futex_unlock_pi(uaddr, flags);
+               ret = futex_unlock_pi(uaddr, flags);
                break;
        case FUTEX_TRYLOCK_PI:
-               if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
+               ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
                break;
        case FUTEX_WAIT_REQUEUE_PI:
                val3 = FUTEX_BITSET_MATCH_ANY;