X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Ffutex.c;h=8888815c578bfff3dd57ac0521e0190757bab35e;hb=509ec6b9b497aaac9cbf6a7c7ecdf08116807e8f;hp=ea87f4d2f455c8c99164cb555287ad1c9fb165e3;hpb=b4949b84567f3ae1227d076fc95bbd8efea06506;p=pandora-kernel.git diff --git a/kernel/futex.c b/kernel/futex.c index ea87f4d2f455..8888815c578b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -59,6 +59,8 @@ #include #include #include +#include +#include #include @@ -283,7 +285,7 @@ again: put_page(page); /* serialize against __split_huge_page_splitting() */ local_irq_disable(); - if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { + if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) { page_head = compound_head(page); /* * page_head is valid pointer but we must pin @@ -314,17 +316,29 @@ again: #endif lock_page(page_head); + + /* + * If page_head->mapping is NULL, then it cannot be a PageAnon + * page; but it might be the ZERO_PAGE or in the gate area or + * in a special mapping (all cases which we are happy to fail); + * or it may have been a good file page when get_user_pages_fast + * found it, but truncated or holepunched or subjected to + * invalidate_complete_page2 before we got the page lock (also + * cases which we are happy to fail). And we hold a reference, + * so refcount care in invalidate_complete_page's remove_mapping + * prevents drop_caches from setting mapping to NULL beneath us. + * + * The case we do have to guard against is when memory pressure made + * shmem_writepage move it from filecache to swapcache beneath us: + * an unlikely race, but we do need to retry for page_head->mapping. + */ if (!page_head->mapping) { + int shmem_swizzled = PageSwapCache(page_head); unlock_page(page_head); put_page(page_head); - /* - * ZERO_PAGE pages don't have a mapping. Avoid a busy loop - * trying to find one. RW mapping would have COW'd (and thus - * have a mapping) so this page is RO and won't ever change. - */ - if ((page_head == ZERO_PAGE(address))) - return -EFAULT; - goto again; + if (shmem_swizzled) + goto again; + return -EFAULT; } /* @@ -350,7 +364,7 @@ again: } else { key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.inode = page_head->mapping->host; - key->shared.pgoff = page_head->index; + key->shared.pgoff = basepage_index(page); } get_futex_key_refs(key); @@ -703,7 +717,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, struct futex_pi_state **ps, struct task_struct *task, int set_waiters) { - int lock_taken, ret, ownerdied = 0; + int lock_taken, ret, force_take = 0; u32 uval, newval, curval, vpid = task_pid_vnr(task); retry: @@ -742,17 +756,15 @@ retry: newval = curval | FUTEX_WAITERS; /* - * There are two cases, where a futex might have no owner (the - * owner TID is 0): OWNER_DIED. We take over the futex in this - * case. We also do an unconditional take over, when the owner - * of the futex died. - * - * This is safe as we are protected by the hash bucket lock ! + * Should we force take the futex? See below. */ - if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { - /* Keep the OWNER_DIED bit */ + if (unlikely(force_take)) { + /* + * Keep the OWNER_DIED and the WAITERS bit and set the + * new TID value. + */ newval = (curval & ~FUTEX_TID_MASK) | vpid; - ownerdied = 0; + force_take = 0; lock_taken = 1; } @@ -762,7 +774,7 @@ retry: goto retry; /* - * We took the lock due to owner died take over. + * We took the lock due to forced take over. */ if (unlikely(lock_taken)) return 1; @@ -777,20 +789,25 @@ retry: switch (ret) { case -ESRCH: /* - * No owner found for this futex. Check if the - * OWNER_DIED bit is set to figure out whether - * this is a robust futex or not. + * We failed to find an owner for this + * futex. So we have no pi_state to block + * on. This can happen in two cases: + * + * 1) The owner died + * 2) A stale FUTEX_WAITERS bit + * + * Re-read the futex value. */ if (get_futex_value_locked(&curval, uaddr)) return -EFAULT; /* - * We simply start over in case of a robust - * futex. The code above will take the futex - * and return happy. + * If the owner died or we have a stale + * WAITERS bit the owner TID in the user space + * futex is 0. */ - if (curval & FUTEX_OWNER_DIED) { - ownerdied = 1; + if (!(curval & FUTEX_TID_MASK)) { + force_take = 1; goto retry; } default: @@ -827,6 +844,9 @@ static void wake_futex(struct futex_q *q) { struct task_struct *p = q->task; + if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) + return; + /* * We set q->lock_ptr = NULL _before_ we wake up the task. If * a non-futex wake up happens on another CPU then the task @@ -1062,6 +1082,10 @@ retry_private: plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key1)) { + if (this->pi_state || this->rt_waiter) { + ret = -EINVAL; + goto out_unlock; + } wake_futex(this); if (++ret >= nr_wake) break; @@ -1074,6 +1098,10 @@ retry_private: op_ret = 0; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key2)) { + if (this->pi_state || this->rt_waiter) { + ret = -EINVAL; + goto out_unlock; + } wake_futex(this); if (++op_ret >= nr_wake2) break; @@ -1082,6 +1110,7 @@ retry_private: ret += op_ret; } +out_unlock: double_unlock_hb(hb1, hb2); out_put_keys: put_futex_key(&key2); @@ -1371,9 +1400,13 @@ retry_private: /* * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always * be paired with each other and no other futex ops. + * + * We should never be requeueing a futex_q with a pi_state, + * which is awaiting a futex_unlock_pi(). */ if ((requeue_pi && !this->rt_waiter) || - (!requeue_pi && this->rt_waiter)) { + (!requeue_pi && this->rt_waiter) || + this->pi_state) { ret = -EINVAL; break; } @@ -2218,11 +2251,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * @uaddr2: the pi futex we will take prior to returning to user-space * * The caller will wait on uaddr and will be requeued by futex_requeue() to - * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and - * complete the acquisition of the rt_mutex prior to returning to userspace. - * This ensures the rt_mutex maintains an owner when it has waiters; without - * one, the pi logic wouldn't know which task to boost/deboost, if there was a - * need to. + * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake + * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to + * userspace. This ensures the rt_mutex maintains an owner when it has waiters; + * without one, the pi logic would not know which task to boost/deboost, if + * there was a need to. * * We call schedule in futex_wait_queue_me() when we enqueue and return there * via the following: @@ -2259,6 +2292,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, struct futex_q q = futex_q_init; int res, ret; + if (uaddr == uaddr2) + return -EINVAL; + if (!bitset) return -EINVAL; @@ -2330,7 +2366,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * signal. futex_unlock_pi() will not destroy the lock_ptr nor * the pi_state. */ - WARN_ON(!&q.pi_state); + WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); debug_rt_mutex_free_waiter(&rt_waiter); @@ -2357,7 +2393,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * fault, unlock the rt_mutex and return the fault to userspace. */ if (ret == -EFAULT) { - if (rt_mutex_owner(pi_mutex) == current) + if (pi_mutex && rt_mutex_owner(pi_mutex) == current) rt_mutex_unlock(pi_mutex); } else if (ret == -EINTR) { /* @@ -2431,40 +2467,29 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, { struct robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); @@ -2628,6 +2653,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } + switch (cmd) { + case FUTEX_LOCK_PI: + case FUTEX_UNLOCK_PI: + case FUTEX_TRYLOCK_PI: + case FUTEX_WAIT_REQUEUE_PI: + case FUTEX_CMP_REQUEUE_PI: + if (!futex_cmpxchg_enabled) + return -ENOSYS; + } + switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; @@ -2649,16 +2684,13 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); break; case FUTEX_LOCK_PI: - if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, flags, val, timeout, 0); + ret = futex_lock_pi(uaddr, flags, val, timeout, 0); break; case FUTEX_UNLOCK_PI: - if (futex_cmpxchg_enabled) - ret = futex_unlock_pi(uaddr, flags); + ret = futex_unlock_pi(uaddr, flags); break; case FUTEX_TRYLOCK_PI: - if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); + ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); break; case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY;