epoll: fix race between ep_poll_callback(POLLFREE) and ep_free()/ep_remove()
authorOleg Nesterov <oleg@redhat.com>
Fri, 1 Sep 2017 16:55:33 +0000 (18:55 +0200)
committerBen Hutchings <ben@decadent.org.uk>
Sat, 11 Nov 2017 13:34:37 +0000 (13:34 +0000)
commit 138e4ad67afd5c6c318b056b4d17c17f2c0ca5c0 upstream.

The race was introduced by me in commit 971316f0503a ("epoll:
ep_unregister_pollwait() can use the freed pwq->whead").  I did not
realize that nothing can protect eventpoll after ep_poll_callback() sets
->whead = NULL, only whead->lock can save us from the race with
ep_free() or ep_remove().

Move ->whead = NULL to the end of ep_poll_callback() and add the
necessary barriers.

TODO: cleanup the ewake/EPOLLEXCLUSIVE logic, it was confusing even
before this patch.

Hopefully this explains use-after-free reported by syzcaller:

BUG: KASAN: use-after-free in debug_spin_lock_before
...
 _raw_spin_lock_irqsave+0x4a/0x60 kernel/locking/spinlock.c:159
 ep_poll_callback+0x29f/0xff0 fs/eventpoll.c:1148

this is spin_lock(eventpoll->lock),

...
Freed by task 17774:
...
 kfree+0xe8/0x2c0 mm/slub.c:3883
 ep_free+0x22c/0x2a0 fs/eventpoll.c:865

Fixes: 971316f0503a ("epoll: ep_unregister_pollwait() can use the freed pwq->whead")
Reported-by: 范龙飞 <long7573@126.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[bwh: Backported to 3.2:
 - Use smp_mb() and ACCESS_ONCE() instead of smp_{load_acquire,store_release}()
 - EPOLLEXCLUSIVE is not supported]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
fs/eventpoll.c

index 451b9b8..7903ddb 100644 (file)
@@ -477,8 +477,14 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq)
        wait_queue_head_t *whead;
 
        rcu_read_lock();
-       /* If it is cleared by POLLFREE, it should be rcu-safe */
-       whead = rcu_dereference(pwq->whead);
+       /*
+        * If it is cleared by POLLFREE, it should be rcu-safe.
+        * If we read NULL we need a barrier paired with
+        * smp_store_release() in ep_poll_callback(), otherwise
+        * we rely on whead->lock.
+        */
+       whead = ACCESS_ONCE(pwq->whead);
+       smp_mb();
        if (whead)
                remove_wait_queue(whead, &pwq->wait);
        rcu_read_unlock();
@@ -859,17 +865,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
 
-       if ((unsigned long)key & POLLFREE) {
-               ep_pwq_from_wait(wait)->whead = NULL;
-               /*
-                * whead = NULL above can race with ep_remove_wait_queue()
-                * which can do another remove_wait_queue() after us, so we
-                * can't use __remove_wait_queue(). whead->lock is held by
-                * the caller.
-                */
-               list_del_init(&wait->task_list);
-       }
-
        spin_lock_irqsave(&ep->lock, flags);
 
        /*
@@ -924,6 +919,23 @@ out_unlock:
        if (pwake)
                ep_poll_safewake(&ep->poll_wait);
 
+       if ((unsigned long)key & POLLFREE) {
+               /*
+                * If we race with ep_remove_wait_queue() it can miss
+                * ->whead = NULL and do another remove_wait_queue() after
+                * us, so we can't use __remove_wait_queue().
+                */
+               list_del_init(&wait->task_list);
+               /*
+                * ->whead != NULL protects us from the race with ep_free()
+                * or ep_remove(), ep_remove_wait_queue() takes whead->lock
+                * held by the caller. Once we nullify it, nothing protects
+                * ep/epi or even wait.
+                */
+               smp_mb();
+               ACCESS_ONCE(ep_pwq_from_wait(wait)->whead) = NULL;
+       }
+
        return 1;
 }