s390/qeth: free netdevice when removing a card
[pandora-kernel.git] / mm / swap.c
index 3a442f1..a4b9016 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -21,7 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mm_inline.h>
 #include <linux/buffer_head.h> /* for try_to_release_page() */
 #include <linux/percpu_counter.h>
@@ -31,6 +31,7 @@
 #include <linux/backing-dev.h>
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
+#include <linux/hugetlb.h>
 
 #include "internal.h"
 
@@ -69,7 +70,8 @@ static void __put_compound_page(struct page *page)
 {
        compound_page_dtor *dtor;
 
-       __page_cache_release(page);
+       if (!PageHuge(page))
+               __page_cache_release(page);
        dtor = get_compound_page_dtor(page);
        (*dtor)(page);
 }
@@ -78,42 +80,74 @@ static void put_compound_page(struct page *page)
 {
        if (unlikely(PageTail(page))) {
                /* __split_huge_page_refcount can run under us */
-               struct page *page_head = page->first_page;
-               smp_rmb();
-               /*
-                * If PageTail is still set after smp_rmb() we can be sure
-                * that the page->first_page we read wasn't a dangling pointer.
-                * See __split_huge_page_refcount() smp_wmb().
-                */
-               if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+               struct page *page_head = compound_trans_head(page);
+
+               if (likely(page != page_head &&
+                          get_page_unless_zero(page_head))) {
                        unsigned long flags;
-                       /*
-                        * Verify that our page_head wasn't converted
-                        * to a a regular page before we got a
-                        * reference on it.
-                        */
-                       if (unlikely(!PageHead(page_head))) {
-                               /* PageHead is cleared after PageTail */
-                               smp_rmb();
-                               VM_BUG_ON(PageTail(page));
-                               goto out_put_head;
+
+                        if (PageHeadHuge(page_head)) {
+                               if (likely(PageTail(page))) {
+                                       /*
+                                        * __split_huge_page_refcount
+                                        * cannot race here.
+                                        */
+                                       VM_BUG_ON(!PageHead(page_head));
+                                       atomic_dec(&page->_mapcount);
+                                       if (put_page_testzero(page_head))
+                                               VM_BUG_ON(1);
+                                       if (put_page_testzero(page_head))
+                                               __put_compound_page(page_head);
+                                       return;
+                               } else {
+                                       /*
+                                        * __split_huge_page_refcount
+                                        * run before us, "page" was a
+                                        * THP tail. The split
+                                        * page_head has been freed
+                                        * and reallocated as slab or
+                                        * hugetlbfs page of smaller
+                                        * order (only possible if
+                                        * reallocated as slab on
+                                        * x86).
+                                        */
+                                       goto skip_lock;
+                               }
                        }
                        /*
-                        * Only run compound_lock on a valid PageHead,
-                        * after having it pinned with
-                        * get_page_unless_zero() above.
+                        * page_head wasn't a dangling pointer but it
+                        * may not be a head page anymore by the time
+                        * we obtain the lock. That is ok as long as it
+                        * can't be freed from under us.
                         */
-                       smp_mb();
-                       /* page_head wasn't a dangling pointer */
                        flags = compound_lock_irqsave(page_head);
                        if (unlikely(!PageTail(page))) {
                                /* __split_huge_page_refcount run before us */
                                compound_unlock_irqrestore(page_head, flags);
                                VM_BUG_ON(PageHead(page_head));
-                       out_put_head:
-                               if (put_page_testzero(page_head))
-                                       __put_single_page(page_head);
-                       out_put_single:
+skip_lock:
+                               if (put_page_testzero(page_head)) {
+                                       /*
+                                        * The head page may have been
+                                        * freed and reallocated as a
+                                        * compound page of smaller
+                                        * order and then freed again.
+                                        * All we know is that it
+                                        * cannot have become: a THP
+                                        * page, a compound page of
+                                        * higher order, a tail page.
+                                        * That is because we still
+                                        * hold the refcount of the
+                                        * split THP tail and
+                                        * page_head was the THP head
+                                        * before the split.
+                                        */
+                                       if (PageHead(page_head))
+                                               __put_compound_page(page_head);
+                                       else
+                                               __put_single_page(page_head);
+                               }
+out_put_single:
                                if (put_page_testzero(page))
                                        __put_single_page(page);
                                return;
@@ -121,16 +155,17 @@ static void put_compound_page(struct page *page)
                        VM_BUG_ON(page_head != page->first_page);
                        /*
                         * We can release the refcount taken by
-                        * get_page_unless_zero now that
-                        * split_huge_page_refcount is blocked on the
-                        * compound_lock.
+                        * get_page_unless_zero() now that
+                        * __split_huge_page_refcount() is blocked on
+                        * the compound_lock.
                         */
                        if (put_page_testzero(page_head))
                                VM_BUG_ON(1);
                        /* __split_huge_page_refcount will wait now */
-                       VM_BUG_ON(atomic_read(&page->_count) <= 0);
-                       atomic_dec(&page->_count);
+                       VM_BUG_ON(page_mapcount(page) <= 0);
+                       atomic_dec(&page->_mapcount);
                        VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+                       VM_BUG_ON(atomic_read(&page->_count) != 0);
                        compound_unlock_irqrestore(page_head, flags);
                        if (put_page_testzero(page_head)) {
                                if (PageHead(page_head))
@@ -160,6 +195,70 @@ void put_page(struct page *page)
 }
 EXPORT_SYMBOL(put_page);
 
+/*
+ * This function is exported but must not be called by anything other
+ * than get_page(). It implements the slow path of get_page().
+ */
+bool __get_page_tail(struct page *page)
+{
+       /*
+        * This takes care of get_page() if run on a tail page
+        * returned by one of the get_user_pages/follow_page variants.
+        * get_user_pages/follow_page itself doesn't need the compound
+        * lock because it runs __get_page_tail_foll() under the
+        * proper PT lock that already serializes against
+        * split_huge_page().
+        */
+       unsigned long flags;
+       bool got = false;
+       struct page *page_head = compound_trans_head(page);
+
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               /* Ref to put_compound_page() comment. */
+               if (PageHeadHuge(page_head)) {
+                       if (likely(PageTail(page))) {
+                               /*
+                                * This is a hugetlbfs
+                                * page. __split_huge_page_refcount
+                                * cannot race here.
+                                */
+                               VM_BUG_ON(!PageHead(page_head));
+                               __get_page_tail_foll(page, false);
+                               return true;
+                       } else {
+                               /*
+                                * __split_huge_page_refcount run
+                                * before us, "page" was a THP
+                                * tail. The split page_head has been
+                                * freed and reallocated as slab or
+                                * hugetlbfs page of smaller order
+                                * (only possible if reallocated as
+                                * slab on x86).
+                                */
+                               put_page(page_head);
+                               return false;
+                       }
+               }
+               /*
+                * page_head wasn't a dangling pointer but it
+                * may not be a head page anymore by the time
+                * we obtain the lock. That is ok as long as it
+                * can't be freed from under us.
+                */
+               flags = compound_lock_irqsave(page_head);
+               /* here __split_huge_page_refcount won't run anymore */
+               if (likely(PageTail(page))) {
+                       __get_page_tail_foll(page, false);
+                       got = true;
+               }
+               compound_unlock_irqrestore(page_head, flags);
+               if (unlikely(!got))
+                       put_page(page_head);
+       }
+       return got;
+}
+EXPORT_SYMBOL(__get_page_tail);
+
 /**
  * put_pages_list() - release a list of pages
  * @pages: list of pages threaded on page->lru
@@ -644,7 +743,7 @@ void lru_add_page_tail(struct zone* zone,
        VM_BUG_ON(!PageHead(page));
        VM_BUG_ON(PageCompound(page_tail));
        VM_BUG_ON(PageLRU(page_tail));
-       VM_BUG_ON(!spin_is_locked(&zone->lru_lock));
+       VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock));
 
        SetPageLRU(page_tail);