Revert "mm: batch activate_page() to reduce lock contention"
[pandora-kernel.git] / mm / swap.c
index 3f48542..ab498ea 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -56,17 +56,97 @@ static void __page_cache_release(struct page *page)
                del_page_from_lru(zone, page);
                spin_unlock_irqrestore(&zone->lru_lock, flags);
        }
+}
+
+static void __put_single_page(struct page *page)
+{
+       __page_cache_release(page);
        free_hot_cold_page(page, 0);
 }
 
-static void put_compound_page(struct page *page)
+static void __put_compound_page(struct page *page)
 {
-       page = compound_head(page);
-       if (put_page_testzero(page)) {
-               compound_page_dtor *dtor;
+       compound_page_dtor *dtor;
+
+       __page_cache_release(page);
+       dtor = get_compound_page_dtor(page);
+       (*dtor)(page);
+}
 
-               dtor = get_compound_page_dtor(page);
-               (*dtor)(page);
+static void put_compound_page(struct page *page)
+{
+       if (unlikely(PageTail(page))) {
+               /* __split_huge_page_refcount can run under us */
+               struct page *page_head = page->first_page;
+               smp_rmb();
+               /*
+                * If PageTail is still set after smp_rmb() we can be sure
+                * that the page->first_page we read wasn't a dangling pointer.
+                * See __split_huge_page_refcount() smp_wmb().
+                */
+               if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+                       unsigned long flags;
+                       /*
+                        * Verify that our page_head wasn't converted
+                        * to a a regular page before we got a
+                        * reference on it.
+                        */
+                       if (unlikely(!PageHead(page_head))) {
+                               /* PageHead is cleared after PageTail */
+                               smp_rmb();
+                               VM_BUG_ON(PageTail(page));
+                               goto out_put_head;
+                       }
+                       /*
+                        * Only run compound_lock on a valid PageHead,
+                        * after having it pinned with
+                        * get_page_unless_zero() above.
+                        */
+                       smp_mb();
+                       /* page_head wasn't a dangling pointer */
+                       flags = compound_lock_irqsave(page_head);
+                       if (unlikely(!PageTail(page))) {
+                               /* __split_huge_page_refcount run before us */
+                               compound_unlock_irqrestore(page_head, flags);
+                               VM_BUG_ON(PageHead(page_head));
+                       out_put_head:
+                               if (put_page_testzero(page_head))
+                                       __put_single_page(page_head);
+                       out_put_single:
+                               if (put_page_testzero(page))
+                                       __put_single_page(page);
+                               return;
+                       }
+                       VM_BUG_ON(page_head != page->first_page);
+                       /*
+                        * We can release the refcount taken by
+                        * get_page_unless_zero now that
+                        * split_huge_page_refcount is blocked on the
+                        * compound_lock.
+                        */
+                       if (put_page_testzero(page_head))
+                               VM_BUG_ON(1);
+                       /* __split_huge_page_refcount will wait now */
+                       VM_BUG_ON(atomic_read(&page->_count) <= 0);
+                       atomic_dec(&page->_count);
+                       VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+                       compound_unlock_irqrestore(page_head, flags);
+                       if (put_page_testzero(page_head)) {
+                               if (PageHead(page_head))
+                                       __put_compound_page(page_head);
+                               else
+                                       __put_single_page(page_head);
+                       }
+               } else {
+                       /* page_head is a dangling pointer */
+                       VM_BUG_ON(PageTail(page));
+                       goto out_put_single;
+               }
+       } else if (put_page_testzero(page)) {
+               if (PageHead(page))
+                       __put_compound_page(page);
+               else
+                       __put_single_page(page);
        }
 }
 
@@ -75,7 +155,7 @@ void put_page(struct page *page)
        if (unlikely(PageCompound(page)))
                put_compound_page(page);
        else if (put_page_testzero(page))
-               __page_cache_release(page);
+               __put_single_page(page);
 }
 EXPORT_SYMBOL(put_page);
 
@@ -98,15 +178,13 @@ void put_pages_list(struct list_head *pages)
 }
 EXPORT_SYMBOL(put_pages_list);
 
-/*
- * pagevec_move_tail() must be called with IRQ disabled.
- * Otherwise this may cause nasty races.
- */
-static void pagevec_move_tail(struct pagevec *pvec)
+static void pagevec_lru_move_fn(struct pagevec *pvec,
+                               void (*move_fn)(struct page *page, void *arg),
+                               void *arg)
 {
        int i;
-       int pgmoved = 0;
        struct zone *zone = NULL;
+       unsigned long flags = 0;
 
        for (i = 0; i < pagevec_count(pvec); i++) {
                struct page *page = pvec->pages[i];
@@ -114,29 +192,49 @@ static void pagevec_move_tail(struct pagevec *pvec)
 
                if (pagezone != zone) {
                        if (zone)
-                               spin_unlock(&zone->lru_lock);
+                               spin_unlock_irqrestore(&zone->lru_lock, flags);
                        zone = pagezone;
-                       spin_lock(&zone->lru_lock);
-               }
-               if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-                       int lru = page_lru_base_type(page);
-                       list_move_tail(&page->lru, &zone->lru[lru].list);
-                       pgmoved++;
+                       spin_lock_irqsave(&zone->lru_lock, flags);
                }
+
+               (*move_fn)(page, arg);
        }
        if (zone)
-               spin_unlock(&zone->lru_lock);
-       __count_vm_events(PGROTATED, pgmoved);
-       release_pages(pvec->pages, pvec->nr, pvec->cold);
+               spin_unlock_irqrestore(&zone->lru_lock, flags);
+       release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
        pagevec_reinit(pvec);
 }
 
+static void pagevec_move_tail_fn(struct page *page, void *arg)
+{
+       int *pgmoved = arg;
+       struct zone *zone = page_zone(page);
+
+       if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+               int lru = page_lru_base_type(page);
+               list_move_tail(&page->lru, &zone->lru[lru].list);
+               (*pgmoved)++;
+       }
+}
+
+/*
+ * pagevec_move_tail() must be called with IRQ disabled.
+ * Otherwise this may cause nasty races.
+ */
+static void pagevec_move_tail(struct pagevec *pvec)
+{
+       int pgmoved = 0;
+
+       pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
+       __count_vm_events(PGROTATED, pgmoved);
+}
+
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  * inactive list.
  */
-void  rotate_reclaimable_page(struct page *page)
+void rotate_reclaimable_page(struct page *page)
 {
        if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
            !PageUnevictable(page) && PageLRU(page)) {
@@ -399,44 +497,70 @@ void __pagevec_release(struct pagevec *pvec)
 
 EXPORT_SYMBOL(__pagevec_release);
 
+/* used by __split_huge_page_refcount() */
+void lru_add_page_tail(struct zone* zone,
+                      struct page *page, struct page *page_tail)
+{
+       int active;
+       enum lru_list lru;
+       const int file = 0;
+       struct list_head *head;
+
+       VM_BUG_ON(!PageHead(page));
+       VM_BUG_ON(PageCompound(page_tail));
+       VM_BUG_ON(PageLRU(page_tail));
+       VM_BUG_ON(!spin_is_locked(&zone->lru_lock));
+
+       SetPageLRU(page_tail);
+
+       if (page_evictable(page_tail, NULL)) {
+               if (PageActive(page)) {
+                       SetPageActive(page_tail);
+                       active = 1;
+                       lru = LRU_ACTIVE_ANON;
+               } else {
+                       active = 0;
+                       lru = LRU_INACTIVE_ANON;
+               }
+               update_page_reclaim_stat(zone, page_tail, file, active);
+               if (likely(PageLRU(page)))
+                       head = page->lru.prev;
+               else
+                       head = &zone->lru[lru].list;
+               __add_page_to_lru_list(zone, page_tail, lru, head);
+       } else {
+               SetPageUnevictable(page_tail);
+               add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
+       }
+}
+
+static void ____pagevec_lru_add_fn(struct page *page, void *arg)
+{
+       enum lru_list lru = (enum lru_list)arg;
+       struct zone *zone = page_zone(page);
+       int file = is_file_lru(lru);
+       int active = is_active_lru(lru);
+
+       VM_BUG_ON(PageActive(page));
+       VM_BUG_ON(PageUnevictable(page));
+       VM_BUG_ON(PageLRU(page));
+
+       SetPageLRU(page);
+       if (active)
+               SetPageActive(page);
+       update_page_reclaim_stat(zone, page, file, active);
+       add_page_to_lru_list(zone, page, lru);
+}
+
 /*
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
-       int i;
-       struct zone *zone = NULL;
-
        VM_BUG_ON(is_unevictable_lru(lru));
 
-       for (i = 0; i < pagevec_count(pvec); i++) {
-               struct page *page = pvec->pages[i];
-               struct zone *pagezone = page_zone(page);
-               int file;
-               int active;
-
-               if (pagezone != zone) {
-                       if (zone)
-                               spin_unlock_irq(&zone->lru_lock);
-                       zone = pagezone;
-                       spin_lock_irq(&zone->lru_lock);
-               }
-               VM_BUG_ON(PageActive(page));
-               VM_BUG_ON(PageUnevictable(page));
-               VM_BUG_ON(PageLRU(page));
-               SetPageLRU(page);
-               active = is_active_lru(lru);
-               file = is_file_lru(lru);
-               if (active)
-                       SetPageActive(page);
-               update_page_reclaim_stat(zone, page, file, active);
-               add_page_to_lru_list(zone, page, lru);
-       }
-       if (zone)
-               spin_unlock_irq(&zone->lru_lock);
-       release_pages(pvec->pages, pvec->nr, pvec->cold);
-       pagevec_reinit(pvec);
+       pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
 }
 
 EXPORT_SYMBOL(____pagevec_lru_add);