[PATCH] mm: migration page refcounting fix

author Nick Piggin <npiggin@suse.de>

Thu, 19 Jan 2006 01:42:27 +0000 (17:42 -0800)

committer Linus Torvalds <torvalds@g5.osdl.org>

Thu, 19 Jan 2006 03:20:17 +0000 (19:20 -0800)
author Nick Piggin <npiggin@suse.de>
Thu, 19 Jan 2006 01:42:27 +0000 (17:42 -0800)
committer Linus Torvalds <torvalds@g5.osdl.org>
Thu, 19 Jan 2006 03:20:17 +0000 (19:20 -0800)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h

index 49cc68a..8ac854f 100644 (file)
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page)
         }
  }
  
-/*
- * Isolate one page from the LRU lists.
- *
- * - zone->lru_lock must be held
- */
-static inline int __isolate_lru_page(struct page *page)
-{
-       if (unlikely(!TestClearPageLRU(page)))
-               return 0;
-
-       if (get_page_testone(page)) {
-               /*
-                * It is being freed elsewhere
-                */
-               __put_page(page);
-               SetPageLRU(page);
-               return -ENOENT;
-       }
-
-       return 1;
-}
diff --git a/include/linux/swap.h b/include/linux/swap.h

index e92054d..d01f7ef 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *));
  extern void FASTCALL(activate_page(struct page *));
  extern void FASTCALL(mark_page_accessed(struct page *));
  extern void lru_add_drain(void);
+extern int lru_add_drain_all(void);
  extern int rotate_reclaimable_page(struct page *page);
  extern void swap_setup(void);
  
diff --git a/mm/filemap.c b/mm/filemap.c

index a965b6b..44da3d4 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -94,6 +94,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
   *    ->private_lock           (try_to_unmap_one)
   *    ->tree_lock              (try_to_unmap_one)
   *    ->zone.lru_lock          (follow_page->mark_page_accessed)
+ *    ->zone.lru_lock          (check_pte_range->isolate_lru_page)
   *    ->private_lock           (page_remove_rmap->set_page_dirty)
   *    ->tree_lock              (page_remove_rmap->set_page_dirty)
   *    ->inode_lock             (page_remove_rmap->set_page_dirty)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 3171f88..551cde4 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                 page = vm_normal_page(vma, addr, *pte);
                 if (!page)
                         continue;
+               /*
+                * The check for PageReserved here is important to avoid
+                * handling zero pages and other pages that may have been
+                * marked special by the system.
+                *
+                * If the PageReserved would not be checked here then f.e.
+                * the location of the zero page could have an influence
+                * on MPOL_MF_STRICT, zero pages would be counted for
+                * the per node stats, and there would be useless attempts
+                * to put zero pages on the migration list.
+                */
                 if (PageReserved(page))
                         continue;
                 nid = page_to_nid(page);
@@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
  
                 if (flags & MPOL_MF_STATS)
                         gather_stats(page, private);
-               else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-                       spin_unlock(ptl);
+               else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                         migrate_page_add(vma, page, private, flags);
-                       spin_lock(ptl);
-               }
                 else
                         break;
         } while (pte++, addr += PAGE_SIZE, addr != end);
@@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
         int err;
         struct vm_area_struct *first, *vma, *prev;
  
+       /* Clear the LRU lists so pages can be isolated */
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+               lru_add_drain_all();
+
         first = find_vma(mm, start);
         if (!first)
                 return ERR_PTR(-EFAULT);
@@ -555,15 +567,8 @@ static void migrate_page_add(struct vm_area_struct *vma,
         if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
             mapping_writably_mapped(page->mapping) ||
             single_mm_mapping(vma->vm_mm, page->mapping)) {
-               int rc = isolate_lru_page(page);
-
-               if (rc == 1)
+               if (isolate_lru_page(page))
                         list_add(&page->lru, pagelist);
-               /*
-                * If the isolate attempt was not successful then we just
-                * encountered an unswappable page. Something must be wrong.
-                */
-               WARN_ON(rc == 0);
         }
  }
  
diff --git a/mm/rmap.c b/mm/rmap.c

index dfbb89f..d85a99d 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -33,7 +33,7 @@
   *     mapping->i_mmap_lock
   *       anon_vma->lock
   *         mm->page_table_lock or pte_lock
- *           zone->lru_lock (in mark_page_accessed)
+ *           zone->lru_lock (in mark_page_accessed, isolate_lru_page)
   *           swap_lock (in swap_duplicate, swap_info_get)
   *             mmlist_lock (in mmput, drain_mmlist and others)
   *             mapping->private_lock (in __set_page_dirty_buffers)
diff --git a/mm/swap.c b/mm/swap.c

index cbb48e7..bc2442a 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -174,6 +174,32 @@ void lru_add_drain(void)
         put_cpu();
  }
  
+#ifdef CONFIG_NUMA
+static void lru_add_drain_per_cpu(void *dummy)
+{
+       lru_add_drain();
+}
+
+/*
+ * Returns 0 for success
+ */
+int lru_add_drain_all(void)
+{
+       return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
+}
+
+#else
+
+/*
+ * Returns 0 for success
+ */
+int lru_add_drain_all(void)
+{
+       lru_add_drain();
+       return 0;
+}
+#endif
+
  /*
   * This path almost never happens for VM activity - pages are normally
   * freed via pagevecs.  But it gets used by networking.
diff --git a/mm/vmscan.c b/mm/vmscan.c

index bf903b2..827bf67 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -586,7 +586,7 @@ static inline void move_to_lru(struct page *page)
  }
  
  /*
- * Add isolated pages on the list back to the LRU
+ * Add isolated pages on the list back to the LRU.
   *
   * returns the number of pages put back.
   */
@@ -760,46 +760,33 @@ next:
         return nr_failed + retry;
  }
  
-static void lru_add_drain_per_cpu(void *dummy)
-{
-       lru_add_drain();
-}
-
  /*
   * Isolate one page from the LRU lists and put it on the
- * indicated list. Do necessary cache draining if the
- * page is not on the LRU lists yet.
+ * indicated list with elevated refcount.
   *
   * Result:
   *  0 = page not on LRU list
   *  1 = page removed from LRU list and added to the specified list.
- * -ENOENT = page is being freed elsewhere.
   */
  int isolate_lru_page(struct page *page)
  {
-       int rc = 0;
-       struct zone *zone = page_zone(page);
+       int ret = 0;
  
-redo:
-       spin_lock_irq(&zone->lru_lock);
-       rc = __isolate_lru_page(page);
-       if (rc == 1) {
-               if (PageActive(page))
-                       del_page_from_active_list(zone, page);
-               else
-                       del_page_from_inactive_list(zone, page);
-       }
-       spin_unlock_irq(&zone->lru_lock);
-       if (rc == 0) {
-               /*
-                * Maybe this page is still waiting for a cpu to drain it
-                * from one of the lru lists?
-                */
-               rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
-               if (rc == 0 && PageLRU(page))
-                       goto redo;
+       if (PageLRU(page)) {
+               struct zone *zone = page_zone(page);
+               spin_lock_irq(&zone->lru_lock);
+               if (TestClearPageLRU(page)) {
+                       ret = 1;
+                       get_page(page);
+                       if (PageActive(page))
+                               del_page_from_active_list(zone, page);
+                       else
+                               del_page_from_inactive_list(zone, page);
+               }
+               spin_unlock_irq(&zone->lru_lock);
         }
-       return rc;
+
+       return ret;
  }
  #endif
  
@@ -831,18 +818,20 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
                 page = lru_to_page(src);
                 prefetchw_prev_lru_page(page, src, flags);
  
-               switch (__isolate_lru_page(page)) {
-               case 1:
-                       /* Succeeded to isolate page */
-                       list_move(&page->lru, dst);
-                       nr_taken++;
-                       break;
-               case -ENOENT:
-                       /* Not possible to isolate */
-                       list_move(&page->lru, src);
-                       break;
-               default:
+               if (!TestClearPageLRU(page))
                         BUG();
+               list_del(&page->lru);
+               if (get_page_testone(page)) {
+                       /*
+                        * It is being freed elsewhere
+                        */
+                       __put_page(page);
+                       SetPageLRU(page);
+                       list_add(&page->lru, src);
+                       continue;
+               } else {
+                       list_add(&page->lru, dst);
+                       nr_taken++;
                 }
         }
author	Nick Piggin <npiggin@suse.de>
	Thu, 19 Jan 2006 01:42:27 +0000 (17:42 -0800)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Thu, 19 Jan 2006 03:20:17 +0000 (19:20 -0800)
include/linux/mm_inline.h		patch \| blob \| history
include/linux/swap.h		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history
mm/swap.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history