mm/rmap: use rmap_walk() in try_to_unmap()
[pandora-kernel.git] / mm / rmap.c
index 068522d..b3263cb 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1179,15 +1179,18 @@ out:
 /*
  * Subfunctions of try_to_unmap: try_to_unmap_one called
  * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
+ *
+ * @arg: enum ttu_flags will be passed to this argument
  */
 int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-                    unsigned long address, enum ttu_flags flags)
+                    unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte;
        pte_t pteval;
        spinlock_t *ptl;
        int ret = SWAP_AGAIN;
+       enum ttu_flags flags = (enum ttu_flags)arg;
 
        pte = page_check_address(page, mm, address, &ptl, 0);
        if (!pte)
@@ -1426,6 +1429,79 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
        return ret;
 }
 
+static int try_to_unmap_nonlinear(struct page *page,
+               struct address_space *mapping, struct vm_area_struct *vma)
+{
+       int ret = SWAP_AGAIN;
+       unsigned long cursor;
+       unsigned long max_nl_cursor = 0;
+       unsigned long max_nl_size = 0;
+       unsigned int mapcount;
+
+       list_for_each_entry(vma,
+               &mapping->i_mmap_nonlinear, shared.nonlinear) {
+
+               cursor = (unsigned long) vma->vm_private_data;
+               if (cursor > max_nl_cursor)
+                       max_nl_cursor = cursor;
+               cursor = vma->vm_end - vma->vm_start;
+               if (cursor > max_nl_size)
+                       max_nl_size = cursor;
+       }
+
+       if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
+               return SWAP_FAIL;
+       }
+
+       /*
+        * We don't try to search for this page in the nonlinear vmas,
+        * and page_referenced wouldn't have found it anyway.  Instead
+        * just walk the nonlinear vmas trying to age and unmap some.
+        * The mapcount of the page we came in with is irrelevant,
+        * but even so use it as a guide to how hard we should try?
+        */
+       mapcount = page_mapcount(page);
+       if (!mapcount)
+               return ret;
+
+       cond_resched();
+
+       max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
+       if (max_nl_cursor == 0)
+               max_nl_cursor = CLUSTER_SIZE;
+
+       do {
+               list_for_each_entry(vma,
+                       &mapping->i_mmap_nonlinear, shared.nonlinear) {
+
+                       cursor = (unsigned long) vma->vm_private_data;
+                       while (cursor < max_nl_cursor &&
+                               cursor < vma->vm_end - vma->vm_start) {
+                               if (try_to_unmap_cluster(cursor, &mapcount,
+                                               vma, page) == SWAP_MLOCK)
+                                       ret = SWAP_MLOCK;
+                               cursor += CLUSTER_SIZE;
+                               vma->vm_private_data = (void *) cursor;
+                               if ((int)mapcount <= 0)
+                                       return ret;
+                       }
+                       vma->vm_private_data = (void *) max_nl_cursor;
+               }
+               cond_resched();
+               max_nl_cursor += CLUSTER_SIZE;
+       } while (max_nl_cursor <= max_nl_size);
+
+       /*
+        * Don't loop forever (perhaps all the remaining pages are
+        * in locked vmas).  Reset cursor on all unreserved nonlinear
+        * vmas, now forgetting on which ones it had fallen behind.
+        */
+       list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
+               vma->vm_private_data = NULL;
+
+       return ret;
+}
+
 bool is_vma_temporary_stack(struct vm_area_struct *vma)
 {
        int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
@@ -1440,6 +1516,11 @@ bool is_vma_temporary_stack(struct vm_area_struct *vma)
        return false;
 }
 
+static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
+{
+       return is_vma_temporary_stack(vma);
+}
+
 /**
  * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
  * rmap method
@@ -1485,7 +1566,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
                        continue;
 
                address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, flags);
+               ret = try_to_unmap_one(page, vma, address, (void *)flags);
                if (ret != SWAP_AGAIN || !page_mapped(page))
                        break;
        }
@@ -1512,21 +1593,14 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 {
        struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+       pgoff_t pgoff = page->index << compound_order(page);
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
-       unsigned long cursor;
-       unsigned long max_nl_cursor = 0;
-       unsigned long max_nl_size = 0;
-       unsigned int mapcount;
-
-       if (PageHuge(page))
-               pgoff = page->index << compound_order(page);
 
        mutex_lock(&mapping->i_mmap_mutex);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                unsigned long address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, flags);
+               ret = try_to_unmap_one(page, vma, address, (void *)flags);
                if (ret != SWAP_AGAIN || !page_mapped(page))
                        goto out;
        }
@@ -1542,69 +1616,17 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
        if (TTU_ACTION(flags) == TTU_MUNLOCK)
                goto out;
 
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
-               cursor = (unsigned long) vma->vm_private_data;
-               if (cursor > max_nl_cursor)
-                       max_nl_cursor = cursor;
-               cursor = vma->vm_end - vma->vm_start;
-               if (cursor > max_nl_size)
-                       max_nl_size = cursor;
-       }
-
-       if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
-               ret = SWAP_FAIL;
-               goto out;
-       }
-
-       /*
-        * We don't try to search for this page in the nonlinear vmas,
-        * and page_referenced wouldn't have found it anyway.  Instead
-        * just walk the nonlinear vmas trying to age and unmap some.
-        * The mapcount of the page we came in with is irrelevant,
-        * but even so use it as a guide to how hard we should try?
-        */
-       mapcount = page_mapcount(page);
-       if (!mapcount)
-               goto out;
-       cond_resched();
-
-       max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
-       if (max_nl_cursor == 0)
-               max_nl_cursor = CLUSTER_SIZE;
-
-       do {
-               list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
-                       cursor = (unsigned long) vma->vm_private_data;
-                       while ( cursor < max_nl_cursor &&
-                               cursor < vma->vm_end - vma->vm_start) {
-                               if (try_to_unmap_cluster(cursor, &mapcount,
-                                               vma, page) == SWAP_MLOCK)
-                                       ret = SWAP_MLOCK;
-                               cursor += CLUSTER_SIZE;
-                               vma->vm_private_data = (void *) cursor;
-                               if ((int)mapcount <= 0)
-                                       goto out;
-                       }
-                       vma->vm_private_data = (void *) max_nl_cursor;
-               }
-               cond_resched();
-               max_nl_cursor += CLUSTER_SIZE;
-       } while (max_nl_cursor <= max_nl_size);
-
-       /*
-        * Don't loop forever (perhaps all the remaining pages are
-        * in locked vmas).  Reset cursor on all unreserved nonlinear
-        * vmas, now forgetting on which ones it had fallen behind.
-        */
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
-               vma->vm_private_data = NULL;
+       ret = try_to_unmap_nonlinear(page, mapping, vma);
 out:
        mutex_unlock(&mapping->i_mmap_mutex);
        return ret;
 }
 
+static int page_not_mapped(struct page *page)
+{
+       return !page_mapped(page);
+};
+
 /**
  * try_to_unmap - try to remove all page table mappings to a page
  * @page: the page to get unmapped
@@ -1622,16 +1644,29 @@ out:
 int try_to_unmap(struct page *page, enum ttu_flags flags)
 {
        int ret;
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)flags,
+               .done = page_not_mapped,
+               .file_nonlinear = try_to_unmap_nonlinear,
+               .anon_lock = page_lock_anon_vma_read,
+       };
 
-       BUG_ON(!PageLocked(page));
        VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
 
-       if (unlikely(PageKsm(page)))
-               ret = try_to_unmap_ksm(page, flags);
-       else if (PageAnon(page))
-               ret = try_to_unmap_anon(page, flags);
-       else
-               ret = try_to_unmap_file(page, flags);
+       /*
+        * During exec, a temporary VMA is setup and later moved.
+        * The VMA is moved under the anon_vma lock but not the
+        * page tables leading to a race where migration cannot
+        * find the migration ptes. Rather than increasing the
+        * locking requirements of exec(), migration skips
+        * temporary VMAs until after exec() completes.
+        */
+       if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page))
+               rwc.invalid_vma = invalid_migration_vma;
+
+       ret = rmap_walk(page, &rwc);
+
        if (ret != SWAP_MLOCK && !page_mapped(page))
                ret = SWAP_SUCCESS;
        return ret;
@@ -1674,18 +1709,13 @@ void __put_anon_vma(struct anon_vma *anon_vma)
        anon_vma_free(anon_vma);
 }
 
-#ifdef CONFIG_MIGRATION
-/*
- * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
- * Called by migrate.c to remove migration ptes, but might be used more later.
- */
-static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+static struct anon_vma *rmap_walk_anon_lock(struct page *page,
+                                       struct rmap_walk_control *rwc)
 {
        struct anon_vma *anon_vma;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct anon_vma_chain *avc;
-       int ret = SWAP_AGAIN;
+
+       if (rwc->anon_lock)
+               return rwc->anon_lock(page);
 
        /*
         * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
@@ -1695,24 +1725,48 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
         */
        anon_vma = page_anon_vma(page);
        if (!anon_vma)
-               return ret;
+               return NULL;
+
        anon_vma_lock_read(anon_vma);
+       return anon_vma;
+}
+
+/*
+ * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
+ * Called by migrate.c to remove migration ptes, but might be used more later.
+ */
+static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
+{
+       struct anon_vma *anon_vma;
+       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+       struct anon_vma_chain *avc;
+       int ret = SWAP_AGAIN;
+
+       anon_vma = rmap_walk_anon_lock(page, rwc);
+       if (!anon_vma)
+               return ret;
+
        anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
                struct vm_area_struct *vma = avc->vma;
                unsigned long address = vma_address(page, vma);
-               ret = rmap_one(page, vma, address, arg);
+
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+                       continue;
+
+               ret = rwc->rmap_one(page, vma, address, rwc->arg);
                if (ret != SWAP_AGAIN)
                        break;
+               if (rwc->done && rwc->done(page))
+                       break;
        }
        anon_vma_unlock_read(anon_vma);
        return ret;
 }
 
-static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 {
        struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+       pgoff_t pgoff = page->index << compound_order(page);
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
 
@@ -1721,32 +1775,41 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
        mutex_lock(&mapping->i_mmap_mutex);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                unsigned long address = vma_address(page, vma);
-               ret = rmap_one(page, vma, address, arg);
+
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+                       continue;
+
+               ret = rwc->rmap_one(page, vma, address, rwc->arg);
                if (ret != SWAP_AGAIN)
-                       break;
+                       goto done;
+               if (rwc->done && rwc->done(page))
+                       goto done;
        }
-       /*
-        * No nonlinear handling: being always shared, nonlinear vmas
-        * never contain migration ptes.  Decide what to do about this
-        * limitation to linear when we need rmap_walk() on nonlinear.
-        */
+
+       if (!rwc->file_nonlinear)
+               goto done;
+
+       if (list_empty(&mapping->i_mmap_nonlinear))
+               goto done;
+
+       ret = rwc->file_nonlinear(page, mapping, vma);
+
+done:
        mutex_unlock(&mapping->i_mmap_mutex);
        return ret;
 }
 
-int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
 {
        VM_BUG_ON(!PageLocked(page));
 
        if (unlikely(PageKsm(page)))
-               return rmap_walk_ksm(page, rmap_one, arg);
+               return rmap_walk_ksm(page, rwc);
        else if (PageAnon(page))
-               return rmap_walk_anon(page, rmap_one, arg);
+               return rmap_walk_anon(page, rwc);
        else
-               return rmap_walk_file(page, rmap_one, arg);
+               return rmap_walk_file(page, rwc);
 }
-#endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_HUGETLB_PAGE
 /*