Merge branch 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block
[pandora-kernel.git] / mm / filemap.c
index f9a29c8..04d1992 100644 (file)
  */
 
 /*
- * Remove a page from the page cache and free it. Caller has to make
+ * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
  * is safe.  The caller must hold the mapping's tree_lock.
  */
-void __remove_from_page_cache(struct page *page)
+void __delete_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page->mapping;
 
@@ -137,7 +137,15 @@ void __remove_from_page_cache(struct page *page)
        }
 }
 
-void remove_from_page_cache(struct page *page)
+/**
+ * delete_from_page_cache - delete page from page cache
+ * @page: the page which the kernel is trying to remove from page cache
+ *
+ * This must be called only on pages that have been verified to be in the page
+ * cache and locked.  It will never put the page into the free list, the caller
+ * has a reference on the page.
+ */
+void delete_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        void (*freepage)(struct page *);
@@ -146,14 +154,15 @@ void remove_from_page_cache(struct page *page)
 
        freepage = mapping->a_ops->freepage;
        spin_lock_irq(&mapping->tree_lock);
-       __remove_from_page_cache(page);
+       __delete_from_page_cache(page);
        spin_unlock_irq(&mapping->tree_lock);
        mem_cgroup_uncharge_cache_page(page);
 
        if (freepage)
                freepage(page);
+       page_cache_release(page);
 }
-EXPORT_SYMBOL(remove_from_page_cache);
+EXPORT_SYMBOL(delete_from_page_cache);
 
 static int sleep_on_page(void *word)
 {
@@ -356,6 +365,76 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(filemap_write_and_wait_range);
 
+/**
+ * replace_page_cache_page - replace a pagecache page with a new one
+ * @old:       page to be replaced
+ * @new:       page to replace with
+ * @gfp_mask:  allocation mode
+ *
+ * This function replaces a page in the pagecache with a new one.  On
+ * success it acquires the pagecache reference for the new page and
+ * drops it for the old page.  Both the old and new pages must be
+ * locked.  This function does not add the new page to the LRU, the
+ * caller must do that.
+ *
+ * The remove + add is atomic.  The only way this function can fail is
+ * memory allocation failure.
+ */
+int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
+{
+       int error;
+       struct mem_cgroup *memcg = NULL;
+
+       VM_BUG_ON(!PageLocked(old));
+       VM_BUG_ON(!PageLocked(new));
+       VM_BUG_ON(new->mapping);
+
+       /*
+        * This is not page migration, but prepare_migration and
+        * end_migration does enough work for charge replacement.
+        *
+        * In the longer term we probably want a specialized function
+        * for moving the charge from old to new in a more efficient
+        * manner.
+        */
+       error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
+       if (error)
+               return error;
+
+       error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+       if (!error) {
+               struct address_space *mapping = old->mapping;
+               void (*freepage)(struct page *);
+
+               pgoff_t offset = old->index;
+               freepage = mapping->a_ops->freepage;
+
+               page_cache_get(new);
+               new->mapping = mapping;
+               new->index = offset;
+
+               spin_lock_irq(&mapping->tree_lock);
+               __delete_from_page_cache(old);
+               error = radix_tree_insert(&mapping->page_tree, offset, new);
+               BUG_ON(error);
+               mapping->nrpages++;
+               __inc_zone_page_state(new, NR_FILE_PAGES);
+               if (PageSwapBacked(new))
+                       __inc_zone_page_state(new, NR_SHMEM);
+               spin_unlock_irq(&mapping->tree_lock);
+               radix_tree_preload_end();
+               if (freepage)
+                       freepage(old);
+               page_cache_release(old);
+               mem_cgroup_end_migration(memcg, old, new, true);
+       } else {
+               mem_cgroup_end_migration(memcg, old, new, false);
+       }
+
+       return error;
+}
+EXPORT_SYMBOL_GPL(replace_page_cache_page);
+
 /**
  * add_to_page_cache_locked - add a locked page to the pagecache
  * @page:      page to add
@@ -566,8 +645,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                __lock_page(page);
                return 1;
        } else {
-               up_read(&mm->mmap_sem);
-               wait_on_page_locked(page);
+               if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                       up_read(&mm->mmap_sem);
+                       wait_on_page_locked(page);
+               }
                return 0;
        }
 }
@@ -727,9 +808,13 @@ repeat:
                page = radix_tree_deref_slot((void **)pages[i]);
                if (unlikely(!page))
                        continue;
+
+               /*
+                * This can only trigger when the entry at index 0 moves out
+                * of or back to the root: none yet gotten, safe to restart.
+                */
                if (radix_tree_deref_retry(page)) {
-                       if (ret)
-                               start = pages[ret-1]->index;
+                       WARN_ON(start | i);
                        goto restart;
                }
 
@@ -745,6 +830,13 @@ repeat:
                pages[ret] = page;
                ret++;
        }
+
+       /*
+        * If all entries were removed before we could secure them,
+        * try again, because callers stop trying once 0 is returned.
+        */
+       if (unlikely(!ret && nr_found))
+               goto restart;
        rcu_read_unlock();
        return ret;
 }
@@ -779,6 +871,11 @@ repeat:
                page = radix_tree_deref_slot((void **)pages[i]);
                if (unlikely(!page))
                        continue;
+
+               /*
+                * This can only trigger when the entry at index 0 moves out
+                * of or back to the root: none yet gotten, safe to restart.
+                */
                if (radix_tree_deref_retry(page))
                        goto restart;
 
@@ -839,6 +936,11 @@ repeat:
                page = radix_tree_deref_slot((void **)pages[i]);
                if (unlikely(!page))
                        continue;
+
+               /*
+                * This can only trigger when the entry at index 0 moves out
+                * of or back to the root: none yet gotten, safe to restart.
+                */
                if (radix_tree_deref_retry(page))
                        goto restart;
 
@@ -854,6 +956,13 @@ repeat:
                pages[ret] = page;
                ret++;
        }
+
+       /*
+        * If all entries were removed before we could secure them,
+        * try again, because callers stop trying once 0 is returned.
+        */
+       if (unlikely(!ret && nr_found))
+               goto restart;
        rcu_read_unlock();
 
        if (ret)