Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

[pandora-kernel.git] / mm / shmem.c
diff --git a/mm/shmem.c b/mm/shmem.c

index 8fa27e4..dfc7069 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
  
  static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
  {
-       struct inode *inode;
+       struct address_space *mapping;
         unsigned long idx;
         unsigned long size;
         unsigned long limit;
@@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
         if (size > SHMEM_NR_DIRECT)
                 size = SHMEM_NR_DIRECT;
         offset = shmem_find_swp(entry, ptr, ptr+size);
-       if (offset >= 0)
+       if (offset >= 0) {
+               shmem_swp_balance_unmap();
                 goto found;
+       }
         if (!info->i_indirect)
                 goto lost2;
  
@@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
                         if (size > ENTRIES_PER_PAGE)
                                 size = ENTRIES_PER_PAGE;
                         offset = shmem_find_swp(entry, ptr, ptr+size);
-                       shmem_swp_unmap(ptr);
                         if (offset >= 0) {
                                 shmem_dir_unmap(dir);
                                 goto found;
                         }
+                       shmem_swp_unmap(ptr);
                 }
         }
  lost1:
@@ -928,8 +930,7 @@ lost2:
         return 0;
  found:
         idx += offset;
-       inode = igrab(&info->vfs_inode);
-       spin_unlock(&info->lock);
+       ptr += offset;
  
         /*
          * Move _head_ to start search for next from here.
@@ -940,37 +941,18 @@ found:
          */
         if (shmem_swaplist.next != &info->swaplist)
                 list_move_tail(&shmem_swaplist, &info->swaplist);
-       mutex_unlock(&shmem_swaplist_mutex);
  
-       error = 1;
-       if (!inode)
-               goto out;
         /*
-        * Charge page using GFP_KERNEL while we can wait.
-        * Charged back to the user(not to caller) when swap account is used.
-        * add_to_page_cache() will be called with GFP_NOWAIT.
+        * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
+        * but also to hold up shmem_evict_inode(): so inode cannot be freed
+        * beneath us (pagelock doesn't help until the page is in pagecache).
          */
-       error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
-       if (error)
-               goto out;
-       error = radix_tree_preload(GFP_KERNEL);
-       if (error) {
-               mem_cgroup_uncharge_cache_page(page);
-               goto out;
-       }
-       error = 1;
-
-       spin_lock(&info->lock);
-       ptr = shmem_swp_entry(info, idx, NULL);
-       if (ptr && ptr->val == entry.val) {
-               error = add_to_page_cache_locked(page, inode->i_mapping,
-                                               idx, GFP_NOWAIT);
-               /* does mem_cgroup_uncharge_cache_page on error */
-       } else  /* we must compensate for our precharge above */
-               mem_cgroup_uncharge_cache_page(page);
+       mapping = info->vfs_inode.i_mapping;
+       error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
+       /* which does mem_cgroup_uncharge_cache_page on error */
  
         if (error == -EEXIST) {
-               struct page *filepage = find_get_page(inode->i_mapping, idx);
+               struct page *filepage = find_get_page(mapping, idx);
                 error = 1;
                 if (filepage) {
                         /*
@@ -990,14 +972,8 @@ found:
                 swap_free(entry);
                 error = 1;      /* not an error, but entry was found */
         }
-       if (ptr)
-               shmem_swp_unmap(ptr);
+       shmem_swp_unmap(ptr);
         spin_unlock(&info->lock);
-       radix_tree_preload_end();
-out:
-       unlock_page(page);
-       page_cache_release(page);
-       iput(inode);            /* allows for NULL */
         return error;
  }
  
@@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
         struct list_head *p, *next;
         struct shmem_inode_info *info;
         int found = 0;
+       int error;
+
+       /*
+        * Charge page using GFP_KERNEL while we can wait, before taking
+        * the shmem_swaplist_mutex which might hold up shmem_writepage().
+        * Charged back to the user (not to caller) when swap account is used.
+        * add_to_page_cache() will be called with GFP_NOWAIT.
+        */
+       error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
+       if (error)
+               goto out;
+       /*
+        * Try to preload while we can wait, to not make a habit of
+        * draining atomic reserves; but don't latch on to this cpu,
+        * it's okay if sometimes we get rescheduled after this.
+        */
+       error = radix_tree_preload(GFP_KERNEL);
+       if (error)
+               goto uncharge;
+       radix_tree_preload_end();
  
         mutex_lock(&shmem_swaplist_mutex);
         list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
                 found = shmem_unuse_inode(info, entry, page);
                 cond_resched();
                 if (found)
-                       goto out;
+                       break;
         }
         mutex_unlock(&shmem_swaplist_mutex);
-       /*
-        * Can some race bring us here?  We've been holding page lock,
-        * so I think not; but would rather try again later than BUG()
-        */
+
+uncharge:
+       if (!found)
+               mem_cgroup_uncharge_cache_page(page);
+       if (found < 0)
+               error = found;
+out:
         unlock_page(page);
         page_cache_release(page);
-out:
-       return (found < 0) ? found : 0;
+       return error;
  }
  
  /*
@@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
         else
                 swap.val = 0;
  
+       /*
+        * Add inode to shmem_unuse()'s list of swapped-out inodes,
+        * if it's not already there.  Do it now because we cannot take
+        * mutex while holding spinlock, and must do so before the page
+        * is moved to swap cache, when its pagelock no longer protects
+        * the inode from eviction.  But don't unlock the mutex until
+        * we've taken the spinlock, because shmem_unuse_inode() will
+        * prune a !swapped inode from the swaplist under both locks.
+        */
+       if (swap.val) {
+               mutex_lock(&shmem_swaplist_mutex);
+               if (list_empty(&info->swaplist))
+                       list_add_tail(&info->swaplist, &shmem_swaplist);
+       }
+
         spin_lock(&info->lock);
+       if (swap.val)
+               mutex_unlock(&shmem_swaplist_mutex);
+
         if (index >= info->next_index) {
                 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
                 goto unlock;
@@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                 delete_from_page_cache(page);
                 shmem_swp_set(info, entry, swap.val);
                 shmem_swp_unmap(entry);
-               if (list_empty(&info->swaplist))
-                       inode = igrab(inode);
-               else
-                       inode = NULL;
                 spin_unlock(&info->lock);
                 swap_shmem_alloc(swap);
                 BUG_ON(page_mapped(page));
                 swap_writepage(page, wbc);
-               if (inode) {
-                       mutex_lock(&shmem_swaplist_mutex);
-                       /* move instead of add in case we're racing */
-                       list_move_tail(&info->swaplist, &shmem_swaplist);
-                       mutex_unlock(&shmem_swaplist_mutex);
-                       iput(inode);
-               }
                 return 0;
         }
  
@@ -1400,20 +1405,14 @@ repeat:
                 if (sbinfo->max_blocks) {
                         if (percpu_counter_compare(&sbinfo->used_blocks,
                                                 sbinfo->max_blocks) >= 0 ||
-                           shmem_acct_block(info->flags)) {
-                               spin_unlock(&info->lock);
-                               error = -ENOSPC;
-                               goto failed;
-                       }
+                           shmem_acct_block(info->flags))
+                               goto nospace;
                         percpu_counter_inc(&sbinfo->used_blocks);
                         spin_lock(&inode->i_lock);
                         inode->i_blocks += BLOCKS_PER_PAGE;
                         spin_unlock(&inode->i_lock);
-               } else if (shmem_acct_block(info->flags)) {
-                       spin_unlock(&info->lock);
-                       error = -ENOSPC;
-                       goto failed;
-               }
+               } else if (shmem_acct_block(info->flags))
+                       goto nospace;
  
                 if (!filepage) {
                         int ret;
@@ -1493,6 +1492,24 @@ done:
         error = 0;
         goto out;
  
+nospace:
+       /*
+        * Perhaps the page was brought in from swap between find_lock_page
+        * and taking info->lock?  We allow for that at add_to_page_cache_lru,
+        * but must also avoid reporting a spurious ENOSPC while working on a
+        * full tmpfs.  (When filepage has been passed in to shmem_getpage, it
+        * is already in page cache, which prevents this race from occurring.)
+        */
+       if (!filepage) {
+               struct page *page = find_get_page(mapping, idx);
+               if (page) {
+                       spin_unlock(&info->lock);
+                       page_cache_release(page);
+                       goto repeat;
+               }
+       }
+       spin_unlock(&info->lock);
+       error = -ENOSPC;
  failed:
         if (*pagep != filepage) {
                 unlock_page(filepage);