* 2000-2001 Christoph Rohland
* 2000-2001 SAP AG
* 2002 Red Hat Inc.
- * Copyright (C) 2002-2005 Hugh Dickins.
+ * Copyright (C) 2002-2011 Hugh Dickins.
+ * Copyright (C) 2011 Google Inc.
* Copyright (C) 2002-2005 VERITAS Software Corporation.
* Copyright (C) 2004 Andi Kleen, SuSE Labs
*
#include <linux/shmem_fs.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
+#include <linux/pagevec.h>
#include <linux/percpu_counter.h>
#include <linux/splice.h>
#include <linux/security.h>
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20
+/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
+#define SHORT_SYMLINK_LEN 128
+
struct shmem_xattr {
struct list_head list; /* anchored by shmem_inode_info->xattr_list */
char *name; /* xattr name */
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
-static void shmem_free_blocks(struct inode *inode, long pages)
-{
- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
- if (sbinfo->max_blocks) {
- percpu_counter_add(&sbinfo->used_blocks, -pages);
- inode->i_blocks -= pages*BLOCKS_PER_PAGE;
- }
-}
-
static int shmem_reserve_inode(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
if (freed > 0) {
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -freed);
info->alloced -= freed;
+ inode->i_blocks -= freed * BLOCKS_PER_PAGE;
shmem_unacct_blocks(info->flags, freed);
- shmem_free_blocks(inode, freed);
}
}
-static void shmem_put_swap(struct shmem_inode_info *info, pgoff_t index,
- swp_entry_t swap)
+/*
+ * Replace item expected in radix tree by a new item, while holding tree lock.
+ */
+static int shmem_radix_tree_replace(struct address_space *mapping,
+ pgoff_t index, void *expected, void *replacement)
+{
+ void **pslot;
+ void *item = NULL;
+
+ VM_BUG_ON(!expected);
+ pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
+ if (pslot)
+ item = radix_tree_deref_slot_protected(pslot,
+ &mapping->tree_lock);
+ if (item != expected)
+ return -ENOENT;
+ if (replacement)
+ radix_tree_replace_slot(pslot, replacement);
+ else
+ radix_tree_delete(&mapping->page_tree, index);
+ return 0;
+}
+
+/*
+ * Like add_to_page_cache_locked, but error if expected item has gone.
+ */
+static int shmem_add_to_page_cache(struct page *page,
+ struct address_space *mapping,
+ pgoff_t index, gfp_t gfp, void *expected)
+{
+ int error = 0;
+
+ VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(!PageSwapBacked(page));
+
+ if (!expected)
+ error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+ if (!error) {
+ page_cache_get(page);
+ page->mapping = mapping;
+ page->index = index;
+
+ spin_lock_irq(&mapping->tree_lock);
+ if (!expected)
+ error = radix_tree_insert(&mapping->page_tree,
+ index, page);
+ else
+ error = shmem_radix_tree_replace(mapping, index,
+ expected, page);
+ if (!error) {
+ mapping->nrpages++;
+ __inc_zone_page_state(page, NR_FILE_PAGES);
+ __inc_zone_page_state(page, NR_SHMEM);
+ spin_unlock_irq(&mapping->tree_lock);
+ } else {
+ page->mapping = NULL;
+ spin_unlock_irq(&mapping->tree_lock);
+ page_cache_release(page);
+ }
+ if (!expected)
+ radix_tree_preload_end();
+ }
+ if (error)
+ mem_cgroup_uncharge_cache_page(page);
+ return error;
+}
+
+/*
+ * Like delete_from_page_cache, but substitutes swap for page.
+ */
+static void shmem_delete_from_page_cache(struct page *page, void *radswap)
+{
+ struct address_space *mapping = page->mapping;
+ int error;
+
+ spin_lock_irq(&mapping->tree_lock);
+ error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
+ page->mapping = NULL;
+ mapping->nrpages--;
+ __dec_zone_page_state(page, NR_FILE_PAGES);
+ __dec_zone_page_state(page, NR_SHMEM);
+ spin_unlock_irq(&mapping->tree_lock);
+ page_cache_release(page);
+ BUG_ON(error);
+}
+
+/*
+ * Like find_get_pages, but collecting swap entries as well as pages.
+ */
+static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
+ pgoff_t start, unsigned int nr_pages,
+ struct page **pages, pgoff_t *indices)
+{
+ unsigned int i;
+ unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, indices, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page))
+ goto restart;
+ /*
+ * Otherwise, we must be storing a swap entry
+ * here as an exceptional entry: so return it
+ * without attempting to raise page count.
+ */
+ goto export;
+ }
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+export:
+ indices[ret] = indices[i];
+ pages[ret] = page;
+ ret++;
+ }
+ if (unlikely(!ret && nr_found))
+ goto restart;
+ rcu_read_unlock();
+ return ret;
+}
+
+/*
+ * Remove swap entry from radix tree, free the swap and its page cache.
+ */
+static int shmem_free_swap(struct address_space *mapping,
+ pgoff_t index, void *radswap)
{
- if (index < SHMEM_NR_DIRECT)
- info->i_direct[index] = swap;
+ int error;
+
+ spin_lock_irq(&mapping->tree_lock);
+ error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
+ spin_unlock_irq(&mapping->tree_lock);
+ if (!error)
+ free_swap_and_cache(radix_to_swp_entry(radswap));
+ return error;
}
-static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index)
+/*
+ * Pagevec may contain swap entries, so shuffle up pages before releasing.
+ */
+static void shmem_pagevec_release(struct pagevec *pvec)
{
- return (index < SHMEM_NR_DIRECT) ?
- info->i_direct[index] : (swp_entry_t){0};
+ int i, j;
+
+ for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
+ struct page *page = pvec->pages[i];
+ if (!radix_tree_exceptional_entry(page))
+ pvec->pages[j++] = page;
+ }
+ pvec->nr = j;
+ pagevec_release(pvec);
}
+/*
+ * Remove range of pages and swap entries from radix tree, and free them.
+ */
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
+ struct pagevec pvec;
+ pgoff_t indices[PAGEVEC_SIZE];
+ long nr_swaps_freed = 0;
pgoff_t index;
- swp_entry_t swap;
+ int i;
- truncate_inode_pages_range(mapping, lstart, lend);
+ BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
- if (end > SHMEM_NR_DIRECT)
- end = SHMEM_NR_DIRECT;
+ pagevec_init(&pvec, 0);
+ index = start;
+ while (index <= end) {
+ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+ pvec.pages, indices);
+ if (!pvec.nr)
+ break;
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
- spin_lock(&info->lock);
- for (index = start; index < end; index++) {
- swap = shmem_get_swap(info, index);
- if (swap.val) {
- free_swap_and_cache(swap);
- shmem_put_swap(info, index, (swp_entry_t){0});
- info->swapped--;
+ index = indices[i];
+ if (index > end)
+ break;
+
+ if (radix_tree_exceptional_entry(page)) {
+ nr_swaps_freed += !shmem_free_swap(mapping,
+ index, page);
+ continue;
+ }
+
+ if (!trylock_page(page))
+ continue;
+ if (page->mapping == mapping) {
+ VM_BUG_ON(PageWriteback(page));
+ truncate_inode_page(mapping, page);
+ }
+ unlock_page(page);
}
+ shmem_pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
+ index++;
}
- if (mapping->nrpages) {
- spin_unlock(&info->lock);
- /*
- * A page may have meanwhile sneaked in from swap.
- */
- truncate_inode_pages_range(mapping, lstart, lend);
- spin_lock(&info->lock);
+ if (partial) {
+ struct page *page = NULL;
+ shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
+ if (page) {
+ zero_user_segment(page, partial, PAGE_CACHE_SIZE);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ }
+
+ index = start;
+ for ( ; ; ) {
+ cond_resched();
+ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+ pvec.pages, indices);
+ if (!pvec.nr) {
+ if (index == start)
+ break;
+ index = start;
+ continue;
+ }
+ if (index == start && indices[0] > end) {
+ shmem_pagevec_release(&pvec);
+ break;
+ }
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+
+ index = indices[i];
+ if (index > end)
+ break;
+
+ if (radix_tree_exceptional_entry(page)) {
+ nr_swaps_freed += !shmem_free_swap(mapping,
+ index, page);
+ continue;
+ }
+
+ lock_page(page);
+ if (page->mapping == mapping) {
+ VM_BUG_ON(PageWriteback(page));
+ truncate_inode_page(mapping, page);
+ }
+ unlock_page(page);
+ }
+ shmem_pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ index++;
}
+ spin_lock(&info->lock);
+ info->swapped -= nr_swaps_freed;
shmem_recalc_inode(inode);
spin_unlock(&info->lock);
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
- struct page *page = NULL;
- if (newsize < oldsize) {
- /*
- * If truncating down to a partial page, then
- * if that page is already allocated, hold it
- * in memory until the truncation is over, so
- * truncate_partial_page cannot miss it were
- * it assigned to swap.
- */
- if (newsize & (PAGE_CACHE_SIZE-1)) {
- (void) shmem_getpage(inode,
- newsize >> PAGE_CACHE_SHIFT,
- &page, SGP_READ, NULL);
- if (page)
- unlock_page(page);
- }
- }
if (newsize != oldsize) {
i_size_write(inode, newsize);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
/* unmap again to remove racily COWed private pages */
unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
}
- if (page)
- page_cache_release(page);
}
setattr_copy(inode, attr);
list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
- }
+ } else
+ kfree(info->symlink);
list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
kfree(xattr->name);
end_writeback(inode);
}
+/*
+ * If swap found in inode, free it and move page from swapcache to filecache.
+ */
static int shmem_unuse_inode(struct shmem_inode_info *info,
swp_entry_t swap, struct page *page)
{
struct address_space *mapping = info->vfs_inode.i_mapping;
+ void *radswap;
pgoff_t index;
int error;
- for (index = 0; index < SHMEM_NR_DIRECT; index++)
- if (shmem_get_swap(info, index).val == swap.val)
- goto found;
- return 0;
-found:
- spin_lock(&info->lock);
- if (shmem_get_swap(info, index).val != swap.val) {
- spin_unlock(&info->lock);
+ radswap = swp_to_radix_entry(swap);
+ index = radix_tree_locate_item(&mapping->page_tree, radswap);
+ if (index == -1)
return 0;
- }
/*
* Move _head_ to start search for next from here.
* but also to hold up shmem_evict_inode(): so inode cannot be freed
* beneath us (pagelock doesn't help until the page is in pagecache).
*/
- error = add_to_page_cache_locked(page, mapping, index, GFP_NOWAIT);
+ error = shmem_add_to_page_cache(page, mapping, index,
+ GFP_NOWAIT, radswap);
/* which does mem_cgroup_uncharge_cache_page on error */
if (error != -ENOMEM) {
+ /*
+ * Truncation and eviction use free_swap_and_cache(), which
+ * only does trylock page: if we raced, best clean up here.
+ */
delete_from_swap_cache(page);
set_page_dirty(page);
- shmem_put_swap(info, index, (swp_entry_t){0});
- info->swapped--;
- swap_free(swap);
+ if (!error) {
+ spin_lock(&info->lock);
+ info->swapped--;
+ spin_unlock(&info->lock);
+ swap_free(swap);
+ }
error = 1; /* not an error, but entry was found */
}
- spin_unlock(&info->lock);
return error;
}
/*
- * shmem_unuse() search for an eventually swapped out shmem page.
+ * Search through swapped inodes to find and replace swap by page.
*/
int shmem_unuse(swp_entry_t swap, struct page *page)
{
* Charge page using GFP_KERNEL while we can wait, before taking
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
- * add_to_page_cache() will be called with GFP_NOWAIT.
*/
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error)
goto out;
- /*
- * Try to preload while we can wait, to not make a habit of
- * draining atomic reserves; but don't latch on to this cpu,
- * it's okay if sometimes we get rescheduled after this.
- */
- error = radix_tree_preload(GFP_KERNEL);
- if (error)
- goto uncharge;
- radix_tree_preload_end();
+ /* No radix_tree_preload: swap entry keeps a place for page in tree */
mutex_lock(&shmem_swaplist_mutex);
list_for_each_safe(this, next, &shmem_swaplist) {
info = list_entry(this, struct shmem_inode_info, swaplist);
- if (!info->swapped) {
- spin_lock(&info->lock);
- if (!info->swapped)
- list_del_init(&info->swaplist);
- spin_unlock(&info->lock);
- }
if (info->swapped)
found = shmem_unuse_inode(info, swap, page);
+ else
+ list_del_init(&info->swaplist);
cond_resched();
if (found)
break;
}
mutex_unlock(&shmem_swaplist_mutex);
-uncharge:
if (!found)
mem_cgroup_uncharge_cache_page(page);
if (found < 0)
static int shmem_writepage(struct page *page, struct writeback_control *wbc)
{
struct shmem_inode_info *info;
- swp_entry_t swap, oswap;
struct address_space *mapping;
- pgoff_t index;
struct inode *inode;
+ swp_entry_t swap;
+ pgoff_t index;
BUG_ON(!PageLocked(page));
mapping = page->mapping;
WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
goto redirty;
}
-
- /*
- * Just for this patch, we have a toy implementation,
- * which can swap out only the first SHMEM_NR_DIRECT pages:
- * for simple demonstration of where we need to think about swap.
- */
- if (index >= SHMEM_NR_DIRECT)
- goto redirty;
-
swap = get_swap_page();
if (!swap.val)
goto redirty;
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
- * if it's not already there. Do it now because we cannot take
- * mutex while holding spinlock, and must do so before the page
- * is moved to swap cache, when its pagelock no longer protects
+ * if it's not already there. Do it now before the page is
+ * moved to swap cache, when its pagelock no longer protects
* the inode from eviction. But don't unlock the mutex until
- * we've taken the spinlock, because shmem_unuse_inode() will
- * prune a !swapped inode from the swaplist under both locks.
+ * we've incremented swapped, because shmem_unuse_inode() will
+ * prune a !swapped inode from the swaplist under this mutex.
*/
mutex_lock(&shmem_swaplist_mutex);
if (list_empty(&info->swaplist))
list_add_tail(&info->swaplist, &shmem_swaplist);
- spin_lock(&info->lock);
- mutex_unlock(&shmem_swaplist_mutex);
-
- oswap = shmem_get_swap(info, index);
- if (oswap.val) {
- WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
- free_swap_and_cache(oswap);
- shmem_put_swap(info, index, (swp_entry_t){0});
- info->swapped--;
- }
- shmem_recalc_inode(inode);
-
if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
- delete_from_page_cache(page);
- shmem_put_swap(info, index, swap);
- info->swapped++;
swap_shmem_alloc(swap);
+ shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+
+ spin_lock(&info->lock);
+ info->swapped++;
+ shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+
+ mutex_unlock(&shmem_swaplist_mutex);
BUG_ON(page_mapped(page));
swap_writepage(page, wbc);
return 0;
}
- spin_unlock(&info->lock);
+ mutex_unlock(&shmem_swaplist_mutex);
swapcache_free(swap, NULL);
redirty:
set_page_dirty(page);
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
{
struct address_space *mapping = inode->i_mapping;
- struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo;
struct page *page;
- struct page *prealloc_page = NULL;
swp_entry_t swap;
int error;
+ int once = 0;
if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
return -EFBIG;
repeat:
+ swap.val = 0;
page = find_lock_page(mapping, index);
- if (page) {
+ if (radix_tree_exceptional_entry(page)) {
+ swap = radix_to_swp_entry(page);
+ page = NULL;
+ }
+
+ if (sgp != SGP_WRITE &&
+ ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ error = -EINVAL;
+ goto failed;
+ }
+
+ if (page || (sgp == SGP_READ && !swap.val)) {
/*
* Once we can get the page lock, it must be uptodate:
* if there were an error in reading back from swap,
* the page would not be inserted into the filecache.
*/
- BUG_ON(!PageUptodate(page));
- goto done;
+ BUG_ON(page && !PageUptodate(page));
+ *pagep = page;
+ return 0;
}
/*
- * Try to preload while we can wait, to not make a habit of
- * draining atomic reserves; but don't latch on to this cpu.
+ * Fast cache lookup did not find it:
+ * bring it back from swap or allocate.
*/
- error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
- if (error)
- goto out;
- radix_tree_preload_end();
-
- if (sgp != SGP_READ && !prealloc_page) {
- prealloc_page = shmem_alloc_page(gfp, info, index);
- if (prealloc_page) {
- SetPageSwapBacked(prealloc_page);
- if (mem_cgroup_cache_charge(prealloc_page,
- current->mm, GFP_KERNEL)) {
- page_cache_release(prealloc_page);
- prealloc_page = NULL;
- }
- }
- }
+ info = SHMEM_I(inode);
+ sbinfo = SHMEM_SB(inode->i_sb);
- spin_lock(&info->lock);
- shmem_recalc_inode(inode);
- swap = shmem_get_swap(info, index);
if (swap.val) {
/* Look it up and read it in.. */
page = lookup_swap_cache(swap);
if (!page) {
- spin_unlock(&info->lock);
/* here we actually do the io */
if (fault_type)
*fault_type |= VM_FAULT_MAJOR;
page = shmem_swapin(swap, gfp, info, index);
if (!page) {
- swp_entry_t nswap = shmem_get_swap(info, index);
- if (nswap.val == swap.val) {
- error = -ENOMEM;
- goto out;
- }
- goto repeat;
+ error = -ENOMEM;
+ goto failed;
}
- wait_on_page_locked(page);
- page_cache_release(page);
- goto repeat;
}
/* We have to do this with page locked to prevent races */
- if (!trylock_page(page)) {
- spin_unlock(&info->lock);
- wait_on_page_locked(page);
- page_cache_release(page);
- goto repeat;
- }
- if (PageWriteback(page)) {
- spin_unlock(&info->lock);
- wait_on_page_writeback(page);
- unlock_page(page);
- page_cache_release(page);
- goto repeat;
- }
+ lock_page(page);
if (!PageUptodate(page)) {
- spin_unlock(&info->lock);
- unlock_page(page);
- page_cache_release(page);
error = -EIO;
- goto out;
+ goto failed;
}
-
- error = add_to_page_cache_locked(page, mapping,
- index, GFP_NOWAIT);
- if (error) {
- spin_unlock(&info->lock);
- if (error == -ENOMEM) {
- /*
- * reclaim from proper memory cgroup and
- * call memcg's OOM if needed.
- */
- error = mem_cgroup_shmem_charge_fallback(
- page, current->mm, gfp);
- if (error) {
- unlock_page(page);
- page_cache_release(page);
- goto out;
- }
- }
- unlock_page(page);
- page_cache_release(page);
- goto repeat;
+ wait_on_page_writeback(page);
+
+ /* Someone may have already done it for us */
+ if (page->mapping) {
+ if (page->mapping == mapping &&
+ page->index == index)
+ goto done;
+ error = -EEXIST;
+ goto failed;
}
- delete_from_swap_cache(page);
- shmem_put_swap(info, index, (swp_entry_t){0});
+ error = mem_cgroup_cache_charge(page, current->mm,
+ gfp & GFP_RECLAIM_MASK);
+ if (!error)
+ error = shmem_add_to_page_cache(page, mapping, index,
+ gfp, swp_to_radix_entry(swap));
+ if (error)
+ goto failed;
+
+ spin_lock(&info->lock);
info->swapped--;
+ shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+
+ delete_from_swap_cache(page);
set_page_dirty(page);
swap_free(swap);
- } else if (sgp == SGP_READ) {
- page = find_get_page(mapping, index);
- if (page && !trylock_page(page)) {
- spin_unlock(&info->lock);
- wait_on_page_locked(page);
- page_cache_release(page);
- goto repeat;
+ } else {
+ if (shmem_acct_block(info->flags)) {
+ error = -ENOSPC;
+ goto failed;
}
- spin_unlock(&info->lock);
-
- } else if (prealloc_page) {
- sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks) {
if (percpu_counter_compare(&sbinfo->used_blocks,
- sbinfo->max_blocks) >= 0 ||
- shmem_acct_block(info->flags))
- goto nospace;
+ sbinfo->max_blocks) >= 0) {
+ error = -ENOSPC;
+ goto unacct;
+ }
percpu_counter_inc(&sbinfo->used_blocks);
- inode->i_blocks += BLOCKS_PER_PAGE;
- } else if (shmem_acct_block(info->flags))
- goto nospace;
-
- page = prealloc_page;
- prealloc_page = NULL;
+ }
- swap = shmem_get_swap(info, index);
- if (swap.val)
- mem_cgroup_uncharge_cache_page(page);
- else
- error = add_to_page_cache_lru(page, mapping,
- index, GFP_NOWAIT);
- /*
- * At add_to_page_cache_lru() failure,
- * uncharge will be done automatically.
- */
- if (swap.val || error) {
- shmem_unacct_blocks(info->flags, 1);
- shmem_free_blocks(inode, 1);
- spin_unlock(&info->lock);
- page_cache_release(page);
- goto repeat;
+ page = shmem_alloc_page(gfp, info, index);
+ if (!page) {
+ error = -ENOMEM;
+ goto decused;
}
+ SetPageSwapBacked(page);
+ __set_page_locked(page);
+ error = mem_cgroup_cache_charge(page, current->mm,
+ gfp & GFP_RECLAIM_MASK);
+ if (!error)
+ error = shmem_add_to_page_cache(page, mapping, index,
+ gfp, NULL);
+ if (error)
+ goto decused;
+ lru_cache_add_anon(page);
+
+ spin_lock(&info->lock);
info->alloced++;
+ inode->i_blocks += BLOCKS_PER_PAGE;
+ shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+
clear_highpage(page);
flush_dcache_page(page);
SetPageUptodate(page);
if (sgp == SGP_DIRTY)
set_page_dirty(page);
-
- } else {
- spin_unlock(&info->lock);
- error = -ENOMEM;
- goto out;
}
done:
- *pagep = page;
- error = 0;
-out:
- if (prealloc_page) {
- mem_cgroup_uncharge_cache_page(prealloc_page);
- page_cache_release(prealloc_page);
+ /* Perhaps the file has been truncated since we checked */
+ if (sgp != SGP_WRITE &&
+ ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ error = -EINVAL;
+ goto trunc;
}
- return error;
+ *pagep = page;
+ return 0;
-nospace:
/*
- * Perhaps the page was brought in from swap between find_lock_page
- * and taking info->lock? We allow for that at add_to_page_cache_lru,
- * but must also avoid reporting a spurious ENOSPC while working on a
- * full tmpfs.
+ * Error recovery.
*/
- page = find_get_page(mapping, index);
+trunc:
+ ClearPageDirty(page);
+ delete_from_page_cache(page);
+ spin_lock(&info->lock);
+ info->alloced--;
+ inode->i_blocks -= BLOCKS_PER_PAGE;
spin_unlock(&info->lock);
+decused:
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -1);
+unacct:
+ shmem_unacct_blocks(info->flags, 1);
+failed:
+ if (swap.val && error != -EINVAL) {
+ struct page *test = find_get_page(mapping, index);
+ if (test && !radix_tree_exceptional_entry(test))
+ page_cache_release(test);
+ /* Have another try if the entry has changed */
+ if (test != swp_to_radix_entry(swap))
+ error = -EEXIST;
+ }
if (page) {
+ unlock_page(page);
page_cache_release(page);
+ }
+ if (error == -ENOSPC && !once++) {
+ info = SHMEM_I(inode);
+ spin_lock(&info->lock);
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
goto repeat;
}
- error = -ENOSPC;
- goto out;
+ if (error == -EEXIST)
+ goto repeat;
+ return error;
}
static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
int error;
int ret = VM_FAULT_LOCKED;
- if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- return VM_FAULT_SIGBUS;
-
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
if (error)
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
#ifdef CONFIG_TMPFS
static const struct inode_operations shmem_symlink_inode_operations;
-static const struct inode_operations shmem_symlink_inline_operations;
+static const struct inode_operations shmem_short_symlink_operations;
static int
shmem_write_begin(struct file *file, struct address_space *mapping,
info = SHMEM_I(inode);
inode->i_size = len-1;
- if (len <= SHMEM_SYMLINK_INLINE_LEN) {
- /* do it inline */
- memcpy(info->inline_symlink, symname, len);
- inode->i_op = &shmem_symlink_inline_operations;
+ if (len <= SHORT_SYMLINK_LEN) {
+ info->symlink = kmemdup(symname, len, GFP_KERNEL);
+ if (!info->symlink) {
+ iput(inode);
+ return -ENOMEM;
+ }
+ inode->i_op = &shmem_short_symlink_operations;
} else {
error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
if (error) {
return 0;
}
-static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
{
- nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
+ nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
return NULL;
}
}
#endif /* CONFIG_TMPFS_XATTR */
-static const struct inode_operations shmem_symlink_inline_operations = {
+static const struct inode_operations shmem_short_symlink_operations = {
.readlink = generic_readlink,
- .follow_link = shmem_follow_link_inline,
+ .follow_link = shmem_follow_short_symlink,
#ifdef CONFIG_TMPFS_XATTR
.setxattr = shmem_setxattr,
.getxattr = shmem_getxattr,
if (config.max_inodes < inodes)
goto out;
/*
- * Those tests also disallow limited->unlimited while any are in
- * use, so i_blocks will always be zero when max_blocks is zero;
+ * Those tests disallow limited->unlimited while any are in use;
* but we must separately disallow unlimited->limited, because
* in that case we have no record of how much is already in use.
*/
static void shmem_destroy_inode(struct inode *inode)
{
- if ((inode->i_mode & S_IFMT) == S_IFREG) {
- /* only struct inode is valid if it's an inline symlink */
+ if ((inode->i_mode & S_IFMT) == S_IFREG)
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
- }
call_rcu(&inode->i_rcu, shmem_destroy_callback);
}
return error;
}
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-/**
- * mem_cgroup_get_shmem_target - find page or swap assigned to the shmem file
- * @inode: the inode to be searched
- * @index: the page offset to be searched
- * @pagep: the pointer for the found page to be stored
- * @swapp: the pointer for the found swap entry to be stored
- *
- * If a page is found, refcount of it is incremented. Callers should handle
- * these refcount.
- */
-void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t index,
- struct page **pagep, swp_entry_t *swapp)
-{
- struct shmem_inode_info *info = SHMEM_I(inode);
- struct page *page = NULL;
- swp_entry_t swap = {0};
-
- if ((index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- goto out;
-
- spin_lock(&info->lock);
-#ifdef CONFIG_SWAP
- swap = shmem_get_swap(info, index);
- if (swap.val)
- page = find_get_page(&swapper_space, swap.val);
- else
-#endif
- page = find_get_page(inode->i_mapping, index);
- spin_unlock(&info->lock);
-out:
- *pagep = page;
- *swapp = swap;
-}
-#endif
-
#else /* !CONFIG_SHMEM */
/*
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-/**
- * mem_cgroup_get_shmem_target - find page or swap assigned to the shmem file
- * @inode: the inode to be searched
- * @index: the page offset to be searched
- * @pagep: the pointer for the found page to be stored
- * @swapp: the pointer for the found swap entry to be stored
- *
- * If a page is found, refcount of it is incremented. Callers should handle
- * these refcount.
- */
-void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t index,
- struct page **pagep, swp_entry_t *swapp)
-{
- struct page *page = NULL;
-
- if ((index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- goto out;
- page = find_get_page(inode->i_mapping, index);
-out:
- *pagep = page;
- *swapp = (swp_entry_t){0};
-}
-#endif
-
#define shmem_vm_ops generic_file_vm_ops
#define shmem_file_operations ramfs_file_operations
#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)