Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
[pandora-kernel.git] / mm / shmem.c
index f01e8de..fcd19d3 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/xattr.h>
+#include <linux/exportfs.h>
 #include <linux/generic_acl.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -82,6 +83,7 @@ enum sgp_type {
        SGP_READ,       /* don't exceed i_size, don't allocate page */
        SGP_CACHE,      /* don't exceed i_size, may allocate page */
        SGP_WRITE,      /* may exceed i_size, may allocate page */
+       SGP_FAULT,      /* same as SGP_CACHE, return with page locked */
 };
 
 static int shmem_getpage(struct inode *inode, unsigned long idx,
@@ -93,8 +95,11 @@ static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
         * The above definition of ENTRIES_PER_PAGE, and the use of
         * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
         * might be reconsidered if it ever diverges from PAGE_SIZE.
+        *
+        * __GFP_MOVABLE is masked out as swap vectors cannot move
         */
-       return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+       return alloc_pages((gfp_mask & ~__GFP_MOVABLE) | __GFP_ZERO,
+                               PAGE_CACHE_SHIFT-PAGE_SHIFT);
 }
 
 static inline void shmem_dir_free(struct page *page)
@@ -372,7 +377,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
                }
 
                spin_unlock(&info->lock);
-               page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+               page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
                if (page)
                        set_page_private(page, 0);
                spin_lock(&info->lock);
@@ -967,6 +972,8 @@ static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_
                *nodelist++ = '\0';
                if (nodelist_parse(nodelist, *policy_nodes))
                        goto out;
+               if (!nodes_subset(*policy_nodes, node_online_map))
+                       goto out;
        }
        if (!strcmp(value, "default")) {
                *policy = MPOL_DEFAULT;
@@ -1094,13 +1101,17 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 
        if (idx >= SHMEM_MAX_INDEX)
                return -EFBIG;
+
+       if (type)
+               *type = 0;
+
        /*
         * Normally, filepage is NULL on entry, and either found
         * uptodate immediately, or allocated and zeroed, or read
         * in under swappage, which is then assigned to filepage.
-        * But shmem_prepare_write passes in a locked filepage,
-        * which may be found not uptodate by other callers too,
-        * and may need to be copied from the swappage read in.
+        * But shmem_readpage and shmem_prepare_write pass in a locked
+        * filepage, which may be found not uptodate by other callers
+        * too, and may need to be copied from the swappage read in.
         */
 repeat:
        if (!filepage)
@@ -1127,9 +1138,9 @@ repeat:
                if (!swappage) {
                        shmem_swp_unmap(entry);
                        /* here we actually do the io */
-                       if (type && *type == VM_FAULT_MINOR) {
+                       if (type && !(*type & VM_FAULT_MAJOR)) {
                                __count_vm_event(PGMAJFAULT);
-                               *type = VM_FAULT_MAJOR;
+                               *type |= VM_FAULT_MAJOR;
                        }
                        spin_unlock(&info->lock);
                        swappage = shmem_swapin(info, swap, idx);
@@ -1283,8 +1294,10 @@ repeat:
        }
 done:
        if (*pagep != filepage) {
-               unlock_page(filepage);
                *pagep = filepage;
+               if (sgp != SGP_FAULT)
+                       unlock_page(filepage);
+
        }
        return 0;
 
@@ -1296,72 +1309,21 @@ failed:
        return error;
 }
 
-static struct page *shmem_nopage(struct vm_area_struct *vma,
-                                unsigned long address, int *type)
+static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-       struct page *page = NULL;
-       unsigned long idx;
        int error;
+       int ret;
 
-       idx = (address - vma->vm_start) >> PAGE_SHIFT;
-       idx += vma->vm_pgoff;
-       idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
-       if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-               return NOPAGE_SIGBUS;
+       if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+               return VM_FAULT_SIGBUS;
 
-       error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
+       error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret);
        if (error)
-               return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
+               return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
 
-       mark_page_accessed(page);
-       return page;
-}
-
-static int shmem_populate(struct vm_area_struct *vma,
-       unsigned long addr, unsigned long len,
-       pgprot_t prot, unsigned long pgoff, int nonblock)
-{
-       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-       struct mm_struct *mm = vma->vm_mm;
-       enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
-       unsigned long size;
-
-       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
-               return -EINVAL;
-
-       while ((long) len > 0) {
-               struct page *page = NULL;
-               int err;
-               /*
-                * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
-                */
-               err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
-               if (err)
-                       return err;
-               /* Page may still be null, but only if nonblock was set. */
-               if (page) {
-                       mark_page_accessed(page);
-                       err = install_page(mm, vma, addr, page, prot);
-                       if (err) {
-                               page_cache_release(page);
-                               return err;
-                       }
-               } else if (vma->vm_flags & VM_NONLINEAR) {
-                       /* No page was found just because we can't read it in
-                        * now (being here implies nonblock != 0), but the page
-                        * may exist, so set the PTE to fault it in later. */
-                       err = install_file_pte(mm, vma, addr, pgoff, prot);
-                       if (err)
-                               return err;
-               }
-
-               len -= PAGE_SIZE;
-               addr += PAGE_SIZE;
-               pgoff++;
-       }
-       return 0;
+       mark_page_accessed(vmf->page);
+       return ret | VM_FAULT_LOCKED;
 }
 
 #ifdef CONFIG_NUMA
@@ -1408,6 +1370,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
        file_accessed(file);
        vma->vm_ops = &shmem_vm_ops;
+       vma->vm_flags |= VM_CAN_NONLINEAR;
        return 0;
 }
 
@@ -1483,9 +1446,18 @@ static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_symlink_inline_operations;
 
 /*
- * Normally tmpfs makes no use of shmem_prepare_write, but it
- * lets a tmpfs file be used read-write below the loop driver.
+ * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
+ * but providing them allows a tmpfs file to be used for splice, sendfile, and
+ * below the loop driver, in the generic fashion that many filesystems support.
  */
+static int shmem_readpage(struct file *file, struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
+       unlock_page(page);
+       return error;
+}
+
 static int
 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
@@ -1709,25 +1681,6 @@ static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count
        return desc.error;
 }
 
-static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void *target)
-{
-       read_descriptor_t desc;
-
-       if (!count)
-               return 0;
-
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.data = target;
-       desc.error = 0;
-
-       do_shmem_file_read(in_file, ppos, &desc, actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
-}
-
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2358,20 +2311,18 @@ static void init_once(void *foo, struct kmem_cache *cachep,
 {
        struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
 
-       if (flags & SLAB_CTOR_CONSTRUCTOR) {
-               inode_init_once(&p->vfs_inode);
+       inode_init_once(&p->vfs_inode);
 #ifdef CONFIG_TMPFS_POSIX_ACL
-               p->i_acl = NULL;
-               p->i_default_acl = NULL;
+       p->i_acl = NULL;
+       p->i_default_acl = NULL;
 #endif
-       }
 }
 
 static int init_inodecache(void)
 {
        shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
                                sizeof(struct shmem_inode_info),
-                               0, 0, init_once, NULL);
+                               0, 0, init_once);
        if (shmem_inode_cachep == NULL)
                return -ENOMEM;
        return 0;
@@ -2386,6 +2337,7 @@ static const struct address_space_operations shmem_aops = {
        .writepage      = shmem_writepage,
        .set_page_dirty = __set_page_dirty_no_writeback,
 #ifdef CONFIG_TMPFS
+       .readpage       = shmem_readpage,
        .prepare_write  = shmem_prepare_write,
        .commit_write   = simple_commit_write,
 #endif
@@ -2399,7 +2351,8 @@ static const struct file_operations shmem_file_operations = {
        .read           = shmem_file_read,
        .write          = shmem_file_write,
        .fsync          = simple_sync_file,
-       .sendfile       = shmem_file_sendfile,
+       .splice_read    = generic_file_splice_read,
+       .splice_write   = generic_file_splice_write,
 #endif
 };
 
@@ -2463,8 +2416,7 @@ static const struct super_operations shmem_ops = {
 };
 
 static struct vm_operations_struct shmem_vm_ops = {
-       .nopage         = shmem_nopage,
-       .populate       = shmem_populate,
+       .fault          = shmem_fault,
 #ifdef CONFIG_NUMA
        .set_policy     = shmem_set_policy,
        .get_policy     = shmem_get_policy,