Merge git://git.infradead.org/~dwmw2/random-2.6
[pandora-kernel.git] / mm / shmem.c
index f514dd3..f92fea9 100644 (file)
@@ -201,7 +201,7 @@ static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
        .ra_pages       = 0,    /* No readahead */
-       .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+       .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
        .unplug_io_fn   = default_unplug_io_fn,
 };
 
@@ -922,20 +922,26 @@ found:
        error = 1;
        if (!inode)
                goto out;
-       /* Precharge page while we can wait, compensate afterwards */
+       /* Precharge page using GFP_KERNEL while we can wait */
        error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
        if (error)
                goto out;
        error = radix_tree_preload(GFP_KERNEL);
-       if (error)
-               goto uncharge;
+       if (error) {
+               mem_cgroup_uncharge_cache_page(page);
+               goto out;
+       }
        error = 1;
 
        spin_lock(&info->lock);
        ptr = shmem_swp_entry(info, idx, NULL);
-       if (ptr && ptr->val == entry.val)
+       if (ptr && ptr->val == entry.val) {
                error = add_to_page_cache(page, inode->i_mapping,
                                                idx, GFP_NOWAIT);
+               /* does mem_cgroup_uncharge_cache_page on error */
+       } else  /* we must compensate for our precharge above */
+               mem_cgroup_uncharge_cache_page(page);
+
        if (error == -EEXIST) {
                struct page *filepage = find_get_page(inode->i_mapping, idx);
                error = 1;
@@ -961,8 +967,6 @@ found:
                shmem_swp_unmap(ptr);
        spin_unlock(&info->lock);
        radix_tree_preload_end();
-uncharge:
-       mem_cgroup_uncharge_page(page);
 out:
        unlock_page(page);
        page_cache_release(page);
@@ -1079,104 +1083,47 @@ redirty:
 
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_TMPFS
-static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 {
-       char *nodelist = strchr(value, ':');
-       int err = 1;
+       char buffer[64];
 
-       if (nodelist) {
-               /* NUL-terminate policy string */
-               *nodelist++ = '\0';
-               if (nodelist_parse(nodelist, *policy_nodes))
-                       goto out;
-               if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY]))
-                       goto out;
-       }
-       if (!strcmp(value, "default")) {
-               *policy = MPOL_DEFAULT;
-               /* Don't allow a nodelist */
-               if (!nodelist)
-                       err = 0;
-       } else if (!strcmp(value, "prefer")) {
-               *policy = MPOL_PREFERRED;
-               /* Insist on a nodelist of one node only */
-               if (nodelist) {
-                       char *rest = nodelist;
-                       while (isdigit(*rest))
-                               rest++;
-                       if (!*rest)
-                               err = 0;
-               }
-       } else if (!strcmp(value, "bind")) {
-               *policy = MPOL_BIND;
-               /* Insist on a nodelist */
-               if (nodelist)
-                       err = 0;
-       } else if (!strcmp(value, "interleave")) {
-               *policy = MPOL_INTERLEAVE;
-               /*
-                * Default to online nodes with memory if no nodelist
-                */
-               if (!nodelist)
-                       *policy_nodes = node_states[N_HIGH_MEMORY];
-               err = 0;
-       }
-out:
-       /* Restore string for error message */
-       if (nodelist)
-               *--nodelist = ':';
-       return err;
-}
+       if (!mpol || mpol->mode == MPOL_DEFAULT)
+               return;         /* show nothing */
 
-static void shmem_show_mpol(struct seq_file *seq, int policy,
-                           const nodemask_t policy_nodes)
-{
-       char *policy_string;
+       mpol_to_str(buffer, sizeof(buffer), mpol, 1);
 
-       switch (policy) {
-       case MPOL_PREFERRED:
-               policy_string = "prefer";
-               break;
-       case MPOL_BIND:
-               policy_string = "bind";
-               break;
-       case MPOL_INTERLEAVE:
-               policy_string = "interleave";
-               break;
-       default:
-               /* MPOL_DEFAULT */
-               return;
-       }
-
-       seq_printf(seq, ",mpol=%s", policy_string);
-
-       if (policy != MPOL_INTERLEAVE ||
-           !nodes_equal(policy_nodes, node_states[N_HIGH_MEMORY])) {
-               char buffer[64];
-               int len;
+       seq_printf(seq, ",mpol=%s", buffer);
+}
 
-               len = nodelist_scnprintf(buffer, sizeof(buffer), policy_nodes);
-               if (len < sizeof(buffer))
-                       seq_printf(seq, ":%s", buffer);
-               else
-                       seq_printf(seq, ":?");
+static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+       struct mempolicy *mpol = NULL;
+       if (sbinfo->mpol) {
+               spin_lock(&sbinfo->stat_lock);  /* prevent replace/use races */
+               mpol = sbinfo->mpol;
+               mpol_get(mpol);
+               spin_unlock(&sbinfo->stat_lock);
        }
+       return mpol;
 }
 #endif /* CONFIG_TMPFS */
 
 static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
                        struct shmem_inode_info *info, unsigned long idx)
 {
+       struct mempolicy mpol, *spol;
        struct vm_area_struct pvma;
        struct page *page;
 
+       spol = mpol_cond_copy(&mpol,
+                               mpol_shared_policy_lookup(&info->policy, idx));
+
        /* Create a pseudo vma that just contains the policy */
        pvma.vm_start = 0;
        pvma.vm_pgoff = idx;
        pvma.vm_ops = NULL;
-       pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+       pvma.vm_policy = spol;
        page = swapin_readahead(entry, gfp, &pvma, 0);
-       mpol_free(pvma.vm_policy);
        return page;
 }
 
@@ -1184,27 +1131,21 @@ static struct page *shmem_alloc_page(gfp_t gfp,
                        struct shmem_inode_info *info, unsigned long idx)
 {
        struct vm_area_struct pvma;
-       struct page *page;
 
        /* Create a pseudo vma that just contains the policy */
        pvma.vm_start = 0;
        pvma.vm_pgoff = idx;
        pvma.vm_ops = NULL;
        pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
-       page = alloc_page_vma(gfp, &pvma, 0);
-       mpol_free(pvma.vm_policy);
-       return page;
+
+       /*
+        * alloc_page_vma() will drop the shared policy reference
+        */
+       return alloc_page_vma(gfp, &pvma, 0);
 }
 #else /* !CONFIG_NUMA */
 #ifdef CONFIG_TMPFS
-static inline int shmem_parse_mpol(char *value, int *policy,
-                                               nodemask_t *policy_nodes)
-{
-       return 1;
-}
-
-static inline void shmem_show_mpol(struct seq_file *seq, int policy,
-                           const nodemask_t policy_nodes)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
 {
 }
 #endif /* CONFIG_TMPFS */
@@ -1222,6 +1163,13 @@ static inline struct page *shmem_alloc_page(gfp_t gfp,
 }
 #endif /* CONFIG_NUMA */
 
+#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
+static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+       return NULL;
+}
+#endif
+
 /*
  * shmem_getpage - either get the page from swap or allocate a new one
  *
@@ -1367,17 +1315,14 @@ repeat:
                        shmem_swp_unmap(entry);
                        spin_unlock(&info->lock);
                        unlock_page(swappage);
+                       page_cache_release(swappage);
                        if (error == -ENOMEM) {
                                /* allow reclaim from this memory cgroup */
-                               error = mem_cgroup_cache_charge(swappage,
-                                       current->mm, gfp & ~__GFP_HIGHMEM);
-                               if (error) {
-                                       page_cache_release(swappage);
+                               error = mem_cgroup_shrink_usage(current->mm,
+                                                               gfp);
+                               if (error)
                                        goto failed;
-                               }
-                               mem_cgroup_uncharge_page(swappage);
                        }
-                       page_cache_release(swappage);
                        goto repeat;
                }
        } else if (sgp == SGP_READ && !filepage) {
@@ -1414,6 +1359,8 @@ repeat:
                }
 
                if (!filepage) {
+                       int ret;
+
                        spin_unlock(&info->lock);
                        filepage = shmem_alloc_page(gfp, info, idx);
                        if (!filepage) {
@@ -1442,10 +1389,18 @@ repeat:
                                swap = *entry;
                                shmem_swp_unmap(entry);
                        }
-                       if (error || swap.val || 0 != add_to_page_cache_lru(
-                                       filepage, mapping, idx, GFP_NOWAIT)) {
+                       ret = error || swap.val;
+                       if (ret)
+                               mem_cgroup_uncharge_cache_page(filepage);
+                       else
+                               ret = add_to_page_cache_lru(filepage, mapping,
+                                               idx, GFP_NOWAIT);
+                       /*
+                        * At add_to_page_cache_lru() failure, uncharge will
+                        * be done automatically.
+                        */
+                       if (ret) {
                                spin_unlock(&info->lock);
-                               mem_cgroup_uncharge_page(filepage);
                                page_cache_release(filepage);
                                shmem_unacct_blocks(info->flags, 1);
                                shmem_free_blocks(inode, 1);
@@ -1454,7 +1409,6 @@ repeat:
                                        goto failed;
                                goto repeat;
                        }
-                       mem_cgroup_uncharge_page(filepage);
                        info->flags |= SHMEM_PAGEIN;
                }
 
@@ -1576,8 +1530,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                case S_IFREG:
                        inode->i_op = &shmem_inode_operations;
                        inode->i_fop = &shmem_file_operations;
-                       mpol_shared_policy_init(&info->policy, sbinfo->policy,
-                                                       &sbinfo->policy_nodes);
+                       mpol_shared_policy_init(&info->policy,
+                                                shmem_get_sbmpol(sbinfo));
                        break;
                case S_IFDIR:
                        inc_nlink(inode);
@@ -1591,8 +1545,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                         * Must not load anything in the rbtree,
                         * mpol_free_shared_policy will not be called.
                         */
-                       mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
-                                               NULL);
+                       mpol_shared_policy_init(&info->policy, NULL);
                        break;
                }
        } else
@@ -1747,26 +1700,38 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
        file_accessed(filp);
 }
 
-static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t shmem_file_aio_read(struct kiocb *iocb,
+               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
 {
-       read_descriptor_t desc;
+       struct file *filp = iocb->ki_filp;
+       ssize_t retval;
+       unsigned long seg;
+       size_t count;
+       loff_t *ppos = &iocb->ki_pos;
 
-       if ((ssize_t) count < 0)
-               return -EINVAL;
-       if (!access_ok(VERIFY_WRITE, buf, count))
-               return -EFAULT;
-       if (!count)
-               return 0;
+       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+       if (retval)
+               return retval;
 
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.buf = buf;
-       desc.error = 0;
+       for (seg = 0; seg < nr_segs; seg++) {
+               read_descriptor_t desc;
 
-       do_shmem_file_read(filp, ppos, &desc, file_read_actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
+               desc.written = 0;
+               desc.arg.buf = iov[seg].iov_base;
+               desc.count = iov[seg].iov_len;
+               if (desc.count == 0)
+                       continue;
+               desc.error = 0;
+               do_shmem_file_read(filp, ppos, &desc, file_read_actor);
+               retval += desc.written;
+               if (desc.error) {
+                       retval = retval ?: desc.error;
+                       break;
+               }
+               if (desc.count > 0)
+                       break;
+       }
+       return retval;
 }
 
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -2207,8 +2172,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
                        if (*rest)
                                goto bad_val;
                } else if (!strcmp(this_char,"mpol")) {
-                       if (shmem_parse_mpol(value, &sbinfo->policy,
-                                            &sbinfo->policy_nodes))
+                       if (mpol_parse_str(value, &sbinfo->mpol, 1))
                                goto bad_val;
                } else {
                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
@@ -2259,8 +2223,9 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
        sbinfo->free_blocks = config.max_blocks - blocks;
        sbinfo->max_inodes  = config.max_inodes;
        sbinfo->free_inodes = config.max_inodes - inodes;
-       sbinfo->policy      = config.policy;
-       sbinfo->policy_nodes = config.policy_nodes;
+
+       mpol_put(sbinfo->mpol);
+       sbinfo->mpol        = config.mpol;      /* transfers initial ref */
 out:
        spin_unlock(&sbinfo->stat_lock);
        return error;
@@ -2281,7 +2246,7 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_printf(seq, ",uid=%u", sbinfo->uid);
        if (sbinfo->gid != 0)
                seq_printf(seq, ",gid=%u", sbinfo->gid);
-       shmem_show_mpol(seq, sbinfo->policy, sbinfo->policy_nodes);
+       shmem_show_mpol(seq, sbinfo->mpol);
        return 0;
 }
 #endif /* CONFIG_TMPFS */
@@ -2311,8 +2276,7 @@ static int shmem_fill_super(struct super_block *sb,
        sbinfo->mode = S_IRWXUGO | S_ISVTX;
        sbinfo->uid = current->fsuid;
        sbinfo->gid = current->fsgid;
-       sbinfo->policy = MPOL_DEFAULT;
-       sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
+       sbinfo->mpol = NULL;
        sb->s_fs_info = sbinfo;
 
 #ifdef CONFIG_TMPFS
@@ -2427,8 +2391,9 @@ static const struct file_operations shmem_file_operations = {
        .mmap           = shmem_mmap,
 #ifdef CONFIG_TMPFS
        .llseek         = generic_file_llseek,
-       .read           = shmem_file_read,
+       .read           = do_sync_read,
        .write          = do_sync_write,
+       .aio_read       = shmem_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .fsync          = simple_sync_file,
        .splice_read    = generic_file_splice_read,