Merge git://git.infradead.org/~dwmw2/random-2.6
[pandora-kernel.git] / mm / shmem.c
index 3c620dc..f92fea9 100644 (file)
@@ -201,7 +201,7 @@ static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
        .ra_pages       = 0,    /* No readahead */
-       .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+       .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
        .unplug_io_fn   = default_unplug_io_fn,
 };
 
@@ -922,20 +922,26 @@ found:
        error = 1;
        if (!inode)
                goto out;
-       /* Precharge page while we can wait, compensate afterwards */
+       /* Precharge page using GFP_KERNEL while we can wait */
        error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
        if (error)
                goto out;
        error = radix_tree_preload(GFP_KERNEL);
-       if (error)
-               goto uncharge;
+       if (error) {
+               mem_cgroup_uncharge_cache_page(page);
+               goto out;
+       }
        error = 1;
 
        spin_lock(&info->lock);
        ptr = shmem_swp_entry(info, idx, NULL);
-       if (ptr && ptr->val == entry.val)
+       if (ptr && ptr->val == entry.val) {
                error = add_to_page_cache(page, inode->i_mapping,
                                                idx, GFP_NOWAIT);
+               /* does mem_cgroup_uncharge_cache_page on error */
+       } else  /* we must compensate for our precharge above */
+               mem_cgroup_uncharge_cache_page(page);
+
        if (error == -EEXIST) {
                struct page *filepage = find_get_page(inode->i_mapping, idx);
                error = 1;
@@ -961,8 +967,6 @@ found:
                shmem_swp_unmap(ptr);
        spin_unlock(&info->lock);
        radix_tree_preload_end();
-uncharge:
-       mem_cgroup_uncharge_page(page);
 out:
        unlock_page(page);
        page_cache_release(page);
@@ -1079,23 +1083,29 @@ redirty:
 
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_TMPFS
-static void shmem_show_mpol(struct seq_file *seq, unsigned short mode,
-                       unsigned short flags, const nodemask_t policy_nodes)
+static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 {
-       struct mempolicy temp;
        char buffer[64];
 
-       if (mode == MPOL_DEFAULT)
+       if (!mpol || mpol->mode == MPOL_DEFAULT)
                return;         /* show nothing */
 
-       temp.mode = mode;
-       temp.flags = flags;
-       temp.v.nodes = policy_nodes;
-
-       mpol_to_str(buffer, sizeof(buffer), &temp);
+       mpol_to_str(buffer, sizeof(buffer), mpol, 1);
 
        seq_printf(seq, ",mpol=%s", buffer);
 }
+
+static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+       struct mempolicy *mpol = NULL;
+       if (sbinfo->mpol) {
+               spin_lock(&sbinfo->stat_lock);  /* prevent replace/use races */
+               mpol = sbinfo->mpol;
+               mpol_get(mpol);
+               spin_unlock(&sbinfo->stat_lock);
+       }
+       return mpol;
+}
 #endif /* CONFIG_TMPFS */
 
 static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
@@ -1135,8 +1145,7 @@ static struct page *shmem_alloc_page(gfp_t gfp,
 }
 #else /* !CONFIG_NUMA */
 #ifdef CONFIG_TMPFS
-static inline void shmem_show_mpol(struct seq_file *seq, unsigned short policy,
-                       unsigned short flags, const nodemask_t policy_nodes)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
 {
 }
 #endif /* CONFIG_TMPFS */
@@ -1154,6 +1163,13 @@ static inline struct page *shmem_alloc_page(gfp_t gfp,
 }
 #endif /* CONFIG_NUMA */
 
+#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
+static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
+{
+       return NULL;
+}
+#endif
+
 /*
  * shmem_getpage - either get the page from swap or allocate a new one
  *
@@ -1299,17 +1315,14 @@ repeat:
                        shmem_swp_unmap(entry);
                        spin_unlock(&info->lock);
                        unlock_page(swappage);
+                       page_cache_release(swappage);
                        if (error == -ENOMEM) {
                                /* allow reclaim from this memory cgroup */
-                               error = mem_cgroup_cache_charge(swappage,
-                                       current->mm, gfp & ~__GFP_HIGHMEM);
-                               if (error) {
-                                       page_cache_release(swappage);
+                               error = mem_cgroup_shrink_usage(current->mm,
+                                                               gfp);
+                               if (error)
                                        goto failed;
-                               }
-                               mem_cgroup_uncharge_page(swappage);
                        }
-                       page_cache_release(swappage);
                        goto repeat;
                }
        } else if (sgp == SGP_READ && !filepage) {
@@ -1346,6 +1359,8 @@ repeat:
                }
 
                if (!filepage) {
+                       int ret;
+
                        spin_unlock(&info->lock);
                        filepage = shmem_alloc_page(gfp, info, idx);
                        if (!filepage) {
@@ -1374,10 +1389,18 @@ repeat:
                                swap = *entry;
                                shmem_swp_unmap(entry);
                        }
-                       if (error || swap.val || 0 != add_to_page_cache_lru(
-                                       filepage, mapping, idx, GFP_NOWAIT)) {
+                       ret = error || swap.val;
+                       if (ret)
+                               mem_cgroup_uncharge_cache_page(filepage);
+                       else
+                               ret = add_to_page_cache_lru(filepage, mapping,
+                                               idx, GFP_NOWAIT);
+                       /*
+                        * At add_to_page_cache_lru() failure, uncharge will
+                        * be done automatically.
+                        */
+                       if (ret) {
                                spin_unlock(&info->lock);
-                               mem_cgroup_uncharge_page(filepage);
                                page_cache_release(filepage);
                                shmem_unacct_blocks(info->flags, 1);
                                shmem_free_blocks(inode, 1);
@@ -1386,7 +1409,6 @@ repeat:
                                        goto failed;
                                goto repeat;
                        }
-                       mem_cgroup_uncharge_page(filepage);
                        info->flags |= SHMEM_PAGEIN;
                }
 
@@ -1508,8 +1530,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                case S_IFREG:
                        inode->i_op = &shmem_inode_operations;
                        inode->i_fop = &shmem_file_operations;
-                       mpol_shared_policy_init(&info->policy, sbinfo->policy,
-                                       sbinfo->flags, &sbinfo->policy_nodes);
+                       mpol_shared_policy_init(&info->policy,
+                                                shmem_get_sbmpol(sbinfo));
                        break;
                case S_IFDIR:
                        inc_nlink(inode);
@@ -1523,8 +1545,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                         * Must not load anything in the rbtree,
                         * mpol_free_shared_policy will not be called.
                         */
-                       mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0,
-                                               NULL);
+                       mpol_shared_policy_init(&info->policy, NULL);
                        break;
                }
        } else
@@ -1679,26 +1700,38 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
        file_accessed(filp);
 }
 
-static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t shmem_file_aio_read(struct kiocb *iocb,
+               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
 {
-       read_descriptor_t desc;
+       struct file *filp = iocb->ki_filp;
+       ssize_t retval;
+       unsigned long seg;
+       size_t count;
+       loff_t *ppos = &iocb->ki_pos;
 
-       if ((ssize_t) count < 0)
-               return -EINVAL;
-       if (!access_ok(VERIFY_WRITE, buf, count))
-               return -EFAULT;
-       if (!count)
-               return 0;
+       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+       if (retval)
+               return retval;
 
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.buf = buf;
-       desc.error = 0;
+       for (seg = 0; seg < nr_segs; seg++) {
+               read_descriptor_t desc;
 
-       do_shmem_file_read(filp, ppos, &desc, file_read_actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
+               desc.written = 0;
+               desc.arg.buf = iov[seg].iov_base;
+               desc.count = iov[seg].iov_len;
+               if (desc.count == 0)
+                       continue;
+               desc.error = 0;
+               do_shmem_file_read(filp, ppos, &desc, file_read_actor);
+               retval += desc.written;
+               if (desc.error) {
+                       retval = retval ?: desc.error;
+                       break;
+               }
+               if (desc.count > 0)
+                       break;
+       }
+       return retval;
 }
 
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -2139,8 +2172,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
                        if (*rest)
                                goto bad_val;
                } else if (!strcmp(this_char,"mpol")) {
-                       if (mpol_parse_str(value, &sbinfo->policy,
-                                        &sbinfo->flags, &sbinfo->policy_nodes))
+                       if (mpol_parse_str(value, &sbinfo->mpol, 1))
                                goto bad_val;
                } else {
                        printk(KERN_ERR "tmpfs: Bad mount option %s\n",
@@ -2191,9 +2223,9 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
        sbinfo->free_blocks = config.max_blocks - blocks;
        sbinfo->max_inodes  = config.max_inodes;
        sbinfo->free_inodes = config.max_inodes - inodes;
-       sbinfo->policy      = config.policy;
-       sbinfo->flags       = config.flags;
-       sbinfo->policy_nodes = config.policy_nodes;
+
+       mpol_put(sbinfo->mpol);
+       sbinfo->mpol        = config.mpol;      /* transfers initial ref */
 out:
        spin_unlock(&sbinfo->stat_lock);
        return error;
@@ -2214,8 +2246,7 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_printf(seq, ",uid=%u", sbinfo->uid);
        if (sbinfo->gid != 0)
                seq_printf(seq, ",gid=%u", sbinfo->gid);
-       shmem_show_mpol(seq, sbinfo->policy, sbinfo->flags,
-                       sbinfo->policy_nodes);
+       shmem_show_mpol(seq, sbinfo->mpol);
        return 0;
 }
 #endif /* CONFIG_TMPFS */
@@ -2245,9 +2276,7 @@ static int shmem_fill_super(struct super_block *sb,
        sbinfo->mode = S_IRWXUGO | S_ISVTX;
        sbinfo->uid = current->fsuid;
        sbinfo->gid = current->fsgid;
-       sbinfo->policy = MPOL_DEFAULT;
-       sbinfo->flags = 0;
-       sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
+       sbinfo->mpol = NULL;
        sb->s_fs_info = sbinfo;
 
 #ifdef CONFIG_TMPFS
@@ -2362,8 +2391,9 @@ static const struct file_operations shmem_file_operations = {
        .mmap           = shmem_mmap,
 #ifdef CONFIG_TMPFS
        .llseek         = generic_file_llseek,
-       .read           = shmem_file_read,
+       .read           = do_sync_read,
        .write          = do_sync_write,
+       .aio_read       = shmem_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .fsync          = simple_sync_file,
        .splice_read    = generic_file_splice_read,