Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[pandora-kernel.git] / mm / memcontrol.c
index a9559b9..a2c7bcb 100644 (file)
@@ -526,18 +526,14 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
-       /*
-        * The ID of the root cgroup is 0, but memcg treat 0 as an
-        * invalid ID, so we return (cgroup_id + 1).
-        */
-       return memcg->css.cgroup->id + 1;
+       return memcg->css.id;
 }
 
 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 {
        struct cgroup_subsys_state *css;
 
-       css = css_from_id(id - 1, &memory_cgrp_subsys);
+       css = css_from_id(id, &memory_cgrp_subsys);
        return mem_cgroup_from_css(css);
 }
 
@@ -570,7 +566,8 @@ void sock_update_memcg(struct sock *sk)
                memcg = mem_cgroup_from_task(current);
                cg_proto = sk->sk_prot->proto_cgroup(memcg);
                if (!mem_cgroup_is_root(memcg) &&
-                   memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) {
+                   memcg_proto_active(cg_proto) &&
+                   css_tryget_online(&memcg->css)) {
                        sk->sk_cgrp = cg_proto;
                }
                rcu_read_unlock();
@@ -831,7 +828,7 @@ retry:
         */
        __mem_cgroup_remove_exceeded(mz, mctz);
        if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
-               !css_tryget(&mz->memcg->css))
+           !css_tryget_online(&mz->memcg->css))
                goto retry;
 done:
        return mz;
@@ -1073,7 +1070,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
                        if (unlikely(!memcg))
                                memcg = root_mem_cgroup;
                }
-       } while (!css_tryget(&memcg->css));
+       } while (!css_tryget_online(&memcg->css));
        rcu_read_unlock();
        return memcg;
 }
@@ -1110,7 +1107,8 @@ skip_node:
         */
        if (next_css) {
                if ((next_css == &root->css) ||
-                   ((next_css->flags & CSS_ONLINE) && css_tryget(next_css)))
+                   ((next_css->flags & CSS_ONLINE) &&
+                    css_tryget_online(next_css)))
                        return mem_cgroup_from_css(next_css);
 
                prev_css = next_css;
@@ -1156,7 +1154,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
                 * would be returned all the time.
                 */
                if (position && position != root &&
-                               !css_tryget(&position->css))
+                   !css_tryget_online(&position->css))
                        position = NULL;
        }
        return position;
@@ -1533,7 +1531,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 {
        /* root ? */
-       if (mem_cgroup_disabled() || !css_parent(&memcg->css))
+       if (mem_cgroup_disabled() || !memcg->css.parent)
                return vm_swappiness;
 
        return memcg->swappiness;
@@ -2769,9 +2767,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
 
 /*
  * A helper function to get mem_cgroup from ID. must be called under
- * rcu_read_lock().  The caller is responsible for calling css_tryget if
- * the mem_cgroup is used for charging. (dropping refcnt from swap can be
- * called against removed memcg.)
+ * rcu_read_lock().  The caller is responsible for calling
+ * css_tryget_online() if the mem_cgroup is used for charging. (dropping
+ * refcnt from swap can be called against removed memcg.)
  */
 static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 {
@@ -2794,14 +2792,14 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
        lock_page_cgroup(pc);
        if (PageCgroupUsed(pc)) {
                memcg = pc->mem_cgroup;
-               if (memcg && !css_tryget(&memcg->css))
+               if (memcg && !css_tryget_online(&memcg->css))
                        memcg = NULL;
        } else if (PageSwapCache(page)) {
                ent.val = page_private(page);
                id = lookup_swap_cgroup_id(ent);
                rcu_read_lock();
                memcg = mem_cgroup_lookup(id);
-               if (memcg && !css_tryget(&memcg->css))
+               if (memcg && !css_tryget_online(&memcg->css))
                        memcg = NULL;
                rcu_read_unlock();
        }
@@ -3365,7 +3363,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
        }
 
        /* The corresponding put will be done in the workqueue. */
-       if (!css_tryget(&memcg->css))
+       if (!css_tryget_online(&memcg->css))
                goto out;
        rcu_read_unlock();
 
@@ -4125,8 +4123,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
        memcg = mem_cgroup_lookup(id);
        if (memcg) {
                /*
-                * We uncharge this because swap is freed.
-                * This memcg can be obsolete one. We avoid calling css_tryget
+                * We uncharge this because swap is freed.  This memcg can
+                * be obsolete one. We avoid calling css_tryget_online().
                 */
                if (!mem_cgroup_is_root(memcg))
                        res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
@@ -4711,18 +4709,28 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
        } while (usage > 0);
 }
 
+/*
+ * Test whether @memcg has children, dead or alive.  Note that this
+ * function doesn't care whether @memcg has use_hierarchy enabled and
+ * returns %true if there are child csses according to the cgroup
+ * hierarchy.  Testing use_hierarchy is the caller's responsiblity.
+ */
 static inline bool memcg_has_children(struct mem_cgroup *memcg)
 {
-       lockdep_assert_held(&memcg_create_mutex);
+       bool ret;
+
        /*
-        * The lock does not prevent addition or deletion to the list
-        * of children, but it prevents a new child from being
-        * initialized based on this parent in css_online(), so it's
-        * enough to decide whether hierarchically inherited
-        * attributes can still be changed or not.
+        * The lock does not prevent addition or deletion of children, but
+        * it prevents a new child from being initialized based on this
+        * parent in css_online(), so it's enough to decide whether
+        * hierarchically inherited attributes can still be changed or not.
         */
-       return memcg->use_hierarchy &&
-               !list_empty(&memcg->css.cgroup->children);
+       lockdep_assert_held(&memcg_create_mutex);
+
+       rcu_read_lock();
+       ret = css_next_child(NULL, &memcg->css);
+       rcu_read_unlock();
+       return ret;
 }
 
 /*
@@ -4734,11 +4742,6 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)
 static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 {
        int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       struct cgroup *cgrp = memcg->css.cgroup;
-
-       /* returns EBUSY if there is a task or if we come here twice. */
-       if (cgroup_has_tasks(cgrp) || !list_empty(&cgrp->children))
-               return -EBUSY;
 
        /* we call try-to-free pages for make this cgroup empty */
        lru_add_drain_all();
@@ -4758,20 +4761,19 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
                }
 
        }
-       lru_add_drain();
-       mem_cgroup_reparent_charges(memcg);
 
        return 0;
 }
 
-static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
-                                       unsigned int event)
+static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
+                                           char *buf, size_t nbytes,
+                                           loff_t off)
 {
-       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+       struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 
        if (mem_cgroup_is_root(memcg))
                return -EINVAL;
-       return mem_cgroup_force_empty(memcg);
+       return mem_cgroup_force_empty(memcg) ?: nbytes;
 }
 
 static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
@@ -4785,7 +4787,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
 {
        int retval = 0;
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-       struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+       struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent);
 
        mutex_lock(&memcg_create_mutex);
 
@@ -4802,7 +4804,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
         */
        if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
                                (val == 1 || val == 0)) {
-               if (list_empty(&memcg->css.cgroup->children))
+               if (!memcg_has_children(memcg))
                        memcg->use_hierarchy = val;
                else
                        retval = -EBUSY;
@@ -4919,7 +4921,8 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
         * of course permitted.
         */
        mutex_lock(&memcg_create_mutex);
-       if (cgroup_has_tasks(memcg->css.cgroup) || memcg_has_children(memcg))
+       if (cgroup_has_tasks(memcg->css.cgroup) ||
+           (memcg->use_hierarchy && memcg_has_children(memcg)))
                err = -EBUSY;
        mutex_unlock(&memcg_create_mutex);
        if (err)
@@ -5021,17 +5024,18 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
  * The user of this function is...
  * RES_LIMIT.
  */
-static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
-                           char *buffer)
+static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
+                               char *buf, size_t nbytes, loff_t off)
 {
-       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+       struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
        enum res_type type;
        int name;
        unsigned long long val;
        int ret;
 
-       type = MEMFILE_TYPE(cft->private);
-       name = MEMFILE_ATTR(cft->private);
+       buf = strstrip(buf);
+       type = MEMFILE_TYPE(of_cft(of)->private);
+       name = MEMFILE_ATTR(of_cft(of)->private);
 
        switch (name) {
        case RES_LIMIT:
@@ -5040,7 +5044,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
                        break;
                }
                /* This function does all necessary parse...reuse it */
-               ret = res_counter_memparse_write_strategy(buffer, &val);
+               ret = res_counter_memparse_write_strategy(buf, &val);
                if (ret)
                        break;
                if (type == _MEM)
@@ -5053,7 +5057,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
                        return -EINVAL;
                break;
        case RES_SOFT_LIMIT:
-               ret = res_counter_memparse_write_strategy(buffer, &val);
+               ret = res_counter_memparse_write_strategy(buf, &val);
                if (ret)
                        break;
                /*
@@ -5070,7 +5074,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
                ret = -EINVAL; /* should be BUG() ? */
                break;
        }
-       return ret;
+       return ret ?: nbytes;
 }
 
 static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
@@ -5083,8 +5087,8 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
        if (!memcg->use_hierarchy)
                goto out;
 
-       while (css_parent(&memcg->css)) {
-               memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+       while (memcg->css.parent) {
+               memcg = mem_cgroup_from_css(memcg->css.parent);
                if (!memcg->use_hierarchy)
                        break;
                tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -5097,14 +5101,15 @@ out:
        *memsw_limit = min_memsw_limit;
 }
 
-static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
+                               size_t nbytes, loff_t off)
 {
-       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+       struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
        int name;
        enum res_type type;
 
-       type = MEMFILE_TYPE(event);
-       name = MEMFILE_ATTR(event);
+       type = MEMFILE_TYPE(of_cft(of)->private);
+       name = MEMFILE_ATTR(of_cft(of)->private);
 
        switch (name) {
        case RES_MAX_USAGE:
@@ -5129,7 +5134,7 @@ static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
                break;
        }
 
-       return 0;
+       return nbytes;
 }
 
 static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
@@ -5322,7 +5327,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
        if (val > 100)
                return -EINVAL;
 
-       if (css_parent(css))
+       if (css->parent)
                memcg->swappiness = val;
        else
                vm_swappiness = val;
@@ -5659,7 +5664,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
        /* cannot set to root cgroup and only 0 and 1 are allowed */
-       if (!css_parent(css) || !((val == 0) || (val == 1)))
+       if (!css->parent || !((val == 0) || (val == 1)))
                return -EINVAL;
 
        memcg->oom_kill_disable = val;
@@ -5705,10 +5710,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
         * which is then paired with css_put during uncharge resp. here.
         *
         * Although this might sound strange as this path is called from
-        * css_offline() when the referencemight have dropped down to 0
-        * and shouldn't be incremented anymore (css_tryget would fail)
-        * we do not have other options because of the kmem allocations
-        * lifetime.
+        * css_offline() when the referencemight have dropped down to 0 and
+        * shouldn't be incremented anymore (css_tryget_online() would
+        * fail) we do not have other options because of the kmem
+        * allocations lifetime.
         */
        css_get(&memcg->css);
 
@@ -5827,9 +5832,10 @@ static void memcg_event_ptable_queue_proc(struct file *file,
  * Input must be in format '<event_fd> <control_fd> <args>'.
  * Interpretation of args is defined by control file implementation.
  */
-static int memcg_write_event_control(struct cgroup_subsys_state *css,
-                                    struct cftype *cft, char *buffer)
+static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+                                        char *buf, size_t nbytes, loff_t off)
 {
+       struct cgroup_subsys_state *css = of_css(of);
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
        struct mem_cgroup_event *event;
        struct cgroup_subsys_state *cfile_css;
@@ -5840,15 +5846,17 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
        char *endp;
        int ret;
 
-       efd = simple_strtoul(buffer, &endp, 10);
+       buf = strstrip(buf);
+
+       efd = simple_strtoul(buf, &endp, 10);
        if (*endp != ' ')
                return -EINVAL;
-       buffer = endp + 1;
+       buf = endp + 1;
 
-       cfd = simple_strtoul(buffer, &endp, 10);
+       cfd = simple_strtoul(buf, &endp, 10);
        if ((*endp != ' ') && (*endp != '\0'))
                return -EINVAL;
-       buffer = endp + 1;
+       buf = endp + 1;
 
        event = kzalloc(sizeof(*event), GFP_KERNEL);
        if (!event)
@@ -5916,8 +5924,8 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
         * automatically removed on cgroup destruction but the removal is
         * asynchronous, so take an extra ref on @css.
         */
-       cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent,
-                                       &memory_cgrp_subsys);
+       cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
+                                              &memory_cgrp_subsys);
        ret = -EINVAL;
        if (IS_ERR(cfile_css))
                goto out_put_cfile;
@@ -5926,7 +5934,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
                goto out_put_cfile;
        }
 
-       ret = event->register_event(memcg, event->eventfd, buffer);
+       ret = event->register_event(memcg, event->eventfd, buf);
        if (ret)
                goto out_put_css;
 
@@ -5939,7 +5947,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
        fdput(cfile);
        fdput(efile);
 
-       return 0;
+       return nbytes;
 
 out_put_css:
        css_put(css);
@@ -5964,25 +5972,25 @@ static struct cftype mem_cgroup_files[] = {
        {
                .name = "max_usage_in_bytes",
                .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
-               .trigger = mem_cgroup_reset,
+               .write = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
                .name = "limit_in_bytes",
                .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
-               .write_string = mem_cgroup_write,
+               .write = mem_cgroup_write,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
                .name = "soft_limit_in_bytes",
                .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
-               .write_string = mem_cgroup_write,
+               .write = mem_cgroup_write,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
                .name = "failcnt",
                .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
-               .trigger = mem_cgroup_reset,
+               .write = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
@@ -5991,7 +5999,7 @@ static struct cftype mem_cgroup_files[] = {
        },
        {
                .name = "force_empty",
-               .trigger = mem_cgroup_force_empty_write,
+               .write = mem_cgroup_force_empty_write,
        },
        {
                .name = "use_hierarchy",
@@ -6001,7 +6009,7 @@ static struct cftype mem_cgroup_files[] = {
        },
        {
                .name = "cgroup.event_control",         /* XXX: for compat */
-               .write_string = memcg_write_event_control,
+               .write = memcg_write_event_control,
                .flags = CFTYPE_NO_PREFIX,
                .mode = S_IWUGO,
        },
@@ -6034,7 +6042,7 @@ static struct cftype mem_cgroup_files[] = {
        {
                .name = "kmem.limit_in_bytes",
                .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
-               .write_string = mem_cgroup_write,
+               .write = mem_cgroup_write,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
@@ -6045,13 +6053,13 @@ static struct cftype mem_cgroup_files[] = {
        {
                .name = "kmem.failcnt",
                .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
-               .trigger = mem_cgroup_reset,
+               .write = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
                .name = "kmem.max_usage_in_bytes",
                .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
-               .trigger = mem_cgroup_reset,
+               .write = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read_u64,
        },
 #ifdef CONFIG_SLABINFO
@@ -6074,19 +6082,19 @@ static struct cftype memsw_cgroup_files[] = {
        {
                .name = "memsw.max_usage_in_bytes",
                .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
-               .trigger = mem_cgroup_reset,
+               .write = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
                .name = "memsw.limit_in_bytes",
                .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
-               .write_string = mem_cgroup_write,
+               .write = mem_cgroup_write,
                .read_u64 = mem_cgroup_read_u64,
        },
        {
                .name = "memsw.failcnt",
                .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
-               .trigger = mem_cgroup_reset,
+               .write = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read_u64,
        },
        { },    /* terminate */
@@ -6264,9 +6272,9 @@ static int
 mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-       struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
+       struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
 
-       if (css->cgroup->id > MEM_CGROUP_ID_MAX)
+       if (css->id > MEM_CGROUP_ID_MAX)
                return -ENOSPC;
 
        if (!parent)
@@ -6361,7 +6369,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
        /*
         * XXX: css_offline() would be where we should reparent all
         * memory to prepare the cgroup for destruction.  However,
-        * memcg does not do css_tryget() and res_counter charging
+        * memcg does not do css_tryget_online() and res_counter charging
         * under the same RCU lock region, which means that charging
         * could race with offlining.  Offlining only happens to
         * cgroups with no tasks in them but charges can show up
@@ -6375,9 +6383,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
         *                           lookup_swap_cgroup_id()
         *                           rcu_read_lock()
         *                           mem_cgroup_lookup()
-        *                           css_tryget()
+        *                           css_tryget_online()
         *                           rcu_read_unlock()
-        * disable css_tryget()
+        * disable css_tryget_online()
         * call_rcu()
         *   offline_css()
         *     reparent_charges()