cgroup: make css_for_each_descendant() and friends include the origin css in the...
authorTejun Heo <tj@kernel.org>
Fri, 9 Aug 2013 00:11:27 +0000 (20:11 -0400)
committerTejun Heo <tj@kernel.org>
Fri, 9 Aug 2013 00:11:27 +0000 (20:11 -0400)
Previously, all css descendant iterators didn't include the origin
(root of subtree) css in the iteration.  The reasons were maintaining
consistency with css_for_each_child() and that at the time of
introduction more use cases needed skipping the origin anyway;
however, given that css_is_descendant() considers self to be a
descendant, omitting the origin css has become more confusing and
looking at the accumulated use cases rather clearly indicates that
including origin would result in simpler code overall.

While this is a change which can easily lead to subtle bugs, cgroup
API including the iterators has recently gone through major
restructuring and no out-of-tree changes will be applicable without
adjustments making this a relatively acceptable opportunity for this
type of change.

The conversions are mostly straight-forward.  If the iteration block
had explicit origin handling before or after, it's moved inside the
iteration.  If not, if (pos == origin) continue; is added.  Some
conversions add extra reference get/put around origin handling by
consolidating origin handling and the rest.  While the extra ref
operations aren't strictly necessary, this shouldn't cause any
noticeable difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-throttle.c
include/linux/cgroup.h
kernel/cgroup.c
kernel/cgroup_freezer.c
kernel/cpuset.c
mm/memcontrol.c
security/device_cgroup.c

index 54ad002..e90c7c1 100644 (file)
@@ -615,12 +615,10 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
        struct blkcg_policy *pol = blkcg_policy[pd->plid];
        struct blkcg_gq *pos_blkg;
        struct cgroup_subsys_state *pos_css;
-       u64 sum;
+       u64 sum = 0;
 
        lockdep_assert_held(pd->blkg->q->queue_lock);
 
-       sum = blkg_stat_read((void *)pd + off);
-
        rcu_read_lock();
        blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
                struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
@@ -650,13 +648,11 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
        struct blkcg_policy *pol = blkcg_policy[pd->plid];
        struct blkcg_gq *pos_blkg;
        struct cgroup_subsys_state *pos_css;
-       struct blkg_rwstat sum;
+       struct blkg_rwstat sum = { };
        int i;
 
        lockdep_assert_held(pd->blkg->q->queue_lock);
 
-       sum = blkg_rwstat_read((void *)pd + off);
-
        rcu_read_lock();
        blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
                struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
index 8555386..ae6969a 100644 (file)
@@ -291,6 +291,7 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
  * read locked.  If called under either blkcg or queue lock, the iteration
  * is guaranteed to include all and only online blkgs.  The caller may
  * update @pos_css by calling css_rightmost_descendant() to skip subtree.
+ * @p_blkg is included in the iteration and the first node to be visited.
  */
 #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)          \
        css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)   \
@@ -304,7 +305,8 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
  * @p_blkg: target blkg to walk descendants of
  *
  * Similar to blkg_for_each_descendant_pre() but performs post-order
- * traversal instead.  Synchronization rules are the same.
+ * traversal instead.  Synchronization rules are the same.  @p_blkg is
+ * included in the iteration and the last node to be visited.
  */
 #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)         \
        css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)  \
index 8cefa7f..8331aba 100644 (file)
@@ -1379,7 +1379,6 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
         * restrictions in the whole hierarchy and allows them to bypass
         * blk-throttle.
         */
-       tg_update_has_rules(tg);
        blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
                tg_update_has_rules(blkg_to_tg(blkg));
 
@@ -1639,8 +1638,6 @@ void blk_throtl_drain(struct request_queue *q)
        blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
                tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
 
-       tg_drain_bios(&td_root_tg(td)->service_queue);
-
        /* finally, transfer bios from top-level tg's into the td */
        tg_drain_bios(&td->service_queue);
 
index c40e508..8ec5b0f 100644 (file)
@@ -798,7 +798,8 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
  * @pos: the css * to use as the loop cursor
  * @root: css whose descendants to walk
  *
- * Walk @root's descendants.  Must be called under rcu_read_lock().  A
+ * Walk @root's descendants.  @root is included in the iteration and the
+ * first node to be visited.  Must be called under rcu_read_lock().  A
  * descendant css which hasn't finished ->css_online() or already has
  * finished ->css_offline() may show up during traversal and it's each
  * subsystem's responsibility to verify that each @pos is alive.
@@ -820,13 +821,12 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
  *
  * my_update_state(@css)
  * {
- *     Lock @css;
- *     Update @css's state;
- *     Unlock @css;
- *
  *     css_for_each_descendant_pre(@pos, @css) {
  *             Lock @pos;
- *             Verify @pos is alive and inherit state from @pos's parent;
+ *             if (@pos == @css)
+ *                     Update @css's state;
+ *             else
+ *                     Verify @pos is alive and inherit state from its parent;
  *             Unlock @pos;
  *     }
  * }
@@ -864,8 +864,9 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
  * @css: css whose descendants to walk
  *
  * Similar to css_for_each_descendant_pre() but performs post-order
- * traversal instead.  Note that the walk visibility guarantee described in
- * pre-order walk doesn't apply the same to post-order walks.
+ * traversal instead.  @root is included in the iteration and the last
+ * node to be visited.  Note that the walk visibility guarantee described
+ * in pre-order walk doesn't apply the same to post-order walks.
  */
 #define css_for_each_descendant_post(pos, css)                         \
        for ((pos) = css_next_descendant_post(NULL, (css)); (pos);      \
index c02a288..52f0498 100644 (file)
@@ -2868,17 +2868,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
 
        mutex_unlock(&cgroup_mutex);
 
-       /* @root always needs to be updated */
-       inode = root->dentry->d_inode;
-       mutex_lock(&inode->i_mutex);
-       mutex_lock(&cgroup_mutex);
-       ret = cgroup_addrm_files(root, cfts, is_add);
-       mutex_unlock(&cgroup_mutex);
-       mutex_unlock(&inode->i_mutex);
-
-       if (ret)
-               goto out_deact;
-
        /* add/rm files for all cgroups created before */
        rcu_read_lock();
        css_for_each_descendant_pre(css, cgroup_css(root, ss->subsys_id)) {
@@ -2907,7 +2896,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
        }
        rcu_read_unlock();
        dput(prev);
-out_deact:
        deactivate_super(sb);
        return ret;
 }
@@ -3099,7 +3087,8 @@ EXPORT_SYMBOL_GPL(css_next_child);
  * @root: css whose descendants to walk
  *
  * To be used by css_for_each_descendant_pre().  Find the next descendant
- * to visit for pre-order traversal of @root's descendants.
+ * to visit for pre-order traversal of @root's descendants.  @root is
+ * included in the iteration and the first node to be visited.
  *
  * While this function requires RCU read locking, it doesn't require the
  * whole traversal to be contained in a single RCU critical section.  This
@@ -3114,9 +3103,9 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos,
 
        WARN_ON_ONCE(!rcu_read_lock_held());
 
-       /* if first iteration, pretend we just visited @root */
+       /* if first iteration, visit @root */
        if (!pos)
-               pos = root;
+               return root;
 
        /* visit the first child if exists */
        next = css_next_child(NULL, pos);
@@ -3186,7 +3175,8 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
  * @root: css whose descendants to walk
  *
  * To be used by css_for_each_descendant_post().  Find the next descendant
- * to visit for post-order traversal of @root's descendants.
+ * to visit for post-order traversal of @root's descendants.  @root is
+ * included in the iteration and the last node to be visited.
  *
  * While this function requires RCU read locking, it doesn't require the
  * whole traversal to be contained in a single RCU critical section.  This
@@ -3207,14 +3197,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
                return next != root ? next : NULL;
        }
 
+       /* if we visited @root, we're done */
+       if (pos == root)
+               return NULL;
+
        /* if there's an unvisited sibling, visit its leftmost descendant */
        next = css_next_child(pos, css_parent(pos));
        if (next)
                return css_leftmost_descendant(next);
 
        /* no sibling left, visit parent */
-       next = css_parent(pos);
-       return next != root ? next : NULL;
+       return css_parent(pos);
 }
 EXPORT_SYMBOL_GPL(css_next_descendant_post);
 
index 224da9a..f0ff64d 100644 (file)
@@ -311,7 +311,6 @@ static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
        /* update states bottom-up */
        css_for_each_descendant_post(pos, css)
                update_if_frozen(pos);
-       update_if_frozen(css);
 
        rcu_read_unlock();
 
@@ -391,11 +390,6 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 {
        struct cgroup_subsys_state *pos;
 
-       /* update @freezer */
-       spin_lock_irq(&freezer->lock);
-       freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
-       spin_unlock_irq(&freezer->lock);
-
        /*
         * Update all its descendants in pre-order traversal.  Each
         * descendant will try to inherit its parent's FREEZING state as
@@ -406,14 +400,23 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
                struct freezer *pos_f = css_freezer(pos);
                struct freezer *parent = parent_freezer(pos_f);
 
-               /*
-                * Our update to @parent->state is already visible which is
-                * all we need.  No need to lock @parent.  For more info on
-                * synchronization, see freezer_post_create().
-                */
                spin_lock_irq(&pos_f->lock);
-               freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING,
-                                   CGROUP_FREEZING_PARENT);
+
+               if (pos_f == freezer) {
+                       freezer_apply_state(pos_f, freeze,
+                                           CGROUP_FREEZING_SELF);
+               } else {
+                       /*
+                        * Our update to @parent->state is already visible
+                        * which is all we need.  No need to lock @parent.
+                        * For more info on synchronization, see
+                        * freezer_post_create().
+                        */
+                       freezer_apply_state(pos_f,
+                                           parent->state & CGROUP_FREEZING,
+                                           CGROUP_FREEZING_PARENT);
+               }
+
                spin_unlock_irq(&pos_f->lock);
        }
        rcu_read_unlock();
index bf69717..72a0383 100644 (file)
@@ -222,7 +222,8 @@ static struct cpuset top_cpuset = {
  *
  * Walk @des_cs through the online descendants of @root_cs.  Must be used
  * with RCU read locked.  The caller may modify @pos_css by calling
- * css_rightmost_descendant() to skip subtree.
+ * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
+ * iteration and the first node to be visited.
  */
 #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)       \
        css_for_each_descendant_pre((pos_css), &(root_cs)->css)         \
@@ -506,6 +507,9 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
 
        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
+               if (cp == root_cs)
+                       continue;
+
                /* skip the whole subtree if @cp doesn't have any CPU */
                if (cpumask_empty(cp->cpus_allowed)) {
                        pos_css = css_rightmost_descendant(pos_css);
@@ -613,6 +617,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
 
        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
+               if (cp == &top_cpuset)
+                       continue;
                /*
                 * Continue traversing beyond @cp iff @cp has some CPUs and
                 * isn't load balancing.  The former is obvious.  The
@@ -875,15 +881,17 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
        struct cpuset *cp;
        struct cgroup_subsys_state *pos_css;
 
-       if (update_root)
-               update_tasks_cpumask(root_cs, heap);
-
        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
-               /* skip the whole subtree if @cp have some CPU */
-               if (!cpumask_empty(cp->cpus_allowed)) {
-                       pos_css = css_rightmost_descendant(pos_css);
-                       continue;
+               if (cp == root_cs) {
+                       if (!update_root)
+                               continue;
+               } else {
+                       /* skip the whole subtree if @cp have some CPU */
+                       if (!cpumask_empty(cp->cpus_allowed)) {
+                               pos_css = css_rightmost_descendant(pos_css);
+                               continue;
+                       }
                }
                if (!css_tryget(&cp->css))
                        continue;
@@ -1130,15 +1138,17 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs,
        struct cpuset *cp;
        struct cgroup_subsys_state *pos_css;
 
-       if (update_root)
-               update_tasks_nodemask(root_cs, heap);
-
        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
-               /* skip the whole subtree if @cp have some CPU */
-               if (!nodes_empty(cp->mems_allowed)) {
-                       pos_css = css_rightmost_descendant(pos_css);
-                       continue;
+               if (cp == root_cs) {
+                       if (!update_root)
+                               continue;
+               } else {
+                       /* skip the whole subtree if @cp have some CPU */
+                       if (!nodes_empty(cp->mems_allowed)) {
+                               pos_css = css_rightmost_descendant(pos_css);
+                               continue;
+                       }
                }
                if (!css_tryget(&cp->css))
                        continue;
@@ -2237,7 +2247,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 
                rcu_read_lock();
                cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
-                       if (!css_tryget(&cs->css))
+                       if (cs == &top_cpuset || !css_tryget(&cs->css))
                                continue;
                        rcu_read_unlock();
 
index 2885e3e..b89d4cb 100644 (file)
@@ -1079,14 +1079,7 @@ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
 {
        struct cgroup_subsys_state *prev_css, *next_css;
 
-       /*
-        * Root is not visited by cgroup iterators so it needs an
-        * explicit visit.
-        */
-       if (!last_visited)
-               return root;
-
-       prev_css = (last_visited == root) ? NULL : &last_visited->css;
+       prev_css = last_visited ? &last_visited->css : NULL;
 skip_node:
        next_css = css_next_descendant_pre(prev_css, &root->css);
 
index 9bf230a..c123628 100644 (file)
@@ -456,7 +456,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root,
                 * methods), and online ones are safe to access outside RCU
                 * read lock without bumping refcnt.
                 */
-               if (!is_devcg_online(devcg))
+               if (pos == &devcg_root->css || !is_devcg_online(devcg))
                        continue;
 
                rcu_read_unlock();