Documentation: lzo: document part of the encoding

[pandora-kernel.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 70776ae..940aced 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -149,12 +149,14 @@ struct cgroup_root cgrp_dfl_root;
   */
  static bool cgrp_dfl_root_visible;
  
+/*
+ * Set by the boot param of the same name and makes subsystems with NULL
+ * ->dfl_files to use ->legacy_files on the default hierarchy.
+ */
+static bool cgroup_legacy_files_on_dfl;
+
  /* some controllers are not supported in the default hierarchy */
-static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0
-#ifdef CONFIG_CGROUP_DEBUG
-       | (1 << debug_cgrp_id)
-#endif
-       ;
+static unsigned int cgrp_dfl_root_inhibit_ss_mask;
  
  /* The list of hierarchy roots */
  
@@ -180,13 +182,15 @@ static u64 css_serial_nr_next = 1;
   */
  static int need_forkexit_callback __read_mostly;
  
-static struct cftype cgroup_base_files[];
+static struct cftype cgroup_dfl_base_files[];
+static struct cftype cgroup_legacy_base_files[];
  
  static void cgroup_put(struct cgroup *cgrp);
  static int rebind_subsystems(struct cgroup_root *dst_root,
                              unsigned int ss_mask);
  static int cgroup_destroy_locked(struct cgroup *cgrp);
-static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
+                     bool visible);
  static void css_release(struct percpu_ref *ref);
  static void kill_css(struct cgroup_subsys_state *css);
  static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
@@ -1031,11 +1035,68 @@ static void cgroup_get(struct cgroup *cgrp)
         css_get(&cgrp->self);
  }
  
+static bool cgroup_tryget(struct cgroup *cgrp)
+{
+       return css_tryget(&cgrp->self);
+}
+
  static void cgroup_put(struct cgroup *cgrp)
  {
         css_put(&cgrp->self);
  }
  
+/**
+ * cgroup_refresh_child_subsys_mask - update child_subsys_mask
+ * @cgrp: the target cgroup
+ *
+ * On the default hierarchy, a subsystem may request other subsystems to be
+ * enabled together through its ->depends_on mask.  In such cases, more
+ * subsystems than specified in "cgroup.subtree_control" may be enabled.
+ *
+ * This function determines which subsystems need to be enabled given the
+ * current @cgrp->subtree_control and records it in
+ * @cgrp->child_subsys_mask.  The resulting mask is always a superset of
+ * @cgrp->subtree_control and follows the usual hierarchy rules.
+ */
+static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+       unsigned int cur_ss_mask = cgrp->subtree_control;
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (!cgroup_on_dfl(cgrp)) {
+               cgrp->child_subsys_mask = cur_ss_mask;
+               return;
+       }
+
+       while (true) {
+               unsigned int new_ss_mask = cur_ss_mask;
+
+               for_each_subsys(ss, ssid)
+                       if (cur_ss_mask & (1 << ssid))
+                               new_ss_mask |= ss->depends_on;
+
+               /*
+                * Mask out subsystems which aren't available.  This can
+                * happen only if some depended-upon subsystems were bound
+                * to non-default hierarchies.
+                */
+               if (parent)
+                       new_ss_mask &= parent->child_subsys_mask;
+               else
+                       new_ss_mask &= cgrp->root->subsys_mask;
+
+               if (new_ss_mask == cur_ss_mask)
+                       break;
+               cur_ss_mask = new_ss_mask;
+       }
+
+       cgrp->child_subsys_mask = cur_ss_mask;
+}
+
  /**
   * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
   * @kn: the kernfs_node being serviced
@@ -1091,7 +1152,8 @@ static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn)
          * protection against removal.  Ensure @cgrp stays accessible and
          * break the active_ref protection.
          */
-       cgroup_get(cgrp);
+       if (!cgroup_tryget(cgrp))
+               return NULL;
         kernfs_break_active_protection(kn);
  
         mutex_lock(&cgroup_mutex);
@@ -1208,12 +1270,15 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
                 up_write(&css_set_rwsem);
  
                 src_root->subsys_mask &= ~(1 << ssid);
-               src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
+               src_root->cgrp.subtree_control &= ~(1 << ssid);
+               cgroup_refresh_child_subsys_mask(&src_root->cgrp);
  
                 /* default hierarchy doesn't enable controllers by default */
                 dst_root->subsys_mask |= 1 << ssid;
-               if (dst_root != &cgrp_dfl_root)
-                       dst_root->cgrp.child_subsys_mask |= 1 << ssid;
+               if (dst_root != &cgrp_dfl_root) {
+                       dst_root->cgrp.subtree_control |= 1 << ssid;
+                       cgroup_refresh_child_subsys_mask(&dst_root->cgrp);
+               }
  
                 if (ss->bind)
                         ss->bind(css);
@@ -1233,8 +1298,6 @@ static int cgroup_show_options(struct seq_file *seq,
         for_each_subsys(ss, ssid)
                 if (root->subsys_mask & (1 << ssid))
                         seq_printf(seq, ",%s", ss->name);
-       if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
-               seq_puts(seq, ",sane_behavior");
         if (root->flags & CGRP_ROOT_NOPREFIX)
                 seq_puts(seq, ",noprefix");
         if (root->flags & CGRP_ROOT_XATTR)
@@ -1268,6 +1331,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
         bool all_ss = false, one_ss = false;
         unsigned int mask = -1U;
         struct cgroup_subsys *ss;
+       int nr_opts = 0;
         int i;
  
  #ifdef CONFIG_CPUSETS
@@ -1277,6 +1341,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
         memset(opts, 0, sizeof(*opts));
  
         while ((token = strsep(&o, ",")) != NULL) {
+               nr_opts++;
+
                 if (!*token)
                         return -EINVAL;
                 if (!strcmp(token, "none")) {
@@ -1361,36 +1427,32 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
                         return -ENOENT;
         }
  
-       /* Consistency checks */
-
         if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
                 pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
-
-               if ((opts->flags & (CGRP_ROOT_NOPREFIX | CGRP_ROOT_XATTR)) ||
-                   opts->cpuset_clone_children || opts->release_agent ||
-                   opts->name) {
-                       pr_err("sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n");
+               if (nr_opts != 1) {
+                       pr_err("sane_behavior: no other mount options allowed\n");
                         return -EINVAL;
                 }
-       } else {
-               /*
-                * If the 'all' option was specified select all the
-                * subsystems, otherwise if 'none', 'name=' and a subsystem
-                * name options were not specified, let's default to 'all'
-                */
-               if (all_ss || (!one_ss && !opts->none && !opts->name))
-                       for_each_subsys(ss, i)
-                               if (!ss->disabled)
-                                       opts->subsys_mask |= (1 << i);
-
-               /*
-                * We either have to specify by name or by subsystems. (So
-                * all empty hierarchies must have a name).
-                */
-               if (!opts->subsys_mask && !opts->name)
-                       return -EINVAL;
+               return 0;
         }
  
+       /*
+        * If the 'all' option was specified select all the subsystems,
+        * otherwise if 'none', 'name=' and a subsystem name options were
+        * not specified, let's default to 'all'
+        */
+       if (all_ss || (!one_ss && !opts->none && !opts->name))
+               for_each_subsys(ss, i)
+                       if (!ss->disabled)
+                               opts->subsys_mask |= (1 << i);
+
+       /*
+        * We either have to specify by name or by subsystems. (So all
+        * empty hierarchies must have a name).
+        */
+       if (!opts->subsys_mask && !opts->name)
+               return -EINVAL;
+
         /*
          * Option noprefix was introduced just for backward compatibility
          * with the old cpuset, so we allow noprefix only if mounting just
@@ -1399,7 +1461,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
         if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
                 return -EINVAL;
  
-
         /* Can't specify "none" and some subsystems */
         if (opts->subsys_mask && opts->none)
                 return -EINVAL;
@@ -1414,8 +1475,8 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
         struct cgroup_sb_opts opts;
         unsigned int added_mask, removed_mask;
  
-       if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
-               pr_err("sane_behavior: remount is not allowed\n");
+       if (root == &cgrp_dfl_root) {
+               pr_err("remount is not allowed\n");
                 return -EINVAL;
         }
  
@@ -1434,11 +1495,10 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
         removed_mask = root->subsys_mask & ~opts.subsys_mask;
  
         /* Don't allow flags or name to change at remount */
-       if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
+       if ((opts.flags ^ root->flags) ||
             (opts.name && strcmp(opts.name, root->name))) {
                 pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
-                      opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
-                      root->flags & CGRP_ROOT_OPTION_MASK, root->name);
+                      opts.flags, opts.name ?: "", root->flags, root->name);
                 ret = -EINVAL;
                 goto out_unlock;
         }
@@ -1563,6 +1623,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
  {
         LIST_HEAD(tmp_links);
         struct cgroup *root_cgrp = &root->cgrp;
+       struct cftype *base_files;
         struct css_set *cset;
         int i, ret;
  
@@ -1600,7 +1661,12 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
         }
         root_cgrp->kn = root->kf_root->kn;
  
-       ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true);
+       if (root == &cgrp_dfl_root)
+               base_files = cgroup_dfl_base_files;
+       else
+               base_files = cgroup_legacy_base_files;
+
+       ret = cgroup_addrm_files(root_cgrp, base_files, true);
         if (ret)
                 goto destroy_root;
  
@@ -1638,7 +1704,7 @@ destroy_root:
  exit_root_id:
         cgroup_exit_root_id(root);
  cancel_ref:
-       percpu_ref_cancel_init(&root_cgrp->self.refcnt);
+       percpu_ref_exit(&root_cgrp->self.refcnt);
  out:
         free_cgrp_cset_links(&tmp_links);
         return ret;
@@ -1672,7 +1738,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                 goto out_unlock;
  
         /* look for a matching existing root */
-       if (!opts.subsys_mask && !opts.none && !opts.name) {
+       if (opts.flags & CGRP_ROOT_SANE_BEHAVIOR) {
                 cgrp_dfl_root_visible = true;
                 root = &cgrp_dfl_root;
                 cgroup_get(&root->cgrp);
@@ -1730,15 +1796,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                         goto out_unlock;
                 }
  
-               if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
-                       if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
-                               pr_err("sane_behavior: new mount options should match the existing superblock\n");
-                               ret = -EINVAL;
-                               goto out_unlock;
-                       } else {
-                               pr_warn("new mount options do not match the existing superblock, will be ignored\n");
-                       }
-               }
+               if (root->flags ^ opts.flags)
+                       pr_warn("new mount options do not match the existing superblock, will be ignored\n");
  
                 /*
                  * We want to reuse @root whose lifetime is governed by its
@@ -2457,9 +2516,7 @@ static int cgroup_release_agent_show(struct seq_file *seq, void *v)
  
  static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
  {
-       struct cgroup *cgrp = seq_css(seq)->cgroup;
-
-       seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
+       seq_puts(seq, "0\n");
         return 0;
  }
  
@@ -2496,7 +2553,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v)
  {
         struct cgroup *cgrp = seq_css(seq)->cgroup;
  
-       cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask);
+       cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control);
         return 0;
  }
  
@@ -2505,7 +2562,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
  {
         struct cgroup *cgrp = seq_css(seq)->cgroup;
  
-       cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
+       cgroup_print_ss_mask(seq, cgrp->subtree_control);
         return 0;
  }
  
@@ -2611,6 +2668,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
                                             loff_t off)
  {
         unsigned int enable = 0, disable = 0;
+       unsigned int css_enable, css_disable, old_ctrl, new_ctrl;
         struct cgroup *cgrp, *child;
         struct cgroup_subsys *ss;
         char *tok;
@@ -2650,11 +2708,26 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
  
         for_each_subsys(ss, ssid) {
                 if (enable & (1 << ssid)) {
-                       if (cgrp->child_subsys_mask & (1 << ssid)) {
+                       if (cgrp->subtree_control & (1 << ssid)) {
                                 enable &= ~(1 << ssid);
                                 continue;
                         }
  
+                       /* unavailable or not enabled on the parent? */
+                       if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
+                           (cgroup_parent(cgrp) &&
+                            !(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) {
+                               ret = -ENOENT;
+                               goto out_unlock;
+                       }
+
+                       /*
+                        * @ss is already enabled through dependency and
+                        * we'll just make it visible.  Skip draining.
+                        */
+                       if (cgrp->child_subsys_mask & (1 << ssid))
+                               continue;
+
                         /*
                          * Because css offlining is asynchronous, userland
                          * might try to re-enable the same controller while
@@ -2677,23 +2750,15 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
  
                                 return restart_syscall();
                         }
-
-                       /* unavailable or not enabled on the parent? */
-                       if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
-                           (cgroup_parent(cgrp) &&
-                            !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) {
-                               ret = -ENOENT;
-                               goto out_unlock;
-                       }
                 } else if (disable & (1 << ssid)) {
-                       if (!(cgrp->child_subsys_mask & (1 << ssid))) {
+                       if (!(cgrp->subtree_control & (1 << ssid))) {
                                 disable &= ~(1 << ssid);
                                 continue;
                         }
  
                         /* a child has it enabled? */
                         cgroup_for_each_live_child(child, cgrp) {
-                               if (child->child_subsys_mask & (1 << ssid)) {
+                               if (child->subtree_control & (1 << ssid)) {
                                         ret = -EBUSY;
                                         goto out_unlock;
                                 }
@@ -2707,7 +2772,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
         }
  
         /*
-        * Except for the root, child_subsys_mask must be zero for a cgroup
+        * Except for the root, subtree_control must be zero for a cgroup
          * with tasks so that child cgroups don't compete against tasks.
          */
         if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
@@ -2716,36 +2781,75 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
         }
  
         /*
-        * Create csses for enables and update child_subsys_mask.  This
-        * changes cgroup_e_css() results which in turn makes the
-        * subsequent cgroup_update_dfl_csses() associate all tasks in the
-        * subtree to the updated csses.
+        * Update subsys masks and calculate what needs to be done.  More
+        * subsystems than specified may need to be enabled or disabled
+        * depending on subsystem dependencies.
+        */
+       cgrp->subtree_control |= enable;
+       cgrp->subtree_control &= ~disable;
+
+       old_ctrl = cgrp->child_subsys_mask;
+       cgroup_refresh_child_subsys_mask(cgrp);
+       new_ctrl = cgrp->child_subsys_mask;
+
+       css_enable = ~old_ctrl & new_ctrl;
+       css_disable = old_ctrl & ~new_ctrl;
+       enable |= css_enable;
+       disable |= css_disable;
+
+       /*
+        * Create new csses or make the existing ones visible.  A css is
+        * created invisible if it's being implicitly enabled through
+        * dependency.  An invisible css is made visible when the userland
+        * explicitly enables it.
          */
         for_each_subsys(ss, ssid) {
                 if (!(enable & (1 << ssid)))
                         continue;
  
                 cgroup_for_each_live_child(child, cgrp) {
-                       ret = create_css(child, ss);
+                       if (css_enable & (1 << ssid))
+                               ret = create_css(child, ss,
+                                       cgrp->subtree_control & (1 << ssid));
+                       else
+                               ret = cgroup_populate_dir(child, 1 << ssid);
                         if (ret)
                                 goto err_undo_css;
                 }
         }
  
-       cgrp->child_subsys_mask |= enable;
-       cgrp->child_subsys_mask &= ~disable;
-
+       /*
+        * At this point, cgroup_e_css() results reflect the new csses
+        * making the following cgroup_update_dfl_csses() properly update
+        * css associations of all tasks in the subtree.
+        */
         ret = cgroup_update_dfl_csses(cgrp);
         if (ret)
                 goto err_undo_css;
  
-       /* all tasks are now migrated away from the old csses, kill them */
+       /*
+        * All tasks are migrated out of disabled csses.  Kill or hide
+        * them.  A css is hidden when the userland requests it to be
+        * disabled while other subsystems are still depending on it.  The
+        * css must not actively control resources and be in the vanilla
+        * state if it's made visible again later.  Controllers which may
+        * be depended upon should provide ->css_reset() for this purpose.
+        */
         for_each_subsys(ss, ssid) {
                 if (!(disable & (1 << ssid)))
                         continue;
  
-               cgroup_for_each_live_child(child, cgrp)
-                       kill_css(cgroup_css(child, ss));
+               cgroup_for_each_live_child(child, cgrp) {
+                       struct cgroup_subsys_state *css = cgroup_css(child, ss);
+
+                       if (css_disable & (1 << ssid)) {
+                               kill_css(css);
+                       } else {
+                               cgroup_clear_dir(child, 1 << ssid);
+                               if (ss->css_reset)
+                                       ss->css_reset(css);
+                       }
+               }
         }
  
         kernfs_activate(cgrp->kn);
@@ -2755,8 +2859,9 @@ out_unlock:
         return ret ?: nbytes;
  
  err_undo_css:
-       cgrp->child_subsys_mask &= ~enable;
-       cgrp->child_subsys_mask |= disable;
+       cgrp->subtree_control &= ~enable;
+       cgrp->subtree_control |= disable;
+       cgroup_refresh_child_subsys_mask(cgrp);
  
         for_each_subsys(ss, ssid) {
                 if (!(enable & (1 << ssid)))
@@ -2764,8 +2869,14 @@ err_undo_css:
  
                 cgroup_for_each_live_child(child, cgrp) {
                         struct cgroup_subsys_state *css = cgroup_css(child, ss);
-                       if (css)
+
+                       if (!css)
+                               continue;
+
+                       if (css_enable & (1 << ssid))
                                 kill_css(css);
+                       else
+                               cgroup_clear_dir(child, 1 << ssid);
                 }
         }
         goto out_unlock;
@@ -2878,9 +2989,9 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
  
         /*
          * This isn't a proper migration and its usefulness is very
-        * limited.  Disallow if sane_behavior.
+        * limited.  Disallow on the default hierarchy.
          */
-       if (cgroup_sane_behavior(cgrp))
+       if (cgroup_on_dfl(cgrp))
                 return -EPERM;
  
         /*
@@ -2964,9 +3075,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
  
         for (cft = cfts; cft->name[0] != '\0'; cft++) {
                 /* does cft->flags tell us to skip this file on @cgrp? */
-               if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
+               if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
                         continue;
-               if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
+               if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
                         continue;
                 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
                         continue;
@@ -3024,6 +3135,9 @@ static void cgroup_exit_cftypes(struct cftype *cfts)
                         kfree(cft->kf_ops);
                 cft->kf_ops = NULL;
                 cft->ss = NULL;
+
+               /* revert flags set by cgroup core while adding @cfts */
+               cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
         }
  }
  
@@ -3109,7 +3223,7 @@ int cgroup_rm_cftypes(struct cftype *cfts)
   * function currently returns 0 as long as @cfts registration is successful
   * even if some file creation attempts on existing cgroups fail.
   */
-int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
  {
         int ret;
  
@@ -3134,6 +3248,49 @@ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
         return ret;
  }
  
+/**
+ * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Similar to cgroup_add_cftypes() but the added files are only used for
+ * the default hierarchy.
+ */
+int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+       struct cftype *cft;
+
+       for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
+               cft->flags |= __CFTYPE_ONLY_ON_DFL;
+       return cgroup_add_cftypes(ss, cfts);
+}
+
+/**
+ * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Similar to cgroup_add_cftypes() but the added files are only used for
+ * the legacy hierarchies.
+ */
+int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+       struct cftype *cft;
+
+       /*
+        * If legacy_flies_on_dfl, we want to show the legacy files on the
+        * dfl hierarchy but iff the target subsystem hasn't been updated
+        * for the dfl hierarchy yet.
+        */
+       if (!cgroup_legacy_files_on_dfl ||
+           ss->dfl_cftypes != ss->legacy_cftypes) {
+               for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
+                       cft->flags |= __CFTYPE_NOT_ON_DFL;
+       }
+
+       return cgroup_add_cftypes(ss, cfts);
+}
+
  /**
   * cgroup_task_count - count the number of tasks in a cgroup.
   * @cgrp: the cgroup in question
@@ -3699,8 +3856,9 @@ after:
   *
   * All this extra complexity was caused by the original implementation
   * committing to an entirely unnecessary property.  In the long term, we
- * want to do away with it.  Explicitly scramble sort order if
- * sane_behavior so that no such expectation exists in the new interface.
+ * want to do away with it.  Explicitly scramble sort order if on the
+ * default hierarchy so that no such expectation exists in the new
+ * interface.
   *
   * Scrambling is done by swapping every two consecutive bits, which is
   * non-identity one-to-one mapping which disturbs sort order sufficiently.
@@ -3715,7 +3873,7 @@ static pid_t pid_fry(pid_t pid)
  
  static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
  {
-       if (cgroup_sane_behavior(cgrp))
+       if (cgroup_on_dfl(cgrp))
                 return pid_fry(pid);
         else
                 return pid;
@@ -3818,7 +3976,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
         css_task_iter_end(&it);
         length = n;
         /* now sort & (if procs) strip out duplicates */
-       if (cgroup_sane_behavior(cgrp))
+       if (cgroup_on_dfl(cgrp))
                 sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
         else
                 sort(array, length, sizeof(pid_t), cmppid, NULL);
@@ -4040,7 +4198,8 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
         return 0;
  }
  
-static struct cftype cgroup_base_files[] = {
+/* cgroup core interface files for the default hierarchy */
+static struct cftype cgroup_dfl_base_files[] = {
         {
                 .name = "cgroup.procs",
                 .seq_start = cgroup_pidlist_start,
@@ -4051,47 +4210,53 @@ static struct cftype cgroup_base_files[] = {
                 .write = cgroup_procs_write,
                 .mode = S_IRUGO | S_IWUSR,
         },
-       {
-               .name = "cgroup.clone_children",
-               .flags = CFTYPE_INSANE,
-               .read_u64 = cgroup_clone_children_read,
-               .write_u64 = cgroup_clone_children_write,
-       },
-       {
-               .name = "cgroup.sane_behavior",
-               .flags = CFTYPE_ONLY_ON_ROOT,
-               .seq_show = cgroup_sane_behavior_show,
-       },
         {
                 .name = "cgroup.controllers",
-               .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_ONLY_ON_ROOT,
+               .flags = CFTYPE_ONLY_ON_ROOT,
                 .seq_show = cgroup_root_controllers_show,
         },
         {
                 .name = "cgroup.controllers",
-               .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+               .flags = CFTYPE_NOT_ON_ROOT,
                 .seq_show = cgroup_controllers_show,
         },
         {
                 .name = "cgroup.subtree_control",
-               .flags = CFTYPE_ONLY_ON_DFL,
                 .seq_show = cgroup_subtree_control_show,
                 .write = cgroup_subtree_control_write,
         },
         {
                 .name = "cgroup.populated",
-               .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+               .flags = CFTYPE_NOT_ON_ROOT,
                 .seq_show = cgroup_populated_show,
         },
+       { }     /* terminate */
+};
  
-       /*
-        * Historical crazy stuff.  These don't have "cgroup."  prefix and
-        * don't exist if sane_behavior.  If you're depending on these, be
-        * prepared to be burned.
-        */
+/* cgroup core interface files for the legacy hierarchies */
+static struct cftype cgroup_legacy_base_files[] = {
+       {
+               .name = "cgroup.procs",
+               .seq_start = cgroup_pidlist_start,
+               .seq_next = cgroup_pidlist_next,
+               .seq_stop = cgroup_pidlist_stop,
+               .seq_show = cgroup_pidlist_show,
+               .private = CGROUP_FILE_PROCS,
+               .write = cgroup_procs_write,
+               .mode = S_IRUGO | S_IWUSR,
+       },
+       {
+               .name = "cgroup.clone_children",
+               .read_u64 = cgroup_clone_children_read,
+               .write_u64 = cgroup_clone_children_write,
+       },
+       {
+               .name = "cgroup.sane_behavior",
+               .flags = CFTYPE_ONLY_ON_ROOT,
+               .seq_show = cgroup_sane_behavior_show,
+       },
         {
                 .name = "tasks",
-               .flags = CFTYPE_INSANE,         /* use "procs" instead */
                 .seq_start = cgroup_pidlist_start,
                 .seq_next = cgroup_pidlist_next,
                 .seq_stop = cgroup_pidlist_stop,
@@ -4102,13 +4267,12 @@ static struct cftype cgroup_base_files[] = {
         },
         {
                 .name = "notify_on_release",
-               .flags = CFTYPE_INSANE,
                 .read_u64 = cgroup_read_notify_on_release,
                 .write_u64 = cgroup_write_notify_on_release,
         },
         {
                 .name = "release_agent",
-               .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
+               .flags = CFTYPE_ONLY_ON_ROOT,
                 .seq_show = cgroup_release_agent_show,
                 .write = cgroup_release_agent_write,
                 .max_write_len = PATH_MAX - 1,
@@ -4175,6 +4339,8 @@ static void css_free_work_fn(struct work_struct *work)
                 container_of(work, struct cgroup_subsys_state, destroy_work);
         struct cgroup *cgrp = css->cgroup;
  
+       percpu_ref_exit(&css->refcnt);
+
         if (css->ss) {
                 /* css free path */
                 if (css->parent)
@@ -4236,6 +4402,15 @@ static void css_release_work_fn(struct work_struct *work)
                 /* cgroup release path */
                 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
                 cgrp->id = -1;
+
+               /*
+                * There are two control paths which try to determine
+                * cgroup from dentry without going through kernfs -
+                * cgroupstats_build() and css_tryget_online_from_dir().
+                * Those are supported by RCU protecting clearing of
+                * cgrp->kn->priv backpointer.
+                */
+               RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL);
         }
  
         mutex_unlock(&cgroup_mutex);
@@ -4314,12 +4489,14 @@ static void offline_css(struct cgroup_subsys_state *css)
   * create_css - create a cgroup_subsys_state
   * @cgrp: the cgroup new css will be associated with
   * @ss: the subsys of new css
+ * @visible: whether to create control knobs for the new css or not
   *
   * Create a new css associated with @cgrp - @ss pair.  On success, the new
- * css is online and installed in @cgrp with all interface files created.
- * Returns 0 on success, -errno on failure.
+ * css is online and installed in @cgrp with all interface files created if
+ * @visible.  Returns 0 on success, -errno on failure.
   */
-static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
+                     bool visible)
  {
         struct cgroup *parent = cgroup_parent(cgrp);
         struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
@@ -4343,9 +4520,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
                 goto err_free_percpu_ref;
         css->id = err;
  
-       err = cgroup_populate_dir(cgrp, 1 << ss->id);
-       if (err)
-               goto err_free_id;
+       if (visible) {
+               err = cgroup_populate_dir(cgrp, 1 << ss->id);
+               if (err)
+                       goto err_free_id;
+       }
  
         /* @css is ready to be brought online now, make it visible */
         list_add_tail_rcu(&css->sibling, &parent_css->children);
@@ -4372,7 +4551,7 @@ err_list_del:
  err_free_id:
         cgroup_idr_remove(&ss->css_idr, css->id);
  err_free_percpu_ref:
-       percpu_ref_cancel_init(&css->refcnt);
+       percpu_ref_exit(&css->refcnt);
  err_free_css:
         call_rcu(&css->rcu_head, css_free_rcu_fn);
         return err;
@@ -4385,8 +4564,14 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
         struct cgroup_root *root;
         struct cgroup_subsys *ss;
         struct kernfs_node *kn;
+       struct cftype *base_files;
         int ssid, ret;
  
+       /* Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable.
+        */
+       if (strchr(name, '\n'))
+               return -EINVAL;
+
         parent = cgroup_kn_lock_live(parent_kn);
         if (!parent)
                 return -ENODEV;
@@ -4455,14 +4640,20 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
         if (ret)
                 goto out_destroy;
  
-       ret = cgroup_addrm_files(cgrp, cgroup_base_files, true);
+       if (cgroup_on_dfl(cgrp))
+               base_files = cgroup_dfl_base_files;
+       else
+               base_files = cgroup_legacy_base_files;
+
+       ret = cgroup_addrm_files(cgrp, base_files, true);
         if (ret)
                 goto out_destroy;
  
         /* let's create and online css's */
         for_each_subsys(ss, ssid) {
                 if (parent->child_subsys_mask & (1 << ssid)) {
-                       ret = create_css(cgrp, ss);
+                       ret = create_css(cgrp, ss,
+                                        parent->subtree_control & (1 << ssid));
                         if (ret)
                                 goto out_destroy;
                 }
@@ -4470,10 +4661,12 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
  
         /*
          * On the default hierarchy, a child doesn't automatically inherit
-        * child_subsys_mask from the parent.  Each is configured manually.
+        * subtree_control from the parent.  Each is configured manually.
          */
-       if (!cgroup_on_dfl(cgrp))
-               cgrp->child_subsys_mask = parent->child_subsys_mask;
+       if (!cgroup_on_dfl(cgrp)) {
+               cgrp->subtree_control = parent->subtree_control;
+               cgroup_refresh_child_subsys_mask(cgrp);
+       }
  
         kernfs_activate(kn);
  
@@ -4483,7 +4676,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
  out_free_id:
         cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
  out_cancel_ref:
-       percpu_ref_cancel_init(&cgrp->self.refcnt);
+       percpu_ref_exit(&cgrp->self.refcnt);
  out_free_cgrp:
         kfree(cgrp);
  out_unlock:
@@ -4656,16 +4849,6 @@ static int cgroup_rmdir(struct kernfs_node *kn)
  
         cgroup_kn_unlock(kn);
  
-       /*
-        * There are two control paths which try to determine cgroup from
-        * dentry without going through kernfs - cgroupstats_build() and
-        * css_tryget_online_from_dir().  Those are supported by RCU
-        * protecting clearing of cgrp->kn->priv backpointer, which should
-        * happen after all files under it have been removed.
-        */
-       if (!ret)
-               RCU_INIT_POINTER(*(void __rcu __force **)&kn->priv, NULL);
-
         cgroup_put(cgrp);
         return ret;
  }
@@ -4736,8 +4919,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
   */
  int __init cgroup_init_early(void)
  {
-       static struct cgroup_sb_opts __initdata opts =
-               { .flags = CGRP_ROOT_SANE_BEHAVIOR };
+       static struct cgroup_sb_opts __initdata opts;
         struct cgroup_subsys *ss;
         int i;
  
@@ -4775,7 +4957,8 @@ int __init cgroup_init(void)
         unsigned long key;
         int ssid, err;
  
-       BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
+       BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
+       BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
  
         mutex_lock(&cgroup_mutex);
  
@@ -4807,9 +4990,22 @@ int __init cgroup_init(void)
                  * disabled flag and cftype registration needs kmalloc,
                  * both of which aren't available during early_init.
                  */
-               if (!ss->disabled) {
-                       cgrp_dfl_root.subsys_mask |= 1 << ss->id;
-                       WARN_ON(cgroup_add_cftypes(ss, ss->base_cftypes));
+               if (ss->disabled)
+                       continue;
+
+               cgrp_dfl_root.subsys_mask |= 1 << ss->id;
+
+               if (cgroup_legacy_files_on_dfl && !ss->dfl_cftypes)
+                       ss->dfl_cftypes = ss->legacy_cftypes;
+
+               if (!ss->dfl_cftypes)
+                       cgrp_dfl_root_inhibit_ss_mask |= 1 << ss->id;
+
+               if (ss->dfl_cftypes == ss->legacy_cftypes) {
+                       WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
+               } else {
+                       WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
+                       WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
                 }
         }
  
@@ -5205,6 +5401,14 @@ static int __init cgroup_disable(char *str)
  }
  __setup("cgroup_disable=", cgroup_disable);
  
+static int __init cgroup_set_legacy_files_on_dfl(char *str)
+{
+       printk("cgroup: using legacy files on the default hierarchy\n");
+       cgroup_legacy_files_on_dfl = true;
+       return 0;
+}
+__setup("cgroup__DEVEL__legacy_files_on_dfl", cgroup_set_legacy_files_on_dfl);
+
  /**
   * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
   * @dentry: directory dentry of interest
@@ -5231,7 +5435,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
         /*
          * This path doesn't originate from kernfs and @kn could already
          * have been or be removed at any point.  @kn->priv is RCU
-        * protected for this access.  See cgroup_rmdir() for details.
+        * protected for this access.  See css_release_work_fn() for details.
          */
         cgrp = rcu_dereference(kn->priv);
         if (cgrp)
@@ -5399,6 +5603,6 @@ static struct cftype debug_files[] =  {
  struct cgroup_subsys debug_cgrp_subsys = {
         .css_alloc = debug_css_alloc,
         .css_free = debug_css_free,
-       .base_cftypes = debug_files,
+       .legacy_cftypes = debug_files,
  };
  #endif /* CONFIG_CGROUP_DEBUG */