cgroup: s/CGRP_CLONE_CHILDREN/CGRP_CPUSET_CLONE_CHILDREN/
[pandora-kernel.git] / kernel / cgroup.c
index 998ab59..2895880 100644 (file)
@@ -242,6 +242,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
  */
 static int need_forkexit_callback __read_mostly;
 
+static int cgroup_destroy_locked(struct cgroup *cgrp);
+
 #ifdef CONFIG_PROVE_LOCKING
 int cgroup_lock_is_held(void)
 {
@@ -294,11 +296,6 @@ static int notify_on_release(const struct cgroup *cgrp)
        return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 }
 
-static int clone_children(const struct cgroup *cgrp)
-{
-       return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
-}
-
 /*
  * for_each_subsys() allows you to iterate on each subsystem attached to
  * an active hierarchy
@@ -874,7 +871,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
                 * Release the subsystem state objects.
                 */
                for_each_subsys(cgrp->root, ss)
-                       ss->destroy(cgrp);
+                       ss->css_free(cgrp);
 
                cgrp->root->number_of_cgroups--;
                mutex_unlock(&cgroup_mutex);
@@ -1099,7 +1096,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",xattr");
        if (strlen(root->release_agent_path))
                seq_printf(seq, ",release_agent=%s", root->release_agent_path);
-       if (clone_children(&root->top_cgroup))
+       if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
                seq_puts(seq, ",clone_children");
        if (strlen(root->name))
                seq_printf(seq, ",name=%s", root->name);
@@ -1111,7 +1108,7 @@ struct cgroup_sb_opts {
        unsigned long subsys_mask;
        unsigned long flags;
        char *release_agent;
-       bool clone_children;
+       bool cpuset_clone_children;
        char *name;
        /* User explicitly requested empty subsystem */
        bool none;
@@ -1162,7 +1159,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
                        continue;
                }
                if (!strcmp(token, "clone_children")) {
-                       opts->clone_children = true;
+                       opts->cpuset_clone_children = true;
                        continue;
                }
                if (!strcmp(token, "xattr")) {
@@ -1381,6 +1378,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
        INIT_LIST_HEAD(&cgrp->children);
        INIT_LIST_HEAD(&cgrp->files);
        INIT_LIST_HEAD(&cgrp->css_sets);
+       INIT_LIST_HEAD(&cgrp->allcg_node);
        INIT_LIST_HEAD(&cgrp->release_list);
        INIT_LIST_HEAD(&cgrp->pidlists);
        mutex_init(&cgrp->pidlist_mutex);
@@ -1471,8 +1469,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
                strcpy(root->release_agent_path, opts->release_agent);
        if (opts->name)
                strcpy(root->name, opts->name);
-       if (opts->clone_children)
-               set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
+       if (opts->cpuset_clone_children)
+               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
        return root;
 }
 
@@ -1650,7 +1648,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 
                free_cg_links(&tmp_cg_links);
 
-               BUG_ON(!list_empty(&root_cgrp->sibling));
                BUG_ON(!list_empty(&root_cgrp->children));
                BUG_ON(root->number_of_cgroups != 1);
 
@@ -1699,7 +1696,6 @@ static void cgroup_kill_sb(struct super_block *sb) {
 
        BUG_ON(root->number_of_cgroups != 1);
        BUG_ON(!list_empty(&cgrp->children));
-       BUG_ON(!list_empty(&cgrp->sibling));
 
        mutex_lock(&cgroup_mutex);
        mutex_lock(&cgroup_root_mutex);
@@ -1757,9 +1753,11 @@ static struct kobject *cgroup_kobj;
  */
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 {
+       struct dentry *dentry = cgrp->dentry;
        char *start;
-       struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
-                                                     cgroup_lock_is_held());
+
+       rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
+                          "cgroup_path() called without proper locking");
 
        if (!dentry || cgrp == dummytop) {
                /*
@@ -1783,8 +1781,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
                if (!cgrp)
                        break;
 
-               dentry = rcu_dereference_check(cgrp->dentry,
-                                              cgroup_lock_is_held());
+               dentry = cgrp->dentry;
                if (!cgrp->parent)
                        continue;
                if (--start < buf)
@@ -2653,10 +2650,17 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode,
 
                /* start off with i_nlink == 2 (for "." entry) */
                inc_nlink(inode);
+               inc_nlink(dentry->d_parent->d_inode);
 
-               /* start with the directory inode held, so that we can
-                * populate it without racing with another mkdir */
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               /*
+                * Control reaches here with cgroup_mutex held.
+                * @inode->i_mutex should nest outside cgroup_mutex but we
+                * want to populate it immediately without releasing
+                * cgroup_mutex.  As @inode isn't visible to anyone else
+                * yet, trylock will always succeed without affecting
+                * lockdep checks.
+                */
+               WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
        } else if (S_ISREG(mode)) {
                inode->i_size = 0;
                inode->i_fop = &cgroup_file_operations;
@@ -2667,32 +2671,6 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode,
        return 0;
 }
 
-/*
- * cgroup_create_dir - create a directory for an object.
- * @cgrp: the cgroup we create the directory for. It must have a valid
- *        ->parent field. And we are going to fill its ->dentry field.
- * @dentry: dentry of the new cgroup
- * @mode: mode to set on new directory.
- */
-static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
-                               umode_t mode)
-{
-       struct dentry *parent;
-       int error = 0;
-
-       parent = cgrp->parent->dentry;
-       error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
-       if (!error) {
-               dentry->d_fsdata = cgrp;
-               inc_nlink(parent->d_inode);
-               rcu_assign_pointer(cgrp->dentry, dentry);
-               dget(dentry);
-       }
-       dput(dentry);
-
-       return error;
-}
-
 /**
  * cgroup_file_mode - deduce file mode of a control file
  * @cft: the control file in question
@@ -2986,6 +2964,92 @@ static void cgroup_enable_task_cg_lists(void)
        write_unlock(&css_set_lock);
 }
 
+/**
+ * cgroup_next_descendant_pre - find the next descendant for pre-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * To be used by cgroup_for_each_descendant_pre().  Find the next
+ * descendant to visit for pre-order traversal of @cgroup's descendants.
+ */
+struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
+                                         struct cgroup *cgroup)
+{
+       struct cgroup *next;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       /* if first iteration, pretend we just visited @cgroup */
+       if (!pos) {
+               if (list_empty(&cgroup->children))
+                       return NULL;
+               pos = cgroup;
+       }
+
+       /* visit the first child if exists */
+       next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
+       if (next)
+               return next;
+
+       /* no child, visit my or the closest ancestor's next sibling */
+       do {
+               next = list_entry_rcu(pos->sibling.next, struct cgroup,
+                                     sibling);
+               if (&next->sibling != &pos->parent->children)
+                       return next;
+
+               pos = pos->parent;
+       } while (pos != cgroup);
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
+
+static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
+{
+       struct cgroup *last;
+
+       do {
+               last = pos;
+               pos = list_first_or_null_rcu(&pos->children, struct cgroup,
+                                            sibling);
+       } while (pos);
+
+       return last;
+}
+
+/**
+ * cgroup_next_descendant_post - find the next descendant for post-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @cgroup: cgroup whose descendants to walk
+ *
+ * To be used by cgroup_for_each_descendant_post().  Find the next
+ * descendant to visit for post-order traversal of @cgroup's descendants.
+ */
+struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
+                                          struct cgroup *cgroup)
+{
+       struct cgroup *next;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       /* if first iteration, visit the leftmost descendant */
+       if (!pos) {
+               next = cgroup_leftmost_descendant(cgroup);
+               return next != cgroup ? next : NULL;
+       }
+
+       /* if there's an unvisited sibling, visit its leftmost descendant */
+       next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
+       if (&next->sibling != &pos->parent->children)
+               return cgroup_leftmost_descendant(next);
+
+       /* no sibling left, visit parent */
+       next = pos->parent;
+       return next != cgroup ? next : NULL;
+}
+EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
+
 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
        __acquires(css_set_lock)
 {
@@ -3836,7 +3900,7 @@ fail:
 static u64 cgroup_clone_children_read(struct cgroup *cgrp,
                                    struct cftype *cft)
 {
-       return clone_children(cgrp);
+       return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
 }
 
 static int cgroup_clone_children_write(struct cgroup *cgrp,
@@ -3844,9 +3908,9 @@ static int cgroup_clone_children_write(struct cgroup *cgrp,
                                     u64 val)
 {
        if (val)
-               set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
        else
-               clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+               clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
        return 0;
 }
 
@@ -3959,7 +4023,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
        css->flags = 0;
        css->id = NULL;
        if (cgrp == dummytop)
-               set_bit(CSS_ROOT, &css->flags);
+               css->flags |= CSS_ROOT;
        BUG_ON(cgrp->subsys[ss->subsys_id]);
        cgrp->subsys[ss->subsys_id] = css;
 
@@ -3972,6 +4036,46 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
        INIT_WORK(&css->dput_work, css_dput_fn);
 }
 
+/* invoke ->post_create() on a new CSS and mark it online if successful */
+static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+       int ret = 0;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (ss->css_online)
+               ret = ss->css_online(cgrp);
+       if (!ret)
+               cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
+       return ret;
+}
+
+/* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
+static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
+       __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
+{
+       struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (!(css->flags & CSS_ONLINE))
+               return;
+
+       /*
+        * css_offline() should be called with cgroup_mutex unlocked.  See
+        * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for
+        * details.  This temporary unlocking should go away once
+        * cgroup_mutex is unexported from controllers.
+        */
+       if (ss->css_offline) {
+               mutex_unlock(&cgroup_mutex);
+               ss->css_offline(cgrp);
+               mutex_lock(&cgroup_mutex);
+       }
+
+       cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
+}
+
 /*
  * cgroup_create - create a cgroup
  * @parent: cgroup that will be parent of the new cgroup
@@ -4002,7 +4106,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
         */
        if (!cgroup_lock_live_group(parent)) {
                err = -ENODEV;
-               goto err_free;
+               goto err_free_cgrp;
        }
 
        /* Grab a reference on the superblock so the hierarchy doesn't
@@ -4021,25 +4125,26 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
        if (notify_on_release(parent))
                set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 
-       if (clone_children(parent))
-               set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+       if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
+               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
 
        for_each_subsys(root, ss) {
                struct cgroup_subsys_state *css;
 
-               css = ss->create(cgrp);
+               css = ss->css_alloc(cgrp);
                if (IS_ERR(css)) {
                        err = PTR_ERR(css);
-                       goto err_destroy;
+                       goto err_free_all;
                }
                init_cgroup_css(css, ss, cgrp);
                if (ss->use_id) {
                        err = alloc_css_id(ss, parent, cgrp);
                        if (err)
-                               goto err_destroy;
+                               goto err_free_all;
                }
-               /* At error, ->destroy() callback has to free assigned ID. */
-               if (clone_children(parent) && ss->post_clone)
+               /* At error, ->css_free() callback has to free assigned ID. */
+               if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags) &&
+                   ss->post_clone)
                        ss->post_clone(cgrp);
 
                if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
@@ -4052,49 +4157,60 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                }
        }
 
-       list_add(&cgrp->sibling, &cgrp->parent->children);
-       root->number_of_cgroups++;
-
-       err = cgroup_create_dir(cgrp, dentry, mode);
+       /*
+        * Create directory.  cgroup_create_file() returns with the new
+        * directory locked on success so that it can be populated without
+        * dropping cgroup_mutex.
+        */
+       err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
        if (err < 0)
-               goto err_remove;
+               goto err_free_all;
+       lockdep_assert_held(&dentry->d_inode->i_mutex);
+
+       /* allocation complete, commit to creation */
+       dentry->d_fsdata = cgrp;
+       cgrp->dentry = dentry;
+       list_add_tail(&cgrp->allcg_node, &root->allcg_list);
+       list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
+       root->number_of_cgroups++;
 
        /* each css holds a ref to the cgroup's dentry */
        for_each_subsys(root, ss)
                dget(dentry);
 
-       /* The cgroup directory was pre-locked for us */
-       BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
-
-       list_add_tail(&cgrp->allcg_node, &root->allcg_list);
+       /* creation succeeded, notify subsystems */
+       for_each_subsys(root, ss) {
+               err = online_css(ss, cgrp);
+               if (err)
+                       goto err_destroy;
+       }
 
        err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
-       /* If err < 0, we have a half-filled directory - oh well ;) */
+       if (err)
+               goto err_destroy;
 
        mutex_unlock(&cgroup_mutex);
        mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
 
        return 0;
 
- err_remove:
-
-       list_del(&cgrp->sibling);
-       root->number_of_cgroups--;
-
- err_destroy:
-
+err_free_all:
        for_each_subsys(root, ss) {
                if (cgrp->subsys[ss->subsys_id])
-                       ss->destroy(cgrp);
+                       ss->css_free(cgrp);
        }
-
        mutex_unlock(&cgroup_mutex);
-
        /* Release the reference count that we took on the superblock */
        deactivate_super(sb);
-err_free:
+err_free_cgrp:
        kfree(cgrp);
        return err;
+
+err_destroy:
+       cgroup_destroy_locked(cgrp);
+       mutex_unlock(&cgroup_mutex);
+       mutex_unlock(&dentry->d_inode->i_mutex);
+       return err;
 }
 
 static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -4146,22 +4262,20 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
        return 0;
 }
 
-static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
+static int cgroup_destroy_locked(struct cgroup *cgrp)
+       __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
 {
-       struct cgroup *cgrp = dentry->d_fsdata;
-       struct dentry *d;
-       struct cgroup *parent;
+       struct dentry *d = cgrp->dentry;
+       struct cgroup *parent = cgrp->parent;
        DEFINE_WAIT(wait);
        struct cgroup_event *event, *tmp;
        struct cgroup_subsys *ss;
 
-       /* the vfs holds both inode->i_mutex already */
-       mutex_lock(&cgroup_mutex);
-       parent = cgrp->parent;
-       if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
-               mutex_unlock(&cgroup_mutex);
+       lockdep_assert_held(&d->d_inode->i_mutex);
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children))
                return -EBUSY;
-       }
 
        /*
         * Block new css_tryget() by deactivating refcnt and mark @cgrp
@@ -4177,16 +4291,9 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        }
        set_bit(CGRP_REMOVED, &cgrp->flags);
 
-       /*
-        * Tell subsystems to initate destruction.  pre_destroy() should be
-        * called with cgroup_mutex unlocked.  See 3fa59dfbc3 ("cgroup: fix
-        * potential deadlock in pre_destroy") for details.
-        */
-       mutex_unlock(&cgroup_mutex);
+       /* tell subsystems to initate destruction */
        for_each_subsys(cgrp->root, ss)
-               if (ss->pre_destroy)
-                       ss->pre_destroy(cgrp);
-       mutex_lock(&cgroup_mutex);
+               offline_css(ss, cgrp);
 
        /*
         * Put all the base refs.  Each css holds an extra reference to the
@@ -4204,12 +4311,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        raw_spin_unlock(&release_list_lock);
 
        /* delete this cgroup from parent->children */
-       list_del_init(&cgrp->sibling);
-
+       list_del_rcu(&cgrp->sibling);
        list_del_init(&cgrp->allcg_node);
 
-       d = dget(cgrp->dentry);
-
+       dget(d);
        cgroup_d_remove_dir(d);
        dput(d);
 
@@ -4230,10 +4335,20 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
        }
        spin_unlock(&cgrp->event_list_lock);
 
-       mutex_unlock(&cgroup_mutex);
        return 0;
 }
 
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
+{
+       int ret;
+
+       mutex_lock(&cgroup_mutex);
+       ret = cgroup_destroy_locked(dentry->d_fsdata);
+       mutex_unlock(&cgroup_mutex);
+
+       return ret;
+}
+
 static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
 {
        INIT_LIST_HEAD(&ss->cftsets);
@@ -4254,13 +4369,15 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 
        printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
+       mutex_lock(&cgroup_mutex);
+
        /* init base cftset */
        cgroup_init_cftsets(ss);
 
        /* Create the top cgroup state for this subsystem */
        list_add(&ss->sibling, &rootnode.subsys_list);
        ss->root = &rootnode;
-       css = ss->create(dummytop);
+       css = ss->css_alloc(dummytop);
        /* We don't handle early failures gracefully */
        BUG_ON(IS_ERR(css));
        init_cgroup_css(css, ss, dummytop);
@@ -4269,7 +4386,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
         * pointer to this state - since the subsystem is
         * newly registered, all tasks and hence the
         * init_css_set is in the subsystem's top cgroup. */
-       init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
+       init_css_set.subsys[ss->subsys_id] = css;
 
        need_forkexit_callback |= ss->fork || ss->exit;
 
@@ -4279,6 +4396,9 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
        BUG_ON(!list_empty(&init_task.tasks));
 
        ss->active = 1;
+       BUG_ON(online_css(ss, dummytop));
+
+       mutex_unlock(&cgroup_mutex);
 
        /* this function shouldn't be used with modular subsystems, since they
         * need to register a subsys_id, among other things */
@@ -4296,12 +4416,12 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
  */
 int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 {
-       int i;
        struct cgroup_subsys_state *css;
+       int i, ret;
 
        /* check name and function validity */
        if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
-           ss->create == NULL || ss->destroy == NULL)
+           ss->css_alloc == NULL || ss->css_free == NULL)
                return -EINVAL;
 
        /*
@@ -4330,10 +4450,11 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
        subsys[ss->subsys_id] = ss;
 
        /*
-        * no ss->create seems to need anything important in the ss struct, so
-        * this can happen first (i.e. before the rootnode attachment).
+        * no ss->css_alloc seems to need anything important in the ss
+        * struct, so this can happen first (i.e. before the rootnode
+        * attachment).
         */
-       css = ss->create(dummytop);
+       css = ss->css_alloc(dummytop);
        if (IS_ERR(css)) {
                /* failure case - need to deassign the subsys[] slot. */
                subsys[ss->subsys_id] = NULL;
@@ -4348,14 +4469,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
        init_cgroup_css(css, ss, dummytop);
        /* init_idr must be after init_cgroup_css because it sets css->id. */
        if (ss->use_id) {
-               int ret = cgroup_init_idr(ss, css);
-               if (ret) {
-                       dummytop->subsys[ss->subsys_id] = NULL;
-                       ss->destroy(dummytop);
-                       subsys[ss->subsys_id] = NULL;
-                       mutex_unlock(&cgroup_mutex);
-                       return ret;
-               }
+               ret = cgroup_init_idr(ss, css);
+               if (ret)
+                       goto err_unload;
        }
 
        /*
@@ -4388,10 +4504,19 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
        write_unlock(&css_set_lock);
 
        ss->active = 1;
+       ret = online_css(ss, dummytop);
+       if (ret)
+               goto err_unload;
 
        /* success! */
        mutex_unlock(&cgroup_mutex);
        return 0;
+
+err_unload:
+       mutex_unlock(&cgroup_mutex);
+       /* @ss can't be mounted here as try_module_get() would fail */
+       cgroup_unload_subsys(ss);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(cgroup_load_subsys);
 
@@ -4418,6 +4543,15 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
        BUG_ON(ss->root != &rootnode);
 
        mutex_lock(&cgroup_mutex);
+
+       offline_css(ss, dummytop);
+       ss->active = 0;
+
+       if (ss->use_id) {
+               idr_remove_all(&ss->idr);
+               idr_destroy(&ss->idr);
+       }
+
        /* deassign the subsys_id */
        subsys[ss->subsys_id] = NULL;
 
@@ -4433,7 +4567,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
                struct css_set *cg = link->cg;
 
                hlist_del(&cg->hlist);
-               BUG_ON(!cg->subsys[ss->subsys_id]);
                cg->subsys[ss->subsys_id] = NULL;
                hhead = css_set_hash(cg->subsys);
                hlist_add_head(&cg->hlist, hhead);
@@ -4441,12 +4574,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
        write_unlock(&css_set_lock);
 
        /*
-        * remove subsystem's css from the dummytop and free it - need to free
-        * before marking as null because ss->destroy needs the cgrp->subsys
-        * pointer to find their state. note that this also takes care of
-        * freeing the css_id.
+        * remove subsystem's css from the dummytop and free it - need to
+        * free before marking as null because ss->css_free needs the
+        * cgrp->subsys pointer to find their state. note that this also
+        * takes care of freeing the css_id.
         */
-       ss->destroy(dummytop);
+       ss->css_free(dummytop);
        dummytop->subsys[ss->subsys_id] = NULL;
 
        mutex_unlock(&cgroup_mutex);
@@ -4490,8 +4623,8 @@ int __init cgroup_init_early(void)
 
                BUG_ON(!ss->name);
                BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
-               BUG_ON(!ss->create);
-               BUG_ON(!ss->destroy);
+               BUG_ON(!ss->css_alloc);
+               BUG_ON(!ss->css_free);
                if (ss->subsys_id != i) {
                        printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
                               ss->name, ss->subsys_id);
@@ -5303,7 +5436,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
 }
 
 #ifdef CONFIG_CGROUP_DEBUG
-static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
+static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont)
 {
        struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
 
@@ -5313,7 +5446,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
        return css;
 }
 
-static void debug_destroy(struct cgroup *cont)
+static void debug_css_free(struct cgroup *cont)
 {
        kfree(cont->subsys[debug_subsys_id]);
 }
@@ -5442,8 +5575,8 @@ static struct cftype debug_files[] =  {
 
 struct cgroup_subsys debug_subsys = {
        .name = "debug",
-       .create = debug_create,
-       .destroy = debug_destroy,
+       .css_alloc = debug_css_alloc,
+       .css_free = debug_css_free,
        .subsys_id = debug_subsys_id,
        .base_cftypes = debug_files,
 };