* Call subsys's pre_destroy handler.
* This is called before css refcnt check.
*/
-static void cgroup_call_pre_destroy(struct cgroup *cgrp)
+static int cgroup_call_pre_destroy(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
+ int ret = 0;
+
for_each_subsys(cgrp->root, ss)
- if (ss->pre_destroy)
- ss->pre_destroy(ss, cgrp);
- return;
+ if (ss->pre_destroy) {
+ ret = ss->pre_destroy(ss, cgrp);
+ if (ret)
+ break;
+ }
+ return ret;
}
static void free_cgroup_rcu(struct rcu_head *obj)
remove_dir(dentry);
}
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
+ */
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
+{
+ if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+ wake_up_all(&cgroup_rmdir_waitq);
+}
+
static int rebind_subsystems(struct cgroupfs_root *root,
unsigned long final_bits)
{
}
ret = rebind_subsystems(root, opts.subsys_bits);
+ if (ret)
+ goto out_unlock;
/* (re)populate subsystem files */
- if (!ret)
- cgroup_populate_dir(cgrp);
+ cgroup_populate_dir(cgrp);
if (opts.release_agent)
strcpy(root->release_agent_path, opts.release_agent);
out_unlock:
- if (opts.release_agent)
- kfree(opts.release_agent);
+ kfree(opts.release_agent);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
return ret;
/* First find the desired set of subsystems */
ret = parse_cgroupfs_options(data, &opts);
if (ret) {
- if (opts.release_agent)
- kfree(opts.release_agent);
+ kfree(opts.release_agent);
return ret;
}
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root) {
- if (opts.release_agent)
- kfree(opts.release_agent);
+ kfree(opts.release_agent);
return -ENOMEM;
}
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
synchronize_rcu();
put_css_set(cg);
+
+ /*
+ * wake up rmdir() waiter. the rmdir should fail since the cgroup
+ * is no longer empty.
+ */
+ cgroup_wakeup_rmdir_waiters(cgrp);
return 0;
}
.rename = cgroup_rename,
};
-static int cgroup_create_file(struct dentry *dentry, int mode,
+static int cgroup_create_file(struct dentry *dentry, mode_t mode,
struct super_block *sb)
{
static const struct dentry_operations cgroup_dops = {
* @mode: mode to set on new directory.
*/
static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
- int mode)
+ mode_t mode)
{
struct dentry *parent;
int error = 0;
return error;
}
+/**
+ * cgroup_file_mode - deduce file mode of a control file
+ * @cft: the control file in question
+ *
+ * returns cft->mode if ->mode is not 0
+ * returns S_IRUGO|S_IWUSR if it has both a read and a write handler
+ * returns S_IRUGO if it has only a read handler
+ * returns S_IWUSR if it has only a write hander
+ */
+static mode_t cgroup_file_mode(const struct cftype *cft)
+{
+ mode_t mode = 0;
+
+ if (cft->mode)
+ return cft->mode;
+
+ if (cft->read || cft->read_u64 || cft->read_s64 ||
+ cft->read_map || cft->read_seq_string)
+ mode |= S_IRUGO;
+
+ if (cft->write || cft->write_u64 || cft->write_s64 ||
+ cft->write_string || cft->trigger)
+ mode |= S_IWUSR;
+
+ return mode;
+}
+
int cgroup_add_file(struct cgroup *cgrp,
struct cgroup_subsys *subsys,
const struct cftype *cft)
struct dentry *dir = cgrp->dentry;
struct dentry *dentry;
int error;
+ mode_t mode;
char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
dentry = lookup_one_len(name, dir, strlen(name));
if (!IS_ERR(dentry)) {
- error = cgroup_create_file(dentry, 0644 | S_IFREG,
+ mode = cgroup_file_mode(cft);
+ error = cgroup_create_file(dentry, mode | S_IFREG,
cgrp->root->sb);
if (!error)
dentry->d_fsdata = (void *)cft;
.write_u64 = cgroup_tasks_write,
.release = cgroup_tasks_release,
.private = FILE_TASKLIST,
+ .mode = S_IRUGO | S_IWUSR,
},
{
* Must be called with the mutex on the parent inode held
*/
static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
- int mode)
+ mode_t mode)
{
struct cgroup *cgrp;
struct cgroupfs_root *root = parent->root;
struct cgroup *cgrp = dentry->d_fsdata;
struct dentry *d;
struct cgroup *parent;
+ DEFINE_WAIT(wait);
+ int ret;
/* the vfs holds both inode->i_mutex already */
-
+again:
mutex_lock(&cgroup_mutex);
if (atomic_read(&cgrp->count) != 0) {
mutex_unlock(&cgroup_mutex);
* Call pre_destroy handlers of subsys. Notify subsystems
* that rmdir() request comes.
*/
- cgroup_call_pre_destroy(cgrp);
+ ret = cgroup_call_pre_destroy(cgrp);
+ if (ret)
+ return ret;
mutex_lock(&cgroup_mutex);
parent = cgrp->parent;
-
- if (atomic_read(&cgrp->count)
- || !list_empty(&cgrp->children)
- || !cgroup_clear_css_refs(cgrp)) {
+ if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
+ /*
+ * css_put/get is provided for subsys to grab refcnt to css. In typical
+ * case, subsystem has no reference after pre_destroy(). But, under
+ * hierarchy management, some *temporal* refcnt can be hold.
+ * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
+ * is really busy, it should return -EBUSY at pre_destroy(). wake_up
+ * is called when css_put() is called and refcnt goes down to 0.
+ */
+ set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+ prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
+
+ if (!cgroup_clear_css_refs(cgrp)) {
+ mutex_unlock(&cgroup_mutex);
+ schedule();
+ finish_wait(&cgroup_rmdir_waitq, &wait);
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+ if (signal_pending(current))
+ return -EINTR;
+ goto again;
+ }
+ /* NO css_tryget() can success after here. */
+ finish_wait(&cgroup_rmdir_waitq, &wait);
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
spin_lock(&release_list_lock);
set_bit(CGRP_REMOVED, &cgrp->flags);
{
struct cgroup *cgrp = css->cgroup;
rcu_read_lock();
- if ((atomic_dec_return(&css->refcnt) == 1) &&
- notify_on_release(cgrp)) {
- set_bit(CGRP_RELEASABLE, &cgrp->flags);
- check_for_release(cgrp);
+ if (atomic_dec_return(&css->refcnt) == 1) {
+ if (notify_on_release(cgrp)) {
+ set_bit(CGRP_RELEASABLE, &cgrp->flags);
+ check_for_release(cgrp);
+ }
+ cgroup_wakeup_rmdir_waiters(cgrp);
}
rcu_read_unlock();
}
}
bool css_is_ancestor(struct cgroup_subsys_state *child,
- struct cgroup_subsys_state *root)
+ const struct cgroup_subsys_state *root)
{
struct css_id *child_id = rcu_dereference(child->id);
struct css_id *root_id = rcu_dereference(root->id);