* Call without callback_mutex or task_lock() held. May be
* called with or without cgroup_mutex held. Thanks in part to
* 'the_top_cpuset_hack', the task's cpuset pointer will never
- * be NULL. This routine also might acquire callback_mutex and
- * current->mm->mmap_sem during call.
+ * be NULL. This routine also might acquire callback_mutex during
+ * call.
*
* Reading current->cpuset->mems_generation doesn't need task_lock
* to guard the current->cpuset derefence, because it is guarded
trialcs = *cs;
/*
- * An empty cpus_allowed is ok if there are no tasks in the cpuset.
+ * An empty cpus_allowed is ok only if the cpuset has no tasks.
* Since cpulist_parse() fails on an empty mask, we special case
* that parsing. The validate_change() call ensures that cpusets
* with tasks have cpus.
* so that the migration code can allocate pages on these nodes.
*
* Call holding cgroup_mutex, so current's cpuset won't change
- * during this call, as cgroup_mutex holds off any attach_task()
+ * during this call, as manage_mutex holds off any cpuset_attach()
* calls. Therefore we don't need to take task_lock around the
* call to guarantee_online_mems(), as we know no one is changing
* our task's cpuset.
* @from: cpuset in which the tasks currently reside
* @to: cpuset to which the tasks will be moved
*
- * Called with manage_sem held
- * callback_mutex must not be held, as attach_task() will take it.
+ * Called with cgroup_mutex held
+ * callback_mutex must not be held, as cpuset_attach() will take it.
*
* The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
* calling callback functions for each.
* last CPU or node from a cpuset, then move the tasks in the empty
* cpuset to its next-highest non-empty parent.
*
- * The parent cpuset has some superset of the 'mems' nodes that the
- * newly empty cpuset held, so no migration of memory is necessary.
- *
- * Called with both manage_sem and callback_sem held
+ * Called with cgroup_mutex held
+ * callback_mutex must not be held, as cpuset_attach() will take it.
*/
static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
{
struct cpuset *parent;
- /* the cgroup's css_sets list is in use if there are tasks
- in the cpuset; the list is empty if there are none;
- the cs->css.refcnt seems always 0 */
+ /*
+ * The cgroup's css_sets list is in use if there are tasks
+ * in the cpuset; the list is empty if there are none;
+ * the cs->css.refcnt seems always 0.
+ */
if (list_empty(&cs->css.cgroup->css_sets))
return;
* has online cpus, so can't be empty).
*/
parent = cs->parent;
- while (cpus_empty(parent->cpus_allowed)) {
- /*
- * this empty cpuset should now be considered to
- * have been used, and therefore eligible for
- * release when empty (if it is notify_on_release)
- */
+ while (cpus_empty(parent->cpus_allowed) ||
+ nodes_empty(parent->mems_allowed))
parent = parent->parent;
- }
move_member_tasks_to_cpuset(cs, parent);
}
* Walk the specified cpuset subtree and look for empty cpusets.
* The tasks of such cpuset must be moved to a parent cpuset.
*
- * Note that such a notify_on_release cpuset must have had, at some time,
- * member tasks or cpuset descendants and cpus and memory, before it can
- * be a candidate for release.
- *
* Called with cgroup_mutex held. We take callback_mutex to modify
* cpus_allowed and mems_allowed.
*
list_add_tail((struct list_head *)&root->stack_list, &queue);
- mutex_lock(&callback_mutex);
while (!list_empty(&queue)) {
cp = container_of(queue.next, struct cpuset, stack_list);
list_del(queue.next);
list_add_tail(&child->stack_list, &queue);
}
cont = cp->css.cgroup;
+
+ /* Continue past cpusets with all cpus, mems online */
+ if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
+ nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
+ continue;
+
/* Remove offline cpus and mems from this cpuset. */
+ mutex_lock(&callback_mutex);
cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
nodes_and(cp->mems_allowed, cp->mems_allowed,
node_states[N_HIGH_MEMORY]);
- if ((cpus_empty(cp->cpus_allowed) ||
- nodes_empty(cp->mems_allowed))) {
- /* Move tasks from the empty cpuset to a parent */
- mutex_unlock(&callback_mutex);
+ mutex_unlock(&callback_mutex);
+
+ /* Move tasks from the empty cpuset to a parent */
+ if (cpus_empty(cp->cpus_allowed) ||
+ nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
- mutex_lock(&callback_mutex);
- }
}
- mutex_unlock(&callback_mutex);
- return;
}
/*
* - Used for /proc/<pid>/cpuset.
* - No need to task_lock(tsk) on this tsk->cpuset reference, as it
* doesn't really matter if tsk->cpuset changes after we read it,
- * and we take cgroup_mutex, keeping attach_task() from changing it
+ * and we take cgroup_mutex, keeping cpuset_attach() from changing it
* anyway.
*/
static int proc_cpuset_show(struct seq_file *m, void *unused_v)
#endif /* CONFIG_PROC_PID_CPUSET */
/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
-char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
-{
- buffer += sprintf(buffer, "Cpus_allowed:\t");
- buffer += cpumask_scnprintf(buffer, PAGE_SIZE, task->cpus_allowed);
- buffer += sprintf(buffer, "\n");
- buffer += sprintf(buffer, "Mems_allowed:\t");
- buffer += nodemask_scnprintf(buffer, PAGE_SIZE, task->mems_allowed);
- buffer += sprintf(buffer, "\n");
- return buffer;
+void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
+{
+ seq_printf(m, "Cpus_allowed:\t");
+ m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
+ task->cpus_allowed);
+ seq_printf(m, "\n");
+ seq_printf(m, "Mems_allowed:\t");
+ m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
+ task->mems_allowed);
+ seq_printf(m, "\n");
}