X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=41822e2027c1b710219a3d8e25d4112b5cf3abeb;hb=390a36aadf39e241c83035469aae48ed1a144088;hp=e4c31e6b87161fb5dae01ece138377480dad0c1b;hpb=39bd0d15eca5af15ee1492964f317ecdb024a9d6;p=pandora-kernel.git diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e4c31e6b8716..41822e2027c1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -77,6 +77,26 @@ struct cpuset { unsigned long flags; /* "unsigned long" so bitops work */ + /* + * On default hierarchy: + * + * The user-configured masks can only be changed by writing to + * cpuset.cpus and cpuset.mems, and won't be limited by the + * parent masks. + * + * The effective masks is the real masks that apply to the tasks + * in the cpuset. They may be changed if the configured masks are + * changed or hotplug happens. + * + * effective_mask == configured_mask & parent's effective_mask, + * and if it ends up empty, it will inherit the parent's mask. + * + * + * On legacy hierachy: + * + * The user-configured masks are always the same with effective masks. + */ + /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; nodemask_t mems_allowed; @@ -313,9 +333,9 @@ static struct file_system_type cpuset_fs_type = { */ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { - while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) + while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) cs = parent_cs(cs); - cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); + cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); } /* @@ -331,9 +351,9 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) */ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { - while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY])) + while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) cs = parent_cs(cs); - nodes_and(*pmask, cs->mems_allowed, node_states[N_MEMORY]); + nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); } /* @@ -450,9 +470,9 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) par = parent_cs(cur); - /* We must be a subset of our parent cpuset */ + /* On legacy hiearchy, we must be a subset of our parent cpuset. */ ret = -EACCES; - if (!is_cpuset_subset(trial, par)) + if (!cgroup_on_dfl(cur->css.cgroup) && !is_cpuset_subset(trial, par)) goto out; /* @@ -795,45 +815,6 @@ void rebuild_sched_domains(void) mutex_unlock(&cpuset_mutex); } -/* - * effective_cpumask_cpuset - return nearest ancestor with non-empty cpus - * @cs: the cpuset in interest - * - * A cpuset's effective cpumask is the cpumask of the nearest ancestor - * with non-empty cpus. We use effective cpumask whenever: - * - we update tasks' cpus_allowed. (they take on the ancestor's cpumask - * if the cpuset they reside in has no cpus) - * - we want to retrieve task_cs(tsk)'s cpus_allowed. - * - * Called with cpuset_mutex held. cpuset_cpus_allowed_fallback() is an - * exception. See comments there. - */ -static struct cpuset *effective_cpumask_cpuset(struct cpuset *cs) -{ - while (cpumask_empty(cs->cpus_allowed)) - cs = parent_cs(cs); - return cs; -} - -/* - * effective_nodemask_cpuset - return nearest ancestor with non-empty mems - * @cs: the cpuset in interest - * - * A cpuset's effective nodemask is the nodemask of the nearest ancestor - * with non-empty memss. We use effective nodemask whenever: - * - we update tasks' mems_allowed. (they take on the ancestor's nodemask - * if the cpuset they reside in has no mems) - * - we want to retrieve task_cs(tsk)'s mems_allowed. - * - * Called with cpuset_mutex held. - */ -static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) -{ - while (nodes_empty(cs->mems_allowed)) - cs = parent_cs(cs); - return cs; -} - /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed @@ -844,13 +825,12 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) */ static void update_tasks_cpumask(struct cpuset *cs) { - struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); struct css_task_iter it; struct task_struct *task; css_task_iter_start(&cs->css, &it); while ((task = css_task_iter_next(&it))) - set_cpus_allowed_ptr(task, cpus_cs->cpus_allowed); + set_cpus_allowed_ptr(task, cs->effective_cpus); css_task_iter_end(&it); } @@ -988,15 +968,13 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { struct task_struct *tsk = current; - struct cpuset *mems_cs; tsk->mems_allowed = *to; do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); rcu_read_lock(); - mems_cs = effective_nodemask_cpuset(task_cs(tsk)); - guarantee_online_mems(mems_cs, &tsk->mems_allowed); + guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); rcu_read_unlock(); } @@ -1065,13 +1043,12 @@ static void *cpuset_being_rebound; static void update_tasks_nodemask(struct cpuset *cs) { static nodemask_t newmems; /* protected by cpuset_mutex */ - struct cpuset *mems_cs = effective_nodemask_cpuset(cs); struct css_task_iter it; struct task_struct *task; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ - guarantee_online_mems(mems_cs, &newmems); + guarantee_online_mems(cs, &newmems); /* * The mpol_rebind_mm() call takes mmap_sem, which we couldn't @@ -1497,8 +1474,6 @@ static void cpuset_attach(struct cgroup_subsys_state *css, struct task_struct *leader = cgroup_taskset_first(tset); struct cpuset *cs = css_cs(css); struct cpuset *oldcs = cpuset_attach_old_cs; - struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); - struct cpuset *mems_cs = effective_nodemask_cpuset(cs); mutex_lock(&cpuset_mutex); @@ -1506,9 +1481,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); else - guarantee_online_cpus(cpus_cs, cpus_attach); + guarantee_online_cpus(cs, cpus_attach); - guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cgroup_taskset_for_each(task, tset) { /* @@ -1525,11 +1500,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, * Change mm, possibly for multiple threads in a threadgroup. This is * expensive and may sleep. */ - cpuset_attach_nodemask_to = cs->mems_allowed; + cpuset_attach_nodemask_to = cs->effective_mems; mm = get_task_mm(leader); if (mm) { - struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); - mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); /* @@ -1540,7 +1513,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, * mm from. */ if (is_memory_migrate(cs)) { - cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed, + cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); } mmput(mm); @@ -2107,6 +2080,65 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) } } +static void hotplug_update_tasks_legacy(struct cpuset *cs, + struct cpumask *off_cpus, + nodemask_t *off_mems) +{ + bool is_empty; + + mutex_lock(&callback_mutex); + cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, off_cpus); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); + nodes_andnot(cs->mems_allowed, cs->mems_allowed, *off_mems); + nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + mutex_unlock(&callback_mutex); + + /* + * Don't call update_tasks_cpumask() if the cpuset becomes empty, + * as the tasks will be migratecd to an ancestor. + */ + if (!cpumask_empty(off_cpus) && !cpumask_empty(cs->cpus_allowed)) + update_tasks_cpumask(cs); + if (!nodes_empty(*off_mems) && !nodes_empty(cs->mems_allowed)) + update_tasks_nodemask(cs); + + is_empty = cpumask_empty(cs->cpus_allowed) || + nodes_empty(cs->mems_allowed); + + mutex_unlock(&cpuset_mutex); + + /* + * Move tasks to the nearest ancestor with execution resources, + * This is full cgroup operation which will also call back into + * cpuset. Should be done outside any lock. + */ + if (is_empty) + remove_tasks_in_empty_cpuset(cs); + + mutex_lock(&cpuset_mutex); +} + +static void hotplug_update_tasks(struct cpuset *cs, + struct cpumask *off_cpus, + nodemask_t *off_mems) +{ + mutex_lock(&callback_mutex); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); + if (cpumask_empty(cs->effective_cpus)) + cpumask_copy(cs->effective_cpus, + parent_cs(cs)->effective_cpus); + + nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + if (nodes_empty(cs->effective_mems)) + cs->effective_mems = parent_cs(cs)->effective_mems; + mutex_unlock(&callback_mutex); + + if (!cpumask_empty(off_cpus)) + update_tasks_cpumask(cs); + if (!nodes_empty(*off_mems)) + update_tasks_nodemask(cs); +} + /** * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest @@ -2119,9 +2151,6 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) { static cpumask_t off_cpus; static nodemask_t off_mems; - bool is_empty; - bool on_dfl = cgroup_on_dfl(cs->css.cgroup); - retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2136,61 +2165,16 @@ retry: goto retry; } - cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); - nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); - - mutex_lock(&callback_mutex); - cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); - - /* Inherit the effective mask of the parent, if it becomes empty. */ - cpumask_andnot(cs->effective_cpus, cs->effective_cpus, &off_cpus); - if (on_dfl && cpumask_empty(cs->effective_cpus)) - cpumask_copy(cs->effective_cpus, parent_cs(cs)->effective_cpus); - mutex_unlock(&callback_mutex); - - /* - * If on_dfl, we need to update tasks' cpumask for empty cpuset to - * take on ancestor's cpumask. Otherwise, don't call - * update_tasks_cpumask() if the cpuset becomes empty, as the tasks - * in it will be migrated to an ancestor. - */ - if ((on_dfl && cpumask_empty(cs->cpus_allowed)) || - (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) - update_tasks_cpumask(cs); - - mutex_lock(&callback_mutex); - nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); - - /* Inherit the effective mask of the parent, if it becomes empty */ - nodes_andnot(cs->effective_mems, cs->effective_mems, off_mems); - if (on_dfl && nodes_empty(cs->effective_mems)) - cs->effective_mems = parent_cs(cs)->effective_mems; - mutex_unlock(&callback_mutex); - - /* - * If on_dfl, we need to update tasks' nodemask for empty cpuset to - * take on ancestor's nodemask. Otherwise, don't call - * update_tasks_nodemask() if the cpuset becomes empty, as the - * tasks in it will be migratd to an ancestor. - */ - if ((on_dfl && nodes_empty(cs->mems_allowed)) || - (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) - update_tasks_nodemask(cs); + cpumask_andnot(&off_cpus, cs->effective_cpus, + top_cpuset.effective_cpus); + nodes_andnot(off_mems, cs->effective_mems, top_cpuset.effective_mems); - is_empty = cpumask_empty(cs->cpus_allowed) || - nodes_empty(cs->mems_allowed); + if (cgroup_on_dfl(cs->css.cgroup)) + hotplug_update_tasks(cs, &off_cpus, &off_mems); + else + hotplug_update_tasks_legacy(cs, &off_cpus, &off_mems); mutex_unlock(&cpuset_mutex); - - /* - * If on_dfl, we'll keep tasks in empty cpusets. - * - * Otherwise move tasks to the nearest ancestor with execution - * resources. This is full cgroup operation which will - * also call back into cpuset. Should be done outside any lock. - */ - if (!on_dfl && is_empty) - remove_tasks_in_empty_cpuset(cs); } /** @@ -2214,6 +2198,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; + bool on_dfl = cgroup_on_dfl(top_cpuset.css.cgroup); mutex_lock(&cpuset_mutex); @@ -2221,13 +2206,14 @@ static void cpuset_hotplug_workfn(struct work_struct *work) cpumask_copy(&new_cpus, cpu_active_mask); new_mems = node_states[N_MEMORY]; - cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus); - mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems); + cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); + mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { mutex_lock(&callback_mutex); - cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); + if (!on_dfl) + cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); cpumask_copy(top_cpuset.effective_cpus, &new_cpus); mutex_unlock(&callback_mutex); /* we don't mess with cpumasks of tasks in top_cpuset */ @@ -2236,7 +2222,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work) /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { mutex_lock(&callback_mutex); - top_cpuset.mems_allowed = new_mems; + if (!on_dfl) + top_cpuset.mems_allowed = new_mems; top_cpuset.effective_mems = new_mems; mutex_unlock(&callback_mutex); update_tasks_nodemask(&top_cpuset); @@ -2331,23 +2318,17 @@ void __init cpuset_init_smp(void) void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { - struct cpuset *cpus_cs; - mutex_lock(&callback_mutex); rcu_read_lock(); - cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); - guarantee_online_cpus(cpus_cs, pmask); + guarantee_online_cpus(task_cs(tsk), pmask); rcu_read_unlock(); mutex_unlock(&callback_mutex); } void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { - struct cpuset *cpus_cs; - rcu_read_lock(); - cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); - do_set_cpus_allowed(tsk, cpus_cs->cpus_allowed); + do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus); rcu_read_unlock(); /* @@ -2386,13 +2367,11 @@ void cpuset_init_current_mems_allowed(void) nodemask_t cpuset_mems_allowed(struct task_struct *tsk) { - struct cpuset *mems_cs; nodemask_t mask; mutex_lock(&callback_mutex); rcu_read_lock(); - mems_cs = effective_nodemask_cpuset(task_cs(tsk)); - guarantee_online_mems(mems_cs, &mask); + guarantee_online_mems(task_cs(tsk), &mask); rcu_read_unlock(); mutex_unlock(&callback_mutex);