/* On legacy hiearchy, we must be a subset of our parent cpuset. */
ret = -EACCES;
- if (!cgroup_on_dfl(cur->css.cgroup) && !is_cpuset_subset(trial, par))
+ if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+ !is_cpuset_subset(trial, par))
goto out;
/*
* be changed to have empty cpus_allowed or mems_allowed.
*/
ret = -ENOSPC;
- if ((cgroup_has_tasks(cur->css.cgroup) || cur->attach_in_progress)) {
+ if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
if (!cpumask_empty(cur->cpus_allowed) &&
cpumask_empty(trial->cpus_allowed))
goto out;
rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
- if (cp == root_cs)
- continue;
-
/* skip the whole subtree if @cp doesn't have any CPU */
if (cpumask_empty(cp->cpus_allowed)) {
pos_css = css_rightmost_descendant(pos_css);
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
+ cpumask_var_t non_isolated_cpus; /* load balanced CPUs */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
dattr = NULL;
csa = NULL;
+ if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
+ goto done;
+ cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
ndoms = 1;
*dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset);
}
- cpumask_copy(doms[0], top_cpuset.effective_cpus);
+ cpumask_and(doms[0], top_cpuset.effective_cpus,
+ non_isolated_cpus);
goto done;
}
* the corresponding sched domain.
*/
if (!cpumask_empty(cp->cpus_allowed) &&
- !is_sched_load_balance(cp))
+ !(is_sched_load_balance(cp) &&
+ cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
continue;
if (is_sched_load_balance(cp))
if (apn == b->pn) {
cpumask_or(dp, dp, b->effective_cpus);
+ cpumask_and(dp, dp, non_isolated_cpus);
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
BUG_ON(nslot != ndoms);
done:
+ free_cpumask_var(non_isolated_cpus);
kfree(csa);
/*
* If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some CPUs.
*/
- if (cpumask_empty(new_cpus))
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+ cpumask_empty(new_cpus))
cpumask_copy(new_cpus, parent->effective_cpus);
/* Skip the whole subtree if the cpumask remains the same. */
cpumask_copy(cp->effective_cpus, new_cpus);
spin_unlock_irq(&callback_lock);
- WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
+ WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
update_tasks_cpumask(cp);
* If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some MEMs.
*/
- if (nodes_empty(*new_mems))
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+ nodes_empty(*new_mems))
*new_mems = parent->effective_mems;
/* Skip the whole subtree if the nodemask remains the same. */
cp->effective_mems = *new_mems;
spin_unlock_irq(&callback_lock);
- WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
+ WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
!nodes_equal(cp->mems_allowed, cp->effective_mems));
update_tasks_nodemask(cp);
spin_unlock_irq(&callback_lock);
/* use trialcs->mems_allowed as a temp variable */
- update_nodemasks_hier(cs, &cs->mems_allowed);
+ update_nodemasks_hier(cs, &trialcs->mems_allowed);
done:
return retval;
}
/* allow moving tasks into an empty cpuset if on default hierarchy */
ret = -ENOSPC;
- if (!cgroup_on_dfl(css->cgroup) &&
+ if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock;
{
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
- struct mm_struct *mm;
struct task_struct *task;
- struct task_struct *leader = cgroup_taskset_first(tset);
+ struct task_struct *leader;
struct cpuset *cs = css_cs(css);
struct cpuset *oldcs = cpuset_attach_old_cs;
}
/*
- * Change mm, possibly for multiple threads in a threadgroup. This is
- * expensive and may sleep.
+ * Change mm for all threadgroup leaders. This is expensive and may
+ * sleep and should be moved outside migration path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
- mm = get_task_mm(leader);
- if (mm) {
- mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
-
- /*
- * old_mems_allowed is the same with mems_allowed here, except
- * if this task is being moved automatically due to hotplug.
- * In that case @mems_allowed has been updated and is empty,
- * so @old_mems_allowed is the right nodesets that we migrate
- * mm from.
- */
- if (is_memory_migrate(cs)) {
- cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
- &cpuset_attach_nodemask_to);
+ cgroup_taskset_for_each_leader(leader, tset) {
+ struct mm_struct *mm = get_task_mm(leader);
+
+ if (mm) {
+ mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
+
+ /*
+ * old_mems_allowed is the same with mems_allowed
+ * here, except if this task is being moved
+ * automatically due to hotplug. In that case
+ * @mems_allowed has been updated and is empty, so
+ * @old_mems_allowed is the right nodesets that we
+ * migrate mm from.
+ */
+ if (is_memory_migrate(cs)) {
+ cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
+ &cpuset_attach_nodemask_to);
+ }
+ mmput(mm);
}
- mmput(mm);
}
cs->old_mems_allowed = cpuset_attach_nodemask_to;
case FILE_MEMORY_PRESSURE_ENABLED:
cpuset_memory_pressure_enabled = !!val;
break;
- case FILE_MEMORY_PRESSURE:
- retval = -EACCES;
- break;
case FILE_SPREAD_PAGE:
retval = update_flag(CS_SPREAD_PAGE, cs, val);
break;
{
struct cpuset *cs = css_cs(seq_css(sf));
cpuset_filetype_t type = seq_cft(sf)->private;
- ssize_t count;
- char *buf, *s;
int ret = 0;
- count = seq_get_buf(sf, &buf);
- s = buf;
-
spin_lock_irq(&callback_lock);
switch (type) {
case FILE_CPULIST:
- s += cpulist_scnprintf(s, count, cs->cpus_allowed);
+ seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
break;
case FILE_MEMLIST:
- s += nodelist_scnprintf(s, count, cs->mems_allowed);
+ seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
break;
case FILE_EFFECTIVE_CPULIST:
- s += cpulist_scnprintf(s, count, cs->effective_cpus);
+ seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus));
break;
case FILE_EFFECTIVE_MEMLIST:
- s += nodelist_scnprintf(s, count, cs->effective_mems);
+ seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
break;
default:
ret = -EINVAL;
- goto out_unlock;
}
- if (s < buf + count - 1) {
- *s++ = '\n';
- seq_commit(sf, s - buf);
- } else {
- seq_commit(sf, -1);
- }
-out_unlock:
spin_unlock_irq(&callback_lock);
return ret;
}
{
.name = "memory_pressure",
.read_u64 = cpuset_read_u64,
- .write_u64 = cpuset_write_u64,
- .private = FILE_MEMORY_PRESSURE,
- .mode = S_IRUGO,
},
{
cpuset_inc();
spin_lock_irq(&callback_lock);
- if (cgroup_on_dfl(cs->css.cgroup)) {
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
cpumask_copy(cs->effective_cpus, parent->effective_cpus);
cs->effective_mems = parent->effective_mems;
}
spin_lock_irq(&callback_lock);
cs->mems_allowed = parent->mems_allowed;
+ cs->effective_mems = parent->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+ cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
spin_unlock_irq(&callback_lock);
out_unlock:
mutex_unlock(&cpuset_mutex);
mutex_lock(&cpuset_mutex);
spin_lock_irq(&callback_lock);
- if (cgroup_on_dfl(root_css->cgroup)) {
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
top_cpuset.mems_allowed = node_possible_map;
} else {
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
mems_updated = !nodes_equal(new_mems, cs->effective_mems);
- if (cgroup_on_dfl(cs->css.cgroup))
+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
hotplug_update_tasks(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated);
else
static cpumask_t new_cpus;
static nodemask_t new_mems;
bool cpus_updated, mems_updated;
- bool on_dfl = cgroup_on_dfl(top_cpuset.css.cgroup);
+ bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
mutex_lock(&cpuset_mutex);
*/
}
-void cpuset_init_current_mems_allowed(void)
+void __init cpuset_init_current_mems_allowed(void)
{
nodes_setall(current->mems_allowed);
}
* @node: is this an allowed node?
* @gfp_mask: memory allocation flags
*
- * If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is
- * set, yes, we can always allocate. If node is in our task's mems_allowed,
- * yes. If it's not a __GFP_HARDWALL request and this node is in the nearest
- * hardwalled cpuset ancestor to this task's cpuset, yes. If the task has been
- * OOM killed and has access to memory reserves as specified by the TIF_MEMDIE
- * flag, yes.
+ * If we're in interrupt, yes, we can always allocate. If @node is set in
+ * current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this
+ * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
+ * yes. If current has access to memory reserves due to TIF_MEMDIE, yes.
* Otherwise, no.
*
- * The __GFP_THISNODE placement logic is really handled elsewhere,
- * by forcibly using a zonelist starting at a specified node, and by
- * (in get_page_from_freelist()) refusing to consider the zones for
- * any node on the zonelist except the first. By the time any such
- * calls get to this routine, we should just shut up and say 'yes'.
- *
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
* and do not allow allocations outside the current tasks cpuset
* unless the task has been OOM killed as is marked TIF_MEMDIE.
int allowed; /* is allocation in zone z allowed? */
unsigned long flags;
- if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
+ if (in_interrupt())
return 1;
if (node_isset(node, current->mems_allowed))
return 1;
return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
}
-#define CPUSET_NODELIST_LEN (256)
-
/**
* cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
* @tsk: pointer to task_struct of some task.
*/
void cpuset_print_task_mems_allowed(struct task_struct *tsk)
{
- /* Statically allocated to prevent using excess stack. */
- static char cpuset_nodelist[CPUSET_NODELIST_LEN];
- static DEFINE_SPINLOCK(cpuset_buffer_lock);
struct cgroup *cgrp;
- spin_lock(&cpuset_buffer_lock);
rcu_read_lock();
cgrp = task_cs(tsk)->css.cgroup;
- nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
- tsk->mems_allowed);
pr_info("%s cpuset=", tsk->comm);
pr_cont_cgroup_name(cgrp);
- pr_cont(" mems_allowed=%s\n", cpuset_nodelist);
+ pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&tsk->mems_allowed));
rcu_read_unlock();
- spin_unlock(&cpuset_buffer_lock);
}
/*
/* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{
- seq_puts(m, "Mems_allowed:\t");
- seq_nodemask(m, &task->mems_allowed);
- seq_puts(m, "\n");
- seq_puts(m, "Mems_allowed_list:\t");
- seq_nodemask_list(m, &task->mems_allowed);
- seq_puts(m, "\n");
+ seq_printf(m, "Mems_allowed:\t%*pb\n",
+ nodemask_pr_args(&task->mems_allowed));
+ seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
+ nodemask_pr_args(&task->mems_allowed));
}