From 470fd646444c65a5d062a371f5ec8dcedee61239 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 18 Oct 2007 23:40:46 -0700 Subject: [PATCH] hotplug cpu: migrate a task within its cpuset When a cpu is disabled, move_task_off_dead_cpu() is called for tasks that have been running on that cpu. Currently, such a task is migrated: 1) to any cpu on the same node as the disabled cpu, which is both online and among that task's cpus_allowed 2) to any cpu which is both online and among that task's cpus_allowed It is typical of a multithreaded application running on a large NUMA system to have its tasks confined to a cpuset so as to cluster them near the memory that they share. Furthermore, it is typical to explicitly place such a task on a specific cpu in that cpuset. And in that case the task's cpus_allowed includes only a single cpu. This patch would insert a preference to migrate such a task to some cpu within its cpuset (and set its cpus_allowed to its entire cpuset). With this patch, migrate the task to: 1) to any cpu on the same node as the disabled cpu, which is both online and among that task's cpus_allowed 2) to any online cpu within the task's cpuset 3) to any cpu which is both online and among that task's cpus_allowed In order to do this, move_task_off_dead_cpu() must make a call to cpuset_cpus_allowed_locked(), a new subset of cpuset_cpus_allowed(), that will not block. (name change - per Oleg's suggestion) Calls are made to cpuset_lock() and cpuset_unlock() in migration_call() to set the cpuset mutex during the whole migrate_live_tasks() and migrate_dead_tasks() procedure. [akpm@linux-foundation.org: build fix] [pj@sgi.com: Fix indentation and spacing] Signed-off-by: Cliff Wickman Cc: Oleg Nesterov Cc: Christoph Lameter Cc: Paul Jackson Cc: Ingo Molnar Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpu-hotplug.txt | 4 +++- include/linux/cpuset.h | 5 +++++ kernel/cpuset.c | 15 ++++++++++++++- kernel/sched.c | 12 +++++++++++- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index b6d24c22274b..a741f658a3c9 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-) CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the CPU is being offlined while tasks are frozen due to a suspend operation in progress -- All process is migrated away from this outgoing CPU to a new CPU +- All processes are migrated away from this outgoing CPU to new CPUs. + The new CPU is chosen from each process' current cpuset, which may be + a subset of all online CPUs. - All interrupts targeted to this CPU is migrated to a new CPU - timers/bottom half/task lets are also migrated to a new CPU - Once all services are migrated, kernel calls an arch specific routine diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 31adfde1c95f..ecae585ec3da 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -21,6 +21,7 @@ extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); +extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -87,6 +88,10 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) { return cpu_possible_map; } +static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p) +{ + return cpu_possible_map; +} static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index fa31cb9f9898..50f5dc463688 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1818,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) cpumask_t mask; mutex_lock(&callback_mutex); + mask = cpuset_cpus_allowed_locked(tsk); + mutex_unlock(&callback_mutex); + + return mask; +} + +/** + * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. + * Must be called with callback_mutex held. + **/ +cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk) +{ + cpumask_t mask; + task_lock(tsk); guarantee_online_cpus(task_cs(tsk), &mask); task_unlock(tsk); - mutex_unlock(&callback_mutex); return mask; } diff --git a/kernel/sched.c b/kernel/sched.c index a7e30462600f..4071306e1088 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5160,8 +5160,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) /* No more Mr. Nice Guy. */ if (dest_cpu == NR_CPUS) { + cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p); + /* + * Try to stay on the same cpuset, where the + * current cpuset may be a subset of all cpus. + * The cpuset_cpus_allowed_locked() variant of + * cpuset_cpus_allowed() will not block. It must be + * called within calls to cpuset_lock/cpuset_unlock. + */ rq = task_rq_lock(p, &flags); - cpus_setall(p->cpus_allowed); + p->cpus_allowed = cpus_allowed; dest_cpu = any_online_cpu(p->cpus_allowed); task_rq_unlock(rq, &flags); @@ -5527,6 +5535,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD: case CPU_DEAD_FROZEN: + cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */ migrate_live_tasks(cpu); rq = cpu_rq(cpu); kthread_stop(rq->migration_thread); @@ -5540,6 +5549,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq->idle->sched_class = &idle_sched_class; migrate_dead_tasks(cpu); spin_unlock_irq(&rq->lock); + cpuset_unlock(); migrate_nr_uninterruptible(rq); BUG_ON(rq->nr_running != 0); -- 2.39.2