]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
sched: kill migration thread in CPU_POST_DEAD instead of CPU_DEAD
authorAmit Arora <amitarora@in.ibm.com>
Wed, 19 May 2010 09:05:57 +0000 (14:35 +0530)
committerGreg Kroah-Hartman <gregkh@suse.de>
Mon, 20 Sep 2010 20:17:45 +0000 (13:17 -0700)
[Fixed in a different manner upstream, due to rewrites in this area]

Problem : In a stress test where some heavy tests were running along with
regular CPU offlining and onlining, a hang was observed. The system seems to
be hung at a point where migration_call() tries to kill the migration_thread
of the dying CPU, which just got moved to the current CPU. This migration
thread does not get a chance to run (and die) since rt_throttled is set to 1
on current, and it doesn't get cleared as the hrtimer which is supposed to
reset the rt bandwidth (sched_rt_period_timer) is tied to the CPU which we just
marked dead!

Solution : This patch pushes the killing of migration thread to "CPU_POST_DEAD"
event. By then all the timers (including sched_rt_period_timer) should have got
migrated (along with other callbacks).

Signed-off-by: Amit Arora <amitarora@in.ibm.com>
Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
kernel/sched.c

index 9990074169fa4c6c5f99f840e4b0c24a898036ab..ba2d610f4529bce17541a89ad7e07cbb87e20caf 100644 (file)
@@ -7752,14 +7752,24 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                cpu_rq(cpu)->migration_thread = NULL;
                break;
 
+       case CPU_POST_DEAD:
+               /*
+                * Bring the migration thread down in CPU_POST_DEAD event,
+                * since the timers should have got migrated by now and thus
+                * we should not see a deadlock between trying to kill the
+                * migration thread and the sched_rt_period_timer.
+                */
+               rq = cpu_rq(cpu);
+               kthread_stop(rq->migration_thread);
+               put_task_struct(rq->migration_thread);
+               rq->migration_thread = NULL;
+               break;
+
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
                cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
                migrate_live_tasks(cpu);
                rq = cpu_rq(cpu);
-               kthread_stop(rq->migration_thread);
-               put_task_struct(rq->migration_thread);
-               rq->migration_thread = NULL;
                /* Idle task back to normal (off runqueue, low prio) */
                spin_lock_irq(&rq->lock);
                update_rq_clock(rq);