From c80d40e1f29243cf7d932b8e35126bcae0e875b4 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 22 May 2014 10:44:06 +1000 Subject: [PATCH] cpu-hotplug-stop-machine-plug-race-window-that-leads-to-ipi-to-offline-cpu-v5 Signed-off-by: Srivatsa S. Bhat Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Reviewed-by: Tejun Heo Cc: Rusty Russell Cc: Frederic Weisbecker Cc: Christoph Hellwig Cc: Mel Gorman Cc: Rik van Riel Cc: Borislav Petkov Cc: Steven Rostedt Cc: Mike Galbraith Cc: Gautham R Shenoy Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Rafael J. Wysocki Signed-off-by: Andrew Morton --- kernel/stop_machine.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 03c77ced3339..9486c3ad5f77 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -193,27 +193,37 @@ static int multi_cpu_stop(void *data) cpu_relax(); /* - * In the case of CPU offline, we don't want the other CPUs to - * send IPIs to the active_cpu (the one going offline) after it - * has disabled interrupts in the _DISABLE_IRQ state (because, - * then it will notice the IPIs only after it goes offline). So - * we split this state into _INACTIVE and _ACTIVE, and thereby - * ensure that the active_cpu disables interrupts only after - * the other CPUs do the same thing. + * We use 2 separate stages to disable interrupts, namely + * _INACTIVE and _ACTIVE, to ensure that the inactive CPUs + * disable their interrupts first, followed by the active CPUs. + * + * This is done to avoid a race in the CPU offline path, which + * can lead to receiving IPIs on the outgoing CPU *after* it + * has gone offline. + * + * During CPU offline, we don't want the other CPUs to send + * IPIs to the active_cpu (the outgoing CPU) *after* it has + * disabled interrupts (because, then it will notice the IPIs + * only after it has gone offline). We can prevent this by + * making the other CPUs disable their interrupts first - that + * way, they will run the stop-machine code with interrupts + * disabled, and hence won't send IPIs after that point. */ if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { case MULTI_STOP_DISABLE_IRQ_INACTIVE: - if (is_active) - break; - - /* Else, fall-through */ - + if (!is_active) { + local_irq_disable(); + hard_irq_disable(); + } + break; case MULTI_STOP_DISABLE_IRQ_ACTIVE: - local_irq_disable(); - hard_irq_disable(); + if (is_active) { + local_irq_disable(); + hard_irq_disable(); + } break; case MULTI_STOP_RUN: if (is_active) -- 2.39.5