From 2440c01e10f07adcbc2094ba12ae4ad6094bd2b6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:17 +0000 Subject: [PATCH] powerpc/kdump: Only save CPU state first time through the secondary CPU capture code We might enter the secondary CPU capture code twice, eg if we have to unstick some CPUs with a system reset. In this case we don't want to overwrite the state on CPUs that had made it into the capture code OK, so use the cpus_state_saved cpumask for that and make it local to crash_ipi_callback. For controlling progress now use atomic_t cpus_in_crash to count how many CPUs have made it into the kdump code, and time_to_dump to tell everyone it's time to dump. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index b942980e9650..28be3452e67a 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -46,7 +46,8 @@ /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; +static atomic_t cpus_in_crash; +static int time_to_dump; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ @@ -67,21 +68,27 @@ static int handle_fault(struct pt_regs *regs) void crash_ipi_callback(struct pt_regs *regs) { + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_in_crash); + cpumask_set_cpu(cpu, &cpus_state_saved); + } + + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic_inc(); /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. */ - while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) + while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -115,19 +122,18 @@ again: * respond. */ msecs = IPI_TIMEOUT; - while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ - if (cpumask_weight(&cpus_in_crash) >= ncpus) { + if (atomic_read(&cpus_in_crash) >= ncpus) { printk(KERN_EMERG "IPI complete\n"); return; } printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); + ncpus - atomic_read(&cpus_in_crash)); /* * If we have a panic timeout set then we can't wait indefinitely @@ -155,10 +161,10 @@ again: * crash code again. We need to reset cpus_in_crash so we * wait for everyone to do this. */ - cpus_in_crash = CPU_MASK_NONE; + atomic_set(&cpus_in_crash, 0); smp_mb(); - while (cpumask_weight(&cpus_in_crash) < ncpus) + while (atomic_read(&cpus_in_crash) < ncpus) cpu_relax(); } @@ -316,7 +322,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_cpu(regs, crashing_cpu); /* * If we came in via system reset, wait a while for the secondary @@ -326,7 +331,11 @@ void default_machine_crash_shutdown(struct pt_regs *regs) mdelay(PRIMARY_TIMEOUT); crash_kexec_prepare_cpus(crashing_cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); -- 2.39.5