From f25e21afe6675372e409b0bcecb4f9349e3ce8ef Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 8 Nov 2011 11:19:55 +1100 Subject: [PATCH] x86: reduce clock calibration time during slave cpu startup Reduce the startup time for slave cpus. Adds hooks for an arch-specific function for clock calibration. These hooks are used on x86. If a newly started cpu has the same phys_proc_id as a core already active, uses the TSC for the delay loop and has a CONSTANT_TSC, use the already-calculated value of loops_per_jiffy. This patch reduces the time required to start slave cpus on a 4096 cpu system from: 465 sec OLD 62 sec NEW This reduces boot time on a 4096p system by almost 7 minutes. Nice... Signed-off-by: Jack Steiner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: John Stultz Signed-off-by: Andrew Morton --- arch/x86/kernel/smpboot.c | 16 +++++++++++----- arch/x86/kernel/tsc.c | 19 +++++++++++++++++++ init/calibrate.c | 15 +++++++++++++++ 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9f548cb4a958..00eef55c8327 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -207,22 +207,28 @@ static void __cpuinit smp_callin(void) * Need to setup vector mappings before we enable interrupts. */ setup_vector_irq(smp_processor_id()); + + /* + * Save our processor parameters. Note: this information + * is needed for clock calibration. + */ + smp_store_cpu_info(cpuid); + /* * Get our bogomips. + * Update loops_per_jiffy in cpu_data. Previous call to + * smp_store_cpu_info() stored a value that is close but not as + * accurate as the value just calculated. * * Need to enable IRQs because it can take longer and then * the NMI watchdog might kill us. */ local_irq_enable(); calibrate_delay(); + cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; local_irq_disable(); pr_debug("Stack at about %p\n", &cpuid); - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - /* * This must be done before setting cpu_online_mask * or calling notify_cpu_starting. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index db483369f10b..ade03e38d237 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -995,3 +995,22 @@ void __init tsc_init(void) check_system_tsc_reliable(); } +/* + * If we have a constant TSC and are using the TSC for the delay loop, + * we can skip clock calibration if another cpu in the same socket has already + * been calibrated. This assumes that CONSTANT_TSC applies to all + * cpus in the socket - this should be a safe assumption. + */ +unsigned long __cpuinit calibrate_delay_is_known(void) +{ + int i, cpu = smp_processor_id(); + + if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) + return 0; + + for_each_online_cpu(i) + if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) + return cpu_data(i).loops_per_jiffy; + return 0; +} + diff --git a/init/calibrate.c b/init/calibrate.c index 24df7976816c..5f117ca9e069 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -246,6 +246,19 @@ recalibrate: static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 }; +/* + * Check if cpu calibration delay is already known. For example, + * some processors with multi-core sockets may have all cores + * with the same calibration delay. + * + * Architectures should override this function if a faster calibration + * method is available. + */ +unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void) +{ + return 0; +} + void __cpuinit calibrate_delay(void) { unsigned long lpj; @@ -265,6 +278,8 @@ void __cpuinit calibrate_delay(void) lpj = lpj_fine; pr_info("Calibrating delay loop (skipped), " "value calculated using timer frequency.. "); + } else if ((lpj = calibrate_delay_is_known())) { + ; } else if ((lpj = calibrate_delay_direct()) != 0) { if (!printed) pr_info("Calibrating delay using timer " -- 2.39.2