From a424848670408739db5b496a7cb184e45aff05ee Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Jun 2014 13:46:03 -0700 Subject: [PATCH] rcu: Bind grace-period kthreads to non-NO_HZ_FULL CPUs Binding the grace-period kthreads to the timekeeping CPU resulted in significant performance decreases for some workloads. For more detail, see: https://lkml.org/lkml/2014/6/3/395 for benchmark numbers https://lkml.org/lkml/2014/6/4/218 for CPU statistics It turns out that it is necessary to bind the grace-period kthreads to the timekeeping CPU only when all but CPU 0 is a nohz_full CPU on the one hand or if CONFIG_NO_HZ_FULL_SYSIDLE=y on the other. In other cases, it suffices to bind the grace-period kthreads to the set of non-nohz_full CPUs. This commit therefore creates a tick_nohz_not_full_mask that is the complement of tick_nohz_full_mask, and then binds the grace-period kthread to the set of CPUs indicated by this new mask, which covers the CONFIG_NO_HZ_FULL_SYSIDLE=n case. The CONFIG_NO_HZ_FULL_SYSIDLE=y case still binds the grace-period kthreads to the timekeeping CPU. This commit also includes the tick_nohz_full_enabled() check suggested by Frederic Weisbecker. Reported-by: Jet Chen Signed-off-by: Paul E. McKenney [ paulmck: Created housekeeping_affine() per fweisbec feedback. ] --- include/linux/tick.h | 19 +++++++++++++++++++ kernel/rcu/tree_plugin.h | 14 +++++++++----- kernel/time/tick-sched.c | 6 ++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index b84773cb9f4c..c39af3261351 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -162,6 +163,7 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; extern cpumask_var_t tick_nohz_full_mask; +extern cpumask_var_t tick_nohz_not_full_mask; static inline bool tick_nohz_full_enabled(void) { @@ -194,6 +196,23 @@ static inline void tick_nohz_full_kick_all(void) { } static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } #endif +static inline bool is_housekeeping_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return cpumask_test_cpu(cpu, tick_nohz_not_full_mask); +#endif + return true; +} + +static inline void housekeeping_affine(struct task_struct *t) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + set_cpus_allowed_ptr(t, tick_nohz_not_full_mask); +#endif +} + static inline void tick_nohz_full_check(void) { if (tick_nohz_full_enabled()) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 1e99cb746d3c..569b390daa15 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2861,12 +2861,16 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) */ static void rcu_bind_gp_kthread(void) { -#ifdef CONFIG_NO_HZ_FULL - int cpu = tick_do_timer_cpu; + int __maybe_unused cpu; - if (cpu < 0 || cpu >= nr_cpu_ids) + if (!tick_nohz_full_enabled()) return; - if (raw_smp_processor_id() != cpu) +#ifdef CONFIG_NO_HZ_FULL_SYSIDLE + cpu = tick_do_timer_cpu; + if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu) set_cpus_allowed_ptr(current, cpumask_of(cpu)); -#endif /* #ifdef CONFIG_NO_HZ_FULL */ +#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ + if (!is_housekeeping_cpu(raw_smp_processor_id())) + housekeeping_affine(current); +#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6558b7ac112d..07ae1cc39063 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -154,6 +154,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; +cpumask_var_t tick_nohz_not_full_mask; bool tick_nohz_full_running; static bool can_stop_full_tick(void) @@ -281,6 +282,7 @@ static int __init tick_nohz_full_setup(char *str) int cpu; alloc_bootmem_cpumask_var(&tick_nohz_full_mask); + alloc_bootmem_cpumask_var(&tick_nohz_not_full_mask); if (cpulist_parse(str, tick_nohz_full_mask) < 0) { pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); return 1; @@ -291,6 +293,8 @@ static int __init tick_nohz_full_setup(char *str) pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); cpumask_clear_cpu(cpu, tick_nohz_full_mask); } + cpumask_andnot(tick_nohz_not_full_mask, + cpu_possible_mask, tick_nohz_full_mask); tick_nohz_full_running = true; return 1; @@ -335,6 +339,8 @@ static int tick_nohz_init_all(void) err = 0; cpumask_setall(tick_nohz_full_mask); cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); + cpumask_clear(tick_nohz_not_full_mask); + cpumask_set_cpu(smp_processor_id(), tick_nohz_not_full_mask); tick_nohz_full_running = true; #endif return err; -- 2.39.5