]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'rcu/idle' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck...
authorIngo Molnar <mingo@kernel.org>
Thu, 27 Sep 2012 06:09:38 +0000 (08:09 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 27 Sep 2012 06:09:38 +0000 (08:09 +0200)
Pull the RCU adaptive-idle feature from Paul E. McKenney:

 "This series adds RCU APIs that allow non-idle tasks to
  enter RCU idle mode and provides x86 code to make use of them, allowing
  RCU to treat user-mode execution as an extended quiescent state when the
  new RCU_USER_QS kernel configuration parameter is specified.  Work is
  in progress to port this to a few other architectures, but is not part
  of this series."

Signed-off-by: Ingo Molnar <mingo@kernel.org>
17 files changed:
arch/Kconfig
arch/um/drivers/mconsole_kern.c
arch/x86/Kconfig
arch/x86/include/asm/rcu.h [new file with mode: 0644]
arch/x86/include/asm/thread_info.h
arch/x86/kernel/entry_64.S
arch/x86/kernel/ptrace.c
arch/x86/kernel/signal.c
arch/x86/kernel/traps.c
arch/x86/mm/fault.c
include/linux/rcupdate.h
include/linux/sched.h
init/Kconfig
kernel/rcutree.c
kernel/rcutree.h
kernel/rcutree_plugin.h
kernel/sched/core.c

index 72f2fa189cc5200bb98143c6b35de8ffc9e0aaee..1401a758797316ef402c6a35d5957643931a7f8e 100644 (file)
@@ -281,4 +281,14 @@ config SECCOMP_FILTER
 
          See Documentation/prctl/seccomp_filter.txt for details.
 
+config HAVE_RCU_USER_QS
+       bool
+       help
+         Provide kernel entry/exit hooks necessary for userspace
+         RCU extended quiescent state. Syscalls need to be wrapped inside
+         rcu_user_exit()-rcu_user_enter() through the slow path using
+         TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
+         are already protected inside rcu_irq_enter/rcu_irq_exit() but
+         preemption or signal handling on irq exit still need to be protected.
+
 source "kernel/gcov/Kconfig"
index 664a60e8dfb442fe2cb75c1ed5ba388a7bdb42b0..c17de0db6736e1bcef0fca9ab1a3d624cd271580 100644 (file)
@@ -705,6 +705,7 @@ static void stack_proc(void *arg)
        struct task_struct *from = current, *to = arg;
 
        to->thread.saved_task = from;
+       rcu_switch(from, to);
        switch_to(from, to, from);
 }
 
index 50a1d1f9b6d3ac28c6b2294cd579d71cda2e75c1..20c49b8450b870ae112eb641204396d19c18d46b 100644 (file)
@@ -97,6 +97,7 @@ config X86
        select KTIME_SCALAR if X86_32
        select GENERIC_STRNCPY_FROM_USER
        select GENERIC_STRNLEN_USER
+       select HAVE_RCU_USER_QS if X86_64
 
 config INSTRUCTION_DECODER
        def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
new file mode 100644 (file)
index 0000000..d1ac07a
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef _ASM_X86_RCU_H
+#define _ASM_X86_RCU_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/rcupdate.h>
+#include <asm/ptrace.h>
+
+static inline void exception_enter(struct pt_regs *regs)
+{
+       rcu_user_exit();
+}
+
+static inline void exception_exit(struct pt_regs *regs)
+{
+#ifdef CONFIG_RCU_USER_QS
+       if (user_mode(regs))
+               rcu_user_enter();
+#endif
+}
+
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_RCU_USER_QS
+# define SCHEDULE_USER call schedule_user
+#else
+# define SCHEDULE_USER call schedule
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
index 89f794f007ec1e4aa5bbd029bcb32182fffe1f48..c535d847e3b5f75dff0d8d2dd7988935fc35b63f 100644 (file)
@@ -89,6 +89,7 @@ struct thread_info {
 #define TIF_NOTSC              16      /* TSC is not accessible in userland */
 #define TIF_IA32               17      /* IA32 compatibility process */
 #define TIF_FORK               18      /* ret_from_fork */
+#define TIF_NOHZ               19      /* in adaptive nohz mode */
 #define TIF_MEMDIE             20      /* is terminating due to OOM killer */
 #define TIF_DEBUG              21      /* uses debug registers */
 #define TIF_IO_BITMAP          22      /* uses I/O bitmap */
@@ -114,6 +115,7 @@ struct thread_info {
 #define _TIF_NOTSC             (1 << TIF_NOTSC)
 #define _TIF_IA32              (1 << TIF_IA32)
 #define _TIF_FORK              (1 << TIF_FORK)
+#define _TIF_NOHZ              (1 << TIF_NOHZ)
 #define _TIF_DEBUG             (1 << TIF_DEBUG)
 #define _TIF_IO_BITMAP         (1 << TIF_IO_BITMAP)
 #define _TIF_FORCED_TF         (1 << TIF_FORCED_TF)
@@ -126,12 +128,13 @@ struct thread_info {
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY        \
        (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |   \
-        _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+        _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT |     \
+        _TIF_NOHZ)
 
 /* work to do in syscall_trace_leave() */
 #define _TIF_WORK_SYSCALL_EXIT \
        (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP |    \
-        _TIF_SYSCALL_TRACEPOINT)
+        _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK                                                 \
@@ -141,7 +144,8 @@ struct thread_info {
 
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK                                              \
-       ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
+       ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT |       \
+       _TIF_NOHZ)
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK                                            \
index 69babd8c834f920b4d54c48e1f41a08d4f7fef6f..1a8f3cbb6ee30accdaa986189152009c634d8da6 100644 (file)
@@ -56,6 +56,7 @@
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
+#include <asm/rcu.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -565,7 +566,7 @@ sysret_careful:
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
        pushq_cfi %rdi
-       call schedule
+       SCHEDULE_USER
        popq_cfi %rdi
        jmp sysret_check
 
@@ -678,7 +679,7 @@ int_careful:
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
        pushq_cfi %rdi
-       call schedule
+       SCHEDULE_USER
        popq_cfi %rdi
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
@@ -974,7 +975,7 @@ retint_careful:
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
        pushq_cfi %rdi
-       call  schedule
+       SCHEDULE_USER
        popq_cfi %rdi
        GET_THREAD_INFO(%rcx)
        DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1449,7 +1450,7 @@ paranoid_userspace:
 paranoid_schedule:
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_ANY)
-       call schedule
+       SCHEDULE_USER
        DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF
        jmp paranoid_userspace
index c4c6a5c2bf0f393ffa8588a1fa7376bcaa9513bb..9f94f8ec26e40f60f35e36e3151c6115b876ba73 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/rcupdate.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs)
 {
        long ret = 0;
 
+       rcu_user_exit();
+
        /*
         * If we stepped into a sysenter/syscall insn, it trapped in
         * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs)
                        !test_thread_flag(TIF_SYSCALL_EMU);
        if (step || test_thread_flag(TIF_SYSCALL_TRACE))
                tracehook_report_syscall_exit(regs, step);
+
+       rcu_user_enter();
 }
index b280908a376e20efc6773b76c6e80353ef8680f4..bca0ab903e577dd96c017cd2b1deeded1e28ea60 100644 (file)
@@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
+       rcu_user_exit();
+
 #ifdef CONFIG_X86_MCE
        /* notify userspace of pending MCEs */
        if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 #ifdef CONFIG_X86_32
        clear_thread_flag(TIF_IRET);
 #endif /* CONFIG_X86_32 */
+
+       rcu_user_enter();
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
index b481341c9369da649908b57c3e8c39b1586b0111..378967578f22fc073fc40bff23aa381b94ec7864 100644 (file)
@@ -55,6 +55,7 @@
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/mce.h>
+#include <asm/rcu.h>
 
 #include <asm/mach_traps.h>
 
@@ -180,11 +181,15 @@ vm86_trap:
 #define DO_ERROR(trapnr, signr, str, name)                             \
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)    \
 {                                                                      \
-       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
-                                                       == NOTIFY_STOP) \
+       exception_enter(regs);                                          \
+       if (notify_die(DIE_TRAP, str, regs, error_code,                 \
+                       trapnr, signr) == NOTIFY_STOP) {                \
+               exception_exit(regs);                                   \
                return;                                                 \
+       }                                                               \
        conditional_sti(regs);                                          \
        do_trap(trapnr, signr, str, regs, error_code, NULL);            \
+       exception_exit(regs);                                           \
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)                \
@@ -195,11 +200,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)       \
        info.si_errno = 0;                                              \
        info.si_code = sicode;                                          \
        info.si_addr = (void __user *)siaddr;                           \
-       if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
-                                                       == NOTIFY_STOP) \
+       exception_enter(regs);                                          \
+       if (notify_die(DIE_TRAP, str, regs, error_code,                 \
+                       trapnr, signr) == NOTIFY_STOP) {                \
+               exception_exit(regs);                                   \
                return;                                                 \
+       }                                                               \
        conditional_sti(regs);                                          \
        do_trap(trapnr, signr, str, regs, error_code, &info);           \
+       exception_exit(regs);                                           \
 }
 
 DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +231,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
 /* Runs on IST stack */
 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
 {
+       exception_enter(regs);
        if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
-                       X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
-               return;
-       preempt_conditional_sti(regs);
-       do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
-       preempt_conditional_cli(regs);
+                      X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
+               preempt_conditional_sti(regs);
+               do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
+               preempt_conditional_cli(regs);
+       }
+       exception_exit(regs);
 }
 
 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
        static const char str[] = "double fault";
        struct task_struct *tsk = current;
 
+       exception_enter(regs);
        /* Return not checked because double check cannot be ignored */
        notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
@@ -255,16 +267,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
 {
        struct task_struct *tsk;
 
+       exception_enter(regs);
        conditional_sti(regs);
 
 #ifdef CONFIG_X86_32
-       if (regs->flags & X86_VM_MASK)
-               goto gp_in_vm86;
+       if (regs->flags & X86_VM_MASK) {
+               local_irq_enable();
+               handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+               goto exit;
+       }
 #endif
 
        tsk = current;
-       if (!user_mode(regs))
-               goto gp_in_kernel;
+       if (!user_mode(regs)) {
+               if (fixup_exception(regs))
+                       goto exit;
+
+               tsk->thread.error_code = error_code;
+               tsk->thread.trap_nr = X86_TRAP_GP;
+               if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
+                              X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
+                       die("general protection fault", regs, error_code);
+               goto exit;
+       }
 
        tsk->thread.error_code = error_code;
        tsk->thread.trap_nr = X86_TRAP_GP;
@@ -279,25 +304,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
        }
 
        force_sig(SIGSEGV, tsk);
-       return;
-
-#ifdef CONFIG_X86_32
-gp_in_vm86:
-       local_irq_enable();
-       handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
-       return;
-#endif
-
-gp_in_kernel:
-       if (fixup_exception(regs))
-               return;
-
-       tsk->thread.error_code = error_code;
-       tsk->thread.trap_nr = X86_TRAP_GP;
-       if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
-                       X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
-               return;
-       die("general protection fault", regs, error_code);
+exit:
+       exception_exit(regs);
 }
 
 /* May run on IST stack. */
@@ -312,15 +320,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
            ftrace_int3_handler(regs))
                return;
 #endif
+       exception_enter(regs);
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
        if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
                                SIGTRAP) == NOTIFY_STOP)
-               return;
+               goto exit;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 
        if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
                        SIGTRAP) == NOTIFY_STOP)
-               return;
+               goto exit;
 
        /*
         * Let others (NMI) know that the debug stack is in use
@@ -331,6 +340,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
        do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
        preempt_conditional_cli(regs);
        debug_stack_usage_dec();
+exit:
+       exception_exit(regs);
 }
 
 #ifdef CONFIG_X86_64
@@ -391,6 +402,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
        unsigned long dr6;
        int si_code;
 
+       exception_enter(regs);
+
        get_debugreg(dr6, 6);
 
        /* Filter out all the reserved bits which are preset to 1 */
@@ -406,7 +419,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
        /* Catch kmemcheck conditions first of all! */
        if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
-               return;
+               goto exit;
 
        /* DR6 may or may not be cleared by the CPU */
        set_debugreg(0, 6);
@@ -421,7 +434,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
        if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
                                                        SIGTRAP) == NOTIFY_STOP)
-               return;
+               goto exit;
 
        /*
         * Let others (NMI) know that the debug stack is in use
@@ -437,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
                                        X86_TRAP_DB);
                preempt_conditional_cli(regs);
                debug_stack_usage_dec();
-               return;
+               goto exit;
        }
 
        /*
@@ -458,7 +471,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
        preempt_conditional_cli(regs);
        debug_stack_usage_dec();
 
-       return;
+exit:
+       exception_exit(regs);
 }
 
 /*
@@ -555,14 +569,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
 #ifdef CONFIG_X86_32
        ignore_fpu_irq = 1;
 #endif
-
+       exception_enter(regs);
        math_error(regs, error_code, X86_TRAP_MF);
+       exception_exit(regs);
 }
 
 dotraplinkage void
 do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 {
+       exception_enter(regs);
        math_error(regs, error_code, X86_TRAP_XF);
+       exception_exit(regs);
 }
 
 dotraplinkage void
@@ -629,6 +646,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
+       exception_enter(regs);
 #ifdef CONFIG_MATH_EMULATION
        if (read_cr0() & X86_CR0_EM) {
                struct math_emu_info info = { };
@@ -637,6 +655,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 
                info.regs = regs;
                math_emulate(&info);
+               exception_exit(regs);
                return;
        }
 #endif
@@ -644,12 +663,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 #ifdef CONFIG_X86_32
        conditional_sti(regs);
 #endif
+       exception_exit(regs);
 }
 
 #ifdef CONFIG_X86_32
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 {
        siginfo_t info;
+
+       exception_enter(regs);
        local_irq_enable();
 
        info.si_signo = SIGILL;
@@ -657,10 +679,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
        info.si_code = ILL_BADSTK;
        info.si_addr = NULL;
        if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
-                       X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
-               return;
-       do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
-               &info);
+                       X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
+               do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
+                       &info);
+       }
+       exception_exit(regs);
 }
 #endif
 
index 76dcd9d8e0bcf6113ac4862f2e4b2fc9acff4012..7dde46d68a25a562b54fcdd300f232b051c10153 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/pgalloc.h>               /* pgd_*(), ...                 */
 #include <asm/kmemcheck.h>             /* kmemcheck_*(), ...           */
 #include <asm/fixmap.h>                        /* VSYSCALL_START               */
+#include <asm/rcu.h>                   /* exception_enter(), ...       */
 
 /*
  * Page fault error code bits:
@@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address)
  * and the problem, and then passes it off to one of the appropriate
  * routines.
  */
-dotraplinkage void __kprobes
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+static void __kprobes
+__do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
        struct vm_area_struct *vma;
        struct task_struct *tsk;
@@ -1209,3 +1210,11 @@ good_area:
 
        up_read(&mm->mmap_sem);
 }
+
+dotraplinkage void __kprobes
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+       exception_enter(regs);
+       __do_page_fault(regs, error_code);
+       exception_exit(regs);
+}
index 0fbbd52e01f9c731d19d29f543d71e2c83fadb91..7c968e4f929ea49806c5e17e53bdfdfb997e4c79 100644 (file)
@@ -191,6 +191,21 @@ extern void rcu_idle_enter(void);
 extern void rcu_idle_exit(void);
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
+
+#ifdef CONFIG_RCU_USER_QS
+extern void rcu_user_enter(void);
+extern void rcu_user_exit(void);
+extern void rcu_user_enter_after_irq(void);
+extern void rcu_user_exit_after_irq(void);
+extern void rcu_user_hooks_switch(struct task_struct *prev,
+                                 struct task_struct *next);
+#else
+static inline void rcu_user_enter(void) { }
+static inline void rcu_user_exit(void) { }
+static inline void rcu_user_enter_after_irq(void) { }
+static inline void rcu_user_exit_after_irq(void) { }
+#endif /* CONFIG_RCU_USER_QS */
+
 extern void exit_rcu(void);
 
 /**
index 23bddac4bad8d08f3781d1e8a453aa41edb28632..335720a1fc33362bfa62e238eaea609a01a04246 100644 (file)
@@ -1885,6 +1885,14 @@ static inline void rcu_copy_process(struct task_struct *p)
 
 #endif
 
+static inline void rcu_switch(struct task_struct *prev,
+                             struct task_struct *next)
+{
+#ifdef CONFIG_RCU_USER_QS
+       rcu_user_hooks_switch(prev, next);
+#endif
+}
+
 static inline void tsk_restore_flags(struct task_struct *task,
                                unsigned long orig_flags, unsigned long flags)
 {
index af6c7f8ba019ae25519f858850b2637b1fc76c8e..c26b8a1d2b576f8b0286168bbed0647a7ded67a1 100644 (file)
@@ -441,6 +441,24 @@ config PREEMPT_RCU
          This option enables preemptible-RCU code that is common between
          the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
 
+config RCU_USER_QS
+       bool "Consider userspace as in RCU extended quiescent state"
+       depends on HAVE_RCU_USER_QS && SMP
+       help
+         This option sets hooks on kernel / userspace boundaries and
+         puts RCU in extended quiescent state when the CPU runs in
+         userspace. It means that when a CPU runs in userspace, it is
+         excluded from the global RCU state machine and thus doesn't
+         to keep the timer tick on for RCU.
+
+config RCU_USER_QS_FORCE
+       bool "Force userspace extended QS by default"
+       depends on RCU_USER_QS
+       help
+         Set the hooks in user/kernel boundaries by default in order to
+         test this feature that treats userspace as an extended quiescent
+         state until we have a real user like a full adaptive nohz option.
+
 config RCU_FANOUT
        int "Tree-based hierarchical RCU fanout value"
        range 2 64 if 64BIT
index 7387e46009d957ddae08560809c6661a83fbb176..4fb2376ddf0660575970a29db0637ac4c70c2020 100644 (file)
@@ -206,6 +206,9 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
        .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
        .dynticks = ATOMIC_INIT(1),
+#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
+       .ignore_user_qs = true,
+#endif
 };
 
 static int blimit = 10;                /* Maximum callbacks per rcu_do_batch. */
@@ -322,16 +325,17 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 }
 
 /*
- * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
+ * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
  *
  * If the new value of the ->dynticks_nesting counter now is zero,
  * we really have entered idle, and must do the appropriate accounting.
  * The caller must have disabled interrupts.
  */
-static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
+static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
+                               bool user)
 {
        trace_rcu_dyntick("Start", oldval, 0);
-       if (!is_idle_task(current)) {
+       if (!user && !is_idle_task(current)) {
                struct task_struct *idle = idle_task(smp_processor_id());
 
                trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
@@ -348,7 +352,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 
        /*
-        * The idle task is not permitted to enter the idle loop while
+        * It is illegal to enter an extended quiescent state while
         * in an RCU read-side critical section.
         */
        rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
@@ -359,6 +363,25 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
                           "Illegal idle entry in RCU-sched read-side critical section.");
 }
 
+/*
+ * Enter an RCU extended quiescent state, which can be either the
+ * idle loop or adaptive-tickless usermode execution.
+ */
+static void rcu_eqs_enter(bool user)
+{
+       long long oldval;
+       struct rcu_dynticks *rdtp;
+
+       rdtp = &__get_cpu_var(rcu_dynticks);
+       oldval = rdtp->dynticks_nesting;
+       WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
+       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
+               rdtp->dynticks_nesting = 0;
+       else
+               rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
+       rcu_eqs_enter_common(rdtp, oldval, user);
+}
+
 /**
  * rcu_idle_enter - inform RCU that current CPU is entering idle
  *
@@ -374,21 +397,70 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
 void rcu_idle_enter(void)
 {
        unsigned long flags;
-       long long oldval;
+
+       local_irq_save(flags);
+       rcu_eqs_enter(false);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_idle_enter);
+
+#ifdef CONFIG_RCU_USER_QS
+/**
+ * rcu_user_enter - inform RCU that we are resuming userspace.
+ *
+ * Enter RCU idle mode right before resuming userspace.  No use of RCU
+ * is permitted between this call and rcu_user_exit(). This way the
+ * CPU doesn't need to maintain the tick for RCU maintenance purposes
+ * when the CPU runs in userspace.
+ */
+void rcu_user_enter(void)
+{
+       unsigned long flags;
        struct rcu_dynticks *rdtp;
 
+       /*
+        * Some contexts may involve an exception occuring in an irq,
+        * leading to that nesting:
+        * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+        * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+        * helpers are enough to protect RCU uses inside the exception. So
+        * just return immediately if we detect we are in an IRQ.
+        */
+       if (in_interrupt())
+               return;
+
+       WARN_ON_ONCE(!current->mm);
+
        local_irq_save(flags);
        rdtp = &__get_cpu_var(rcu_dynticks);
-       oldval = rdtp->dynticks_nesting;
-       WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
-       if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
-               rdtp->dynticks_nesting = 0;
-       else
-               rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
-       rcu_idle_enter_common(rdtp, oldval);
+       if (!rdtp->ignore_user_qs && !rdtp->in_user) {
+               rdtp->in_user = true;
+               rcu_eqs_enter(true);
+       }
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(rcu_idle_enter);
+
+/**
+ * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace
+ * after the current irq returns.
+ *
+ * This is similar to rcu_user_enter() but in the context of a non-nesting
+ * irq. After this call, RCU enters into idle mode when the interrupt
+ * returns.
+ */
+void rcu_user_enter_after_irq(void)
+{
+       unsigned long flags;
+       struct rcu_dynticks *rdtp;
+
+       local_irq_save(flags);
+       rdtp = &__get_cpu_var(rcu_dynticks);
+       /* Ensure this irq is interrupting a non-idle RCU state.  */
+       WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK));
+       rdtp->dynticks_nesting = 1;
+       local_irq_restore(flags);
+}
+#endif /* CONFIG_RCU_USER_QS */
 
 /**
  * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
@@ -420,18 +492,19 @@ void rcu_irq_exit(void)
        if (rdtp->dynticks_nesting)
                trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
        else
-               rcu_idle_enter_common(rdtp, oldval);
+               rcu_eqs_enter_common(rdtp, oldval, true);
        local_irq_restore(flags);
 }
 
 /*
- * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
+ * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
  *
  * If the new value of the ->dynticks_nesting counter was previously zero,
  * we really have exited idle, and must do the appropriate accounting.
  * The caller must have disabled interrupts.
  */
-static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
+static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
+                              int user)
 {
        smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */
        atomic_inc(&rdtp->dynticks);
@@ -440,7 +513,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
        WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
        rcu_cleanup_after_idle(smp_processor_id());
        trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
-       if (!is_idle_task(current)) {
+       if (!user && !is_idle_task(current)) {
                struct task_struct *idle = idle_task(smp_processor_id());
 
                trace_rcu_dyntick("Error on exit: not idle task",
@@ -452,6 +525,25 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
        }
 }
 
+/*
+ * Exit an RCU extended quiescent state, which can be either the
+ * idle loop or adaptive-tickless usermode execution.
+ */
+static void rcu_eqs_exit(bool user)
+{
+       struct rcu_dynticks *rdtp;
+       long long oldval;
+
+       rdtp = &__get_cpu_var(rcu_dynticks);
+       oldval = rdtp->dynticks_nesting;
+       WARN_ON_ONCE(oldval < 0);
+       if (oldval & DYNTICK_TASK_NEST_MASK)
+               rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
+       else
+               rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+       rcu_eqs_exit_common(rdtp, oldval, user);
+}
+
 /**
  * rcu_idle_exit - inform RCU that current CPU is leaving idle
  *
@@ -464,23 +556,69 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
  * now starting.
  */
 void rcu_idle_exit(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       rcu_eqs_exit(false);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
+
+#ifdef CONFIG_RCU_USER_QS
+/**
+ * rcu_user_exit - inform RCU that we are exiting userspace.
+ *
+ * Exit RCU idle mode while entering the kernel because it can
+ * run a RCU read side critical section anytime.
+ */
+void rcu_user_exit(void)
 {
        unsigned long flags;
        struct rcu_dynticks *rdtp;
-       long long oldval;
+
+       /*
+        * Some contexts may involve an exception occuring in an irq,
+        * leading to that nesting:
+        * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+        * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+        * helpers are enough to protect RCU uses inside the exception. So
+        * just return immediately if we detect we are in an IRQ.
+        */
+       if (in_interrupt())
+               return;
 
        local_irq_save(flags);
        rdtp = &__get_cpu_var(rcu_dynticks);
-       oldval = rdtp->dynticks_nesting;
-       WARN_ON_ONCE(oldval < 0);
-       if (oldval & DYNTICK_TASK_NEST_MASK)
-               rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
-       else
-               rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-       rcu_idle_exit_common(rdtp, oldval);
+       if (rdtp->in_user) {
+               rdtp->in_user = false;
+               rcu_eqs_exit(true);
+       }
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(rcu_idle_exit);
+
+/**
+ * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace
+ * idle mode after the current non-nesting irq returns.
+ *
+ * This is similar to rcu_user_exit() but in the context of an irq.
+ * This is called when the irq has interrupted a userspace RCU idle mode
+ * context. When the current non-nesting interrupt returns after this call,
+ * the CPU won't restore the RCU idle mode.
+ */
+void rcu_user_exit_after_irq(void)
+{
+       unsigned long flags;
+       struct rcu_dynticks *rdtp;
+
+       local_irq_save(flags);
+       rdtp = &__get_cpu_var(rcu_dynticks);
+       /* Ensure we are interrupting an RCU idle mode. */
+       WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK);
+       rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE;
+       local_irq_restore(flags);
+}
+#endif /* CONFIG_RCU_USER_QS */
 
 /**
  * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -515,7 +653,7 @@ void rcu_irq_enter(void)
        if (oldval)
                trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
        else
-               rcu_idle_exit_common(rdtp, oldval);
+               rcu_eqs_exit_common(rdtp, oldval, true);
        local_irq_restore(flags);
 }
 
@@ -579,6 +717,21 @@ int rcu_is_cpu_idle(void)
 }
 EXPORT_SYMBOL(rcu_is_cpu_idle);
 
+#ifdef CONFIG_RCU_USER_QS
+void rcu_user_hooks_switch(struct task_struct *prev,
+                          struct task_struct *next)
+{
+       struct rcu_dynticks *rdtp;
+
+       /* Interrupts are disabled in context switch */
+       rdtp = &__get_cpu_var(rcu_dynticks);
+       if (!rdtp->ignore_user_qs) {
+               clear_tsk_thread_flag(prev, TIF_NOHZ);
+               set_tsk_thread_flag(next, TIF_NOHZ);
+       }
+}
+#endif /* #ifdef CONFIG_RCU_USER_QS */
+
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
@@ -2473,6 +2626,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
        rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
        WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
        WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
+#ifdef CONFIG_RCU_USER_QS
+       WARN_ON_ONCE(rdp->dynticks->in_user);
+#endif
        rdp->cpu = cpu;
        rdp->rsp = rsp;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
index 7576fd4d8ce62854ee13a48089ac6811a7b11e25..5faf05d683265222127e6496668b6821018ff999 100644 (file)
@@ -102,6 +102,10 @@ struct rcu_dynticks {
                                    /* idle-period nonlazy_posted snapshot. */
        int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+#ifdef CONFIG_RCU_USER_QS
+       bool ignore_user_qs;        /* Treat userspace as extended QS or not */
+       bool in_user;               /* Is the CPU in userland from RCU POV? */
+#endif
 };
 
 /* RCU's kthread states for tracing. */
index 9c71c1b18e0359a6e5d36639804abc325146f09f..f921154881870b0d4d489b84fdb1f40ccde7495b 100644 (file)
@@ -1757,6 +1757,26 @@ static void rcu_prepare_for_idle(int cpu)
        if (!tne)
                return;
 
+       /* Adaptive-tick mode, where usermode execution is idle to RCU. */
+       if (!is_idle_task(current)) {
+               rdtp->dyntick_holdoff = jiffies - 1;
+               if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
+                       trace_rcu_prep_idle("User dyntick with callbacks");
+                       rdtp->idle_gp_timer_expires =
+                               round_up(jiffies + RCU_IDLE_GP_DELAY,
+                                        RCU_IDLE_GP_DELAY);
+               } else if (rcu_cpu_has_callbacks(cpu)) {
+                       rdtp->idle_gp_timer_expires =
+                               round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
+                       trace_rcu_prep_idle("User dyntick with lazy callbacks");
+               } else {
+                       return;
+               }
+               tp = &rdtp->idle_gp_timer;
+               mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
+               return;
+       }
+
        /*
         * If this is an idle re-entry, for example, due to use of
         * RCU_NONIDLE() or the new idle-loop tracing API within the idle
index 1a48cdbc86314624696d9c9f554f588a938e65b8..3c4dec0594d6ba86aca9ee2f7662586c49bb36b0 100644 (file)
@@ -2081,6 +2081,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 #endif
 
        /* Here we just switch the register state and the stack. */
+       rcu_switch(prev, next);
        switch_to(prev, next, prev);
 
        barrier();
@@ -3468,6 +3469,21 @@ asmlinkage void __sched schedule(void)
 }
 EXPORT_SYMBOL(schedule);
 
+#ifdef CONFIG_RCU_USER_QS
+asmlinkage void __sched schedule_user(void)
+{
+       /*
+        * If we come here after a random call to set_need_resched(),
+        * or we have been woken up remotely but the IPI has not yet arrived,
+        * we haven't yet exited the RCU idle mode. Do it here manually until
+        * we find a better solution.
+        */
+       rcu_user_exit();
+       schedule();
+       rcu_user_enter();
+}
+#endif
+
 /**
  * schedule_preempt_disabled - called with preemption disabled
  *
@@ -3569,6 +3585,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
        /* Catch callers which need to be fixed */
        BUG_ON(ti->preempt_count || !irqs_disabled());
 
+       rcu_user_exit();
        do {
                add_preempt_count(PREEMPT_ACTIVE);
                local_irq_enable();