]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge branches 'doc.2015.10.06a', 'percpu-rwsem.2015.10.06a' and 'torture.2015.10...
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Wed, 7 Oct 2015 23:06:25 +0000 (16:06 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Wed, 7 Oct 2015 23:06:25 +0000 (16:06 -0700)
doc.2015.10.06a:  Documentation updates.
percpu-rwsem.2015.10.06a:  Optimization of per-CPU reader-writer semaphores.
torture.2015.10.06a:  Torture-test updates.

19 files changed:
Documentation/RCU/stallwarn.txt
Documentation/RCU/torture.txt
Documentation/RCU/whatisRCU.txt
Documentation/locking/locktorture.txt
Documentation/memory-barriers.txt
include/linux/percpu-rwsem.h
include/linux/rcu_sync.h [new file with mode: 0644]
kernel/locking/locktorture.c
kernel/locking/percpu-rwsem.c
kernel/rcu/Makefile
kernel/rcu/rcutorture.c
kernel/rcu/sync.c [new file with mode: 0644]
kernel/torture.c
tools/testing/selftests/rcutorture/bin/kvm.sh
tools/testing/selftests/rcutorture/configs/lock/CFLIST
tools/testing/selftests/rcutorture/configs/lock/LOCK05 [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/lock/LOCK06 [new file with mode: 0644]
tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot [new file with mode: 0644]

index efb9454875ab7453255532887225ad0cb2d5d2fd..0f7fb4298e7e0b657a7df678ee0852de7ab7887d 100644 (file)
@@ -205,6 +205,13 @@ o  For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
        behavior, you might need to replace some of the cond_resched()
        calls with calls to cond_resched_rcu_qs().
 
+o      Booting Linux using a console connection that is too slow to
+       keep up with the boot-time console-message rate.  For example,
+       a 115Kbaud serial console can be -way- too slow to keep up
+       with boot-time message rates, and will frequently result in
+       RCU CPU stall warning messages.  Especially if you have added
+       debug printk()s.
+
 o      Anything that prevents RCU's grace-period kthreads from running.
        This can result in the "All QSes seen" console-log message.
        This message will include information on when the kthread last
index dac02a6219b1cae3c657512b293b957627cec830..118e7c176ce76aaca534c50fe7fe01da1cb3b710 100644 (file)
@@ -166,40 +166,27 @@ test_no_idle_hz   Whether or not to test the ability of RCU to operate in
 
 torture_type   The type of RCU to test, with string values as follows:
 
-               "rcu":  rcu_read_lock(), rcu_read_unlock() and call_rcu().
-
-               "rcu_sync":  rcu_read_lock(), rcu_read_unlock(), and
-                       synchronize_rcu().
-
-               "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and
-                       synchronize_rcu_expedited().
+               "rcu":  rcu_read_lock(), rcu_read_unlock() and call_rcu(),
+                       along with expedited, synchronous, and polling
+                       variants.
 
                "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
-                       call_rcu_bh().
-
-               "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(),
-                       and synchronize_rcu_bh().
+                       call_rcu_bh(), along with expedited and synchronous
+                       variants.
 
-               "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(),
-                       and synchronize_rcu_bh_expedited().
+               "rcu_busted": This tests an intentionally incorrect version
+                       of RCU in order to help test rcutorture itself.
 
                "srcu": srcu_read_lock(), srcu_read_unlock() and
-                       call_srcu().
-
-               "srcu_sync": srcu_read_lock(), srcu_read_unlock() and
-                       synchronize_srcu().
-
-               "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
-                       synchronize_srcu_expedited().
+                       call_srcu(), along with expedited and
+                       synchronous variants.
 
                "sched": preempt_disable(), preempt_enable(), and
-                       call_rcu_sched().
-
-               "sched_sync": preempt_disable(), preempt_enable(), and
-                       synchronize_sched().
+                       call_rcu_sched(), along with expedited,
+                       synchronous, and polling variants.
 
-               "sched_expedited": preempt_disable(), preempt_enable(), and
-                       synchronize_sched_expedited().
+               "tasks": voluntary context switch and call_rcu_tasks(),
+                       along with expedited and synchronous variants.
 
                Defaults to "rcu".
 
index adc2184009c5ecec9ba4615291ca8998743b48e2..dc49c6712b17ff4968d3c4fdf2b304e0292fb5be 100644 (file)
@@ -364,7 +364,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
        };
        DEFINE_SPINLOCK(foo_mutex);
 
-       struct foo *gbl_foo;
+       struct foo __rcu *gbl_foo;
 
        /*
         * Create a new struct foo that is the same as the one currently
@@ -386,7 +386,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
 
                new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
                spin_lock(&foo_mutex);
-               old_fp = gbl_foo;
+               old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
                *new_fp = *old_fp;
                new_fp->a = new_a;
                rcu_assign_pointer(gbl_foo, new_fp);
@@ -487,7 +487,7 @@ The foo_update_a() function might then be written as follows:
 
                new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
                spin_lock(&foo_mutex);
-               old_fp = gbl_foo;
+               old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
                *new_fp = *old_fp;
                new_fp->a = new_a;
                rcu_assign_pointer(gbl_foo, new_fp);
index 619f2bb136a545f1932609f3b93b898145f6a5cc..a2ef3a929bf189bb43f7425f8e5e8a5564d23833 100644 (file)
@@ -52,6 +52,9 @@ torture_type    Type of lock to torture. By default, only spinlocks will
 
                     o "mutex_lock": mutex_lock() and mutex_unlock() pairs.
 
+                    o "rtmutex_lock": rtmutex_lock() and rtmutex_unlock()
+                                      pairs. Kernel must have CONFIG_RT_MUTEX=y.
+
                     o "rwsem_lock": read/write down() and up() semaphore pairs.
 
 torture_runnable  Start locktorture at boot time in the case where the
index 2ba8461b0631de759fefd2a12918a6c4f4ee7562..8e7cf9ad3db131c3f82621043fd7184ed025d088 100644 (file)
@@ -1710,6 +1710,17 @@ There are some more advanced barrier functions:
      operations" subsection for information on where to use these.
 
 
+ (*) lockless_dereference();
+     This can be thought of as a pointer-fetch wrapper around the
+     smp_read_barrier_depends() data-dependency barrier.
+
+     This is also similar to rcu_dereference(), but in cases where
+     object lifetime is handled by some mechanism other than RCU, for
+     example, when the objects removed only when the system goes down.
+     In addition, lockless_dereference() is used in some data structures
+     that can be used both with and without RCU.
+
+
  (*) dma_wmb();
  (*) dma_rmb();
 
@@ -1789,7 +1800,6 @@ The Linux kernel has a number of locking constructs:
  (*) mutexes
  (*) semaphores
  (*) R/W semaphores
- (*) RCU
 
 In all cases there are variants on "ACQUIRE" operations and "RELEASE" operations
 for each construct.  These operations all imply certain barriers:
index 834c4e52cb2d1cab57ba33a3e87c025bc7e4a7cc..c2fa3ecb0dce57dde9ad4a92a35dc4178d7a159e 100644 (file)
@@ -5,11 +5,12 @@
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
+#include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
+       struct rcu_sync         rss;
        unsigned int __percpu   *fast_read_ctr;
-       atomic_t                write_ctr;
        struct rw_semaphore     rw_sem;
        atomic_t                slow_read_ctr;
        wait_queue_head_t       write_waitq;
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
new file mode 100644 (file)
index 0000000..a63a33e
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * RCU-based infrastructure for lightweight reader-writer locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2015, Red Hat, Inc.
+ *
+ * Author: Oleg Nesterov <oleg@redhat.com>
+ */
+
+#ifndef _LINUX_RCU_SYNC_H_
+#define _LINUX_RCU_SYNC_H_
+
+#include <linux/wait.h>
+#include <linux/rcupdate.h>
+
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
+/* Structure to mediate between updaters and fastpath-using readers.  */
+struct rcu_sync {
+       int                     gp_state;
+       int                     gp_count;
+       wait_queue_head_t       gp_wait;
+
+       int                     cb_state;
+       struct rcu_head         cb_head;
+
+       enum rcu_sync_type      gp_type;
+};
+
+extern void rcu_sync_lockdep_assert(struct rcu_sync *);
+
+/**
+ * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * Returns true if readers are permitted to use their fastpaths.
+ * Must be invoked within an RCU read-side critical section whose
+ * flavor matches that of the rcu_sync struture.
+ */
+static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
+{
+#ifdef CONFIG_PROVE_RCU
+       rcu_sync_lockdep_assert(rsp);
+#endif
+       return !rsp->gp_state; /* GP_IDLE */
+}
+
+extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_enter(struct rcu_sync *);
+extern void rcu_sync_exit(struct rcu_sync *);
+extern void rcu_sync_dtor(struct rcu_sync *);
+
+#define __RCU_SYNC_INITIALIZER(name, type) {                           \
+               .gp_state = 0,                                          \
+               .gp_count = 0,                                          \
+               .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
+               .cb_state = 0,                                          \
+               .gp_type = type,                                        \
+       }
+
+#define        __DEFINE_RCU_SYNC(name, type)   \
+       struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
+
+#define DEFINE_RCU_SYNC(name)          \
+       __DEFINE_RCU_SYNC(name, RCU_SYNC)
+
+#define DEFINE_RCU_SCHED_SYNC(name)    \
+       __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
+
+#define DEFINE_RCU_BH_SYNC(name)       \
+       __DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+
+#endif /* _LINUX_RCU_SYNC_H_ */
index 32244186f1f2ae0e7a6343ad084f416aa0cda055..8ef1919d63b2401a4832404d92c598c3e656b090 100644 (file)
  *
  * Copyright (C) IBM Corporation, 2014
  *
- * Author: Paul E. McKenney <paulmck@us.ibm.com>
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ *          Davidlohr Bueso <dave@stgolabs.net>
  *     Based on kernel/rcu/torture.c.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include <linux/sched/rt.h>
 #include <linux/spinlock.h>
 #include <linux/rwlock.h>
 #include <linux/mutex.h>
@@ -34,6 +36,7 @@
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/percpu-rwsem.h>
 #include <linux/torture.h>
 
 MODULE_LICENSE("GPL");
@@ -91,11 +94,13 @@ struct lock_torture_ops {
        void (*init)(void);
        int (*writelock)(void);
        void (*write_delay)(struct torture_random_state *trsp);
+       void (*task_boost)(struct torture_random_state *trsp);
        void (*writeunlock)(void);
        int (*readlock)(void);
        void (*read_delay)(struct torture_random_state *trsp);
        void (*readunlock)(void);
-       unsigned long flags;
+
+       unsigned long flags; /* for irq spinlocks */
        const char *name;
 };
 
@@ -139,9 +144,15 @@ static void torture_lock_busted_write_unlock(void)
          /* BUGGY, do not use in real life!!! */
 }
 
+static void torture_boost_dummy(struct torture_random_state *trsp)
+{
+       /* Only rtmutexes care about priority */
+}
+
 static struct lock_torture_ops lock_busted_ops = {
        .writelock      = torture_lock_busted_write_lock,
        .write_delay    = torture_lock_busted_write_delay,
+       .task_boost     = torture_boost_dummy,
        .writeunlock    = torture_lock_busted_write_unlock,
        .readlock       = NULL,
        .read_delay     = NULL,
@@ -185,6 +196,7 @@ static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
 static struct lock_torture_ops spin_lock_ops = {
        .writelock      = torture_spin_lock_write_lock,
        .write_delay    = torture_spin_lock_write_delay,
+       .task_boost     = torture_boost_dummy,
        .writeunlock    = torture_spin_lock_write_unlock,
        .readlock       = NULL,
        .read_delay     = NULL,
@@ -211,6 +223,7 @@ __releases(torture_spinlock)
 static struct lock_torture_ops spin_lock_irq_ops = {
        .writelock      = torture_spin_lock_write_lock_irq,
        .write_delay    = torture_spin_lock_write_delay,
+       .task_boost     = torture_boost_dummy,
        .writeunlock    = torture_lock_spin_write_unlock_irq,
        .readlock       = NULL,
        .read_delay     = NULL,
@@ -275,6 +288,7 @@ static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
 static struct lock_torture_ops rw_lock_ops = {
        .writelock      = torture_rwlock_write_lock,
        .write_delay    = torture_rwlock_write_delay,
+       .task_boost     = torture_boost_dummy,
        .writeunlock    = torture_rwlock_write_unlock,
        .readlock       = torture_rwlock_read_lock,
        .read_delay     = torture_rwlock_read_delay,
@@ -315,6 +329,7 @@ __releases(torture_rwlock)
 static struct lock_torture_ops rw_lock_irq_ops = {
        .writelock      = torture_rwlock_write_lock_irq,
        .write_delay    = torture_rwlock_write_delay,
+       .task_boost     = torture_boost_dummy,
        .writeunlock    = torture_rwlock_write_unlock_irq,
        .readlock       = torture_rwlock_read_lock_irq,
        .read_delay     = torture_rwlock_read_delay,
@@ -354,6 +369,7 @@ static void torture_mutex_unlock(void) __releases(torture_mutex)
 static struct lock_torture_ops mutex_lock_ops = {
        .writelock      = torture_mutex_lock,
        .write_delay    = torture_mutex_delay,
+       .task_boost     = torture_boost_dummy,
        .writeunlock    = torture_mutex_unlock,
        .readlock       = NULL,
        .read_delay     = NULL,
@@ -361,6 +377,90 @@ static struct lock_torture_ops mutex_lock_ops = {
        .name           = "mutex_lock"
 };
 
+#ifdef CONFIG_RT_MUTEXES
+static DEFINE_RT_MUTEX(torture_rtmutex);
+
+static int torture_rtmutex_lock(void) __acquires(torture_rtmutex)
+{
+       rt_mutex_lock(&torture_rtmutex);
+       return 0;
+}
+
+static void torture_rtmutex_boost(struct torture_random_state *trsp)
+{
+       int policy;
+       struct sched_param param;
+       const unsigned int factor = 50000; /* yes, quite arbitrary */
+
+       if (!rt_task(current)) {
+               /*
+                * (1) Boost priority once every ~50k operations. When the
+                * task tries to take the lock, the rtmutex it will account
+                * for the new priority, and do any corresponding pi-dance.
+                */
+               if (!(torture_random(trsp) %
+                     (cxt.nrealwriters_stress * factor))) {
+                       policy = SCHED_FIFO;
+                       param.sched_priority = MAX_RT_PRIO - 1;
+               } else /* common case, do nothing */
+                       return;
+       } else {
+               /*
+                * The task will remain boosted for another ~500k operations,
+                * then restored back to its original prio, and so forth.
+                *
+                * When @trsp is nil, we want to force-reset the task for
+                * stopping the kthread.
+                */
+               if (!trsp || !(torture_random(trsp) %
+                              (cxt.nrealwriters_stress * factor * 2))) {
+                       policy = SCHED_NORMAL;
+                       param.sched_priority = 0;
+               } else /* common case, do nothing */
+                       return;
+       }
+
+       sched_setscheduler_nocheck(current, policy, &param);
+}
+
+static void torture_rtmutex_delay(struct torture_random_state *trsp)
+{
+       const unsigned long shortdelay_us = 2;
+       const unsigned long longdelay_ms = 100;
+
+       /*
+        * We want a short delay mostly to emulate likely code, and
+        * we want a long delay occasionally to force massive contention.
+        */
+       if (!(torture_random(trsp) %
+             (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+               mdelay(longdelay_ms);
+       if (!(torture_random(trsp) %
+             (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+               udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+       if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
+               preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+
+static void torture_rtmutex_unlock(void) __releases(torture_rtmutex)
+{
+       rt_mutex_unlock(&torture_rtmutex);
+}
+
+static struct lock_torture_ops rtmutex_lock_ops = {
+       .writelock      = torture_rtmutex_lock,
+       .write_delay    = torture_rtmutex_delay,
+       .task_boost     = torture_rtmutex_boost,
+       .writeunlock    = torture_rtmutex_unlock,
+       .readlock       = NULL,
+       .read_delay     = NULL,
+       .readunlock     = NULL,
+       .name           = "rtmutex_lock"
+};
+#endif
+
 static DECLARE_RWSEM(torture_rwsem);
 static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
 {
@@ -419,6 +519,7 @@ static void torture_rwsem_up_read(void) __releases(torture_rwsem)
 static struct lock_torture_ops rwsem_lock_ops = {
        .writelock      = torture_rwsem_down_write,
        .write_delay    = torture_rwsem_write_delay,
+       .task_boost     = torture_boost_dummy,
        .writeunlock    = torture_rwsem_up_write,
        .readlock       = torture_rwsem_down_read,
        .read_delay     = torture_rwsem_read_delay,
@@ -426,6 +527,48 @@ static struct lock_torture_ops rwsem_lock_ops = {
        .name           = "rwsem_lock"
 };
 
+#include <linux/percpu-rwsem.h>
+static struct percpu_rw_semaphore pcpu_rwsem;
+
+void torture_percpu_rwsem_init(void)
+{
+       BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
+}
+
+static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem)
+{
+       percpu_down_write(&pcpu_rwsem);
+       return 0;
+}
+
+static void torture_percpu_rwsem_up_write(void) __releases(pcpu_rwsem)
+{
+       percpu_up_write(&pcpu_rwsem);
+}
+
+static int torture_percpu_rwsem_down_read(void) __acquires(pcpu_rwsem)
+{
+       percpu_down_read(&pcpu_rwsem);
+       return 0;
+}
+
+static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem)
+{
+       percpu_up_read(&pcpu_rwsem);
+}
+
+static struct lock_torture_ops percpu_rwsem_lock_ops = {
+       .init           = torture_percpu_rwsem_init,
+       .writelock      = torture_percpu_rwsem_down_write,
+       .write_delay    = torture_rwsem_write_delay,
+       .task_boost     = torture_boost_dummy,
+       .writeunlock    = torture_percpu_rwsem_up_write,
+       .readlock       = torture_percpu_rwsem_down_read,
+       .read_delay     = torture_rwsem_read_delay,
+       .readunlock     = torture_percpu_rwsem_up_read,
+       .name           = "percpu_rwsem_lock"
+};
+
 /*
  * Lock torture writer kthread.  Repeatedly acquires and releases
  * the lock, checking for duplicate acquisitions.
@@ -442,6 +585,7 @@ static int lock_torture_writer(void *arg)
                if ((torture_random(&rand) & 0xfffff) == 0)
                        schedule_timeout_uninterruptible(1);
 
+               cxt.cur_ops->task_boost(&rand);
                cxt.cur_ops->writelock();
                if (WARN_ON_ONCE(lock_is_write_held))
                        lwsp->n_lock_fail++;
@@ -456,6 +600,8 @@ static int lock_torture_writer(void *arg)
 
                stutter_wait("lock_torture_writer");
        } while (!torture_must_stop());
+
+       cxt.cur_ops->task_boost(NULL); /* reset prio */
        torture_kthread_stopping("lock_torture_writer");
        return 0;
 }
@@ -642,7 +788,11 @@ static int __init lock_torture_init(void)
                &spin_lock_ops, &spin_lock_irq_ops,
                &rw_lock_ops, &rw_lock_irq_ops,
                &mutex_lock_ops,
+#ifdef CONFIG_RT_MUTEXES
+               &rtmutex_lock_ops,
+#endif
                &rwsem_lock_ops,
+               &percpu_rwsem_lock_ops,
        };
 
        if (!torture_init_begin(torture_type, verbose, &torture_runnable))
@@ -661,11 +811,11 @@ static int __init lock_torture_init(void)
                for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
                        pr_alert(" %s", torture_ops[i]->name);
                pr_alert("\n");
-               torture_init_end();
-               return -EINVAL;
+               firsterr = -EINVAL;
+               goto unwind;
        }
        if (cxt.cur_ops->init)
-               cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+               cxt.cur_ops->init();
 
        if (nwriters_stress >= 0)
                cxt.nrealwriters_stress = nwriters_stress;
@@ -676,6 +826,10 @@ static int __init lock_torture_init(void)
        if (strncmp(torture_type, "mutex", 5) == 0)
                cxt.debug_lock = true;
 #endif
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+       if (strncmp(torture_type, "rtmutex", 7) == 0)
+               cxt.debug_lock = true;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
        if ((strncmp(torture_type, "spin", 4) == 0) ||
            (strncmp(torture_type, "rw_lock", 7) == 0))
index f3256725486738878d0f640216a95d91d16359d4..f231e0bb311ce0827d281d34f737a3a06405c072 100644 (file)
@@ -17,50 +17,43 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
 
        /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
        __init_rwsem(&brw->rw_sem, name, rwsem_key);
-       atomic_set(&brw->write_ctr, 0);
+       rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
        atomic_set(&brw->slow_read_ctr, 0);
        init_waitqueue_head(&brw->write_waitq);
        return 0;
 }
+EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
 
 void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 {
+       /*
+        * XXX: temporary kludge. The error path in alloc_super()
+        * assumes that percpu_free_rwsem() is safe after kzalloc().
+        */
+       if (!brw->fast_read_ctr)
+               return;
+
+       rcu_sync_dtor(&brw->rss);
        free_percpu(brw->fast_read_ctr);
        brw->fast_read_ctr = NULL; /* catch use after free bugs */
 }
 
 /*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- *     R_W: down_write() comes after up_read(), the writer should see all
- *          changes done by the reader
- * or
- *     W_R: down_read() comes after up_write(), the reader should see all
- *          changes done by the writer
+ * This is the fast-path for down_read/up_read. If it succeeds we rely
+ * on the barriers provided by rcu_sync_enter/exit; see the comments in
+ * percpu_down_write() and percpu_up_write().
  *
  * If this helper fails the callers rely on the normal rw_semaphore and
  * atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
  */
 static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 {
-       bool success = false;
+       bool success;
 
        preempt_disable();
-       if (likely(!atomic_read(&brw->write_ctr))) {
+       success = rcu_sync_is_idle(&brw->rss);
+       if (likely(success))
                __this_cpu_add(*brw->fast_read_ctr, val);
-               success = true;
-       }
        preempt_enable();
 
        return success;
@@ -77,16 +70,17 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 void percpu_down_read(struct percpu_rw_semaphore *brw)
 {
        might_sleep();
-       if (likely(update_fast_ctr(brw, +1))) {
-               rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+       rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+       if (likely(update_fast_ctr(brw, +1)))
                return;
-       }
 
-       down_read(&brw->rw_sem);
+       /* Avoid rwsem_acquire_read() and rwsem_release() */
+       __down_read(&brw->rw_sem);
        atomic_inc(&brw->slow_read_ctr);
-       /* avoid up_read()->rwsem_release() */
        __up_read(&brw->rw_sem);
 }
+EXPORT_SYMBOL_GPL(percpu_down_read);
 
 int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
 {
@@ -112,6 +106,7 @@ void percpu_up_read(struct percpu_rw_semaphore *brw)
        if (atomic_dec_and_test(&brw->slow_read_ctr))
                wake_up_all(&brw->write_waitq);
 }
+EXPORT_SYMBOL_GPL(percpu_up_read);
 
 static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
 {
@@ -126,33 +121,17 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
        return sum;
 }
 
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
-       /* tell update_fast_ctr() there is a pending writer */
-       atomic_inc(&brw->write_ctr);
        /*
-        * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
-        *    so that update_fast_ctr() can't succeed.
-        *
-        * 2. Ensures we see the result of every previous this_cpu_add() in
-        *    update_fast_ctr().
+        * Make rcu_sync_is_idle() == F and thus disable the fast-path in
+        * percpu_down_read() and percpu_up_read(), and wait for gp pass.
         *
-        * 3. Ensures that if any reader has exited its critical section via
-        *    fast-path, it executes a full memory barrier before we return.
-        *    See R_W case in the comment above update_fast_ctr().
+        * The latter synchronises us with the preceding readers which used
+        * the fast-past, so we can not miss the result of __this_cpu_add()
+        * or anything else inside their criticial sections.
         */
-       synchronize_sched_expedited();
+       rcu_sync_enter(&brw->rss);
 
        /* exclude other writers, and block the new readers completely */
        down_write(&brw->rw_sem);
@@ -163,16 +142,17 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
        /* wait for all readers to complete their percpu_up_read() */
        wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
 }
+EXPORT_SYMBOL_GPL(percpu_down_write);
 
 void percpu_up_write(struct percpu_rw_semaphore *brw)
 {
        /* release the lock, but the readers can't use the fast-path */
        up_write(&brw->rw_sem);
        /*
-        * Insert the barrier before the next fast-path in down_read,
-        * see W_R case in the comment above update_fast_ctr().
+        * Enable the fast-path in percpu_down_read() and percpu_up_read()
+        * but only after another gp pass; this adds the necessary barrier
+        * to ensure the reader can't miss the changes done by us.
         */
-       synchronize_sched_expedited();
-       /* the last writer unblocks update_fast_ctr() */
-       atomic_dec(&brw->write_ctr);
+       rcu_sync_exit(&brw->rss);
 }
+EXPORT_SYMBOL_GPL(percpu_up_write);
index 50a808424b06af45fdd0fd6ab3ae732e1917680d..61a16569ffbf53c2b19b9b67d908edcd225dc201 100644 (file)
@@ -1,4 +1,4 @@
-obj-y += update.o
+obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += tree.o
index f9ec6cbe77d365f37c76768e157c046db48f2b1f..d89328e260df6f4f953b649fea98587aacf12318 100644 (file)
@@ -695,7 +695,7 @@ static bool __maybe_unused torturing_tasks(void)
 
 #define RCUTORTURE_TASKS_OPS
 
-static bool torturing_tasks(void)
+static bool __maybe_unused torturing_tasks(void)
 {
        return false;
 }
@@ -768,7 +768,6 @@ static int rcu_torture_boost(void *arg)
                                }
                                call_rcu_time = jiffies;
                        }
-                       cond_resched_rcu_qs();
                        stutter_wait("rcu_torture_boost");
                        if (torture_must_stop())
                                goto checkwait;
@@ -1208,7 +1207,6 @@ rcu_torture_reader(void *arg)
                __this_cpu_inc(rcu_torture_batch[completed]);
                preempt_enable();
                cur_ops->readunlock(idx);
-               cond_resched_rcu_qs();
                stutter_wait("rcu_torture_reader");
        } while (!torture_must_stop());
        if (irqreader && cur_ops->irq_capable) {
@@ -1742,15 +1740,15 @@ rcu_torture_init(void)
                for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
                        pr_alert(" %s", torture_ops[i]->name);
                pr_alert("\n");
-               torture_init_end();
-               return -EINVAL;
+               firsterr = -EINVAL;
+               goto unwind;
        }
        if (cur_ops->fqs == NULL && fqs_duration != 0) {
                pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
                fqs_duration = 0;
        }
        if (cur_ops->init)
-               cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+               cur_ops->init();
 
        if (nreaders >= 0) {
                nrealreaders = nreaders;
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
new file mode 100644 (file)
index 0000000..be922c9
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * RCU-based infrastructure for lightweight reader-writer locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2015, Red Hat, Inc.
+ *
+ * Author: Oleg Nesterov <oleg@redhat.com>
+ */
+
+#include <linux/rcu_sync.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_PROVE_RCU
+#define __INIT_HELD(func)      .held = func,
+#else
+#define __INIT_HELD(func)
+#endif
+
+static const struct {
+       void (*sync)(void);
+       void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+       void (*wait)(void);
+#ifdef CONFIG_PROVE_RCU
+       int  (*held)(void);
+#endif
+} gp_ops[] = {
+       [RCU_SYNC] = {
+               .sync = synchronize_rcu,
+               .call = call_rcu,
+               .wait = rcu_barrier,
+               __INIT_HELD(rcu_read_lock_held)
+       },
+       [RCU_SCHED_SYNC] = {
+               .sync = synchronize_sched,
+               .call = call_rcu_sched,
+               .wait = rcu_barrier_sched,
+               __INIT_HELD(rcu_read_lock_sched_held)
+       },
+       [RCU_BH_SYNC] = {
+               .sync = synchronize_rcu_bh,
+               .call = call_rcu_bh,
+               .wait = rcu_barrier_bh,
+               __INIT_HELD(rcu_read_lock_bh_held)
+       },
+};
+
+enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
+enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
+
+#define        rss_lock        gp_wait.lock
+
+#ifdef CONFIG_PROVE_RCU
+void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
+{
+       RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
+                        "suspicious rcu_sync_is_idle() usage");
+}
+#endif
+
+/**
+ * rcu_sync_init() - Initialize an rcu_sync structure
+ * @rsp: Pointer to rcu_sync structure to be initialized
+ * @type: Flavor of RCU with which to synchronize rcu_sync structure
+ */
+void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
+{
+       memset(rsp, 0, sizeof(*rsp));
+       init_waitqueue_head(&rsp->gp_wait);
+       rsp->gp_type = type;
+}
+
+/**
+ * rcu_sync_enter() - Force readers onto slowpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who need readers to make use of
+ * a slowpath during the update.  After this function returns, all
+ * subsequent calls to rcu_sync_is_idle() will return false, which
+ * tells readers to stay off their fastpaths.  A later call to
+ * rcu_sync_exit() re-enables reader slowpaths.
+ *
+ * When called in isolation, rcu_sync_enter() must wait for a grace
+ * period, however, closely spaced calls to rcu_sync_enter() can
+ * optimize away the grace-period wait via a state machine implemented
+ * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
+ */
+void rcu_sync_enter(struct rcu_sync *rsp)
+{
+       bool need_wait, need_sync;
+
+       spin_lock_irq(&rsp->rss_lock);
+       need_wait = rsp->gp_count++;
+       need_sync = rsp->gp_state == GP_IDLE;
+       if (need_sync)
+               rsp->gp_state = GP_PENDING;
+       spin_unlock_irq(&rsp->rss_lock);
+
+       BUG_ON(need_wait && need_sync);
+
+       if (need_sync) {
+               gp_ops[rsp->gp_type].sync();
+               rsp->gp_state = GP_PASSED;
+               wake_up_all(&rsp->gp_wait);
+       } else if (need_wait) {
+               wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
+       } else {
+               /*
+                * Possible when there's a pending CB from a rcu_sync_exit().
+                * Nobody has yet been allowed the 'fast' path and thus we can
+                * avoid doing any sync(). The callback will get 'dropped'.
+                */
+               BUG_ON(rsp->gp_state != GP_PASSED);
+       }
+}
+
+/**
+ * rcu_sync_func() - Callback function managing reader access to fastpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is passed to one of the call_rcu() functions by
+ * rcu_sync_exit(), so that it is invoked after a grace period following the
+ * that invocation of rcu_sync_exit().  It takes action based on events that
+ * have taken place in the meantime, so that closely spaced rcu_sync_enter()
+ * and rcu_sync_exit() pairs need not wait for a grace period.
+ *
+ * If another rcu_sync_enter() is invoked before the grace period
+ * ended, reset state to allow the next rcu_sync_exit() to let the
+ * readers back onto their fastpaths (after a grace period).  If both
+ * another rcu_sync_enter() and its matching rcu_sync_exit() are invoked
+ * before the grace period ended, re-invoke call_rcu() on behalf of that
+ * rcu_sync_exit().  Otherwise, set all state back to idle so that readers
+ * can again use their fastpaths.
+ */
+static void rcu_sync_func(struct rcu_head *rcu)
+{
+       struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head);
+       unsigned long flags;
+
+       BUG_ON(rsp->gp_state != GP_PASSED);
+       BUG_ON(rsp->cb_state == CB_IDLE);
+
+       spin_lock_irqsave(&rsp->rss_lock, flags);
+       if (rsp->gp_count) {
+               /*
+                * A new rcu_sync_begin() has happened; drop the callback.
+                */
+               rsp->cb_state = CB_IDLE;
+       } else if (rsp->cb_state == CB_REPLAY) {
+               /*
+                * A new rcu_sync_exit() has happened; requeue the callback
+                * to catch a later GP.
+                */
+               rsp->cb_state = CB_PENDING;
+               gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
+       } else {
+               /*
+                * We're at least a GP after rcu_sync_exit(); eveybody will now
+                * have observed the write side critical section. Let 'em rip!.
+                */
+               rsp->cb_state = CB_IDLE;
+               rsp->gp_state = GP_IDLE;
+       }
+       spin_unlock_irqrestore(&rsp->rss_lock, flags);
+}
+
+/**
+ * rcu_sync_exit() - Allow readers back onto fast patch after grace period
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who have completed, and can therefore
+ * now allow readers to make use of their fastpaths after a grace period
+ * has elapsed.  After this grace period has completed, all subsequent
+ * calls to rcu_sync_is_idle() will return true, which tells readers that
+ * they can once again use their fastpaths.
+ */
+void rcu_sync_exit(struct rcu_sync *rsp)
+{
+       spin_lock_irq(&rsp->rss_lock);
+       if (!--rsp->gp_count) {
+               if (rsp->cb_state == CB_IDLE) {
+                       rsp->cb_state = CB_PENDING;
+                       gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
+               } else if (rsp->cb_state == CB_PENDING) {
+                       rsp->cb_state = CB_REPLAY;
+               }
+       }
+       spin_unlock_irq(&rsp->rss_lock);
+}
+
+/**
+ * rcu_sync_dtor() - Clean up an rcu_sync structure
+ * @rsp: Pointer to rcu_sync structure to be cleaned up
+ */
+void rcu_sync_dtor(struct rcu_sync *rsp)
+{
+       int cb_state;
+
+       BUG_ON(rsp->gp_count);
+
+       spin_lock_irq(&rsp->rss_lock);
+       if (rsp->cb_state == CB_REPLAY)
+               rsp->cb_state = CB_PENDING;
+       cb_state = rsp->cb_state;
+       spin_unlock_irq(&rsp->rss_lock);
+
+       if (cb_state != CB_IDLE) {
+               gp_ops[rsp->gp_type].wait();
+               BUG_ON(rsp->cb_state != CB_IDLE);
+       }
+}
index 3e4840633d3ee7bd926f1fe67f8b0a4b324514da..44aa462d033f700a86e1439d1b24053daca4094a 100644 (file)
@@ -523,6 +523,7 @@ static int stutter;
  */
 void stutter_wait(const char *title)
 {
+       cond_resched_rcu_qs();
        while (READ_ONCE(stutter_pause_test) ||
               (torture_runnable && !READ_ONCE(*torture_runnable))) {
                if (stutter_pause_test)
index fbe2dbff1e210c2f4711d2f581823d6b14c85799..f6483609ebc246f38d268dc97a9ddfa4072cbf7b 100755 (executable)
@@ -75,7 +75,7 @@ usage () {
 while test $# -gt 0
 do
        case "$1" in
-       --bootargs)
+       --bootargs|--bootarg)
                checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
                TORTURE_BOOTARGS="$2"
                shift
@@ -88,7 +88,7 @@ do
        --buildonly)
                TORTURE_BUILDONLY=1
                ;;
-       --configs)
+       --configs|--config)
                checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--'
                configs="$2"
                shift
@@ -134,7 +134,7 @@ do
        --no-initrd)
                TORTURE_INITRD=""; export TORTURE_INITRD
                ;;
-       --qemu-args)
+       --qemu-args|--qemu-arg)
                checkarg --qemu-args "-qemu args" $# "$2" '^-' '^error'
                TORTURE_QEMU_ARG="$2"
                shift
index 6910b73707617cc2ddf855be82e488420a35bed1..b9611c523723982e726548742b700d29ed0be7c4 100644 (file)
@@ -1,4 +1,6 @@
 LOCK01
 LOCK02
 LOCK03
-LOCK04
\ No newline at end of file
+LOCK04
+LOCK05
+LOCK06
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05 b/tools/testing/selftests/rcutorture/configs/lock/LOCK05
new file mode 100644 (file)
index 0000000..1d1da14
--- /dev/null
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot
new file mode 100644 (file)
index 0000000..8ac3730
--- /dev/null
@@ -0,0 +1 @@
+locktorture.torture_type=rtmutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06 b/tools/testing/selftests/rcutorture/configs/lock/LOCK06
new file mode 100644 (file)
index 0000000..1d1da14
--- /dev/null
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot
new file mode 100644 (file)
index 0000000..f92219c
--- /dev/null
@@ -0,0 +1 @@
+locktorture.torture_type=percpu_rwsem_lock