]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
x86, ticketlock: Convert spin loop to C
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Fri, 24 Jun 2011 01:19:14 +0000 (18:19 -0700)
committerH. Peter Anvin <hpa@linux.intel.com>
Fri, 22 Jul 2011 18:13:26 +0000 (11:13 -0700)
The inner loop of __ticket_spin_lock isn't doing anything very special,
so reimplement it in C.

For the 8 bit ticket lock variant, we use a register union to get direct
access to the lower and upper bytes in the tickets, but unfortunately gcc
won't generate a direct comparison between the two halves of the register,
so the generated asm isn't quite as pretty as the hand-coded version.
However benchmarking shows that this is actually a small improvement in
runtime performance on some benchmarks, and never a slowdown.

We also need to make sure there's a barrier at the end of the lock loop
to make sure that the compiler doesn't move any instructions from within
the locked region into the region where we don't yet own the lock.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Link: http://lkml.kernel.org/r/f04120629b8b1cfa1c306373e34320687305a518.1308878118.git.jeremy.fitzhardinge@citrix.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/include/asm/spinlock.h

index d6d578459ceacd2b588328a2f2ed154d4eb28a39..f48a6e32e487e8e83682e281be2d9cf845ebfea7 100644 (file)
 #if (NR_CPUS < 256)
 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
-       unsigned short inc = 1 << TICKET_SHIFT;
-
-       asm volatile (
-               LOCK_PREFIX "xaddw %w0, %1\n"
-               "1:\t"
-               "cmpb %h0, %b0\n\t"
-               "je 2f\n\t"
-               "rep ; nop\n\t"
-               "movb %1, %b0\n\t"
-               /* don't need lfence here, because loads are in-order */
-               "jmp 1b\n"
-               "2:"
-               : "+Q" (inc), "+m" (lock->slock)
-               :
-               : "memory", "cc");
+       register union {
+               struct __raw_tickets tickets;
+               unsigned short slock;
+       } inc = { .slock = 1 << TICKET_SHIFT };
+
+       asm volatile (LOCK_PREFIX "xaddw %w0, %1\n"
+                     : "+Q" (inc), "+m" (lock->slock) : : "memory", "cc");
+
+       for (;;) {
+               if (inc.tickets.head == inc.tickets.tail)
+                       goto out;
+               cpu_relax();
+               inc.tickets.head = ACCESS_ONCE(lock->tickets.head);
+       }
+out:   barrier();              /* make sure nothing creeps before the lock is taken */
 }
 
 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
@@ -105,22 +105,22 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
        unsigned inc = 1 << TICKET_SHIFT;
-       unsigned tmp;
+       __ticket_t tmp;
 
-       asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
-                    "movzwl %w0, %2\n\t"
-                    "shrl $16, %0\n\t"
-                    "1:\t"
-                    "cmpl %0, %2\n\t"
-                    "je 2f\n\t"
-                    "rep ; nop\n\t"
-                    "movzwl %1, %2\n\t"
-                    /* don't need lfence here, because loads are in-order */
-                    "jmp 1b\n"
-                    "2:"
-                    : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
-                    :
-                    : "memory", "cc");
+       asm volatile(LOCK_PREFIX "xaddl %0, %1\n\t"
+                    : "+r" (inc), "+m" (lock->slock)
+                    : : "memory", "cc");
+
+       tmp = inc;
+       inc >>= TICKET_SHIFT;
+
+       for (;;) {
+               if ((__ticket_t)inc == tmp)
+                       goto out;
+               cpu_relax();
+               tmp = ACCESS_ONCE(lock->tickets.head);
+       }
+out:   barrier();              /* make sure nothing creeps before the lock is taken */
 }
 
 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)